1
2
3
4
5
6
7
8#include "gaudiP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE 0x100
67
68#define GAUDI_RESET_TIMEOUT_MSEC 2000
69#define GAUDI_RESET_WAIT_MSEC 1
70#define GAUDI_CPU_RESET_WAIT_MSEC 200
71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000
75#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000
80#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000
81#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000
82
83#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85#define GAUDI_MAX_STRING_LEN 20
86
87#define GAUDI_CB_POOL_CB_CNT 512
88#define GAUDI_CB_POOL_CB_SIZE 0x20000
89
90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
98#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
99
100#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105#define HBM_SCRUBBING_TIMEOUT_US 1000000
106
107#define GAUDI_PLL_MAX 10
108
109static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
110 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
111 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
112 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
113 "gaudi cpu eq"
114};
115
116static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
117 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
118 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
119 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
120 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
121 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
122 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
123 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
124 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
125};
126
127static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
128 [0] = GAUDI_QUEUE_ID_DMA_0_0,
129 [1] = GAUDI_QUEUE_ID_DMA_0_1,
130 [2] = GAUDI_QUEUE_ID_DMA_0_2,
131 [3] = GAUDI_QUEUE_ID_DMA_0_3,
132 [4] = GAUDI_QUEUE_ID_DMA_1_0,
133 [5] = GAUDI_QUEUE_ID_DMA_1_1,
134 [6] = GAUDI_QUEUE_ID_DMA_1_2,
135 [7] = GAUDI_QUEUE_ID_DMA_1_3,
136};
137
138static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
139 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
140 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
141 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
142 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
143 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
144 [PACKET_REPEAT] = sizeof(struct packet_repeat),
145 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
146 [PACKET_FENCE] = sizeof(struct packet_fence),
147 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
148 [PACKET_NOP] = sizeof(struct packet_nop),
149 [PACKET_STOP] = sizeof(struct packet_stop),
150 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
151 [PACKET_WAIT] = sizeof(struct packet_wait),
152 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
153};
154
155static inline bool validate_packet_id(enum packet_id id)
156{
157 switch (id) {
158 case PACKET_WREG_32:
159 case PACKET_WREG_BULK:
160 case PACKET_MSG_LONG:
161 case PACKET_MSG_SHORT:
162 case PACKET_CP_DMA:
163 case PACKET_REPEAT:
164 case PACKET_MSG_PROT:
165 case PACKET_FENCE:
166 case PACKET_LIN_DMA:
167 case PACKET_NOP:
168 case PACKET_STOP:
169 case PACKET_ARB_POINT:
170 case PACKET_WAIT:
171 case PACKET_LOAD_AND_EXE:
172 return true;
173 default:
174 return false;
175 }
176}
177
178static const char * const
179gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
180 "tpc_address_exceed_slm",
181 "tpc_div_by_0",
182 "tpc_spu_mac_overflow",
183 "tpc_spu_addsub_overflow",
184 "tpc_spu_abs_overflow",
185 "tpc_spu_fp_dst_nan_inf",
186 "tpc_spu_fp_dst_denorm",
187 "tpc_vpu_mac_overflow",
188 "tpc_vpu_addsub_overflow",
189 "tpc_vpu_abs_overflow",
190 "tpc_vpu_fp_dst_nan_inf",
191 "tpc_vpu_fp_dst_denorm",
192 "tpc_assertions",
193 "tpc_illegal_instruction",
194 "tpc_pc_wrap_around",
195 "tpc_qm_sw_err",
196 "tpc_hbw_rresp_err",
197 "tpc_hbw_bresp_err",
198 "tpc_lbw_rresp_err",
199 "tpc_lbw_bresp_err"
200};
201
202static const char * const
203gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
204 "PQ AXI HBW error",
205 "CQ AXI HBW error",
206 "CP AXI HBW error",
207 "CP error due to undefined OPCODE",
208 "CP encountered STOP OPCODE",
209 "CP AXI LBW error",
210 "CP WRREG32 or WRBULK returned error",
211 "N/A",
212 "FENCE 0 inc over max value and clipped",
213 "FENCE 1 inc over max value and clipped",
214 "FENCE 2 inc over max value and clipped",
215 "FENCE 3 inc over max value and clipped",
216 "FENCE 0 dec under min value and clipped",
217 "FENCE 1 dec under min value and clipped",
218 "FENCE 2 dec under min value and clipped",
219 "FENCE 3 dec under min value and clipped"
220};
221
222static const char * const
223gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
224 "Choice push while full error",
225 "Choice Q watchdog error",
226 "MSG AXI LBW returned with error"
227};
228
229enum gaudi_sm_sei_cause {
230 GAUDI_SM_SEI_SO_OVERFLOW,
231 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
232 GAUDI_SM_SEI_AXI_RESPONSE_ERR
233};
234
235static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
236 QUEUE_TYPE_EXT,
237 QUEUE_TYPE_EXT,
238 QUEUE_TYPE_EXT,
239 QUEUE_TYPE_EXT,
240 QUEUE_TYPE_EXT,
241 QUEUE_TYPE_EXT,
242 QUEUE_TYPE_EXT,
243 QUEUE_TYPE_EXT,
244 QUEUE_TYPE_CPU,
245 QUEUE_TYPE_INT,
246 QUEUE_TYPE_INT,
247 QUEUE_TYPE_INT,
248 QUEUE_TYPE_INT,
249 QUEUE_TYPE_INT,
250 QUEUE_TYPE_INT,
251 QUEUE_TYPE_INT,
252 QUEUE_TYPE_INT,
253 QUEUE_TYPE_INT,
254 QUEUE_TYPE_INT,
255 QUEUE_TYPE_INT,
256 QUEUE_TYPE_INT,
257 QUEUE_TYPE_INT,
258 QUEUE_TYPE_INT,
259 QUEUE_TYPE_INT,
260 QUEUE_TYPE_INT,
261 QUEUE_TYPE_INT,
262 QUEUE_TYPE_INT,
263 QUEUE_TYPE_INT,
264 QUEUE_TYPE_INT,
265 QUEUE_TYPE_INT,
266 QUEUE_TYPE_INT,
267 QUEUE_TYPE_INT,
268 QUEUE_TYPE_INT,
269 QUEUE_TYPE_INT,
270 QUEUE_TYPE_INT,
271 QUEUE_TYPE_INT,
272 QUEUE_TYPE_INT,
273 QUEUE_TYPE_INT,
274 QUEUE_TYPE_INT,
275 QUEUE_TYPE_INT,
276 QUEUE_TYPE_INT,
277 QUEUE_TYPE_INT,
278 QUEUE_TYPE_INT,
279 QUEUE_TYPE_INT,
280 QUEUE_TYPE_INT,
281 QUEUE_TYPE_INT,
282 QUEUE_TYPE_INT,
283 QUEUE_TYPE_INT,
284 QUEUE_TYPE_INT,
285 QUEUE_TYPE_INT,
286 QUEUE_TYPE_INT,
287 QUEUE_TYPE_INT,
288 QUEUE_TYPE_INT,
289 QUEUE_TYPE_INT,
290 QUEUE_TYPE_INT,
291 QUEUE_TYPE_INT,
292 QUEUE_TYPE_INT,
293 QUEUE_TYPE_INT,
294 QUEUE_TYPE_INT,
295 QUEUE_TYPE_INT,
296 QUEUE_TYPE_INT,
297 QUEUE_TYPE_INT,
298 QUEUE_TYPE_INT,
299 QUEUE_TYPE_INT,
300 QUEUE_TYPE_INT,
301 QUEUE_TYPE_INT,
302 QUEUE_TYPE_INT,
303 QUEUE_TYPE_INT,
304 QUEUE_TYPE_INT,
305 QUEUE_TYPE_INT,
306 QUEUE_TYPE_INT,
307 QUEUE_TYPE_INT,
308 QUEUE_TYPE_INT,
309 QUEUE_TYPE_INT,
310 QUEUE_TYPE_INT,
311 QUEUE_TYPE_INT,
312 QUEUE_TYPE_INT,
313 QUEUE_TYPE_INT,
314 QUEUE_TYPE_INT,
315 QUEUE_TYPE_INT,
316 QUEUE_TYPE_INT,
317 QUEUE_TYPE_INT,
318 QUEUE_TYPE_INT,
319 QUEUE_TYPE_INT,
320 QUEUE_TYPE_INT,
321 QUEUE_TYPE_INT,
322 QUEUE_TYPE_INT,
323 QUEUE_TYPE_INT,
324 QUEUE_TYPE_INT,
325 QUEUE_TYPE_INT,
326 QUEUE_TYPE_INT,
327 QUEUE_TYPE_INT,
328 QUEUE_TYPE_INT,
329 QUEUE_TYPE_INT,
330 QUEUE_TYPE_INT,
331 QUEUE_TYPE_INT,
332 QUEUE_TYPE_INT,
333 QUEUE_TYPE_INT,
334 QUEUE_TYPE_INT,
335 QUEUE_TYPE_INT,
336 QUEUE_TYPE_INT,
337 QUEUE_TYPE_INT,
338 QUEUE_TYPE_INT,
339 QUEUE_TYPE_INT,
340 QUEUE_TYPE_INT,
341 QUEUE_TYPE_INT,
342 QUEUE_TYPE_INT,
343 QUEUE_TYPE_INT,
344 QUEUE_TYPE_INT,
345 QUEUE_TYPE_INT,
346 QUEUE_TYPE_INT,
347 QUEUE_TYPE_INT,
348 QUEUE_TYPE_INT,
349};
350
351struct ecc_info_extract_params {
352 u64 block_address;
353 u32 num_memories;
354 bool derr;
355 bool disable_clock_gating;
356};
357
358static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
359 u64 phys_addr);
360static int gaudi_send_job_on_qman0(struct hl_device *hdev,
361 struct hl_cs_job *job);
362static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
363 u32 size, u64 val);
364static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
365 u32 num_regs, u32 val);
366static int gaudi_schedule_register_memset(struct hl_device *hdev,
367 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
368static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
369 u32 tpc_id);
370static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
371static int gaudi_cpucp_info_get(struct hl_device *hdev);
372static void gaudi_disable_clock_gating(struct hl_device *hdev);
373static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
374static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
375 u32 size, bool eb);
376static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
377 struct hl_gen_wait_properties *prop);
378
379static inline enum hl_collective_mode
380get_collective_mode(struct hl_device *hdev, u32 queue_id)
381{
382 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
383 return HL_COLLECTIVE_MASTER;
384
385 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
386 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
387 return HL_COLLECTIVE_SLAVE;
388
389 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
390 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
391 return HL_COLLECTIVE_SLAVE;
392
393 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
394 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
395 return HL_COLLECTIVE_SLAVE;
396
397 return HL_COLLECTIVE_NOT_SUPPORTED;
398}
399
400static inline void set_default_power_values(struct hl_device *hdev)
401{
402 struct asic_fixed_properties *prop = &hdev->asic_prop;
403
404 if (hdev->card_type == cpucp_card_type_pmc) {
405 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
406 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
407 } else {
408 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
409 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
410 }
411}
412
413static int gaudi_set_fixed_properties(struct hl_device *hdev)
414{
415 struct asic_fixed_properties *prop = &hdev->asic_prop;
416 u32 num_sync_stream_queues = 0;
417 int i;
418
419 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
420 prop->hw_queues_props = kcalloc(prop->max_queues,
421 sizeof(struct hw_queue_properties),
422 GFP_KERNEL);
423
424 if (!prop->hw_queues_props)
425 return -ENOMEM;
426
427 for (i = 0 ; i < prop->max_queues ; i++) {
428 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
429 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
430 prop->hw_queues_props[i].driver_only = 0;
431 prop->hw_queues_props[i].supports_sync_stream = 1;
432 prop->hw_queues_props[i].cb_alloc_flags =
433 CB_ALLOC_KERNEL;
434 num_sync_stream_queues++;
435 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
436 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
437 prop->hw_queues_props[i].driver_only = 1;
438 prop->hw_queues_props[i].supports_sync_stream = 0;
439 prop->hw_queues_props[i].cb_alloc_flags =
440 CB_ALLOC_KERNEL;
441 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
442 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
443 prop->hw_queues_props[i].driver_only = 0;
444 prop->hw_queues_props[i].supports_sync_stream = 0;
445 prop->hw_queues_props[i].cb_alloc_flags =
446 CB_ALLOC_USER;
447
448 }
449 prop->hw_queues_props[i].collective_mode =
450 get_collective_mode(hdev, i);
451 }
452
453 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
454 prop->collective_first_sob = 0;
455 prop->collective_first_mon = 0;
456
457
458 prop->sync_stream_first_sob =
459 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
460 * QMAN_STREAMS * HL_RSVD_SOBS;
461
462
463
464
465 prop->sync_stream_first_mon =
466 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
467 (NUMBER_OF_EXT_HW_QUEUES * 2);
468
469 prop->dram_base_address = DRAM_PHYS_BASE;
470 prop->dram_size = GAUDI_HBM_SIZE_32GB;
471 prop->dram_end_address = prop->dram_base_address +
472 prop->dram_size;
473 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
474
475 prop->sram_base_address = SRAM_BASE_ADDR;
476 prop->sram_size = SRAM_SIZE;
477 prop->sram_end_address = prop->sram_base_address +
478 prop->sram_size;
479 prop->sram_user_base_address = prop->sram_base_address +
480 SRAM_USER_BASE_OFFSET;
481
482 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
483 if (hdev->pldm)
484 prop->mmu_pgt_size = 0x800000;
485 else
486 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
487 prop->mmu_pte_size = HL_PTE_SIZE;
488 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
489 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
490 prop->dram_page_size = PAGE_SIZE_2MB;
491 prop->dram_supports_virtual_memory = false;
492
493 prop->pmmu.hop0_shift = HOP0_SHIFT;
494 prop->pmmu.hop1_shift = HOP1_SHIFT;
495 prop->pmmu.hop2_shift = HOP2_SHIFT;
496 prop->pmmu.hop3_shift = HOP3_SHIFT;
497 prop->pmmu.hop4_shift = HOP4_SHIFT;
498 prop->pmmu.hop0_mask = HOP0_MASK;
499 prop->pmmu.hop1_mask = HOP1_MASK;
500 prop->pmmu.hop2_mask = HOP2_MASK;
501 prop->pmmu.hop3_mask = HOP3_MASK;
502 prop->pmmu.hop4_mask = HOP4_MASK;
503 prop->pmmu.start_addr = VA_HOST_SPACE_START;
504 prop->pmmu.end_addr =
505 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
506 prop->pmmu.page_size = PAGE_SIZE_4KB;
507 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
508
509
510 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
511 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
512
513
514 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
515 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
516 prop->dmmu.end_addr = VA_HOST_SPACE_END;
517 prop->dmmu.page_size = PAGE_SIZE_2MB;
518
519 prop->cfg_size = CFG_SIZE;
520 prop->max_asid = MAX_ASID;
521 prop->num_of_events = GAUDI_EVENT_SIZE;
522 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
523
524 set_default_power_values(hdev);
525
526 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
527 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
528
529 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
530 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
531
532 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
533 CARD_NAME_MAX_LEN);
534
535 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
536
537 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
538 prop->sync_stream_first_sob +
539 (num_sync_stream_queues * HL_RSVD_SOBS);
540 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
541 prop->sync_stream_first_mon +
542 (num_sync_stream_queues * HL_RSVD_MONS);
543
544 prop->first_available_user_msix_interrupt = USHRT_MAX;
545
546 for (i = 0 ; i < HL_MAX_DCORES ; i++)
547 prop->first_available_cq[i] = USHRT_MAX;
548
549 prop->fw_cpu_boot_dev_sts0_valid = false;
550 prop->fw_cpu_boot_dev_sts1_valid = false;
551 prop->hard_reset_done_by_fw = false;
552 prop->gic_interrupts_enable = true;
553
554 return 0;
555}
556
557static int gaudi_pci_bars_map(struct hl_device *hdev)
558{
559 static const char * const name[] = {"SRAM", "CFG", "HBM"};
560 bool is_wc[3] = {false, false, true};
561 int rc;
562
563 rc = hl_pci_bars_map(hdev, name, is_wc);
564 if (rc)
565 return rc;
566
567 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
568 (CFG_BASE - SPI_FLASH_BASE_ADDR);
569
570 return 0;
571}
572
573static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
574{
575 struct gaudi_device *gaudi = hdev->asic_specific;
576 struct hl_inbound_pci_region pci_region;
577 u64 old_addr = addr;
578 int rc;
579
580 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
581 return old_addr;
582
583 if (hdev->asic_prop.iatu_done_by_fw)
584 return U64_MAX;
585
586
587 pci_region.mode = PCI_BAR_MATCH_MODE;
588 pci_region.bar = HBM_BAR_ID;
589 pci_region.addr = addr;
590 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
591 if (rc)
592 return U64_MAX;
593
594 if (gaudi) {
595 old_addr = gaudi->hbm_bar_cur_addr;
596 gaudi->hbm_bar_cur_addr = addr;
597 }
598
599 return old_addr;
600}
601
602static int gaudi_init_iatu(struct hl_device *hdev)
603{
604 struct hl_inbound_pci_region inbound_region;
605 struct hl_outbound_pci_region outbound_region;
606 int rc;
607
608 if (hdev->asic_prop.iatu_done_by_fw)
609 return 0;
610
611
612 inbound_region.mode = PCI_BAR_MATCH_MODE;
613 inbound_region.bar = SRAM_BAR_ID;
614 inbound_region.addr = SRAM_BASE_ADDR;
615 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
616 if (rc)
617 goto done;
618
619
620 inbound_region.mode = PCI_BAR_MATCH_MODE;
621 inbound_region.bar = CFG_BAR_ID;
622 inbound_region.addr = SPI_FLASH_BASE_ADDR;
623 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
624 if (rc)
625 goto done;
626
627
628 inbound_region.mode = PCI_BAR_MATCH_MODE;
629 inbound_region.bar = HBM_BAR_ID;
630 inbound_region.addr = DRAM_PHYS_BASE;
631 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
632 if (rc)
633 goto done;
634
635 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
636
637
638 outbound_region.addr = HOST_PHYS_BASE;
639 outbound_region.size = HOST_PHYS_SIZE;
640 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
641
642done:
643 return rc;
644}
645
646static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
647{
648 return RREG32(mmHW_STATE);
649}
650
651static int gaudi_early_init(struct hl_device *hdev)
652{
653 struct asic_fixed_properties *prop = &hdev->asic_prop;
654 struct pci_dev *pdev = hdev->pdev;
655 u32 fw_boot_status;
656 int rc;
657
658 rc = gaudi_set_fixed_properties(hdev);
659 if (rc) {
660 dev_err(hdev->dev, "Failed setting fixed properties\n");
661 return rc;
662 }
663
664
665 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
666 dev_err(hdev->dev,
667 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
668 SRAM_BAR_ID,
669 (unsigned long long) pci_resource_len(pdev,
670 SRAM_BAR_ID),
671 SRAM_BAR_SIZE);
672 rc = -ENODEV;
673 goto free_queue_props;
674 }
675
676 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
677 dev_err(hdev->dev,
678 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
679 CFG_BAR_ID,
680 (unsigned long long) pci_resource_len(pdev,
681 CFG_BAR_ID),
682 CFG_BAR_SIZE);
683 rc = -ENODEV;
684 goto free_queue_props;
685 }
686
687 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
688
689
690 if (hdev->asic_prop.fw_security_enabled) {
691 hdev->asic_prop.iatu_done_by_fw = true;
692
693
694
695
696
697 hdev->asic_prop.gic_interrupts_enable = false;
698 goto pci_init;
699 }
700
701 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
702 &fw_boot_status);
703 if (rc)
704 goto free_queue_props;
705
706
707 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
708 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
709 hdev->asic_prop.iatu_done_by_fw = true;
710
711pci_init:
712 rc = hl_pci_init(hdev);
713 if (rc)
714 goto free_queue_props;
715
716
717
718
719 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
720 mmCPU_BOOT_DEV_STS0,
721 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
722 mmCPU_BOOT_ERR1,
723 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
724 if (rc) {
725 if (hdev->reset_on_preboot_fail)
726 hdev->asic_funcs->hw_fini(hdev, true);
727 goto pci_fini;
728 }
729
730 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
731 dev_info(hdev->dev,
732 "H/W state is dirty, must reset before initializing\n");
733 hdev->asic_funcs->hw_fini(hdev, true);
734 }
735
736 return 0;
737
738pci_fini:
739 hl_pci_fini(hdev);
740free_queue_props:
741 kfree(hdev->asic_prop.hw_queues_props);
742 return rc;
743}
744
745static int gaudi_early_fini(struct hl_device *hdev)
746{
747 kfree(hdev->asic_prop.hw_queues_props);
748 hl_pci_fini(hdev);
749
750 return 0;
751}
752
753
754
755
756
757
758
759static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
760{
761 struct asic_fixed_properties *prop = &hdev->asic_prop;
762 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
763 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
764 int rc;
765
766 if (hdev->asic_prop.fw_security_enabled) {
767 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
768
769 if (rc)
770 return rc;
771
772 freq = pll_freq_arr[2];
773 } else {
774
775 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
776 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
777 nr = RREG32(mmPSOC_CPU_PLL_NR);
778 nf = RREG32(mmPSOC_CPU_PLL_NF);
779 od = RREG32(mmPSOC_CPU_PLL_OD);
780
781 if (div_sel == DIV_SEL_REF_CLK ||
782 div_sel == DIV_SEL_DIVIDED_REF) {
783 if (div_sel == DIV_SEL_REF_CLK)
784 freq = PLL_REF_CLK;
785 else
786 freq = PLL_REF_CLK / (div_fctr + 1);
787 } else if (div_sel == DIV_SEL_PLL_CLK ||
788 div_sel == DIV_SEL_DIVIDED_PLL) {
789 pll_clk = PLL_REF_CLK * (nf + 1) /
790 ((nr + 1) * (od + 1));
791 if (div_sel == DIV_SEL_PLL_CLK)
792 freq = pll_clk;
793 else
794 freq = pll_clk / (div_fctr + 1);
795 } else {
796 dev_warn(hdev->dev,
797 "Received invalid div select value: %d",
798 div_sel);
799 freq = 0;
800 }
801 }
802
803 prop->psoc_timestamp_frequency = freq;
804 prop->psoc_pci_pll_nr = nr;
805 prop->psoc_pci_pll_nf = nf;
806 prop->psoc_pci_pll_od = od;
807 prop->psoc_pci_pll_div_factor = div_fctr;
808
809 return 0;
810}
811
812static int _gaudi_init_tpc_mem(struct hl_device *hdev,
813 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
814{
815 struct asic_fixed_properties *prop = &hdev->asic_prop;
816 struct packet_lin_dma *init_tpc_mem_pkt;
817 struct hl_cs_job *job;
818 struct hl_cb *cb;
819 u64 dst_addr;
820 u32 cb_size, ctl;
821 u8 tpc_id;
822 int rc;
823
824 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
825 if (!cb)
826 return -EFAULT;
827
828 init_tpc_mem_pkt = cb->kernel_address;
829 cb_size = sizeof(*init_tpc_mem_pkt);
830 memset(init_tpc_mem_pkt, 0, cb_size);
831
832 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
833
834 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
835 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
836 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
837 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
838
839 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
840
841 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
842 dst_addr = (prop->sram_user_base_address &
843 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
844 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
845 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
846
847 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
848 if (!job) {
849 dev_err(hdev->dev, "Failed to allocate a new job\n");
850 rc = -ENOMEM;
851 goto release_cb;
852 }
853
854 job->id = 0;
855 job->user_cb = cb;
856 atomic_inc(&job->user_cb->cs_cnt);
857 job->user_cb_size = cb_size;
858 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
859 job->patched_cb = job->user_cb;
860 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
861
862 hl_debugfs_add_job(hdev, job);
863
864 rc = gaudi_send_job_on_qman0(hdev, job);
865
866 if (rc)
867 goto free_job;
868
869 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
870 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
871 if (rc)
872 break;
873 }
874
875free_job:
876 hl_userptr_delete_list(hdev, &job->userptr_list);
877 hl_debugfs_remove_job(hdev, job);
878 kfree(job);
879 atomic_dec(&cb->cs_cnt);
880
881release_cb:
882 hl_cb_put(cb);
883 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
884
885 return rc;
886}
887
888
889
890
891
892
893
894
895
896static int gaudi_init_tpc_mem(struct hl_device *hdev)
897{
898 const struct firmware *fw;
899 size_t fw_size;
900 void *cpu_addr;
901 dma_addr_t dma_handle;
902 int rc, count = 5;
903
904again:
905 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
906 if (rc == -EINTR && count-- > 0) {
907 msleep(50);
908 goto again;
909 }
910
911 if (rc) {
912 dev_err(hdev->dev, "Failed to load firmware file %s\n",
913 GAUDI_TPC_FW_FILE);
914 goto out;
915 }
916
917 fw_size = fw->size;
918 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
919 &dma_handle, GFP_KERNEL | __GFP_ZERO);
920 if (!cpu_addr) {
921 dev_err(hdev->dev,
922 "Failed to allocate %zu of dma memory for TPC kernel\n",
923 fw_size);
924 rc = -ENOMEM;
925 goto out;
926 }
927
928 memcpy(cpu_addr, fw->data, fw_size);
929
930 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
931
932 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
933 dma_handle);
934
935out:
936 release_firmware(fw);
937 return rc;
938}
939
940static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
941{
942 struct gaudi_device *gaudi = hdev->asic_specific;
943 struct gaudi_collective_properties *prop = &gaudi->collective_props;
944 struct hl_hw_queue *q;
945 u32 i, sob_id, sob_group_id, queue_id;
946
947
948 sob_group_id =
949 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
950 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
951
952 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
953 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
954 q = &hdev->kernel_queues[queue_id + (4 * i)];
955 q->sync_stream_prop.collective_sob_id = sob_id + i;
956 }
957
958
959
960
961 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
962 q = &hdev->kernel_queues[queue_id];
963 q->sync_stream_prop.collective_sob_id =
964 sob_id + NIC_NUMBER_OF_ENGINES;
965
966 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
967 q = &hdev->kernel_queues[queue_id];
968 q->sync_stream_prop.collective_sob_id =
969 sob_id + NIC_NUMBER_OF_ENGINES;
970}
971
972static void gaudi_sob_group_hw_reset(struct kref *ref)
973{
974 struct gaudi_hw_sob_group *hw_sob_group =
975 container_of(ref, struct gaudi_hw_sob_group, kref);
976 struct hl_device *hdev = hw_sob_group->hdev;
977 u64 base_addr;
978 int rc;
979
980 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
981 hw_sob_group->base_sob_id * 4;
982 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
983 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
984 if (rc)
985 dev_err(hdev->dev,
986 "failed resetting sob group - sob base %u, count %u",
987 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
988
989 kref_init(&hw_sob_group->kref);
990}
991
992static void gaudi_sob_group_reset_error(struct kref *ref)
993{
994 struct gaudi_hw_sob_group *hw_sob_group =
995 container_of(ref, struct gaudi_hw_sob_group, kref);
996 struct hl_device *hdev = hw_sob_group->hdev;
997
998 dev_crit(hdev->dev,
999 "SOB release shouldn't be called here, base_sob_id: %d\n",
1000 hw_sob_group->base_sob_id);
1001}
1002
1003static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1004{
1005 struct gaudi_collective_properties *prop;
1006 int i;
1007
1008 prop = &gaudi->collective_props;
1009
1010 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1011
1012 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1013 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1014 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1015 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1016
1017 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1018 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1019}
1020
1021static int gaudi_collective_init(struct hl_device *hdev)
1022{
1023 u32 i, sob_id, reserved_sobs_per_group;
1024 struct gaudi_collective_properties *prop;
1025 struct gaudi_device *gaudi;
1026
1027 gaudi = hdev->asic_specific;
1028 prop = &gaudi->collective_props;
1029 sob_id = hdev->asic_prop.collective_first_sob;
1030
1031
1032 reserved_sobs_per_group =
1033 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1034
1035
1036 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1037 prop->hw_sob_group[i].hdev = hdev;
1038 prop->hw_sob_group[i].base_sob_id = sob_id;
1039 sob_id += reserved_sobs_per_group;
1040 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1041 }
1042
1043 for (i = 0 ; i < QMAN_STREAMS; i++) {
1044 prop->next_sob_group_val[i] = 1;
1045 prop->curr_sob_group_idx[i] = 0;
1046 gaudi_collective_map_sobs(hdev, i);
1047 }
1048
1049 gaudi_collective_mstr_sob_mask_set(gaudi);
1050
1051 return 0;
1052}
1053
1054static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1055{
1056 struct gaudi_device *gaudi = hdev->asic_specific;
1057 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1058
1059 kref_put(&cprop->hw_sob_group[sob_group].kref,
1060 gaudi_sob_group_hw_reset);
1061}
1062
1063static void gaudi_collective_master_init_job(struct hl_device *hdev,
1064 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1065{
1066 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1067 struct gaudi_collective_properties *cprop;
1068 struct hl_gen_wait_properties wait_prop;
1069 struct hl_sync_stream_properties *prop;
1070 struct gaudi_device *gaudi;
1071
1072 gaudi = hdev->asic_specific;
1073 cprop = &gaudi->collective_props;
1074 queue_id = job->hw_queue_id;
1075 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1076
1077 master_sob_base =
1078 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1079 master_monitor = prop->collective_mstr_mon_id[0];
1080
1081 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1082
1083 dev_dbg(hdev->dev,
1084 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1085 master_sob_base, cprop->mstr_sob_mask[0],
1086 cprop->next_sob_group_val[stream],
1087 master_monitor, queue_id);
1088
1089 wait_prop.data = (void *) job->patched_cb;
1090 wait_prop.sob_base = master_sob_base;
1091 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1092 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1093 wait_prop.mon_id = master_monitor;
1094 wait_prop.q_idx = queue_id;
1095 wait_prop.size = cb_size;
1096 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1097
1098 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1099 master_monitor = prop->collective_mstr_mon_id[1];
1100
1101 dev_dbg(hdev->dev,
1102 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1103 master_sob_base, cprop->mstr_sob_mask[1],
1104 cprop->next_sob_group_val[stream],
1105 master_monitor, queue_id);
1106
1107 wait_prop.sob_base = master_sob_base;
1108 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1109 wait_prop.mon_id = master_monitor;
1110 wait_prop.size = cb_size;
1111 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1112}
1113
1114static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1115 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1116{
1117 struct hl_gen_wait_properties wait_prop;
1118 struct hl_sync_stream_properties *prop;
1119 u32 queue_id, cb_size = 0;
1120
1121 queue_id = job->hw_queue_id;
1122 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1123
1124
1125 wait_prop.data = (void *) job->user_cb;
1126 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1127 wait_prop.sob_mask = 0x1;
1128 wait_prop.sob_val = cs_cmpl->sob_val;
1129 wait_prop.mon_id = prop->collective_slave_mon_id;
1130 wait_prop.q_idx = queue_id;
1131 wait_prop.size = cb_size;
1132
1133 dev_dbg(hdev->dev,
1134 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1135 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1136 prop->collective_slave_mon_id, queue_id);
1137
1138 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1139
1140 dev_dbg(hdev->dev,
1141 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1142 prop->collective_sob_id, queue_id);
1143
1144 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1145 prop->collective_sob_id, cb_size, false);
1146}
1147
1148static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1149{
1150 struct hl_cs_compl *signal_cs_cmpl =
1151 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1152 struct hl_cs_compl *cs_cmpl =
1153 container_of(cs->fence, struct hl_cs_compl, base_fence);
1154 struct gaudi_collective_properties *cprop;
1155 u32 stream, queue_id, sob_group_offset;
1156 struct gaudi_device *gaudi;
1157 struct hl_device *hdev;
1158 struct hl_cs_job *job;
1159 struct hl_ctx *ctx;
1160
1161 ctx = cs->ctx;
1162 hdev = ctx->hdev;
1163 gaudi = hdev->asic_specific;
1164 cprop = &gaudi->collective_props;
1165
1166
1167 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1168 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1169
1170
1171 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1172 stream = job->hw_queue_id % 4;
1173 sob_group_offset =
1174 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1175
1176 list_for_each_entry(job, &cs->job_list, cs_node) {
1177 queue_id = job->hw_queue_id;
1178
1179 if (hdev->kernel_queues[queue_id].collective_mode ==
1180 HL_COLLECTIVE_MASTER)
1181 gaudi_collective_master_init_job(hdev, job, stream,
1182 sob_group_offset);
1183 else
1184 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1185 }
1186
1187 cs_cmpl->sob_group = sob_group_offset;
1188
1189
1190 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1191 cprop->next_sob_group_val[stream]++;
1192
1193 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1194
1195
1196
1197
1198
1199 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1200 gaudi_sob_group_reset_error);
1201 cprop->next_sob_group_val[stream] = 1;
1202
1203 cprop->curr_sob_group_idx[stream] =
1204 (cprop->curr_sob_group_idx[stream] + 1) &
1205 (HL_RSVD_SOBS - 1);
1206
1207 gaudi_collective_map_sobs(hdev, stream);
1208
1209 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1210 cprop->curr_sob_group_idx[stream], stream);
1211 }
1212
1213
1214 kref_get(&cs_cmpl->hw_sob->kref);
1215
1216
1217
1218
1219
1220 mb();
1221 hl_fence_put(cs->signal_fence);
1222 cs->signal_fence = NULL;
1223}
1224
1225static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1226 struct hl_ctx *ctx, struct hl_cs *cs,
1227 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1228{
1229 struct hw_queue_properties *hw_queue_prop;
1230 struct hl_cs_counters_atomic *cntr;
1231 struct hl_cs_job *job;
1232 struct hl_cb *cb;
1233 u32 cb_size;
1234 bool patched_cb;
1235
1236 cntr = &hdev->aggregated_cs_counters;
1237
1238 if (mode == HL_COLLECTIVE_MASTER) {
1239
1240
1241
1242
1243
1244
1245
1246 cb_size = sizeof(struct packet_msg_short) * 8 +
1247 sizeof(struct packet_fence) * 2 +
1248 sizeof(struct packet_msg_prot) * 2;
1249 patched_cb = true;
1250 } else {
1251
1252
1253
1254
1255
1256 cb_size = sizeof(struct packet_msg_short) * 5 +
1257 sizeof(struct packet_fence);
1258 patched_cb = false;
1259 }
1260
1261 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1262 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1263 if (!job) {
1264 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1265 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1266 dev_err(hdev->dev, "Failed to allocate a new job\n");
1267 return -ENOMEM;
1268 }
1269
1270
1271 cb = hl_cb_kernel_create(hdev, cb_size,
1272 hdev->mmu_enable && !patched_cb);
1273 if (!cb) {
1274 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1275 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1276 kfree(job);
1277 return -EFAULT;
1278 }
1279
1280 job->id = 0;
1281 job->cs = cs;
1282 job->user_cb = cb;
1283 atomic_inc(&job->user_cb->cs_cnt);
1284 job->user_cb_size = cb_size;
1285 job->hw_queue_id = queue_id;
1286
1287
1288
1289
1290
1291
1292
1293 if (patched_cb)
1294 job->patched_cb = job->user_cb;
1295 else
1296 job->patched_cb = NULL;
1297
1298 job->job_cb_size = job->user_cb_size;
1299 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1300
1301
1302 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1303 cs_get(cs);
1304
1305 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1306
1307 list_add_tail(&job->cs_node, &cs->job_list);
1308
1309 hl_debugfs_add_job(hdev, job);
1310
1311 return 0;
1312}
1313
1314static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1315 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1316 u32 collective_engine_id)
1317{
1318 struct gaudi_device *gaudi = hdev->asic_specific;
1319 struct hw_queue_properties *hw_queue_prop;
1320 u32 queue_id, collective_queue, num_jobs;
1321 u32 stream, nic_queue, nic_idx = 0;
1322 bool skip;
1323 int i, rc = 0;
1324
1325
1326 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1327 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1328 dev_err(hdev->dev,
1329 "Queue %d is not configured as collective master\n",
1330 wait_queue_id);
1331 return -EINVAL;
1332 }
1333
1334
1335 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1336 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1337 dev_err(hdev->dev,
1338 "Collective wait does not support engine %u\n",
1339 collective_engine_id);
1340 return -EINVAL;
1341 }
1342
1343 stream = wait_queue_id % 4;
1344
1345 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1346 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1347 else
1348 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1349
1350 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1351 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362 for (i = 0 ; i < num_jobs ; i++) {
1363 if (i == 0) {
1364 queue_id = wait_queue_id;
1365 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1366 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1367 } else {
1368 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1369 if (gaudi->hw_cap_initialized &
1370 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1371 skip = false;
1372 else
1373 skip = true;
1374
1375 queue_id = nic_queue;
1376 nic_queue += 4;
1377 nic_idx++;
1378
1379 if (skip)
1380 continue;
1381 } else {
1382 queue_id = collective_queue;
1383 }
1384
1385 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1386 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1387 }
1388
1389 if (rc)
1390 return rc;
1391 }
1392
1393 return rc;
1394}
1395
1396static int gaudi_late_init(struct hl_device *hdev)
1397{
1398 struct gaudi_device *gaudi = hdev->asic_specific;
1399 int rc;
1400
1401 rc = gaudi->cpucp_info_get(hdev);
1402 if (rc) {
1403 dev_err(hdev->dev, "Failed to get cpucp info\n");
1404 return rc;
1405 }
1406
1407 if ((hdev->card_type == cpucp_card_type_pci) &&
1408 (hdev->nic_ports_mask & 0x3)) {
1409 dev_info(hdev->dev,
1410 "PCI card detected, only 8 ports are enabled\n");
1411 hdev->nic_ports_mask &= ~0x3;
1412
1413
1414 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1415 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1416 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1417
1418 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1419 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1420 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1421
1422 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1423 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1424
1425 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1426 }
1427
1428 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1429 if (rc) {
1430 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1431 return rc;
1432 }
1433
1434 rc = gaudi_fetch_psoc_frequency(hdev);
1435 if (rc) {
1436 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1437 goto disable_pci_access;
1438 }
1439
1440 rc = gaudi_mmu_clear_pgt_range(hdev);
1441 if (rc) {
1442 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1443 goto disable_pci_access;
1444 }
1445
1446 rc = gaudi_init_tpc_mem(hdev);
1447 if (rc) {
1448 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1449 goto disable_pci_access;
1450 }
1451
1452 rc = gaudi_collective_init(hdev);
1453 if (rc) {
1454 dev_err(hdev->dev, "Failed to init collective\n");
1455 goto disable_pci_access;
1456 }
1457
1458 return 0;
1459
1460disable_pci_access:
1461 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1462
1463 return rc;
1464}
1465
1466static void gaudi_late_fini(struct hl_device *hdev)
1467{
1468 const struct hwmon_channel_info **channel_info_arr;
1469 int i = 0;
1470
1471 if (!hdev->hl_chip_info->info)
1472 return;
1473
1474 channel_info_arr = hdev->hl_chip_info->info;
1475
1476 while (channel_info_arr[i]) {
1477 kfree(channel_info_arr[i]->config);
1478 kfree(channel_info_arr[i]);
1479 i++;
1480 }
1481
1482 kfree(channel_info_arr);
1483
1484 hdev->hl_chip_info->info = NULL;
1485}
1486
1487static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1488{
1489 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1490 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1491 int i, j, rc = 0;
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1503 virt_addr_arr[i] =
1504 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1505 HL_CPU_ACCESSIBLE_MEM_SIZE,
1506 &dma_addr_arr[i],
1507 GFP_KERNEL | __GFP_ZERO);
1508 if (!virt_addr_arr[i]) {
1509 rc = -ENOMEM;
1510 goto free_dma_mem_arr;
1511 }
1512
1513 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1514 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1515 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1516 break;
1517 }
1518
1519 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1520 dev_err(hdev->dev,
1521 "MSB of CPU accessible DMA memory are not identical in all range\n");
1522 rc = -EFAULT;
1523 goto free_dma_mem_arr;
1524 }
1525
1526 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1527 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1528 hdev->cpu_pci_msb_addr =
1529 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1530
1531 if (!hdev->asic_prop.fw_security_enabled)
1532 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1533
1534free_dma_mem_arr:
1535 for (j = 0 ; j < i ; j++)
1536 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1537 HL_CPU_ACCESSIBLE_MEM_SIZE,
1538 virt_addr_arr[j],
1539 dma_addr_arr[j]);
1540
1541 return rc;
1542}
1543
1544static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1545{
1546 struct gaudi_device *gaudi = hdev->asic_specific;
1547 struct gaudi_internal_qman_info *q;
1548 u32 i;
1549
1550 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1551 q = &gaudi->internal_qmans[i];
1552 if (!q->pq_kernel_addr)
1553 continue;
1554 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1555 q->pq_kernel_addr,
1556 q->pq_dma_addr);
1557 }
1558}
1559
1560static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1561{
1562 struct gaudi_device *gaudi = hdev->asic_specific;
1563 struct gaudi_internal_qman_info *q;
1564 int rc, i;
1565
1566 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1567 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1568 continue;
1569
1570 q = &gaudi->internal_qmans[i];
1571
1572 switch (i) {
1573 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1574 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1575 break;
1576 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1577 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1578 break;
1579 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1580 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1581 break;
1582 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1583 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1584 break;
1585 default:
1586 dev_err(hdev->dev, "Bad internal queue index %d", i);
1587 rc = -EINVAL;
1588 goto free_internal_qmans_pq_mem;
1589 }
1590
1591 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1592 hdev, q->pq_size,
1593 &q->pq_dma_addr,
1594 GFP_KERNEL | __GFP_ZERO);
1595 if (!q->pq_kernel_addr) {
1596 rc = -ENOMEM;
1597 goto free_internal_qmans_pq_mem;
1598 }
1599 }
1600
1601 return 0;
1602
1603free_internal_qmans_pq_mem:
1604 gaudi_free_internal_qmans_pq_mem(hdev);
1605 return rc;
1606}
1607
1608static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1609{
1610 struct asic_fixed_properties *prop = &hdev->asic_prop;
1611 struct pci_mem_region *region;
1612
1613
1614 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1615 region->region_base = CFG_BASE;
1616 region->region_size = CFG_SIZE;
1617 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1618 region->bar_size = CFG_BAR_SIZE;
1619 region->bar_id = CFG_BAR_ID;
1620 region->used = 1;
1621
1622
1623 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1624 region->region_base = SRAM_BASE_ADDR;
1625 region->region_size = SRAM_SIZE;
1626 region->offset_in_bar = 0;
1627 region->bar_size = SRAM_BAR_SIZE;
1628 region->bar_id = SRAM_BAR_ID;
1629 region->used = 1;
1630
1631
1632 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1633 region->region_base = DRAM_PHYS_BASE;
1634 region->region_size = hdev->asic_prop.dram_size;
1635 region->offset_in_bar = 0;
1636 region->bar_size = prop->dram_pci_bar_size;
1637 region->bar_id = HBM_BAR_ID;
1638 region->used = 1;
1639
1640
1641 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1642 region->region_base = PSOC_SCRATCHPAD_ADDR;
1643 region->region_size = PSOC_SCRATCHPAD_SIZE;
1644 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1645 region->bar_size = CFG_BAR_SIZE;
1646 region->bar_id = CFG_BAR_ID;
1647 region->used = 1;
1648}
1649
1650static int gaudi_sw_init(struct hl_device *hdev)
1651{
1652 struct gaudi_device *gaudi;
1653 u32 i, event_id = 0;
1654 int rc;
1655
1656
1657 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1658 if (!gaudi)
1659 return -ENOMEM;
1660
1661 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1662 if (gaudi_irq_map_table[i].valid) {
1663 if (event_id == GAUDI_EVENT_SIZE) {
1664 dev_err(hdev->dev,
1665 "Event array exceeds the limit of %u events\n",
1666 GAUDI_EVENT_SIZE);
1667 rc = -EINVAL;
1668 goto free_gaudi_device;
1669 }
1670
1671 gaudi->events[event_id++] =
1672 gaudi_irq_map_table[i].fc_id;
1673 }
1674 }
1675
1676 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1677
1678 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1679
1680 hdev->asic_specific = gaudi;
1681
1682
1683 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1684 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1685 if (!hdev->dma_pool) {
1686 dev_err(hdev->dev, "failed to create DMA pool\n");
1687 rc = -ENOMEM;
1688 goto free_gaudi_device;
1689 }
1690
1691 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1692 if (rc)
1693 goto free_dma_pool;
1694
1695 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1696 if (!hdev->cpu_accessible_dma_pool) {
1697 dev_err(hdev->dev,
1698 "Failed to create CPU accessible DMA pool\n");
1699 rc = -ENOMEM;
1700 goto free_cpu_dma_mem;
1701 }
1702
1703 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1704 (uintptr_t) hdev->cpu_accessible_dma_mem,
1705 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1706 if (rc) {
1707 dev_err(hdev->dev,
1708 "Failed to add memory to CPU accessible DMA pool\n");
1709 rc = -EFAULT;
1710 goto free_cpu_accessible_dma_pool;
1711 }
1712
1713 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1714 if (rc)
1715 goto free_cpu_accessible_dma_pool;
1716
1717 spin_lock_init(&gaudi->hw_queues_lock);
1718 mutex_init(&gaudi->clk_gate_mutex);
1719
1720 hdev->supports_sync_stream = true;
1721 hdev->supports_coresight = true;
1722 hdev->supports_staged_submission = true;
1723
1724 gaudi_set_pci_memory_regions(hdev);
1725
1726 return 0;
1727
1728free_cpu_accessible_dma_pool:
1729 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1730free_cpu_dma_mem:
1731 if (!hdev->asic_prop.fw_security_enabled)
1732 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1733 hdev->cpu_pci_msb_addr);
1734 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1735 HL_CPU_ACCESSIBLE_MEM_SIZE,
1736 hdev->cpu_accessible_dma_mem,
1737 hdev->cpu_accessible_dma_address);
1738free_dma_pool:
1739 dma_pool_destroy(hdev->dma_pool);
1740free_gaudi_device:
1741 kfree(gaudi);
1742 return rc;
1743}
1744
1745static int gaudi_sw_fini(struct hl_device *hdev)
1746{
1747 struct gaudi_device *gaudi = hdev->asic_specific;
1748
1749 gaudi_free_internal_qmans_pq_mem(hdev);
1750
1751 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1752
1753 if (!hdev->asic_prop.fw_security_enabled)
1754 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1755 hdev->cpu_pci_msb_addr);
1756
1757 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1758 HL_CPU_ACCESSIBLE_MEM_SIZE,
1759 hdev->cpu_accessible_dma_mem,
1760 hdev->cpu_accessible_dma_address);
1761
1762 dma_pool_destroy(hdev->dma_pool);
1763
1764 mutex_destroy(&gaudi->clk_gate_mutex);
1765
1766 kfree(gaudi);
1767
1768 return 0;
1769}
1770
1771static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1772{
1773 struct hl_device *hdev = arg;
1774 int i;
1775
1776 if (hdev->disabled)
1777 return IRQ_HANDLED;
1778
1779 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1780 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1781
1782 hl_irq_handler_eq(irq, &hdev->event_queue);
1783
1784 return IRQ_HANDLED;
1785}
1786
1787
1788
1789
1790
1791static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1792 bool cpu_eq)
1793{
1794 int msi_vec;
1795
1796 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1797 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1798 GAUDI_EVENT_QUEUE_MSI_IDX);
1799
1800 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1801 (nr + NIC_NUMBER_OF_ENGINES + 1);
1802
1803 return pci_irq_vector(hdev->pdev, msi_vec);
1804}
1805
1806static int gaudi_enable_msi_single(struct hl_device *hdev)
1807{
1808 int rc, irq;
1809
1810 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1811
1812 irq = gaudi_pci_irq_vector(hdev, 0, false);
1813 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1814 "gaudi single msi", hdev);
1815 if (rc)
1816 dev_err(hdev->dev,
1817 "Failed to request single MSI IRQ\n");
1818
1819 return rc;
1820}
1821
1822static int gaudi_enable_msi_multi(struct hl_device *hdev)
1823{
1824 int cq_cnt = hdev->asic_prop.completion_queues_count;
1825 int rc, i, irq_cnt_init, irq;
1826
1827 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1828 irq = gaudi_pci_irq_vector(hdev, i, false);
1829 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1830 &hdev->completion_queue[i]);
1831 if (rc) {
1832 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1833 goto free_irqs;
1834 }
1835 }
1836
1837 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1838 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1839 &hdev->event_queue);
1840 if (rc) {
1841 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1842 goto free_irqs;
1843 }
1844
1845 return 0;
1846
1847free_irqs:
1848 for (i = 0 ; i < irq_cnt_init ; i++)
1849 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1850 &hdev->completion_queue[i]);
1851 return rc;
1852}
1853
1854static int gaudi_enable_msi(struct hl_device *hdev)
1855{
1856 struct gaudi_device *gaudi = hdev->asic_specific;
1857 int rc;
1858
1859 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1860 return 0;
1861
1862 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1863 if (rc < 0) {
1864 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1865 return rc;
1866 }
1867
1868 if (rc < NUMBER_OF_INTERRUPTS) {
1869 gaudi->multi_msi_mode = false;
1870 rc = gaudi_enable_msi_single(hdev);
1871 } else {
1872 gaudi->multi_msi_mode = true;
1873 rc = gaudi_enable_msi_multi(hdev);
1874 }
1875
1876 if (rc)
1877 goto free_pci_irq_vectors;
1878
1879 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1880
1881 return 0;
1882
1883free_pci_irq_vectors:
1884 pci_free_irq_vectors(hdev->pdev);
1885 return rc;
1886}
1887
1888static void gaudi_sync_irqs(struct hl_device *hdev)
1889{
1890 struct gaudi_device *gaudi = hdev->asic_specific;
1891 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1892
1893 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1894 return;
1895
1896
1897 if (gaudi->multi_msi_mode) {
1898 for (i = 0 ; i < cq_cnt ; i++)
1899 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1900
1901 synchronize_irq(gaudi_pci_irq_vector(hdev,
1902 GAUDI_EVENT_QUEUE_MSI_IDX,
1903 true));
1904 } else {
1905 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1906 }
1907}
1908
1909static void gaudi_disable_msi(struct hl_device *hdev)
1910{
1911 struct gaudi_device *gaudi = hdev->asic_specific;
1912 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1913
1914 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1915 return;
1916
1917 gaudi_sync_irqs(hdev);
1918
1919 if (gaudi->multi_msi_mode) {
1920 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1921 true);
1922 free_irq(irq, &hdev->event_queue);
1923
1924 for (i = 0 ; i < cq_cnt ; i++) {
1925 irq = gaudi_pci_irq_vector(hdev, i, false);
1926 free_irq(irq, &hdev->completion_queue[i]);
1927 }
1928 } else {
1929 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1930 }
1931
1932 pci_free_irq_vectors(hdev->pdev);
1933
1934 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1935}
1936
1937static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1938{
1939 struct gaudi_device *gaudi = hdev->asic_specific;
1940
1941 if (hdev->asic_prop.fw_security_enabled)
1942 return;
1943
1944 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
1945 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
1946 return;
1947
1948 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1949 return;
1950
1951 if (!hdev->sram_scrambler_enable)
1952 return;
1953
1954 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1955 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1956 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1957 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1959 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1961 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1963 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1964 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1965 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1966 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1967 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1968 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1969 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1970
1971 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1972 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1973 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1974 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1975 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1976 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1977 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1978 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1979 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1980 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1981 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1982 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1983 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1984 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1985 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1986 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1987
1988 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1989 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1990 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1991 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1992 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1993 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1994 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1995 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1996 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1997 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1998 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1999 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2000 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2001 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2002 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2003 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2004
2005 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2006}
2007
2008static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2009{
2010 struct gaudi_device *gaudi = hdev->asic_specific;
2011
2012 if (hdev->asic_prop.fw_security_enabled)
2013 return;
2014
2015 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2016 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2017 return;
2018
2019 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2020 return;
2021
2022 if (!hdev->dram_scrambler_enable)
2023 return;
2024
2025 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2026 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2027 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2028 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2029 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2030 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2031 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2032 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2033 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2034 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2035 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2036 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2037 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2038 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2039 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2040 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2041
2042 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2043 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2044 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2045 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2046 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2047 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2048 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2049 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2050 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2051 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2052 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2053 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2054 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2055 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2056 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2057 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2058
2059 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2060 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2061 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2062 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2063 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2064 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2065 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2066 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2067 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2068 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2069 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2070 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2071 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2072 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2073 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2074 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2075
2076 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2077}
2078
2079static void gaudi_init_e2e(struct hl_device *hdev)
2080{
2081 if (hdev->asic_prop.fw_security_enabled)
2082 return;
2083
2084 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2085 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2086 return;
2087
2088 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2089 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2090 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2091 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2092
2093 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2094 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2095 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2096 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2097
2098 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2099 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2100 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2101 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2102
2103 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2104 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2105 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2106 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2107
2108 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2109 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2110 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2111 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2112
2113 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2114 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2115 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2116 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2117
2118 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2119 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2120 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2121 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2122
2123 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2124 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2125 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2126 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2127
2128 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2129 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2130 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2131 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2132
2133 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2134 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2135 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2136 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2137
2138 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2139 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2140 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2141 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2142
2143 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2144 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2145 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2146 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2147
2148 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2149 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2150 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2151 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2152
2153 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2154 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2155 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2156 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2157
2158 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2159 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2160 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2161 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2162
2163 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2164 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2165 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2166 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2167
2168 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2169 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2170 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2171 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2172
2173 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2174 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2175 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2176 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2177
2178 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2179 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2180 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2181 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2182
2183 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2184 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2185 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2186 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2187
2188 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2189 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2190 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2191 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2192
2193 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2194 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2195 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2196 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2197
2198 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2199 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2200 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2201 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2202
2203 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2204 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2205 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2206 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2207
2208 if (!hdev->dram_scrambler_enable) {
2209 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2210 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2211 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2212 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2213
2214 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2215 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2216 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2217 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2218
2219 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2220 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2221 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2222 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2223
2224 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2225 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2226 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2227 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2228
2229 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2230 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2231 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2232 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2233
2234 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2235 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2236 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2237 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2238
2239 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2240 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2241 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2242 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2243
2244 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2245 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2246 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2247 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2248
2249 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2250 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2251 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2252 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2253
2254 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2255 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2256 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2257 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2258
2259 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2260 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2261 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2262 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2263
2264 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2265 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2266 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2267 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2268
2269 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2270 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2271 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2272 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2273
2274 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2275 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2276 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2277 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2278
2279 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2280 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2281 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2282 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2283
2284 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2285 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2286 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2287 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2288
2289 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2290 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2291 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2292 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2293
2294 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2297 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2298
2299 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2300 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2301 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2302 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2303
2304 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2307 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2308
2309 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2310 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2311 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2312 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2313
2314 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2317 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2318
2319 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2320 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2321 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2322 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2323
2324 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2327 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2328 }
2329
2330 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2331 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2332 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2333 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2334
2335 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2336 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2338 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339
2340 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2341 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2343 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344
2345 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2346 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2348 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349
2350 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2351 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2353 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354
2355 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2356 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2358 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359
2360 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2361 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2363 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364
2365 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2366 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2368 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369
2370 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2371 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2373 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374
2375 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2376 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2378 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379
2380 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2381 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2383 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384
2385 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2386 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2388 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389
2390 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2391 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2393 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2396 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2398 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2401 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2403 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2406 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2408 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2411 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2412 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2413 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2414
2415 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2416 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2418 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419
2420 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2421 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2423 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424
2425 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2426 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2428 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429
2430 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2431 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2433 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434
2435 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2436 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2438 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439
2440 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2441 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2443 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444
2445 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2446 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2448 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449}
2450
2451static void gaudi_init_hbm_cred(struct hl_device *hdev)
2452{
2453 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2454
2455 if (hdev->asic_prop.fw_security_enabled)
2456 return;
2457
2458 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2459 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2460 return;
2461
2462 hbm0_wr = 0x33333333;
2463 hbm0_rd = 0x77777777;
2464 hbm1_wr = 0x55555555;
2465 hbm1_rd = 0xDDDDDDDD;
2466
2467 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2468 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2469 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2470 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2471
2472 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2473 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2474 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2475 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2476
2477 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2478 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2479 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2480 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2481
2482 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2483 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2484 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2485 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2486
2487 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2488 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2489 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2490 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2491 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499
2500 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2501 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2503 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512}
2513
2514static void gaudi_init_golden_registers(struct hl_device *hdev)
2515{
2516 u32 tpc_offset;
2517 int tpc_id, i;
2518
2519 gaudi_init_e2e(hdev);
2520 gaudi_init_hbm_cred(hdev);
2521
2522 for (tpc_id = 0, tpc_offset = 0;
2523 tpc_id < TPC_NUMBER_OF_ENGINES;
2524 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2525
2526 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2527
2528 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2529 ICACHE_FETCH_LINE_NUM, 2);
2530 }
2531
2532
2533 for (i = 0 ; i < 128 ; i += 8)
2534 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2535
2536 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2537 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2538 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540}
2541
2542static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2543 int qman_id, dma_addr_t qman_pq_addr)
2544{
2545 struct cpu_dyn_regs *dyn_regs =
2546 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2547 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2548 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2549 u32 q_off, dma_qm_offset;
2550 u32 dma_qm_err_cfg, irq_handler_offset;
2551
2552 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2553
2554 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2555 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2556 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2557 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558 so_base_en_lo = lower_32_bits(CFG_BASE +
2559 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2560 so_base_en_hi = upper_32_bits(CFG_BASE +
2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2563 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2564 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2565 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 so_base_ws_lo = lower_32_bits(CFG_BASE +
2567 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2568 so_base_ws_hi = upper_32_bits(CFG_BASE +
2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570
2571 q_off = dma_qm_offset + qman_id * 4;
2572
2573 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2574 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2575
2576 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2577 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2578 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2579
2580 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2581 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2582 QMAN_LDMA_SRC_OFFSET);
2583 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2584 QMAN_LDMA_DST_OFFSET);
2585
2586 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2587 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2588 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2589 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2594
2595 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2596
2597
2598 if (qman_id == 0) {
2599 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2600 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2601 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2602
2603
2604 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2605 if (hdev->stop_on_err)
2606 dma_qm_err_cfg |=
2607 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2608
2609 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2610
2611 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2612 lower_32_bits(CFG_BASE + irq_handler_offset));
2613 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2614 upper_32_bits(CFG_BASE + irq_handler_offset));
2615
2616 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2617 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2618 dma_id);
2619
2620 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2621 QM_ARB_ERR_MSG_EN_MASK);
2622
2623
2624 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2625 GAUDI_ARB_WDT_TIMEOUT);
2626
2627 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2628 QMAN_EXTERNAL_MAKE_TRUSTED);
2629
2630 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2631 }
2632}
2633
2634static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2635{
2636 struct cpu_dyn_regs *dyn_regs =
2637 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2638 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2639 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2640 u32 irq_handler_offset;
2641
2642
2643 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2644 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2645
2646
2647 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2648
2649
2650 if (hdev->stop_on_err)
2651 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2652
2653 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2654
2655 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2656 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2657 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2658
2659 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2660 lower_32_bits(CFG_BASE + irq_handler_offset));
2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2662 upper_32_bits(CFG_BASE + irq_handler_offset));
2663
2664 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2665 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2666 WREG32(mmDMA0_CORE_PROT + dma_offset,
2667 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2668
2669 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2670 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2671 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2672}
2673
2674static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2675 u32 enable_mask)
2676{
2677 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2678
2679 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2680}
2681
2682static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2683{
2684 struct gaudi_device *gaudi = hdev->asic_specific;
2685 struct hl_hw_queue *q;
2686 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2687
2688 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2689 return;
2690
2691 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2692 dma_id = gaudi_dma_assignment[i];
2693
2694
2695
2696
2697
2698 if (dma_id > 1) {
2699 cpu_skip = 1;
2700 nic_skip = NIC_NUMBER_OF_ENGINES;
2701 } else {
2702 cpu_skip = 0;
2703 nic_skip = 0;
2704 }
2705
2706 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2707 q_idx = 4 * dma_id + j + cpu_skip;
2708 q = &hdev->kernel_queues[q_idx];
2709 q->cq_id = cq_id++;
2710 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2711 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2712 q->bus_address);
2713 }
2714
2715 gaudi_init_dma_core(hdev, dma_id);
2716
2717 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2718 }
2719
2720 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2721}
2722
2723static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2724 int qman_id, u64 qman_base_addr)
2725{
2726 struct cpu_dyn_regs *dyn_regs =
2727 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2728 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2729 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2730 u32 dma_qm_err_cfg, irq_handler_offset;
2731 u32 q_off, dma_qm_offset;
2732
2733 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2734
2735 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2736 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2737 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739 so_base_en_lo = lower_32_bits(CFG_BASE +
2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2741 so_base_en_hi = upper_32_bits(CFG_BASE +
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2744 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2745 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 so_base_ws_lo = lower_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2749 so_base_ws_hi = upper_32_bits(CFG_BASE +
2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
2752 q_off = dma_qm_offset + qman_id * 4;
2753
2754 if (qman_id < 4) {
2755 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2756 lower_32_bits(qman_base_addr));
2757 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2758 upper_32_bits(qman_base_addr));
2759
2760 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2761 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2762 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2763
2764 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2765 QMAN_CPDMA_SIZE_OFFSET);
2766 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2767 QMAN_CPDMA_SRC_OFFSET);
2768 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2769 QMAN_CPDMA_DST_OFFSET);
2770 } else {
2771 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2772 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2773 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2774
2775 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2776 QMAN_LDMA_SIZE_OFFSET);
2777 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2778 QMAN_LDMA_SRC_OFFSET);
2779 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2780 QMAN_LDMA_DST_OFFSET);
2781
2782
2783 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2784 if (hdev->stop_on_err)
2785 dma_qm_err_cfg |=
2786 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2787
2788 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2789
2790 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2791 lower_32_bits(CFG_BASE + irq_handler_offset));
2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2793 upper_32_bits(CFG_BASE + irq_handler_offset));
2794
2795 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2796 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2797 dma_id);
2798
2799 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2800 QM_ARB_ERR_MSG_EN_MASK);
2801
2802
2803 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2804 GAUDI_ARB_WDT_TIMEOUT);
2805
2806 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808 QMAN_INTERNAL_MAKE_TRUSTED);
2809 }
2810
2811 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816
2817 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819 mtr_base_ws_lo);
2820 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821 mtr_base_ws_hi);
2822 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823 so_base_ws_lo);
2824 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825 so_base_ws_hi);
2826 }
2827}
2828
2829static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830{
2831 struct gaudi_device *gaudi = hdev->asic_specific;
2832 struct gaudi_internal_qman_info *q;
2833 u64 qman_base_addr;
2834 int i, j, dma_id, internal_q_index;
2835
2836 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837 return;
2838
2839 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843
2844
2845
2846
2847 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849 q = &gaudi->internal_qmans[internal_q_index];
2850 qman_base_addr = (u64) q->pq_dma_addr;
2851 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852 qman_base_addr);
2853 }
2854
2855
2856 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858 gaudi_init_dma_core(hdev, dma_id);
2859
2860 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861 }
2862
2863 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864}
2865
2866static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867 int qman_id, u64 qman_base_addr)
2868{
2869 struct cpu_dyn_regs *dyn_regs =
2870 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871 u32 mtr_base_lo, mtr_base_hi;
2872 u32 so_base_lo, so_base_hi;
2873 u32 irq_handler_offset;
2874 u32 q_off, mme_id;
2875 u32 mme_qm_err_cfg;
2876
2877 mtr_base_lo = lower_32_bits(CFG_BASE +
2878 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879 mtr_base_hi = upper_32_bits(CFG_BASE +
2880 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 so_base_lo = lower_32_bits(CFG_BASE +
2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883 so_base_hi = upper_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886 q_off = mme_offset + qman_id * 4;
2887
2888 if (qman_id < 4) {
2889 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890 lower_32_bits(qman_base_addr));
2891 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892 upper_32_bits(qman_base_addr));
2893
2894 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899 QMAN_CPDMA_SIZE_OFFSET);
2900 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901 QMAN_CPDMA_SRC_OFFSET);
2902 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903 QMAN_CPDMA_DST_OFFSET);
2904 } else {
2905 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910 QMAN_LDMA_SIZE_OFFSET);
2911 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912 QMAN_LDMA_SRC_OFFSET);
2913 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914 QMAN_LDMA_DST_OFFSET);
2915
2916
2917 mme_id = mme_offset /
2918 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921 if (hdev->stop_on_err)
2922 mme_qm_err_cfg |=
2923 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928 lower_32_bits(CFG_BASE + irq_handler_offset));
2929 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930 upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934 mme_id);
2935
2936 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937 QM_ARB_ERR_MSG_EN_MASK);
2938
2939
2940 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2941 GAUDI_ARB_WDT_TIMEOUT);
2942
2943 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945 QMAN_INTERNAL_MAKE_TRUSTED);
2946 }
2947
2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952}
2953
2954static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955{
2956 struct gaudi_device *gaudi = hdev->asic_specific;
2957 struct gaudi_internal_qman_info *q;
2958 u64 qman_base_addr;
2959 u32 mme_offset;
2960 int i, internal_q_index;
2961
2962 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963 return;
2964
2965
2966
2967
2968
2969
2970 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974 q = &gaudi->internal_qmans[internal_q_index];
2975 qman_base_addr = (u64) q->pq_dma_addr;
2976 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977 qman_base_addr);
2978 if (i == 3)
2979 mme_offset = 0;
2980 }
2981
2982
2983 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985 gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990 gaudi->hw_cap_initialized |= HW_CAP_MME;
2991}
2992
2993static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994 int qman_id, u64 qman_base_addr)
2995{
2996 struct cpu_dyn_regs *dyn_regs =
2997 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000 u32 tpc_qm_err_cfg, irq_handler_offset;
3001 u32 q_off, tpc_id;
3002
3003 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 so_base_en_lo = lower_32_bits(CFG_BASE +
3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009 so_base_en_hi = upper_32_bits(CFG_BASE +
3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 so_base_ws_lo = lower_32_bits(CFG_BASE +
3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017 so_base_ws_hi = upper_32_bits(CFG_BASE +
3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020 q_off = tpc_offset + qman_id * 4;
3021
3022 tpc_id = tpc_offset /
3023 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025 if (qman_id < 4) {
3026 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027 lower_32_bits(qman_base_addr));
3028 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029 upper_32_bits(qman_base_addr));
3030
3031 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036 QMAN_CPDMA_SIZE_OFFSET);
3037 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038 QMAN_CPDMA_SRC_OFFSET);
3039 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040 QMAN_CPDMA_DST_OFFSET);
3041 } else {
3042 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047 QMAN_LDMA_SIZE_OFFSET);
3048 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049 QMAN_LDMA_SRC_OFFSET);
3050 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051 QMAN_LDMA_DST_OFFSET);
3052
3053
3054 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055 if (hdev->stop_on_err)
3056 tpc_qm_err_cfg |=
3057 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062 lower_32_bits(CFG_BASE + irq_handler_offset));
3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064 upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068 tpc_id);
3069
3070 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071 QM_ARB_ERR_MSG_EN_MASK);
3072
3073
3074 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3075 GAUDI_ARB_WDT_TIMEOUT);
3076
3077 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3078 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3079 QMAN_INTERNAL_MAKE_TRUSTED);
3080 }
3081
3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3085 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3086
3087
3088 if (tpc_id == 6) {
3089 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3090 mtr_base_ws_lo);
3091 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3092 mtr_base_ws_hi);
3093 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3094 so_base_ws_lo);
3095 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3096 so_base_ws_hi);
3097 }
3098}
3099
3100static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3101{
3102 struct gaudi_device *gaudi = hdev->asic_specific;
3103 struct gaudi_internal_qman_info *q;
3104 u64 qman_base_addr;
3105 u32 so_base_hi, tpc_offset = 0;
3106 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3107 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3108 int i, tpc_id, internal_q_index;
3109
3110 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3111 return;
3112
3113 so_base_hi = upper_32_bits(CFG_BASE +
3114 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3115
3116 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3117 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3118 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3119 tpc_id * QMAN_STREAMS + i;
3120 q = &gaudi->internal_qmans[internal_q_index];
3121 qman_base_addr = (u64) q->pq_dma_addr;
3122 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3123 qman_base_addr);
3124
3125 if (i == 3) {
3126
3127 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3128
3129
3130 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3131 QMAN_TPC_ENABLE);
3132 }
3133 }
3134
3135 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3136 so_base_hi);
3137
3138 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3139
3140 gaudi->hw_cap_initialized |=
3141 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3142 }
3143}
3144
3145static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3146 int qman_id, u64 qman_base_addr, int nic_id)
3147{
3148 struct cpu_dyn_regs *dyn_regs =
3149 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3150 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3151 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3152 u32 nic_qm_err_cfg, irq_handler_offset;
3153 u32 q_off;
3154
3155 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3156 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159 so_base_en_lo = lower_32_bits(CFG_BASE +
3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161 so_base_en_hi = upper_32_bits(CFG_BASE +
3162 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3164 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167 so_base_ws_lo = lower_32_bits(CFG_BASE +
3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169 so_base_ws_hi = upper_32_bits(CFG_BASE +
3170 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171
3172 q_off = nic_offset + qman_id * 4;
3173
3174 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3175 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3176
3177 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3178 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3179 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3180
3181 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182 QMAN_LDMA_SIZE_OFFSET);
3183 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184 QMAN_LDMA_SRC_OFFSET);
3185 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186 QMAN_LDMA_DST_OFFSET);
3187
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3191 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3192
3193
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3197 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3198
3199 if (qman_id == 0) {
3200 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3201 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3202 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3203
3204
3205 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3206 if (hdev->stop_on_err)
3207 nic_qm_err_cfg |=
3208 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3209
3210 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3211
3212 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3213 lower_32_bits(CFG_BASE + irq_handler_offset));
3214 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3215 upper_32_bits(CFG_BASE + irq_handler_offset));
3216
3217 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3218 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3219 nic_id);
3220
3221 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3222 QM_ARB_ERR_MSG_EN_MASK);
3223
3224
3225 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3226 GAUDI_ARB_WDT_TIMEOUT);
3227
3228 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230 QMAN_INTERNAL_MAKE_TRUSTED);
3231 }
3232}
3233
3234static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235{
3236 struct gaudi_device *gaudi = hdev->asic_specific;
3237 struct gaudi_internal_qman_info *q;
3238 u64 qman_base_addr;
3239 u32 nic_offset = 0;
3240 u32 nic_delta_between_qmans =
3241 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 u32 nic_delta_between_nics =
3243 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244 int i, nic_id, internal_q_index;
3245
3246 if (!hdev->nic_ports_mask)
3247 return;
3248
3249 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250 return;
3251
3252 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253
3254 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256 nic_offset += nic_delta_between_qmans;
3257 if (nic_id & 1) {
3258 nic_offset -= (nic_delta_between_qmans * 2);
3259 nic_offset += nic_delta_between_nics;
3260 }
3261 continue;
3262 }
3263
3264 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266 nic_id * QMAN_STREAMS + i;
3267 q = &gaudi->internal_qmans[internal_q_index];
3268 qman_base_addr = (u64) q->pq_dma_addr;
3269 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270 qman_base_addr, nic_id);
3271 }
3272
3273
3274 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275
3276 nic_offset += nic_delta_between_qmans;
3277 if (nic_id & 1) {
3278 nic_offset -= (nic_delta_between_qmans * 2);
3279 nic_offset += nic_delta_between_nics;
3280 }
3281
3282 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283 }
3284}
3285
3286static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287{
3288 struct gaudi_device *gaudi = hdev->asic_specific;
3289
3290 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291 return;
3292
3293 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296}
3297
3298static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299{
3300 struct gaudi_device *gaudi = hdev->asic_specific;
3301
3302 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303 return;
3304
3305 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310}
3311
3312static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313{
3314 struct gaudi_device *gaudi = hdev->asic_specific;
3315
3316 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317 return;
3318
3319 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321}
3322
3323static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324{
3325 struct gaudi_device *gaudi = hdev->asic_specific;
3326 u32 tpc_offset = 0;
3327 int tpc_id;
3328
3329 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330 return;
3331
3332 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335 }
3336}
3337
3338static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339{
3340 struct gaudi_device *gaudi = hdev->asic_specific;
3341 u32 nic_mask, nic_offset = 0;
3342 u32 nic_delta_between_qmans =
3343 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 u32 nic_delta_between_nics =
3345 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346 int nic_id;
3347
3348 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350
3351 if (gaudi->hw_cap_initialized & nic_mask)
3352 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353
3354 nic_offset += nic_delta_between_qmans;
3355 if (nic_id & 1) {
3356 nic_offset -= (nic_delta_between_qmans * 2);
3357 nic_offset += nic_delta_between_nics;
3358 }
3359 }
3360}
3361
3362static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363{
3364 struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367 return;
3368
3369
3370 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373}
3374
3375static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376{
3377 struct gaudi_device *gaudi = hdev->asic_specific;
3378
3379 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380 return;
3381
3382
3383
3384 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389}
3390
3391static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392{
3393 struct gaudi_device *gaudi = hdev->asic_specific;
3394
3395 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396 return;
3397
3398
3399 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401}
3402
3403static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404{
3405 struct gaudi_device *gaudi = hdev->asic_specific;
3406
3407 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408 return;
3409
3410 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418}
3419
3420static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421{
3422 struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424
3425
3426 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427 WREG32(mmNIC0_QM0_GLBL_CFG1,
3428 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431
3432 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433 WREG32(mmNIC0_QM1_GLBL_CFG1,
3434 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437
3438 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439 WREG32(mmNIC1_QM0_GLBL_CFG1,
3440 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443
3444 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445 WREG32(mmNIC1_QM1_GLBL_CFG1,
3446 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449
3450 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451 WREG32(mmNIC2_QM0_GLBL_CFG1,
3452 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455
3456 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457 WREG32(mmNIC2_QM1_GLBL_CFG1,
3458 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461
3462 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463 WREG32(mmNIC3_QM0_GLBL_CFG1,
3464 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467
3468 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469 WREG32(mmNIC3_QM1_GLBL_CFG1,
3470 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473
3474 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475 WREG32(mmNIC4_QM0_GLBL_CFG1,
3476 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479
3480 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481 WREG32(mmNIC4_QM1_GLBL_CFG1,
3482 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485}
3486
3487static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488{
3489 struct gaudi_device *gaudi = hdev->asic_specific;
3490
3491 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492 return;
3493
3494 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497}
3498
3499static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500{
3501 struct gaudi_device *gaudi = hdev->asic_specific;
3502
3503 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504 return;
3505
3506 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511}
3512
3513static void gaudi_mme_stall(struct hl_device *hdev)
3514{
3515 struct gaudi_device *gaudi = hdev->asic_specific;
3516
3517 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518 return;
3519
3520
3521 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537}
3538
3539static void gaudi_tpc_stall(struct hl_device *hdev)
3540{
3541 struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544 return;
3545
3546 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554}
3555
3556static void gaudi_set_clock_gating(struct hl_device *hdev)
3557{
3558 struct gaudi_device *gaudi = hdev->asic_specific;
3559 u32 qman_offset;
3560 bool enable;
3561 int i;
3562
3563
3564
3565
3566 if (hdev->in_debug)
3567 return;
3568
3569 if (hdev->asic_prop.fw_security_enabled)
3570 return;
3571
3572 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3573 enable = !!(hdev->clock_gating_mask &
3574 (BIT_ULL(gaudi_dma_assignment[i])));
3575
3576 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3577 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3578 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3579 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3580 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3581 }
3582
3583 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3584 enable = !!(hdev->clock_gating_mask &
3585 (BIT_ULL(gaudi_dma_assignment[i])));
3586
3587
3588
3589
3590 if (i == GAUDI_HBM_DMA_4)
3591 enable = 0;
3592
3593 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3594 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3595 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3596 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3597 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3598 }
3599
3600 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3601 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3602 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3603
3604 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3605 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3606 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3607
3608 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3609 enable = !!(hdev->clock_gating_mask &
3610 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3611
3612 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3613 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3614 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3615 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3616
3617 qman_offset += TPC_QMAN_OFFSET;
3618 }
3619
3620 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3621}
3622
3623static void gaudi_disable_clock_gating(struct hl_device *hdev)
3624{
3625 struct gaudi_device *gaudi = hdev->asic_specific;
3626 u32 qman_offset;
3627 int i;
3628
3629 if (hdev->asic_prop.fw_security_enabled)
3630 return;
3631
3632 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3633 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3634 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3635
3636 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3637 }
3638
3639 WREG32(mmMME0_QM_CGM_CFG, 0);
3640 WREG32(mmMME0_QM_CGM_CFG1, 0);
3641 WREG32(mmMME2_QM_CGM_CFG, 0);
3642 WREG32(mmMME2_QM_CGM_CFG1, 0);
3643
3644 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3645 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3646 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3647
3648 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3649 }
3650
3651 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3652}
3653
3654static void gaudi_enable_timestamp(struct hl_device *hdev)
3655{
3656
3657 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3658
3659
3660 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3661 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3662
3663
3664 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3665}
3666
3667static void gaudi_disable_timestamp(struct hl_device *hdev)
3668{
3669
3670 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3671}
3672
3673static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3674{
3675 u32 wait_timeout_ms;
3676
3677 dev_info(hdev->dev,
3678 "Halting compute engines and disabling interrupts\n");
3679
3680 if (hdev->pldm)
3681 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3682 else
3683 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3684
3685 gaudi_stop_nic_qmans(hdev);
3686 gaudi_stop_mme_qmans(hdev);
3687 gaudi_stop_tpc_qmans(hdev);
3688 gaudi_stop_hbm_dma_qmans(hdev);
3689 gaudi_stop_pci_dma_qmans(hdev);
3690
3691 hdev->asic_funcs->disable_clock_gating(hdev);
3692
3693 msleep(wait_timeout_ms);
3694
3695 gaudi_pci_dma_stall(hdev);
3696 gaudi_hbm_dma_stall(hdev);
3697 gaudi_tpc_stall(hdev);
3698 gaudi_mme_stall(hdev);
3699
3700 msleep(wait_timeout_ms);
3701
3702 gaudi_disable_nic_qmans(hdev);
3703 gaudi_disable_mme_qmans(hdev);
3704 gaudi_disable_tpc_qmans(hdev);
3705 gaudi_disable_hbm_dma_qmans(hdev);
3706 gaudi_disable_pci_dma_qmans(hdev);
3707
3708 gaudi_disable_timestamp(hdev);
3709
3710 gaudi_disable_msi(hdev);
3711}
3712
3713static int gaudi_mmu_init(struct hl_device *hdev)
3714{
3715 struct asic_fixed_properties *prop = &hdev->asic_prop;
3716 struct gaudi_device *gaudi = hdev->asic_specific;
3717 u64 hop0_addr;
3718 int rc, i;
3719
3720 if (!hdev->mmu_enable)
3721 return 0;
3722
3723 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3724 return 0;
3725
3726 for (i = 0 ; i < prop->max_asid ; i++) {
3727 hop0_addr = prop->mmu_pgt_addr +
3728 (i * prop->mmu_hop_table_size);
3729
3730 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3731 if (rc) {
3732 dev_err(hdev->dev,
3733 "failed to set hop0 addr for asid %d\n", i);
3734 goto err;
3735 }
3736 }
3737
3738
3739 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3740 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3741
3742 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3743
3744 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3745 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3746
3747 WREG32(mmSTLB_HOP_CONFIGURATION,
3748 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3749
3750
3751
3752
3753
3754 gaudi->mmu_cache_inv_pi = 1;
3755
3756 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3757
3758 return 0;
3759
3760err:
3761 return rc;
3762}
3763
3764static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3765{
3766 void __iomem *dst;
3767
3768 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3769
3770 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3771}
3772
3773static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3774{
3775 void __iomem *dst;
3776
3777 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3778
3779 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3780}
3781
3782static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3783{
3784 struct dynamic_fw_load_mgr *dynamic_loader;
3785 struct cpu_dyn_regs *dyn_regs;
3786
3787 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3788
3789
3790
3791
3792
3793
3794
3795
3796 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3797 dyn_regs->kmd_msg_to_cpu =
3798 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3799 dyn_regs->cpu_cmd_status_to_host =
3800 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3801
3802 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3803}
3804
3805static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3806{
3807 struct static_fw_load_mgr *static_loader;
3808
3809 static_loader = &hdev->fw_loader.static_loader;
3810
3811 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3812 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3813 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3814 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3815 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3816 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3817 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3818 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3819 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3820 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3821 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3822 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3823 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3824 GAUDI_PLDM_RESET_WAIT_MSEC :
3825 GAUDI_CPU_RESET_WAIT_MSEC;
3826}
3827
3828static void gaudi_init_firmware_loader(struct hl_device *hdev)
3829{
3830 struct asic_fixed_properties *prop = &hdev->asic_prop;
3831 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3832
3833
3834 fw_loader->linux_loaded = false;
3835 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3836 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3837 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3838 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3839 fw_loader->skip_bmc = !hdev->bmc_enable;
3840 fw_loader->sram_bar_id = SRAM_BAR_ID;
3841 fw_loader->dram_bar_id = HBM_BAR_ID;
3842
3843 if (prop->dynamic_fw_load)
3844 gaudi_init_dynamic_firmware_loader(hdev);
3845 else
3846 gaudi_init_static_firmware_loader(hdev);
3847}
3848
3849static int gaudi_init_cpu(struct hl_device *hdev)
3850{
3851 struct gaudi_device *gaudi = hdev->asic_specific;
3852 int rc;
3853
3854 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3855 return 0;
3856
3857 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3858 return 0;
3859
3860
3861
3862
3863
3864 if (!hdev->asic_prop.fw_security_enabled)
3865 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3866
3867 rc = hl_fw_init_cpu(hdev);
3868
3869 if (rc)
3870 return rc;
3871
3872 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3873
3874 return 0;
3875}
3876
3877static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3878{
3879 struct cpu_dyn_regs *dyn_regs =
3880 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3881 struct asic_fixed_properties *prop = &hdev->asic_prop;
3882 struct gaudi_device *gaudi = hdev->asic_specific;
3883 u32 status, irq_handler_offset;
3884 struct hl_eq *eq;
3885 struct hl_hw_queue *cpu_pq =
3886 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3887 int err;
3888
3889 if (!hdev->cpu_queues_enable)
3890 return 0;
3891
3892 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3893 return 0;
3894
3895 eq = &hdev->event_queue;
3896
3897 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3898 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3899
3900 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3901 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3902
3903 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3904 lower_32_bits(hdev->cpu_accessible_dma_address));
3905 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3906 upper_32_bits(hdev->cpu_accessible_dma_address));
3907
3908 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3909 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3910 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3911
3912
3913 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3914
3915 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3916
3917 if (gaudi->multi_msi_mode)
3918 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3919 else
3920 WREG32(mmCPU_IF_QUEUE_INIT,
3921 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3922
3923 irq_handler_offset = prop->gic_interrupts_enable ?
3924 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3925 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3926
3927 WREG32(irq_handler_offset,
3928 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3929
3930 err = hl_poll_timeout(
3931 hdev,
3932 mmCPU_IF_QUEUE_INIT,
3933 status,
3934 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3935 1000,
3936 cpu_timeout);
3937
3938 if (err) {
3939 dev_err(hdev->dev,
3940 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3941 return -EIO;
3942 }
3943
3944
3945 if (prop->fw_cpu_boot_dev_sts0_valid)
3946 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3947 if (prop->fw_cpu_boot_dev_sts1_valid)
3948 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3949
3950 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3951 return 0;
3952}
3953
3954static void gaudi_pre_hw_init(struct hl_device *hdev)
3955{
3956
3957 RREG32(mmHW_STATE);
3958
3959 if (!hdev->asic_prop.fw_security_enabled) {
3960
3961
3962
3963 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3964 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3965 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3966
3967
3968
3969
3970 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3971 }
3972
3973
3974
3975
3976
3977
3978
3979 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3980}
3981
3982static int gaudi_hw_init(struct hl_device *hdev)
3983{
3984 struct gaudi_device *gaudi = hdev->asic_specific;
3985 int rc;
3986
3987 gaudi_pre_hw_init(hdev);
3988
3989
3990
3991
3992
3993 if (hdev->asic_prop.iatu_done_by_fw)
3994 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3995
3996
3997
3998
3999
4000 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4001 dev_err(hdev->dev,
4002 "failed to map HBM bar to DRAM base address\n");
4003 return -EIO;
4004 }
4005
4006 rc = gaudi_init_cpu(hdev);
4007 if (rc) {
4008 dev_err(hdev->dev, "failed to initialize CPU\n");
4009 return rc;
4010 }
4011
4012
4013
4014
4015
4016
4017 hdev->asic_funcs->disable_clock_gating(hdev);
4018
4019
4020 gaudi_init_scrambler_sram(hdev);
4021
4022
4023 gaudi_init_scrambler_hbm(hdev);
4024
4025 gaudi_init_golden_registers(hdev);
4026
4027 rc = gaudi_mmu_init(hdev);
4028 if (rc)
4029 return rc;
4030
4031 gaudi_init_security(hdev);
4032
4033 gaudi_init_pci_dma_qmans(hdev);
4034
4035 gaudi_init_hbm_dma_qmans(hdev);
4036
4037 gaudi_init_mme_qmans(hdev);
4038
4039 gaudi_init_tpc_qmans(hdev);
4040
4041 gaudi_init_nic_qmans(hdev);
4042
4043 hdev->asic_funcs->set_clock_gating(hdev);
4044
4045 gaudi_enable_timestamp(hdev);
4046
4047
4048 rc = gaudi_enable_msi(hdev);
4049 if (rc)
4050 goto disable_queues;
4051
4052
4053 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4054 if (rc) {
4055 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4056 rc);
4057 goto disable_msi;
4058 }
4059
4060
4061 RREG32(mmHW_STATE);
4062
4063 return 0;
4064
4065disable_msi:
4066 gaudi_disable_msi(hdev);
4067disable_queues:
4068 gaudi_disable_mme_qmans(hdev);
4069 gaudi_disable_pci_dma_qmans(hdev);
4070
4071 return rc;
4072}
4073
4074static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4075{
4076 struct cpu_dyn_regs *dyn_regs =
4077 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4078 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4079 struct gaudi_device *gaudi = hdev->asic_specific;
4080 bool driver_performs_reset;
4081
4082 if (!hard_reset) {
4083 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4084 return;
4085 }
4086
4087 if (hdev->pldm) {
4088 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4089 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4090 } else {
4091 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4092 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4093 }
4094
4095 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4096 !hdev->asic_prop.hard_reset_done_by_fw);
4097
4098
4099
4100
4101 if (driver_performs_reset)
4102 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4103 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4104
4105
4106
4107
4108
4109 if (hdev->fw_loader.linux_loaded) {
4110 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4111 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4112 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4113
4114 WREG32(irq_handler_offset,
4115 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4116 } else {
4117 if (hdev->asic_prop.hard_reset_done_by_fw)
4118 hl_fw_ask_hard_reset_without_linux(hdev);
4119 else
4120 hl_fw_ask_halt_machine_without_linux(hdev);
4121 }
4122
4123 if (driver_performs_reset) {
4124
4125
4126
4127
4128 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4129 (CFG_RST_H_DMA_MASK |
4130 CFG_RST_H_MME_MASK |
4131 CFG_RST_H_SM_MASK |
4132 CFG_RST_H_TPC_7_MASK));
4133
4134 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4135
4136 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4137 (CFG_RST_H_HBM_MASK |
4138 CFG_RST_H_TPC_7_MASK |
4139 CFG_RST_H_NIC_MASK |
4140 CFG_RST_H_SM_MASK |
4141 CFG_RST_H_DMA_MASK |
4142 CFG_RST_H_MME_MASK |
4143 CFG_RST_H_CPU_MASK |
4144 CFG_RST_H_MMU_MASK));
4145
4146 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4147 (CFG_RST_L_IF_MASK |
4148 CFG_RST_L_PSOC_MASK |
4149 CFG_RST_L_TPC_MASK));
4150
4151 msleep(cpu_timeout_ms);
4152
4153
4154 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4155
4156
4157 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4158
4159 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4160 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4161
4162 dev_info(hdev->dev,
4163 "Issued HARD reset command, going to wait %dms\n",
4164 reset_timeout_ms);
4165 } else {
4166 dev_info(hdev->dev,
4167 "Firmware performs HARD reset, going to wait %dms\n",
4168 reset_timeout_ms);
4169 }
4170
4171
4172
4173
4174
4175 msleep(reset_timeout_ms);
4176
4177 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4178 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4179 dev_err(hdev->dev,
4180 "Timeout while waiting for device to reset 0x%x\n",
4181 status);
4182
4183 if (gaudi) {
4184 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4185 HW_CAP_HBM | HW_CAP_PCI_DMA |
4186 HW_CAP_MME | HW_CAP_TPC_MASK |
4187 HW_CAP_HBM_DMA | HW_CAP_PLL |
4188 HW_CAP_NIC_MASK | HW_CAP_MMU |
4189 HW_CAP_SRAM_SCRAMBLER |
4190 HW_CAP_HBM_SCRAMBLER |
4191 HW_CAP_CLK_GATE);
4192
4193 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4194
4195 hdev->device_cpu_is_halted = false;
4196 }
4197}
4198
4199static int gaudi_suspend(struct hl_device *hdev)
4200{
4201 int rc;
4202
4203 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4204 if (rc)
4205 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4206
4207 return rc;
4208}
4209
4210static int gaudi_resume(struct hl_device *hdev)
4211{
4212 return gaudi_init_iatu(hdev);
4213}
4214
4215static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4216 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4217{
4218 int rc;
4219
4220 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4221 VM_DONTCOPY | VM_NORESERVE;
4222
4223 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4224 (dma_addr - HOST_PHYS_BASE), size);
4225 if (rc)
4226 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4227
4228 return rc;
4229}
4230
4231static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4232{
4233 struct cpu_dyn_regs *dyn_regs =
4234 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4235 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4236 struct gaudi_device *gaudi = hdev->asic_specific;
4237 bool invalid_queue = false;
4238 int dma_id;
4239
4240 switch (hw_queue_id) {
4241 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4242 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4243 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4244 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4245 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4246 break;
4247
4248 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4249 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4250 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4251 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4252 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4253 break;
4254
4255 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4256 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4257 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4258 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4259 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4260 break;
4261
4262 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4263 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4264 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4265 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4266 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4267 break;
4268
4269 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4270 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4271 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4273 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274 break;
4275
4276 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4277 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4278 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4280 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281 break;
4282
4283 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4284 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4285 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288 break;
4289
4290 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4291 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4292 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295 break;
4296
4297 case GAUDI_QUEUE_ID_CPU_PQ:
4298 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4299 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4300 else
4301 invalid_queue = true;
4302 break;
4303
4304 case GAUDI_QUEUE_ID_MME_0_0:
4305 db_reg_offset = mmMME2_QM_PQ_PI_0;
4306 break;
4307
4308 case GAUDI_QUEUE_ID_MME_0_1:
4309 db_reg_offset = mmMME2_QM_PQ_PI_1;
4310 break;
4311
4312 case GAUDI_QUEUE_ID_MME_0_2:
4313 db_reg_offset = mmMME2_QM_PQ_PI_2;
4314 break;
4315
4316 case GAUDI_QUEUE_ID_MME_0_3:
4317 db_reg_offset = mmMME2_QM_PQ_PI_3;
4318 break;
4319
4320 case GAUDI_QUEUE_ID_MME_1_0:
4321 db_reg_offset = mmMME0_QM_PQ_PI_0;
4322 break;
4323
4324 case GAUDI_QUEUE_ID_MME_1_1:
4325 db_reg_offset = mmMME0_QM_PQ_PI_1;
4326 break;
4327
4328 case GAUDI_QUEUE_ID_MME_1_2:
4329 db_reg_offset = mmMME0_QM_PQ_PI_2;
4330 break;
4331
4332 case GAUDI_QUEUE_ID_MME_1_3:
4333 db_reg_offset = mmMME0_QM_PQ_PI_3;
4334 break;
4335
4336 case GAUDI_QUEUE_ID_TPC_0_0:
4337 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4338 break;
4339
4340 case GAUDI_QUEUE_ID_TPC_0_1:
4341 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4342 break;
4343
4344 case GAUDI_QUEUE_ID_TPC_0_2:
4345 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4346 break;
4347
4348 case GAUDI_QUEUE_ID_TPC_0_3:
4349 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4350 break;
4351
4352 case GAUDI_QUEUE_ID_TPC_1_0:
4353 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4354 break;
4355
4356 case GAUDI_QUEUE_ID_TPC_1_1:
4357 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4358 break;
4359
4360 case GAUDI_QUEUE_ID_TPC_1_2:
4361 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4362 break;
4363
4364 case GAUDI_QUEUE_ID_TPC_1_3:
4365 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4366 break;
4367
4368 case GAUDI_QUEUE_ID_TPC_2_0:
4369 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4370 break;
4371
4372 case GAUDI_QUEUE_ID_TPC_2_1:
4373 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4374 break;
4375
4376 case GAUDI_QUEUE_ID_TPC_2_2:
4377 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4378 break;
4379
4380 case GAUDI_QUEUE_ID_TPC_2_3:
4381 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4382 break;
4383
4384 case GAUDI_QUEUE_ID_TPC_3_0:
4385 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4386 break;
4387
4388 case GAUDI_QUEUE_ID_TPC_3_1:
4389 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4390 break;
4391
4392 case GAUDI_QUEUE_ID_TPC_3_2:
4393 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4394 break;
4395
4396 case GAUDI_QUEUE_ID_TPC_3_3:
4397 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4398 break;
4399
4400 case GAUDI_QUEUE_ID_TPC_4_0:
4401 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4402 break;
4403
4404 case GAUDI_QUEUE_ID_TPC_4_1:
4405 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4406 break;
4407
4408 case GAUDI_QUEUE_ID_TPC_4_2:
4409 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4410 break;
4411
4412 case GAUDI_QUEUE_ID_TPC_4_3:
4413 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4414 break;
4415
4416 case GAUDI_QUEUE_ID_TPC_5_0:
4417 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4418 break;
4419
4420 case GAUDI_QUEUE_ID_TPC_5_1:
4421 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4422 break;
4423
4424 case GAUDI_QUEUE_ID_TPC_5_2:
4425 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4426 break;
4427
4428 case GAUDI_QUEUE_ID_TPC_5_3:
4429 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4430 break;
4431
4432 case GAUDI_QUEUE_ID_TPC_6_0:
4433 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4434 break;
4435
4436 case GAUDI_QUEUE_ID_TPC_6_1:
4437 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4438 break;
4439
4440 case GAUDI_QUEUE_ID_TPC_6_2:
4441 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4442 break;
4443
4444 case GAUDI_QUEUE_ID_TPC_6_3:
4445 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4446 break;
4447
4448 case GAUDI_QUEUE_ID_TPC_7_0:
4449 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4450 break;
4451
4452 case GAUDI_QUEUE_ID_TPC_7_1:
4453 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4454 break;
4455
4456 case GAUDI_QUEUE_ID_TPC_7_2:
4457 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4458 break;
4459
4460 case GAUDI_QUEUE_ID_TPC_7_3:
4461 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4462 break;
4463
4464 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4465 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4466 invalid_queue = true;
4467
4468 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4469 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4470 break;
4471
4472 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4473 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4474 invalid_queue = true;
4475
4476 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4477 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4478 break;
4479
4480 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4481 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4482 invalid_queue = true;
4483
4484 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4485 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4486 break;
4487
4488 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4489 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4490 invalid_queue = true;
4491
4492 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4493 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4494 break;
4495
4496 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4497 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4498 invalid_queue = true;
4499
4500 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4501 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4502 break;
4503
4504 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4505 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4506 invalid_queue = true;
4507
4508 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4509 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4510 break;
4511
4512 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4513 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4514 invalid_queue = true;
4515
4516 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4517 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4518 break;
4519
4520 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4521 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4522 invalid_queue = true;
4523
4524 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4525 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4526 break;
4527
4528 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4529 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4530 invalid_queue = true;
4531
4532 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4533 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4534 break;
4535
4536 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4537 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4538 invalid_queue = true;
4539
4540 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4541 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4542 break;
4543
4544 default:
4545 invalid_queue = true;
4546 }
4547
4548 if (invalid_queue) {
4549
4550 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4551 hw_queue_id);
4552 return;
4553 }
4554
4555 db_value = pi;
4556
4557
4558 WREG32(db_reg_offset, db_value);
4559
4560 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4561
4562 mb();
4563
4564 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4565 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4566 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4567
4568 WREG32(irq_handler_offset,
4569 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4570 }
4571}
4572
4573static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4574 struct hl_bd *bd)
4575{
4576 __le64 *pbd = (__le64 *) bd;
4577
4578
4579 pqe[0] = pbd[0];
4580 pqe[1] = pbd[1];
4581}
4582
4583static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4584 dma_addr_t *dma_handle, gfp_t flags)
4585{
4586 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4587 dma_handle, flags);
4588
4589
4590 if (kernel_addr)
4591 *dma_handle += HOST_PHYS_BASE;
4592
4593 return kernel_addr;
4594}
4595
4596static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4597 void *cpu_addr, dma_addr_t dma_handle)
4598{
4599
4600 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4601
4602 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4603}
4604
4605static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4606{
4607 struct asic_fixed_properties *prop = &hdev->asic_prop;
4608 u64 cur_addr = DRAM_BASE_ADDR_USER;
4609 u32 val;
4610 u32 chunk_size;
4611 int rc, dma_id;
4612
4613 while (cur_addr < prop->dram_end_address) {
4614 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4615 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4616
4617 chunk_size =
4618 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4619
4620 dev_dbg(hdev->dev,
4621 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4622 cur_addr, cur_addr + chunk_size);
4623
4624 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4625 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4626 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4627 lower_32_bits(cur_addr));
4628 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4629 upper_32_bits(cur_addr));
4630 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4631 chunk_size);
4632 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4633 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4634 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4635
4636 cur_addr += chunk_size;
4637
4638 if (cur_addr == prop->dram_end_address)
4639 break;
4640 }
4641
4642 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4643 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4644
4645 rc = hl_poll_timeout(
4646 hdev,
4647 mmDMA0_CORE_STS0 + dma_offset,
4648 val,
4649 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4650 1000,
4651 HBM_SCRUBBING_TIMEOUT_US);
4652
4653 if (rc) {
4654 dev_err(hdev->dev,
4655 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4656 dma_id);
4657 return -EIO;
4658 }
4659 }
4660 }
4661
4662 return 0;
4663}
4664
4665static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4666{
4667 struct asic_fixed_properties *prop = &hdev->asic_prop;
4668 struct gaudi_device *gaudi = hdev->asic_specific;
4669 int rc = 0;
4670 u64 val = 0;
4671
4672 if (!hdev->memory_scrub)
4673 return 0;
4674
4675 if (!addr && !size) {
4676
4677 rc = hl_poll_timeout(
4678 hdev,
4679 mmDMA0_CORE_STS0,
4680 val,
4681 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4682 0, NULL)),
4683 1000,
4684 HBM_SCRUBBING_TIMEOUT_US);
4685 if (rc) {
4686 dev_err(hdev->dev, "waiting for idle timeout\n");
4687 return -EIO;
4688 }
4689
4690
4691 addr = prop->sram_user_base_address;
4692 size = hdev->pldm ? 0x10000 :
4693 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4694 val = 0x7777777777777777ull;
4695
4696 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4697 if (rc) {
4698 dev_err(hdev->dev,
4699 "Failed to clear SRAM in mem scrub all\n");
4700 return rc;
4701 }
4702
4703 mutex_lock(&gaudi->clk_gate_mutex);
4704 hdev->asic_funcs->disable_clock_gating(hdev);
4705
4706
4707 rc = gaudi_hbm_scrubbing(hdev);
4708 if (rc)
4709 dev_err(hdev->dev,
4710 "Failed to clear HBM in mem scrub all\n");
4711
4712 hdev->asic_funcs->set_clock_gating(hdev);
4713 mutex_unlock(&gaudi->clk_gate_mutex);
4714 }
4715
4716 return rc;
4717}
4718
4719static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4720 u32 queue_id, dma_addr_t *dma_handle,
4721 u16 *queue_len)
4722{
4723 struct gaudi_device *gaudi = hdev->asic_specific;
4724 struct gaudi_internal_qman_info *q;
4725
4726 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4727 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4728 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4729 return NULL;
4730 }
4731
4732 q = &gaudi->internal_qmans[queue_id];
4733 *dma_handle = q->pq_dma_addr;
4734 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4735
4736 return q->pq_kernel_addr;
4737}
4738
4739static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4740 u16 len, u32 timeout, u64 *result)
4741{
4742 struct gaudi_device *gaudi = hdev->asic_specific;
4743
4744 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4745 if (result)
4746 *result = 0;
4747 return 0;
4748 }
4749
4750 if (!timeout)
4751 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4752
4753 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4754 timeout, result);
4755}
4756
4757static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4758{
4759 struct packet_msg_prot *fence_pkt;
4760 dma_addr_t pkt_dma_addr;
4761 u32 fence_val, tmp, timeout_usec;
4762 dma_addr_t fence_dma_addr;
4763 u32 *fence_ptr;
4764 int rc;
4765
4766 if (hdev->pldm)
4767 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4768 else
4769 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4770
4771 fence_val = GAUDI_QMAN0_FENCE_VAL;
4772
4773 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4774 &fence_dma_addr);
4775 if (!fence_ptr) {
4776 dev_err(hdev->dev,
4777 "Failed to allocate memory for H/W queue %d testing\n",
4778 hw_queue_id);
4779 return -ENOMEM;
4780 }
4781
4782 *fence_ptr = 0;
4783
4784 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4785 sizeof(struct packet_msg_prot),
4786 GFP_KERNEL, &pkt_dma_addr);
4787 if (!fence_pkt) {
4788 dev_err(hdev->dev,
4789 "Failed to allocate packet for H/W queue %d testing\n",
4790 hw_queue_id);
4791 rc = -ENOMEM;
4792 goto free_fence_ptr;
4793 }
4794
4795 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4796 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4797 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4798
4799 fence_pkt->ctl = cpu_to_le32(tmp);
4800 fence_pkt->value = cpu_to_le32(fence_val);
4801 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4802
4803 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4804 sizeof(struct packet_msg_prot),
4805 pkt_dma_addr);
4806 if (rc) {
4807 dev_err(hdev->dev,
4808 "Failed to send fence packet to H/W queue %d\n",
4809 hw_queue_id);
4810 goto free_pkt;
4811 }
4812
4813 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4814 1000, timeout_usec, true);
4815
4816 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4817
4818 if (rc == -ETIMEDOUT) {
4819 dev_err(hdev->dev,
4820 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4821 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4822 rc = -EIO;
4823 }
4824
4825free_pkt:
4826 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4827 pkt_dma_addr);
4828free_fence_ptr:
4829 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4830 fence_dma_addr);
4831 return rc;
4832}
4833
4834static int gaudi_test_cpu_queue(struct hl_device *hdev)
4835{
4836 struct gaudi_device *gaudi = hdev->asic_specific;
4837
4838
4839
4840
4841
4842 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4843 return 0;
4844
4845 return hl_fw_test_cpu_queue(hdev);
4846}
4847
4848static int gaudi_test_queues(struct hl_device *hdev)
4849{
4850 int i, rc, ret_val = 0;
4851
4852 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4853 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4854 rc = gaudi_test_queue(hdev, i);
4855 if (rc)
4856 ret_val = -EINVAL;
4857 }
4858 }
4859
4860 rc = gaudi_test_cpu_queue(hdev);
4861 if (rc)
4862 ret_val = -EINVAL;
4863
4864 return ret_val;
4865}
4866
4867static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4868 gfp_t mem_flags, dma_addr_t *dma_handle)
4869{
4870 void *kernel_addr;
4871
4872 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4873 return NULL;
4874
4875 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4876
4877
4878 if (kernel_addr)
4879 *dma_handle += HOST_PHYS_BASE;
4880
4881 return kernel_addr;
4882}
4883
4884static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4885 dma_addr_t dma_addr)
4886{
4887
4888 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4889
4890 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4891}
4892
4893static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4894 size_t size, dma_addr_t *dma_handle)
4895{
4896 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4897}
4898
4899static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4900 size_t size, void *vaddr)
4901{
4902 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4903}
4904
4905static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4906 int nents, enum dma_data_direction dir)
4907{
4908 struct scatterlist *sg;
4909 int i;
4910
4911 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4912 return -ENOMEM;
4913
4914
4915 for_each_sg(sgl, sg, nents, i)
4916 sg->dma_address += HOST_PHYS_BASE;
4917
4918 return 0;
4919}
4920
4921static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4922 int nents, enum dma_data_direction dir)
4923{
4924 struct scatterlist *sg;
4925 int i;
4926
4927
4928 for_each_sg(sgl, sg, nents, i)
4929 sg->dma_address -= HOST_PHYS_BASE;
4930
4931 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4932}
4933
4934static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4935 struct sg_table *sgt)
4936{
4937 struct scatterlist *sg, *sg_next_iter;
4938 u32 count, dma_desc_cnt;
4939 u64 len, len_next;
4940 dma_addr_t addr, addr_next;
4941
4942 dma_desc_cnt = 0;
4943
4944 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4945
4946 len = sg_dma_len(sg);
4947 addr = sg_dma_address(sg);
4948
4949 if (len == 0)
4950 break;
4951
4952 while ((count + 1) < sgt->nents) {
4953 sg_next_iter = sg_next(sg);
4954 len_next = sg_dma_len(sg_next_iter);
4955 addr_next = sg_dma_address(sg_next_iter);
4956
4957 if (len_next == 0)
4958 break;
4959
4960 if ((addr + len == addr_next) &&
4961 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4962 len += len_next;
4963 count++;
4964 sg = sg_next_iter;
4965 } else {
4966 break;
4967 }
4968 }
4969
4970 dma_desc_cnt++;
4971 }
4972
4973 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4974}
4975
4976static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4977 struct hl_cs_parser *parser,
4978 struct packet_lin_dma *user_dma_pkt,
4979 u64 addr, enum dma_data_direction dir)
4980{
4981 struct hl_userptr *userptr;
4982 int rc;
4983
4984 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4985 parser->job_userptr_list, &userptr))
4986 goto already_pinned;
4987
4988 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4989 if (!userptr)
4990 return -ENOMEM;
4991
4992 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4993 userptr);
4994 if (rc)
4995 goto free_userptr;
4996
4997 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4998
4999 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5000 userptr->sgt->nents, dir);
5001 if (rc) {
5002 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5003 goto unpin_memory;
5004 }
5005
5006 userptr->dma_mapped = true;
5007 userptr->dir = dir;
5008
5009already_pinned:
5010 parser->patched_cb_size +=
5011 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5012
5013 return 0;
5014
5015unpin_memory:
5016 list_del(&userptr->job_node);
5017 hl_unpin_host_memory(hdev, userptr);
5018free_userptr:
5019 kfree(userptr);
5020 return rc;
5021}
5022
5023static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5024 struct hl_cs_parser *parser,
5025 struct packet_lin_dma *user_dma_pkt,
5026 bool src_in_host)
5027{
5028 enum dma_data_direction dir;
5029 bool skip_host_mem_pin = false, user_memset;
5030 u64 addr;
5031 int rc = 0;
5032
5033 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5034 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5035 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5036
5037 if (src_in_host) {
5038 if (user_memset)
5039 skip_host_mem_pin = true;
5040
5041 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5042 dir = DMA_TO_DEVICE;
5043 addr = le64_to_cpu(user_dma_pkt->src_addr);
5044 } else {
5045 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5046 dir = DMA_FROM_DEVICE;
5047 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5048 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5049 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5050 }
5051
5052 if (skip_host_mem_pin)
5053 parser->patched_cb_size += sizeof(*user_dma_pkt);
5054 else
5055 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5056 addr, dir);
5057
5058 return rc;
5059}
5060
5061static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5062 struct hl_cs_parser *parser,
5063 struct packet_lin_dma *user_dma_pkt)
5064{
5065 bool src_in_host = false;
5066 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5067 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5068 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5069
5070 dev_dbg(hdev->dev, "DMA packet details:\n");
5071 dev_dbg(hdev->dev, "source == 0x%llx\n",
5072 le64_to_cpu(user_dma_pkt->src_addr));
5073 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5074 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5075
5076
5077
5078
5079
5080
5081 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5082 parser->patched_cb_size += sizeof(*user_dma_pkt);
5083 return 0;
5084 }
5085
5086 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5087 src_in_host = true;
5088
5089 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5090 src_in_host);
5091}
5092
5093static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5094 struct hl_cs_parser *parser,
5095 struct packet_load_and_exe *user_pkt)
5096{
5097 u32 cfg;
5098
5099 cfg = le32_to_cpu(user_pkt->cfg);
5100
5101 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5102 dev_err(hdev->dev,
5103 "User not allowed to use Load and Execute\n");
5104 return -EPERM;
5105 }
5106
5107 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5108
5109 return 0;
5110}
5111
5112static int gaudi_validate_cb(struct hl_device *hdev,
5113 struct hl_cs_parser *parser, bool is_mmu)
5114{
5115 u32 cb_parsed_length = 0;
5116 int rc = 0;
5117
5118 parser->patched_cb_size = 0;
5119
5120
5121 while (cb_parsed_length < parser->user_cb_size) {
5122 enum packet_id pkt_id;
5123 u16 pkt_size;
5124 struct gaudi_packet *user_pkt;
5125
5126 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5127
5128 pkt_id = (enum packet_id) (
5129 (le64_to_cpu(user_pkt->header) &
5130 PACKET_HEADER_PACKET_ID_MASK) >>
5131 PACKET_HEADER_PACKET_ID_SHIFT);
5132
5133 if (!validate_packet_id(pkt_id)) {
5134 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5135 rc = -EINVAL;
5136 break;
5137 }
5138
5139 pkt_size = gaudi_packet_sizes[pkt_id];
5140 cb_parsed_length += pkt_size;
5141 if (cb_parsed_length > parser->user_cb_size) {
5142 dev_err(hdev->dev,
5143 "packet 0x%x is out of CB boundary\n", pkt_id);
5144 rc = -EINVAL;
5145 break;
5146 }
5147
5148 switch (pkt_id) {
5149 case PACKET_MSG_PROT:
5150 dev_err(hdev->dev,
5151 "User not allowed to use MSG_PROT\n");
5152 rc = -EPERM;
5153 break;
5154
5155 case PACKET_CP_DMA:
5156 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5157 rc = -EPERM;
5158 break;
5159
5160 case PACKET_STOP:
5161 dev_err(hdev->dev, "User not allowed to use STOP\n");
5162 rc = -EPERM;
5163 break;
5164
5165 case PACKET_WREG_BULK:
5166 dev_err(hdev->dev,
5167 "User not allowed to use WREG_BULK\n");
5168 rc = -EPERM;
5169 break;
5170
5171 case PACKET_LOAD_AND_EXE:
5172 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5173 (struct packet_load_and_exe *) user_pkt);
5174 break;
5175
5176 case PACKET_LIN_DMA:
5177 parser->contains_dma_pkt = true;
5178 if (is_mmu)
5179 parser->patched_cb_size += pkt_size;
5180 else
5181 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5182 (struct packet_lin_dma *) user_pkt);
5183 break;
5184
5185 case PACKET_WREG_32:
5186 case PACKET_MSG_LONG:
5187 case PACKET_MSG_SHORT:
5188 case PACKET_REPEAT:
5189 case PACKET_FENCE:
5190 case PACKET_NOP:
5191 case PACKET_ARB_POINT:
5192 parser->patched_cb_size += pkt_size;
5193 break;
5194
5195 default:
5196 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5197 pkt_id);
5198 rc = -EINVAL;
5199 break;
5200 }
5201
5202 if (rc)
5203 break;
5204 }
5205
5206
5207
5208
5209
5210
5211 if (parser->completion)
5212 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5213
5214 return rc;
5215}
5216
5217static int gaudi_patch_dma_packet(struct hl_device *hdev,
5218 struct hl_cs_parser *parser,
5219 struct packet_lin_dma *user_dma_pkt,
5220 struct packet_lin_dma *new_dma_pkt,
5221 u32 *new_dma_pkt_size)
5222{
5223 struct hl_userptr *userptr;
5224 struct scatterlist *sg, *sg_next_iter;
5225 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5226 u64 len, len_next;
5227 dma_addr_t dma_addr, dma_addr_next;
5228 u64 device_memory_addr, addr;
5229 enum dma_data_direction dir;
5230 struct sg_table *sgt;
5231 bool src_in_host = false;
5232 bool skip_host_mem_pin = false;
5233 bool user_memset;
5234
5235 ctl = le32_to_cpu(user_dma_pkt->ctl);
5236
5237 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5238 src_in_host = true;
5239
5240 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5241 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5242
5243 if (src_in_host) {
5244 addr = le64_to_cpu(user_dma_pkt->src_addr);
5245 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5246 dir = DMA_TO_DEVICE;
5247 if (user_memset)
5248 skip_host_mem_pin = true;
5249 } else {
5250 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5251 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5252 dir = DMA_FROM_DEVICE;
5253 }
5254
5255 if ((!skip_host_mem_pin) &&
5256 (!hl_userptr_is_pinned(hdev, addr,
5257 le32_to_cpu(user_dma_pkt->tsize),
5258 parser->job_userptr_list, &userptr))) {
5259 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5260 addr, user_dma_pkt->tsize);
5261 return -EFAULT;
5262 }
5263
5264 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5265 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5266 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5267 return 0;
5268 }
5269
5270 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5271
5272 sgt = userptr->sgt;
5273 dma_desc_cnt = 0;
5274
5275 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5276 len = sg_dma_len(sg);
5277 dma_addr = sg_dma_address(sg);
5278
5279 if (len == 0)
5280 break;
5281
5282 while ((count + 1) < sgt->nents) {
5283 sg_next_iter = sg_next(sg);
5284 len_next = sg_dma_len(sg_next_iter);
5285 dma_addr_next = sg_dma_address(sg_next_iter);
5286
5287 if (len_next == 0)
5288 break;
5289
5290 if ((dma_addr + len == dma_addr_next) &&
5291 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5292 len += len_next;
5293 count++;
5294 sg = sg_next_iter;
5295 } else {
5296 break;
5297 }
5298 }
5299
5300 ctl = le32_to_cpu(user_dma_pkt->ctl);
5301 if (likely(dma_desc_cnt))
5302 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5303 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5304 new_dma_pkt->ctl = cpu_to_le32(ctl);
5305 new_dma_pkt->tsize = cpu_to_le32(len);
5306
5307 if (dir == DMA_TO_DEVICE) {
5308 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5309 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5310 } else {
5311 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5312 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5313 }
5314
5315 if (!user_memset)
5316 device_memory_addr += len;
5317 dma_desc_cnt++;
5318 new_dma_pkt++;
5319 }
5320
5321 if (!dma_desc_cnt) {
5322 dev_err(hdev->dev,
5323 "Error of 0 SG entries when patching DMA packet\n");
5324 return -EFAULT;
5325 }
5326
5327
5328 new_dma_pkt--;
5329 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5330
5331 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5332
5333 return 0;
5334}
5335
5336static int gaudi_patch_cb(struct hl_device *hdev,
5337 struct hl_cs_parser *parser)
5338{
5339 u32 cb_parsed_length = 0;
5340 u32 cb_patched_cur_length = 0;
5341 int rc = 0;
5342
5343
5344 while (cb_parsed_length < parser->user_cb_size) {
5345 enum packet_id pkt_id;
5346 u16 pkt_size;
5347 u32 new_pkt_size = 0;
5348 struct gaudi_packet *user_pkt, *kernel_pkt;
5349
5350 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5351 kernel_pkt = parser->patched_cb->kernel_address +
5352 cb_patched_cur_length;
5353
5354 pkt_id = (enum packet_id) (
5355 (le64_to_cpu(user_pkt->header) &
5356 PACKET_HEADER_PACKET_ID_MASK) >>
5357 PACKET_HEADER_PACKET_ID_SHIFT);
5358
5359 if (!validate_packet_id(pkt_id)) {
5360 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5361 rc = -EINVAL;
5362 break;
5363 }
5364
5365 pkt_size = gaudi_packet_sizes[pkt_id];
5366 cb_parsed_length += pkt_size;
5367 if (cb_parsed_length > parser->user_cb_size) {
5368 dev_err(hdev->dev,
5369 "packet 0x%x is out of CB boundary\n", pkt_id);
5370 rc = -EINVAL;
5371 break;
5372 }
5373
5374 switch (pkt_id) {
5375 case PACKET_LIN_DMA:
5376 rc = gaudi_patch_dma_packet(hdev, parser,
5377 (struct packet_lin_dma *) user_pkt,
5378 (struct packet_lin_dma *) kernel_pkt,
5379 &new_pkt_size);
5380 cb_patched_cur_length += new_pkt_size;
5381 break;
5382
5383 case PACKET_MSG_PROT:
5384 dev_err(hdev->dev,
5385 "User not allowed to use MSG_PROT\n");
5386 rc = -EPERM;
5387 break;
5388
5389 case PACKET_CP_DMA:
5390 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5391 rc = -EPERM;
5392 break;
5393
5394 case PACKET_STOP:
5395 dev_err(hdev->dev, "User not allowed to use STOP\n");
5396 rc = -EPERM;
5397 break;
5398
5399 case PACKET_WREG_32:
5400 case PACKET_WREG_BULK:
5401 case PACKET_MSG_LONG:
5402 case PACKET_MSG_SHORT:
5403 case PACKET_REPEAT:
5404 case PACKET_FENCE:
5405 case PACKET_NOP:
5406 case PACKET_ARB_POINT:
5407 case PACKET_LOAD_AND_EXE:
5408 memcpy(kernel_pkt, user_pkt, pkt_size);
5409 cb_patched_cur_length += pkt_size;
5410 break;
5411
5412 default:
5413 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5414 pkt_id);
5415 rc = -EINVAL;
5416 break;
5417 }
5418
5419 if (rc)
5420 break;
5421 }
5422
5423 return rc;
5424}
5425
5426static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5427 struct hl_cs_parser *parser)
5428{
5429 u64 patched_cb_handle;
5430 u32 patched_cb_size;
5431 struct hl_cb *user_cb;
5432 int rc;
5433
5434
5435
5436
5437
5438
5439 if (parser->completion)
5440 parser->patched_cb_size = parser->user_cb_size +
5441 sizeof(struct packet_msg_prot) * 2;
5442 else
5443 parser->patched_cb_size = parser->user_cb_size;
5444
5445 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5446 parser->patched_cb_size, false, false,
5447 &patched_cb_handle);
5448
5449 if (rc) {
5450 dev_err(hdev->dev,
5451 "Failed to allocate patched CB for DMA CS %d\n",
5452 rc);
5453 return rc;
5454 }
5455
5456 patched_cb_handle >>= PAGE_SHIFT;
5457 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5458 (u32) patched_cb_handle);
5459
5460 if (!parser->patched_cb) {
5461 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5462 (u32) patched_cb_handle);
5463 rc = -EFAULT;
5464 goto out;
5465 }
5466
5467
5468
5469
5470
5471 memcpy(parser->patched_cb->kernel_address,
5472 parser->user_cb->kernel_address,
5473 parser->user_cb_size);
5474
5475 patched_cb_size = parser->patched_cb_size;
5476
5477
5478 user_cb = parser->user_cb;
5479 parser->user_cb = parser->patched_cb;
5480 rc = gaudi_validate_cb(hdev, parser, true);
5481 parser->user_cb = user_cb;
5482
5483 if (rc) {
5484 hl_cb_put(parser->patched_cb);
5485 goto out;
5486 }
5487
5488 if (patched_cb_size != parser->patched_cb_size) {
5489 dev_err(hdev->dev, "user CB size mismatch\n");
5490 hl_cb_put(parser->patched_cb);
5491 rc = -EINVAL;
5492 goto out;
5493 }
5494
5495out:
5496
5497
5498
5499
5500
5501
5502 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5503 patched_cb_handle << PAGE_SHIFT);
5504
5505 return rc;
5506}
5507
5508static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5509 struct hl_cs_parser *parser)
5510{
5511 u64 patched_cb_handle;
5512 int rc;
5513
5514 rc = gaudi_validate_cb(hdev, parser, false);
5515
5516 if (rc)
5517 goto free_userptr;
5518
5519 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5520 parser->patched_cb_size, false, false,
5521 &patched_cb_handle);
5522 if (rc) {
5523 dev_err(hdev->dev,
5524 "Failed to allocate patched CB for DMA CS %d\n", rc);
5525 goto free_userptr;
5526 }
5527
5528 patched_cb_handle >>= PAGE_SHIFT;
5529 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5530 (u32) patched_cb_handle);
5531
5532 if (!parser->patched_cb) {
5533 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5534 (u32) patched_cb_handle);
5535 rc = -EFAULT;
5536 goto out;
5537 }
5538
5539 rc = gaudi_patch_cb(hdev, parser);
5540
5541 if (rc)
5542 hl_cb_put(parser->patched_cb);
5543
5544out:
5545
5546
5547
5548
5549
5550
5551 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5552 patched_cb_handle << PAGE_SHIFT);
5553
5554free_userptr:
5555 if (rc)
5556 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5557 return rc;
5558}
5559
5560static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5561 struct hl_cs_parser *parser)
5562{
5563 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5564 struct gaudi_device *gaudi = hdev->asic_specific;
5565 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5566 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5567
5568 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5569 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5570 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5571 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5572 parser->hw_queue_id);
5573 return -EINVAL;
5574 }
5575
5576
5577 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5578 parser->user_cb_size,
5579 asic_prop->sram_user_base_address,
5580 asic_prop->sram_end_address))
5581 return 0;
5582
5583 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5584 parser->user_cb_size,
5585 asic_prop->dram_user_base_address,
5586 asic_prop->dram_end_address))
5587 return 0;
5588
5589
5590 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5591 parser->user_cb_size,
5592 asic_prop->pmmu.start_addr,
5593 asic_prop->pmmu.end_addr))
5594 return 0;
5595
5596 dev_err(hdev->dev,
5597 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5598 parser->user_cb, parser->user_cb_size);
5599
5600 return -EFAULT;
5601}
5602
5603static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5604{
5605 struct gaudi_device *gaudi = hdev->asic_specific;
5606
5607 if (parser->queue_type == QUEUE_TYPE_INT)
5608 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5609
5610 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5611 return gaudi_parse_cb_mmu(hdev, parser);
5612 else
5613 return gaudi_parse_cb_no_mmu(hdev, parser);
5614}
5615
5616static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5617 void *kernel_address, u32 len,
5618 u64 cq_addr, u32 cq_val, u32 msi_vec,
5619 bool eb)
5620{
5621 struct gaudi_device *gaudi = hdev->asic_specific;
5622 struct packet_msg_prot *cq_pkt;
5623 u32 tmp;
5624
5625 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5626
5627 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5628 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5629
5630 if (eb)
5631 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5632
5633 cq_pkt->ctl = cpu_to_le32(tmp);
5634 cq_pkt->value = cpu_to_le32(cq_val);
5635 cq_pkt->addr = cpu_to_le64(cq_addr);
5636
5637 cq_pkt++;
5638
5639 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5640 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5641 cq_pkt->ctl = cpu_to_le32(tmp);
5642 cq_pkt->value = cpu_to_le32(1);
5643
5644 if (!gaudi->multi_msi_mode)
5645 msi_vec = 0;
5646
5647 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5648}
5649
5650static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5651{
5652 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5653}
5654
5655static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5656 u32 size, u64 val)
5657{
5658 struct packet_lin_dma *lin_dma_pkt;
5659 struct hl_cs_job *job;
5660 u32 cb_size, ctl, err_cause;
5661 struct hl_cb *cb;
5662 u64 id;
5663 int rc;
5664
5665 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5666 if (!cb)
5667 return -EFAULT;
5668
5669 lin_dma_pkt = cb->kernel_address;
5670 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5671 cb_size = sizeof(*lin_dma_pkt);
5672
5673 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5674 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5675 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5676 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5677 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5678
5679 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5680 lin_dma_pkt->src_addr = cpu_to_le64(val);
5681 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5682 lin_dma_pkt->tsize = cpu_to_le32(size);
5683
5684 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5685 if (!job) {
5686 dev_err(hdev->dev, "Failed to allocate a new job\n");
5687 rc = -ENOMEM;
5688 goto release_cb;
5689 }
5690
5691
5692 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5693 if (err_cause && !hdev->init_done) {
5694 dev_dbg(hdev->dev,
5695 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5696 err_cause);
5697 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5698 }
5699
5700 job->id = 0;
5701 job->user_cb = cb;
5702 atomic_inc(&job->user_cb->cs_cnt);
5703 job->user_cb_size = cb_size;
5704 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5705 job->patched_cb = job->user_cb;
5706 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5707
5708 hl_debugfs_add_job(hdev, job);
5709
5710 rc = gaudi_send_job_on_qman0(hdev, job);
5711 hl_debugfs_remove_job(hdev, job);
5712 kfree(job);
5713 atomic_dec(&cb->cs_cnt);
5714
5715
5716 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5717 if (err_cause) {
5718 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5719 rc = -EIO;
5720 if (!hdev->init_done) {
5721 dev_dbg(hdev->dev,
5722 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5723 err_cause);
5724 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5725 }
5726 }
5727
5728release_cb:
5729 id = cb->id;
5730 hl_cb_put(cb);
5731 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5732
5733 return rc;
5734}
5735
5736static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5737 u32 num_regs, u32 val)
5738{
5739 struct packet_msg_long *pkt;
5740 struct hl_cs_job *job;
5741 u32 cb_size, ctl;
5742 struct hl_cb *cb;
5743 int i, rc;
5744
5745 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5746
5747 if (cb_size > SZ_2M) {
5748 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5749 return -ENOMEM;
5750 }
5751
5752 cb = hl_cb_kernel_create(hdev, cb_size, false);
5753 if (!cb)
5754 return -EFAULT;
5755
5756 pkt = cb->kernel_address;
5757
5758 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0);
5759 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5760 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5761 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5762 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5763
5764 for (i = 0; i < num_regs ; i++, pkt++) {
5765 pkt->ctl = cpu_to_le32(ctl);
5766 pkt->value = cpu_to_le32(val);
5767 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5768 }
5769
5770 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5771 if (!job) {
5772 dev_err(hdev->dev, "Failed to allocate a new job\n");
5773 rc = -ENOMEM;
5774 goto release_cb;
5775 }
5776
5777 job->id = 0;
5778 job->user_cb = cb;
5779 atomic_inc(&job->user_cb->cs_cnt);
5780 job->user_cb_size = cb_size;
5781 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5782 job->patched_cb = job->user_cb;
5783 job->job_cb_size = cb_size;
5784
5785 hl_debugfs_add_job(hdev, job);
5786
5787 rc = gaudi_send_job_on_qman0(hdev, job);
5788 hl_debugfs_remove_job(hdev, job);
5789 kfree(job);
5790 atomic_dec(&cb->cs_cnt);
5791
5792release_cb:
5793 hl_cb_put(cb);
5794 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5795
5796 return rc;
5797}
5798
5799static int gaudi_schedule_register_memset(struct hl_device *hdev,
5800 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5801{
5802 struct hl_ctx *ctx;
5803 struct hl_pending_cb *pending_cb;
5804 struct packet_msg_long *pkt;
5805 u32 cb_size, ctl;
5806 struct hl_cb *cb;
5807 int i, rc;
5808
5809 mutex_lock(&hdev->fpriv_list_lock);
5810 ctx = hdev->compute_ctx;
5811
5812
5813
5814
5815 if (!ctx || kref_read(&ctx->refcount) == 0) {
5816 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5817 mutex_unlock(&hdev->fpriv_list_lock);
5818 return rc;
5819 }
5820
5821 mutex_unlock(&hdev->fpriv_list_lock);
5822
5823 cb_size = (sizeof(*pkt) * num_regs) +
5824 sizeof(struct packet_msg_prot) * 2;
5825
5826 if (cb_size > SZ_2M) {
5827 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5828 return -ENOMEM;
5829 }
5830
5831 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5832 if (!pending_cb)
5833 return -ENOMEM;
5834
5835 cb = hl_cb_kernel_create(hdev, cb_size, false);
5836 if (!cb) {
5837 kfree(pending_cb);
5838 return -EFAULT;
5839 }
5840
5841 pkt = cb->kernel_address;
5842
5843 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0);
5844 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5845 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5846 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5847 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5848
5849 for (i = 0; i < num_regs ; i++, pkt++) {
5850 pkt->ctl = cpu_to_le32(ctl);
5851 pkt->value = cpu_to_le32(val);
5852 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5853 }
5854
5855 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5856
5857 pending_cb->cb = cb;
5858 pending_cb->cb_size = cb_size;
5859
5860
5861
5862 pending_cb->hw_queue_id = hw_queue_id;
5863
5864 spin_lock(&ctx->pending_cb_lock);
5865 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5866 spin_unlock(&ctx->pending_cb_lock);
5867
5868 return 0;
5869}
5870
5871static int gaudi_restore_sm_registers(struct hl_device *hdev)
5872{
5873 u64 base_addr;
5874 u32 num_regs;
5875 int rc;
5876
5877 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5878 num_regs = NUM_OF_SOB_IN_BLOCK;
5879 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5880 if (rc) {
5881 dev_err(hdev->dev, "failed resetting SM registers");
5882 return -ENOMEM;
5883 }
5884
5885 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5886 num_regs = NUM_OF_SOB_IN_BLOCK;
5887 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5888 if (rc) {
5889 dev_err(hdev->dev, "failed resetting SM registers");
5890 return -ENOMEM;
5891 }
5892
5893 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5894 num_regs = NUM_OF_SOB_IN_BLOCK;
5895 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5896 if (rc) {
5897 dev_err(hdev->dev, "failed resetting SM registers");
5898 return -ENOMEM;
5899 }
5900
5901 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5902 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5903 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5904 if (rc) {
5905 dev_err(hdev->dev, "failed resetting SM registers");
5906 return -ENOMEM;
5907 }
5908
5909 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5910 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5911 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5912 if (rc) {
5913 dev_err(hdev->dev, "failed resetting SM registers");
5914 return -ENOMEM;
5915 }
5916
5917 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5918 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5919 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5920 if (rc) {
5921 dev_err(hdev->dev, "failed resetting SM registers");
5922 return -ENOMEM;
5923 }
5924
5925 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5926 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5927 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5928 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5929 if (rc) {
5930 dev_err(hdev->dev, "failed resetting SM registers");
5931 return -ENOMEM;
5932 }
5933
5934 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5935 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5936 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5937 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5938 if (rc) {
5939 dev_err(hdev->dev, "failed resetting SM registers");
5940 return -ENOMEM;
5941 }
5942
5943 return 0;
5944}
5945
5946static void gaudi_restore_dma_registers(struct hl_device *hdev)
5947{
5948 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5949 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5950 int i;
5951
5952 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5953 u64 sob_addr = CFG_BASE +
5954 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5955 (i * sob_delta);
5956 u32 dma_offset = i * DMA_CORE_OFFSET;
5957
5958 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5959 lower_32_bits(sob_addr));
5960 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5961 upper_32_bits(sob_addr));
5962 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5963
5964
5965
5966
5967 if (i > 1)
5968 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5969 0x00000001);
5970 }
5971}
5972
5973static void gaudi_restore_qm_registers(struct hl_device *hdev)
5974{
5975 u32 qman_offset;
5976 int i;
5977
5978 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5979 qman_offset = i * DMA_QMAN_OFFSET;
5980 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5981 }
5982
5983 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5984 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5985 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5986 }
5987
5988 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5989 qman_offset = i * TPC_QMAN_OFFSET;
5990 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5991 }
5992
5993 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5994 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5995 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5996 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5997 }
5998}
5999
6000static int gaudi_restore_user_registers(struct hl_device *hdev)
6001{
6002 int rc;
6003
6004 rc = gaudi_restore_sm_registers(hdev);
6005 if (rc)
6006 return rc;
6007
6008 gaudi_restore_dma_registers(hdev);
6009 gaudi_restore_qm_registers(hdev);
6010
6011 return 0;
6012}
6013
6014static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6015{
6016 return gaudi_restore_user_registers(hdev);
6017}
6018
6019static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6020{
6021 struct asic_fixed_properties *prop = &hdev->asic_prop;
6022 struct gaudi_device *gaudi = hdev->asic_specific;
6023 u64 addr = prop->mmu_pgt_addr;
6024 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6025
6026 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6027 return 0;
6028
6029 return gaudi_memset_device_memory(hdev, addr, size, 0);
6030}
6031
6032static void gaudi_restore_phase_topology(struct hl_device *hdev)
6033{
6034
6035}
6036
6037static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6038 bool user_address, u32 *val)
6039{
6040 struct asic_fixed_properties *prop = &hdev->asic_prop;
6041 struct gaudi_device *gaudi = hdev->asic_specific;
6042 u64 hbm_bar_addr, host_phys_end;
6043 int rc = 0;
6044
6045 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6046
6047 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6048
6049 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6050 (hdev->clock_gating_mask &
6051 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6052
6053 dev_err_ratelimited(hdev->dev,
6054 "Can't read register - clock gating is enabled!\n");
6055 rc = -EFAULT;
6056 } else {
6057 *val = RREG32(addr - CFG_BASE);
6058 }
6059
6060 } else if ((addr >= SRAM_BASE_ADDR) &&
6061 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6062 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6063 (addr - SRAM_BASE_ADDR));
6064 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6065 u64 bar_base_addr = DRAM_PHYS_BASE +
6066 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6067
6068 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6069 if (hbm_bar_addr != U64_MAX) {
6070 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6071 (addr - bar_base_addr));
6072
6073 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6074 hbm_bar_addr);
6075 }
6076 if (hbm_bar_addr == U64_MAX)
6077 rc = -EIO;
6078 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6079 user_address && !iommu_present(&pci_bus_type)) {
6080 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6081 } else {
6082 rc = -EFAULT;
6083 }
6084
6085 return rc;
6086}
6087
6088static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6089 bool user_address, u32 val)
6090{
6091 struct asic_fixed_properties *prop = &hdev->asic_prop;
6092 struct gaudi_device *gaudi = hdev->asic_specific;
6093 u64 hbm_bar_addr, host_phys_end;
6094 int rc = 0;
6095
6096 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6097
6098 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6099
6100 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6101 (hdev->clock_gating_mask &
6102 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6103
6104 dev_err_ratelimited(hdev->dev,
6105 "Can't write register - clock gating is enabled!\n");
6106 rc = -EFAULT;
6107 } else {
6108 WREG32(addr - CFG_BASE, val);
6109 }
6110
6111 } else if ((addr >= SRAM_BASE_ADDR) &&
6112 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6113 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6114 (addr - SRAM_BASE_ADDR));
6115 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6116 u64 bar_base_addr = DRAM_PHYS_BASE +
6117 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6118
6119 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6120 if (hbm_bar_addr != U64_MAX) {
6121 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6122 (addr - bar_base_addr));
6123
6124 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6125 hbm_bar_addr);
6126 }
6127 if (hbm_bar_addr == U64_MAX)
6128 rc = -EIO;
6129 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6130 user_address && !iommu_present(&pci_bus_type)) {
6131 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6132 } else {
6133 rc = -EFAULT;
6134 }
6135
6136 return rc;
6137}
6138
6139static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6140 bool user_address, u64 *val)
6141{
6142 struct asic_fixed_properties *prop = &hdev->asic_prop;
6143 struct gaudi_device *gaudi = hdev->asic_specific;
6144 u64 hbm_bar_addr, host_phys_end;
6145 int rc = 0;
6146
6147 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6148
6149 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6150
6151 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6152 (hdev->clock_gating_mask &
6153 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6154
6155 dev_err_ratelimited(hdev->dev,
6156 "Can't read register - clock gating is enabled!\n");
6157 rc = -EFAULT;
6158 } else {
6159 u32 val_l = RREG32(addr - CFG_BASE);
6160 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6161
6162 *val = (((u64) val_h) << 32) | val_l;
6163 }
6164
6165 } else if ((addr >= SRAM_BASE_ADDR) &&
6166 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6167 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6168 (addr - SRAM_BASE_ADDR));
6169 } else if (addr <=
6170 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6171 u64 bar_base_addr = DRAM_PHYS_BASE +
6172 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6173
6174 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6175 if (hbm_bar_addr != U64_MAX) {
6176 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6177 (addr - bar_base_addr));
6178
6179 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6180 hbm_bar_addr);
6181 }
6182 if (hbm_bar_addr == U64_MAX)
6183 rc = -EIO;
6184 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6185 user_address && !iommu_present(&pci_bus_type)) {
6186 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6187 } else {
6188 rc = -EFAULT;
6189 }
6190
6191 return rc;
6192}
6193
6194static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6195 bool user_address, u64 val)
6196{
6197 struct asic_fixed_properties *prop = &hdev->asic_prop;
6198 struct gaudi_device *gaudi = hdev->asic_specific;
6199 u64 hbm_bar_addr, host_phys_end;
6200 int rc = 0;
6201
6202 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6203
6204 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6205
6206 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6207 (hdev->clock_gating_mask &
6208 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6209
6210 dev_err_ratelimited(hdev->dev,
6211 "Can't write register - clock gating is enabled!\n");
6212 rc = -EFAULT;
6213 } else {
6214 WREG32(addr - CFG_BASE, lower_32_bits(val));
6215 WREG32(addr + sizeof(u32) - CFG_BASE,
6216 upper_32_bits(val));
6217 }
6218
6219 } else if ((addr >= SRAM_BASE_ADDR) &&
6220 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6221 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6222 (addr - SRAM_BASE_ADDR));
6223 } else if (addr <=
6224 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6225 u64 bar_base_addr = DRAM_PHYS_BASE +
6226 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6227
6228 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6229 if (hbm_bar_addr != U64_MAX) {
6230 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6231 (addr - bar_base_addr));
6232
6233 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6234 hbm_bar_addr);
6235 }
6236 if (hbm_bar_addr == U64_MAX)
6237 rc = -EIO;
6238 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6239 user_address && !iommu_present(&pci_bus_type)) {
6240 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6241 } else {
6242 rc = -EFAULT;
6243 }
6244
6245 return rc;
6246}
6247
6248static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6249 u32 size_to_dma, dma_addr_t dma_addr)
6250{
6251 u32 err_cause, val;
6252 u64 dma_offset;
6253 int rc;
6254
6255 dma_offset = dma_id * DMA_CORE_OFFSET;
6256
6257 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6258 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6259 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6260 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6261 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6262 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6263 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6264
6265 rc = hl_poll_timeout(
6266 hdev,
6267 mmDMA0_CORE_STS0 + dma_offset,
6268 val,
6269 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6270 0,
6271 1000000);
6272
6273 if (rc) {
6274 dev_err(hdev->dev,
6275 "DMA %d timed-out during reading of 0x%llx\n",
6276 dma_id, addr);
6277 return -EIO;
6278 }
6279
6280
6281 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6282 if (err_cause) {
6283 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6284 dev_dbg(hdev->dev,
6285 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6286 err_cause);
6287 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6288
6289 return -EIO;
6290 }
6291
6292 return 0;
6293}
6294
6295static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6296 void *blob_addr)
6297{
6298 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6299 struct gaudi_device *gaudi = hdev->asic_specific;
6300 u64 dma_offset, qm_offset;
6301 dma_addr_t dma_addr;
6302 void *kernel_addr;
6303 bool is_eng_idle;
6304 int rc = 0, dma_id;
6305
6306 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6307 hdev, SZ_2M,
6308 &dma_addr,
6309 GFP_KERNEL | __GFP_ZERO);
6310
6311 if (!kernel_addr)
6312 return -ENOMEM;
6313
6314 mutex_lock(&gaudi->clk_gate_mutex);
6315
6316 hdev->asic_funcs->disable_clock_gating(hdev);
6317
6318 hdev->asic_funcs->hw_queues_lock(hdev);
6319
6320 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6321 dma_offset = dma_id * DMA_CORE_OFFSET;
6322 qm_offset = dma_id * DMA_QMAN_OFFSET;
6323 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6324 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6325
6326 if (!is_eng_idle) {
6327 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6328 dma_offset = dma_id * DMA_CORE_OFFSET;
6329 qm_offset = dma_id * DMA_QMAN_OFFSET;
6330 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6331 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6332
6333 if (!is_eng_idle) {
6334 dev_err_ratelimited(hdev->dev,
6335 "Can't read via DMA because it is BUSY\n");
6336 rc = -EAGAIN;
6337 goto out;
6338 }
6339 }
6340
6341 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6342 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6343 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6344
6345
6346
6347
6348
6349 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6350
6351
6352 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6353 if (err_cause) {
6354 dev_dbg(hdev->dev,
6355 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6356 err_cause);
6357 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6358 }
6359
6360 pos = 0;
6361 size_left = size;
6362 size_to_dma = SZ_2M;
6363
6364 while (size_left > 0) {
6365
6366 if (size_left < SZ_2M)
6367 size_to_dma = size_left;
6368
6369 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6370 dma_addr);
6371 if (rc)
6372 break;
6373
6374 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6375
6376 if (size_left <= SZ_2M)
6377 break;
6378
6379 pos += SZ_2M;
6380 addr += SZ_2M;
6381 size_left -= SZ_2M;
6382 }
6383
6384
6385
6386
6387
6388 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6389 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6390
6391 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6392
6393out:
6394 hdev->asic_funcs->hw_queues_unlock(hdev);
6395
6396 hdev->asic_funcs->set_clock_gating(hdev);
6397
6398 mutex_unlock(&gaudi->clk_gate_mutex);
6399
6400 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6401 dma_addr);
6402
6403 return rc;
6404}
6405
6406static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6407{
6408 struct gaudi_device *gaudi = hdev->asic_specific;
6409
6410 if (hdev->hard_reset_pending)
6411 return U64_MAX;
6412
6413 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6414 (addr - gaudi->hbm_bar_cur_addr));
6415}
6416
6417static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6418{
6419 struct gaudi_device *gaudi = hdev->asic_specific;
6420
6421 if (hdev->hard_reset_pending)
6422 return;
6423
6424 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6425 (addr - gaudi->hbm_bar_cur_addr));
6426}
6427
6428void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6429{
6430
6431 WREG32_AND(reg, ~0x7FF);
6432 WREG32_OR(reg, asid);
6433}
6434
6435static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6436{
6437 struct gaudi_device *gaudi = hdev->asic_specific;
6438
6439 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6440 return;
6441
6442 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6443 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6444 return;
6445 }
6446
6447 mutex_lock(&gaudi->clk_gate_mutex);
6448
6449 hdev->asic_funcs->disable_clock_gating(hdev);
6450
6451 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6452 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6453 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6454 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6455 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6456
6457 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6458 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6459 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6460 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6461 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6462
6463 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6464 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6465 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6466 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6467 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6468
6469 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6470 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6471 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6472 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6473 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6474
6475 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6480
6481 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6482 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6483 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6484 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6485 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6486
6487 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6488 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6489 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6490 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6491 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6492
6493 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6494 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6495 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6496 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6497 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6498
6499 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6500 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6501 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6502 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6503 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6504 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6505 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6506 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6507
6508 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6509 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6510 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6511 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6512 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6513 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6514 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6515
6516 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6517 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6518 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6519 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6520 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6521 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6522 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6523
6524 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6525 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6526 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6527 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6528 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6529 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6530 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6531
6532 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6533 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6534 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6535 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6536 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6537 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6538 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6539
6540 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6541 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6542 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6543 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6544 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6545 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6546 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6547
6548 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6549 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6550 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6551 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6552 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6553 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6554 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6555
6556 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6557 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6558 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6559 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6560 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6561 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6562 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6563
6564 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6571
6572 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6582
6583 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6595
6596 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6597 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6598 asid);
6599 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6600 asid);
6601 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6602 asid);
6603 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6604 asid);
6605 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6606 asid);
6607 }
6608
6609 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6610 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6611 asid);
6612 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6613 asid);
6614 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6615 asid);
6616 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6617 asid);
6618 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6619 asid);
6620 }
6621
6622 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6623 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6624 asid);
6625 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6626 asid);
6627 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6628 asid);
6629 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6630 asid);
6631 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6632 asid);
6633 }
6634
6635 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6636 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6637 asid);
6638 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6639 asid);
6640 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6641 asid);
6642 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6643 asid);
6644 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6645 asid);
6646 }
6647
6648 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6649 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6650 asid);
6651 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6652 asid);
6653 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6654 asid);
6655 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6656 asid);
6657 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6658 asid);
6659 }
6660
6661 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6662 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6663 asid);
6664 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6665 asid);
6666 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6667 asid);
6668 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6669 asid);
6670 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6671 asid);
6672 }
6673
6674 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6675 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6676 asid);
6677 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6678 asid);
6679 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6680 asid);
6681 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6682 asid);
6683 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6684 asid);
6685 }
6686
6687 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6688 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6689 asid);
6690 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6691 asid);
6692 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6693 asid);
6694 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6695 asid);
6696 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6697 asid);
6698 }
6699
6700 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6701 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6702 asid);
6703 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6704 asid);
6705 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6706 asid);
6707 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6708 asid);
6709 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6710 asid);
6711 }
6712
6713 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6714 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6715 asid);
6716 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6717 asid);
6718 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6719 asid);
6720 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6721 asid);
6722 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6723 asid);
6724 }
6725
6726 hdev->asic_funcs->set_clock_gating(hdev);
6727
6728 mutex_unlock(&gaudi->clk_gate_mutex);
6729}
6730
6731static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6732 struct hl_cs_job *job)
6733{
6734 struct packet_msg_prot *fence_pkt;
6735 u32 *fence_ptr;
6736 dma_addr_t fence_dma_addr;
6737 struct hl_cb *cb;
6738 u32 tmp, timeout, dma_offset;
6739 int rc;
6740
6741 if (hdev->pldm)
6742 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6743 else
6744 timeout = HL_DEVICE_TIMEOUT_USEC;
6745
6746 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6747 dev_err_ratelimited(hdev->dev,
6748 "Can't send driver job on QMAN0 because the device is not idle\n");
6749 return -EBUSY;
6750 }
6751
6752 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6753 &fence_dma_addr);
6754 if (!fence_ptr) {
6755 dev_err(hdev->dev,
6756 "Failed to allocate fence memory for QMAN0\n");
6757 return -ENOMEM;
6758 }
6759
6760 cb = job->patched_cb;
6761
6762 fence_pkt = cb->kernel_address +
6763 job->job_cb_size - sizeof(struct packet_msg_prot);
6764
6765 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6766 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6767 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6768
6769 fence_pkt->ctl = cpu_to_le32(tmp);
6770 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6771 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6772
6773 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6774
6775 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6776
6777 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6778 job->job_cb_size, cb->bus_address);
6779 if (rc) {
6780 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6781 goto free_fence_ptr;
6782 }
6783
6784 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6785 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6786 timeout, true);
6787
6788 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6789
6790 if (rc == -ETIMEDOUT) {
6791 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6792 goto free_fence_ptr;
6793 }
6794
6795free_fence_ptr:
6796 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6797 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6798
6799 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6800 fence_dma_addr);
6801 return rc;
6802}
6803
6804static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6805{
6806 if (event_type >= GAUDI_EVENT_SIZE)
6807 goto event_not_supported;
6808
6809 if (!gaudi_irq_map_table[event_type].valid)
6810 goto event_not_supported;
6811
6812 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6813
6814 return;
6815
6816event_not_supported:
6817 snprintf(desc, size, "N/A");
6818}
6819
6820static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6821 u32 x_y, bool is_write)
6822{
6823 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6824
6825 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6826 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6827
6828 switch (x_y) {
6829 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6830 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6831 dma_id[0] = 0;
6832 dma_id[1] = 2;
6833 break;
6834 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6835 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6836 dma_id[0] = 1;
6837 dma_id[1] = 3;
6838 break;
6839 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6840 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6841 dma_id[0] = 4;
6842 dma_id[1] = 6;
6843 break;
6844 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6845 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6846 dma_id[0] = 5;
6847 dma_id[1] = 7;
6848 break;
6849 default:
6850 goto unknown_initiator;
6851 }
6852
6853 for (i = 0 ; i < 2 ; i++) {
6854 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6855 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6856 }
6857
6858 switch (x_y) {
6859 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6860 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6861 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6862 return "DMA0";
6863 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6864 return "DMA2";
6865 else
6866 return "DMA0 or DMA2";
6867 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6868 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6869 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6870 return "DMA1";
6871 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6872 return "DMA3";
6873 else
6874 return "DMA1 or DMA3";
6875 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6876 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6877 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6878 return "DMA4";
6879 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6880 return "DMA6";
6881 else
6882 return "DMA4 or DMA6";
6883 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6884 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6885 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6886 return "DMA5";
6887 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6888 return "DMA7";
6889 else
6890 return "DMA5 or DMA7";
6891 }
6892
6893unknown_initiator:
6894 return "unknown initiator";
6895}
6896
6897static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6898 bool is_write)
6899{
6900 u32 val, x_y, axi_id;
6901
6902 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6903 RREG32(mmMMU_UP_RAZWI_READ_ID);
6904 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6905 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6906 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6907 RAZWI_INITIATOR_AXI_ID_SHIFT);
6908
6909 switch (x_y) {
6910 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6911 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6912 return "TPC0";
6913 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6914 return "NIC0";
6915 break;
6916 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6917 return "TPC1";
6918 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6919 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6920 return "MME0";
6921 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6922 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6923 return "MME1";
6924 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6925 return "TPC2";
6926 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6927 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6928 return "TPC3";
6929 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6930 return "PCI";
6931 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6932 return "CPU";
6933 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6934 return "PSOC";
6935 break;
6936 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6937 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6938 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6939 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6940 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6941 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6942 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6943 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6944 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6945 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6946 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6947 return "TPC4";
6948 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6949 return "NIC1";
6950 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6951 return "NIC2";
6952 break;
6953 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6954 return "TPC5";
6955 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6956 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6957 return "MME2";
6958 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6959 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6960 return "MME3";
6961 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6962 return "TPC6";
6963 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6964 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6965 return "TPC7";
6966 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6967 return "NIC4";
6968 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6969 return "NIC5";
6970 break;
6971 default:
6972 break;
6973 }
6974
6975 dev_err(hdev->dev,
6976 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6977 val,
6978 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6979 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6980 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6981 RAZWI_INITIATOR_AXI_ID_MASK);
6982
6983 return "unknown initiator";
6984}
6985
6986static void gaudi_print_razwi_info(struct hl_device *hdev)
6987{
6988 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6989 dev_err_ratelimited(hdev->dev,
6990 "RAZWI event caused by illegal write of %s\n",
6991 gaudi_get_razwi_initiator_name(hdev, true));
6992 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6993 }
6994
6995 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6996 dev_err_ratelimited(hdev->dev,
6997 "RAZWI event caused by illegal read of %s\n",
6998 gaudi_get_razwi_initiator_name(hdev, false));
6999 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7000 }
7001}
7002
7003static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7004{
7005 struct gaudi_device *gaudi = hdev->asic_specific;
7006 u64 addr;
7007 u32 val;
7008
7009 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7010 return;
7011
7012 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7013 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7014 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7015 addr <<= 32;
7016 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7017
7018 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7019 addr);
7020
7021 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7022 }
7023
7024 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7025 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7026 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7027 addr <<= 32;
7028 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7029
7030 dev_err_ratelimited(hdev->dev,
7031 "MMU access error on va 0x%llx\n", addr);
7032
7033 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7034 }
7035}
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055static int gaudi_extract_ecc_info(struct hl_device *hdev,
7056 struct ecc_info_extract_params *params, u64 *ecc_address,
7057 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7058{
7059 struct gaudi_device *gaudi = hdev->asic_specific;
7060 u32 i, num_mem_regs, reg, err_bit;
7061 u64 err_addr, err_word = 0;
7062 int rc = 0;
7063
7064 num_mem_regs = params->num_memories / 32 +
7065 ((params->num_memories % 32) ? 1 : 0);
7066
7067 if (params->block_address >= CFG_BASE)
7068 params->block_address -= CFG_BASE;
7069
7070 if (params->derr)
7071 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7072 else
7073 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7074
7075 if (params->disable_clock_gating) {
7076 mutex_lock(&gaudi->clk_gate_mutex);
7077 hdev->asic_funcs->disable_clock_gating(hdev);
7078 }
7079
7080
7081 *memory_wrapper_idx = 0xFF;
7082
7083
7084 for (i = 0 ; i < num_mem_regs ; i++) {
7085 err_addr += i * 4;
7086 err_word = RREG32(err_addr);
7087 if (err_word) {
7088 err_bit = __ffs(err_word);
7089 *memory_wrapper_idx = err_bit + (32 * i);
7090 break;
7091 }
7092 }
7093
7094 if (*memory_wrapper_idx == 0xFF) {
7095 dev_err(hdev->dev, "ECC error information cannot be found\n");
7096 rc = -EINVAL;
7097 goto enable_clk_gate;
7098 }
7099
7100 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7101 *memory_wrapper_idx);
7102
7103 *ecc_address =
7104 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7105 *ecc_syndrom =
7106 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7107
7108
7109 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7110 if (params->derr)
7111 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7112 else
7113 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7114
7115 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7116
7117enable_clk_gate:
7118 if (params->disable_clock_gating) {
7119 hdev->asic_funcs->set_clock_gating(hdev);
7120
7121 mutex_unlock(&gaudi->clk_gate_mutex);
7122 }
7123
7124 return rc;
7125}
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7136{
7137 u32 mask = q_len - 1;
7138
7139
7140
7141
7142
7143
7144 return (idx + q_len - 1) & mask;
7145}
7146
7147
7148
7149
7150
7151
7152
7153
7154static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7155 u64 qman_base)
7156{
7157 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7158 u32 cq_ptr_lo_off, size;
7159
7160 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7161
7162 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7163 stream * cq_ptr_lo_off;
7164 cq_ptr_hi = cq_ptr_lo +
7165 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7166 cq_tsize = cq_ptr_lo +
7167 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7168
7169 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7170 size = RREG32(cq_tsize);
7171 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7172 stream, cq_ptr, size);
7173}
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7185 u32 stream, u64 qman_base,
7186 bool pr_sw_conf)
7187{
7188 u32 ci, qm_ci_stream_off, queue_len;
7189 struct hl_hw_queue *q;
7190 u64 pq_ci;
7191 int i;
7192
7193 q = &hdev->kernel_queues[qid_base + stream];
7194
7195 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7196 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7197 stream * qm_ci_stream_off;
7198
7199 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7200 q->int_queue_len : HL_QUEUE_LENGTH;
7201
7202 hdev->asic_funcs->hw_queues_lock(hdev);
7203
7204 if (pr_sw_conf)
7205 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7206
7207 ci = RREG32(pq_ci);
7208
7209
7210 ci = gaudi_queue_idx_dec(ci, queue_len);
7211
7212 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7213 struct hl_bd *bd;
7214 u64 addr;
7215 u32 len;
7216
7217 bd = q->kernel_address;
7218 bd += ci;
7219
7220 len = le32_to_cpu(bd->len);
7221
7222 if (!len)
7223 break;
7224
7225 addr = le64_to_cpu(bd->ptr);
7226
7227 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7228 stream, ci, addr, len);
7229
7230
7231 ci = gaudi_queue_idx_dec(ci, queue_len);
7232 }
7233
7234 hdev->asic_funcs->hw_queues_unlock(hdev);
7235}
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7250 u32 stream, u64 qman_base)
7251{
7252 u32 i;
7253
7254 if (stream != QMAN_STREAMS) {
7255 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7256 true);
7257 return;
7258 }
7259
7260 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7261
7262 for (i = 0; i < QMAN_STREAMS; i++)
7263 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7264 false);
7265}
7266
7267static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7268 const char *qm_name,
7269 u64 qman_base,
7270 u32 qid_base)
7271{
7272 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7273 u64 glbl_sts_addr, arb_err_addr;
7274 char reg_desc[32];
7275
7276 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7277 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7278
7279
7280 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7281 glbl_sts_clr_val = 0;
7282 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7283
7284 if (!glbl_sts_val)
7285 continue;
7286
7287 if (i == QMAN_STREAMS)
7288 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7289 else
7290 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7291
7292 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7293 if (glbl_sts_val & BIT(j)) {
7294 dev_err_ratelimited(hdev->dev,
7295 "%s %s. err cause: %s\n",
7296 qm_name, reg_desc,
7297 gaudi_qman_error_cause[j]);
7298 glbl_sts_clr_val |= BIT(j);
7299 }
7300 }
7301
7302
7303 if (!hdev->stop_on_err)
7304 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7305 else
7306 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7307 }
7308
7309 arb_err_val = RREG32(arb_err_addr);
7310
7311 if (!arb_err_val)
7312 return;
7313
7314 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7315 if (arb_err_val & BIT(j)) {
7316 dev_err_ratelimited(hdev->dev,
7317 "%s ARB_ERR. err cause: %s\n",
7318 qm_name,
7319 gaudi_qman_arb_error_cause[j]);
7320 }
7321 }
7322}
7323
7324static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7325 struct hl_eq_sm_sei_data *sei_data)
7326{
7327 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7328
7329 switch (sei_data->sei_cause) {
7330 case SM_SEI_SO_OVERFLOW:
7331 dev_err(hdev->dev,
7332 "SM %u SEI Error: SO %u overflow/underflow",
7333 index, le32_to_cpu(sei_data->sei_log));
7334 break;
7335 case SM_SEI_LBW_4B_UNALIGNED:
7336 dev_err(hdev->dev,
7337 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7338 index, le32_to_cpu(sei_data->sei_log));
7339 break;
7340 case SM_SEI_AXI_RESPONSE_ERR:
7341 dev_err(hdev->dev,
7342 "SM %u SEI Error: AXI ID %u response error",
7343 index, le32_to_cpu(sei_data->sei_log));
7344 break;
7345 default:
7346 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7347 le32_to_cpu(sei_data->sei_log));
7348 break;
7349 }
7350}
7351
7352static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7353 struct hl_eq_ecc_data *ecc_data)
7354{
7355 struct ecc_info_extract_params params;
7356 u64 ecc_address = 0, ecc_syndrom = 0;
7357 u8 index, memory_wrapper_idx = 0;
7358 bool extract_info_from_fw;
7359 int rc;
7360
7361 switch (event_type) {
7362 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7363 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7364 extract_info_from_fw = true;
7365 break;
7366 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7367 index = event_type - GAUDI_EVENT_TPC0_SERR;
7368 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7369 params.num_memories = 90;
7370 params.derr = false;
7371 params.disable_clock_gating = true;
7372 extract_info_from_fw = false;
7373 break;
7374 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7375 index = event_type - GAUDI_EVENT_TPC0_DERR;
7376 params.block_address =
7377 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7378 params.num_memories = 90;
7379 params.derr = true;
7380 params.disable_clock_gating = true;
7381 extract_info_from_fw = false;
7382 break;
7383 case GAUDI_EVENT_MME0_ACC_SERR:
7384 case GAUDI_EVENT_MME1_ACC_SERR:
7385 case GAUDI_EVENT_MME2_ACC_SERR:
7386 case GAUDI_EVENT_MME3_ACC_SERR:
7387 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7388 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7389 params.num_memories = 128;
7390 params.derr = false;
7391 params.disable_clock_gating = true;
7392 extract_info_from_fw = false;
7393 break;
7394 case GAUDI_EVENT_MME0_ACC_DERR:
7395 case GAUDI_EVENT_MME1_ACC_DERR:
7396 case GAUDI_EVENT_MME2_ACC_DERR:
7397 case GAUDI_EVENT_MME3_ACC_DERR:
7398 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7399 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7400 params.num_memories = 128;
7401 params.derr = true;
7402 params.disable_clock_gating = true;
7403 extract_info_from_fw = false;
7404 break;
7405 case GAUDI_EVENT_MME0_SBAB_SERR:
7406 case GAUDI_EVENT_MME1_SBAB_SERR:
7407 case GAUDI_EVENT_MME2_SBAB_SERR:
7408 case GAUDI_EVENT_MME3_SBAB_SERR:
7409 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7410 params.block_address =
7411 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7412 params.num_memories = 33;
7413 params.derr = false;
7414 params.disable_clock_gating = true;
7415 extract_info_from_fw = false;
7416 break;
7417 case GAUDI_EVENT_MME0_SBAB_DERR:
7418 case GAUDI_EVENT_MME1_SBAB_DERR:
7419 case GAUDI_EVENT_MME2_SBAB_DERR:
7420 case GAUDI_EVENT_MME3_SBAB_DERR:
7421 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7422 params.block_address =
7423 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7424 params.num_memories = 33;
7425 params.derr = true;
7426 params.disable_clock_gating = true;
7427 extract_info_from_fw = false;
7428 break;
7429 default:
7430 return;
7431 }
7432
7433 if (extract_info_from_fw) {
7434 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7435 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7436 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7437 } else {
7438 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7439 &ecc_syndrom, &memory_wrapper_idx);
7440 if (rc)
7441 return;
7442 }
7443
7444 dev_err(hdev->dev,
7445 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7446 ecc_address, ecc_syndrom, memory_wrapper_idx);
7447}
7448
7449static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7450{
7451 u64 qman_base;
7452 char desc[32];
7453 u32 qid_base;
7454 u8 index;
7455
7456 switch (event_type) {
7457 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7458 index = event_type - GAUDI_EVENT_TPC0_QM;
7459 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7460 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7461 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7462 break;
7463 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7464 index = event_type - GAUDI_EVENT_MME0_QM;
7465 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7466 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7467 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7468 break;
7469 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7470 index = event_type - GAUDI_EVENT_DMA0_QM;
7471 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7472
7473 if (index > 1)
7474 qid_base++;
7475 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7476 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7477 break;
7478 case GAUDI_EVENT_NIC0_QM0:
7479 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7480 qman_base = mmNIC0_QM0_BASE;
7481 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7482 break;
7483 case GAUDI_EVENT_NIC0_QM1:
7484 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7485 qman_base = mmNIC0_QM1_BASE;
7486 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7487 break;
7488 case GAUDI_EVENT_NIC1_QM0:
7489 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7490 qman_base = mmNIC1_QM0_BASE;
7491 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7492 break;
7493 case GAUDI_EVENT_NIC1_QM1:
7494 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7495 qman_base = mmNIC1_QM1_BASE;
7496 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7497 break;
7498 case GAUDI_EVENT_NIC2_QM0:
7499 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7500 qman_base = mmNIC2_QM0_BASE;
7501 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7502 break;
7503 case GAUDI_EVENT_NIC2_QM1:
7504 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7505 qman_base = mmNIC2_QM1_BASE;
7506 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7507 break;
7508 case GAUDI_EVENT_NIC3_QM0:
7509 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7510 qman_base = mmNIC3_QM0_BASE;
7511 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7512 break;
7513 case GAUDI_EVENT_NIC3_QM1:
7514 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7515 qman_base = mmNIC3_QM1_BASE;
7516 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7517 break;
7518 case GAUDI_EVENT_NIC4_QM0:
7519 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7520 qman_base = mmNIC4_QM0_BASE;
7521 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7522 break;
7523 case GAUDI_EVENT_NIC4_QM1:
7524 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7525 qman_base = mmNIC4_QM1_BASE;
7526 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7527 break;
7528 default:
7529 return;
7530 }
7531
7532 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7533}
7534
7535static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7536 bool razwi)
7537{
7538 char desc[64] = "";
7539
7540 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7541 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7542 event_type, desc);
7543
7544 if (razwi) {
7545 gaudi_print_razwi_info(hdev);
7546 gaudi_print_mmu_error_info(hdev);
7547 }
7548}
7549
7550static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7551 struct cpucp_pkt_sync_err *sync_err)
7552{
7553 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7554
7555 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7556 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7557}
7558
7559static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7560 struct hl_eq_fw_alive *fw_alive)
7561{
7562 dev_err(hdev->dev,
7563 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7564 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7565 "Minor" : "Critical", fw_alive->process_id,
7566 fw_alive->thread_id, fw_alive->uptime_seconds);
7567}
7568
7569static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7570{
7571 struct gaudi_device *gaudi = hdev->asic_specific;
7572
7573
7574
7575
7576 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7577}
7578
7579static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7580 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7581{
7582 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7583 int rc = 0;
7584
7585 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7586 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7587 if (!hbm_ecc_data) {
7588 dev_err(hdev->dev, "No FW ECC data");
7589 return 0;
7590 }
7591
7592 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7593 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7594 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7595 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7596 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7597 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7598 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7599 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7600 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7601 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7602 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7603 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7604 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7605 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7606
7607 dev_err(hdev->dev,
7608 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7609 device, ch, wr_par, rd_par, ca_par, serr, derr);
7610 dev_err(hdev->dev,
7611 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7612 device, ch, hbm_ecc_data->first_addr, type,
7613 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7614 hbm_ecc_data->dec_cnt);
7615 return 0;
7616 }
7617
7618 if (hdev->asic_prop.fw_security_enabled) {
7619 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7620 return 0;
7621 }
7622
7623 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7624 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7625 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7626 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7627 if (val) {
7628 rc = -EIO;
7629 dev_err(hdev->dev,
7630 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7631 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7632 (val >> 2) & 0x1, (val >> 3) & 0x1,
7633 (val >> 4) & 0x1);
7634
7635 val2 = RREG32(base + ch * 0x1000 + 0x060);
7636 dev_err(hdev->dev,
7637 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7638 device, ch * 2,
7639 RREG32(base + ch * 0x1000 + 0x064),
7640 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7641 (val2 & 0xFF0000) >> 16,
7642 (val2 & 0xFF000000) >> 24);
7643 }
7644
7645 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7646 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7647 if (val) {
7648 rc = -EIO;
7649 dev_err(hdev->dev,
7650 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7651 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7652 (val >> 2) & 0x1, (val >> 3) & 0x1,
7653 (val >> 4) & 0x1);
7654
7655 val2 = RREG32(base + ch * 0x1000 + 0x070);
7656 dev_err(hdev->dev,
7657 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7658 device, ch * 2 + 1,
7659 RREG32(base + ch * 0x1000 + 0x074),
7660 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7661 (val2 & 0xFF0000) >> 16,
7662 (val2 & 0xFF000000) >> 24);
7663 }
7664
7665
7666 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7667 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7668 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7669 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7670 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7671 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7672 }
7673
7674 val = RREG32(base + 0x8F30);
7675 val2 = RREG32(base + 0x8F34);
7676 if (val | val2) {
7677 rc = -EIO;
7678 dev_err(hdev->dev,
7679 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7680 device, val, val2);
7681 }
7682 val = RREG32(base + 0x8F40);
7683 val2 = RREG32(base + 0x8F44);
7684 if (val | val2) {
7685 rc = -EIO;
7686 dev_err(hdev->dev,
7687 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7688 device, val, val2);
7689 }
7690
7691 return rc;
7692}
7693
7694static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7695{
7696 switch (hbm_event_type) {
7697 case GAUDI_EVENT_HBM0_SPI_0:
7698 case GAUDI_EVENT_HBM0_SPI_1:
7699 return 0;
7700 case GAUDI_EVENT_HBM1_SPI_0:
7701 case GAUDI_EVENT_HBM1_SPI_1:
7702 return 1;
7703 case GAUDI_EVENT_HBM2_SPI_0:
7704 case GAUDI_EVENT_HBM2_SPI_1:
7705 return 2;
7706 case GAUDI_EVENT_HBM3_SPI_0:
7707 case GAUDI_EVENT_HBM3_SPI_1:
7708 return 3;
7709 default:
7710 break;
7711 }
7712
7713
7714 return 0;
7715}
7716
7717static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7718 char *interrupt_name)
7719{
7720 struct gaudi_device *gaudi = hdev->asic_specific;
7721 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7722 bool soft_reset_required = false;
7723
7724
7725
7726
7727
7728
7729 mutex_lock(&gaudi->clk_gate_mutex);
7730
7731 hdev->asic_funcs->disable_clock_gating(hdev);
7732
7733 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7734 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7735
7736 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7737 if (tpc_interrupts_cause & BIT(i)) {
7738 dev_err_ratelimited(hdev->dev,
7739 "TPC%d_%s interrupt cause: %s\n",
7740 tpc_id, interrupt_name,
7741 gaudi_tpc_interrupts_cause[i]);
7742
7743 if (i == 15)
7744 soft_reset_required = true;
7745 }
7746
7747
7748 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7749
7750 hdev->asic_funcs->set_clock_gating(hdev);
7751
7752 mutex_unlock(&gaudi->clk_gate_mutex);
7753
7754 return soft_reset_required;
7755}
7756
7757static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7758{
7759 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7760}
7761
7762static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7763{
7764 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7765}
7766
7767static void gaudi_print_clk_change_info(struct hl_device *hdev,
7768 u16 event_type)
7769{
7770 switch (event_type) {
7771 case GAUDI_EVENT_FIX_POWER_ENV_S:
7772 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7773 dev_info_ratelimited(hdev->dev,
7774 "Clock throttling due to power consumption\n");
7775 break;
7776
7777 case GAUDI_EVENT_FIX_POWER_ENV_E:
7778 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7779 dev_info_ratelimited(hdev->dev,
7780 "Power envelop is safe, back to optimal clock\n");
7781 break;
7782
7783 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7784 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7785 dev_info_ratelimited(hdev->dev,
7786 "Clock throttling due to overheating\n");
7787 break;
7788
7789 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7790 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7791 dev_info_ratelimited(hdev->dev,
7792 "Thermal envelop is safe, back to optimal clock\n");
7793 break;
7794
7795 default:
7796 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7797 event_type);
7798 break;
7799 }
7800}
7801
7802static void gaudi_handle_eqe(struct hl_device *hdev,
7803 struct hl_eq_entry *eq_entry)
7804{
7805 struct gaudi_device *gaudi = hdev->asic_specific;
7806 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7807 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7808 >> EQ_CTL_EVENT_TYPE_SHIFT);
7809 u8 cause;
7810 bool reset_required;
7811
7812 gaudi->events_stat[event_type]++;
7813 gaudi->events_stat_aggregate[event_type]++;
7814
7815 switch (event_type) {
7816 case GAUDI_EVENT_PCIE_CORE_DERR:
7817 case GAUDI_EVENT_PCIE_IF_DERR:
7818 case GAUDI_EVENT_PCIE_PHY_DERR:
7819 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7820 case GAUDI_EVENT_MME0_ACC_DERR:
7821 case GAUDI_EVENT_MME0_SBAB_DERR:
7822 case GAUDI_EVENT_MME1_ACC_DERR:
7823 case GAUDI_EVENT_MME1_SBAB_DERR:
7824 case GAUDI_EVENT_MME2_ACC_DERR:
7825 case GAUDI_EVENT_MME2_SBAB_DERR:
7826 case GAUDI_EVENT_MME3_ACC_DERR:
7827 case GAUDI_EVENT_MME3_SBAB_DERR:
7828 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7829 fallthrough;
7830 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7831 case GAUDI_EVENT_PSOC_MEM_DERR:
7832 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7833 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7834 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7835 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7836 case GAUDI_EVENT_MMU_DERR:
7837 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7838 gaudi_print_irq_info(hdev, event_type, true);
7839 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7840 goto reset_device;
7841
7842 case GAUDI_EVENT_GIC500:
7843 case GAUDI_EVENT_AXI_ECC:
7844 case GAUDI_EVENT_L2_RAM_ECC:
7845 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7846 gaudi_print_irq_info(hdev, event_type, false);
7847 goto reset_device;
7848
7849 case GAUDI_EVENT_HBM0_SPI_0:
7850 case GAUDI_EVENT_HBM1_SPI_0:
7851 case GAUDI_EVENT_HBM2_SPI_0:
7852 case GAUDI_EVENT_HBM3_SPI_0:
7853 gaudi_print_irq_info(hdev, event_type, false);
7854 gaudi_hbm_read_interrupts(hdev,
7855 gaudi_hbm_event_to_dev(event_type),
7856 &eq_entry->hbm_ecc_data);
7857 goto reset_device;
7858
7859 case GAUDI_EVENT_HBM0_SPI_1:
7860 case GAUDI_EVENT_HBM1_SPI_1:
7861 case GAUDI_EVENT_HBM2_SPI_1:
7862 case GAUDI_EVENT_HBM3_SPI_1:
7863 gaudi_print_irq_info(hdev, event_type, false);
7864 gaudi_hbm_read_interrupts(hdev,
7865 gaudi_hbm_event_to_dev(event_type),
7866 &eq_entry->hbm_ecc_data);
7867 hl_fw_unmask_irq(hdev, event_type);
7868 break;
7869
7870 case GAUDI_EVENT_TPC0_DEC:
7871 case GAUDI_EVENT_TPC1_DEC:
7872 case GAUDI_EVENT_TPC2_DEC:
7873 case GAUDI_EVENT_TPC3_DEC:
7874 case GAUDI_EVENT_TPC4_DEC:
7875 case GAUDI_EVENT_TPC5_DEC:
7876 case GAUDI_EVENT_TPC6_DEC:
7877 case GAUDI_EVENT_TPC7_DEC:
7878 gaudi_print_irq_info(hdev, event_type, true);
7879 reset_required = gaudi_tpc_read_interrupts(hdev,
7880 tpc_dec_event_to_tpc_id(event_type),
7881 "AXI_SLV_DEC_Error");
7882 if (reset_required) {
7883 dev_err(hdev->dev, "hard reset required due to %s\n",
7884 gaudi_irq_map_table[event_type].name);
7885
7886 goto reset_device;
7887 } else {
7888 hl_fw_unmask_irq(hdev, event_type);
7889 }
7890 break;
7891
7892 case GAUDI_EVENT_TPC0_KRN_ERR:
7893 case GAUDI_EVENT_TPC1_KRN_ERR:
7894 case GAUDI_EVENT_TPC2_KRN_ERR:
7895 case GAUDI_EVENT_TPC3_KRN_ERR:
7896 case GAUDI_EVENT_TPC4_KRN_ERR:
7897 case GAUDI_EVENT_TPC5_KRN_ERR:
7898 case GAUDI_EVENT_TPC6_KRN_ERR:
7899 case GAUDI_EVENT_TPC7_KRN_ERR:
7900 gaudi_print_irq_info(hdev, event_type, true);
7901 reset_required = gaudi_tpc_read_interrupts(hdev,
7902 tpc_krn_event_to_tpc_id(event_type),
7903 "KRN_ERR");
7904 if (reset_required) {
7905 dev_err(hdev->dev, "hard reset required due to %s\n",
7906 gaudi_irq_map_table[event_type].name);
7907
7908 goto reset_device;
7909 } else {
7910 hl_fw_unmask_irq(hdev, event_type);
7911 }
7912 break;
7913
7914 case GAUDI_EVENT_PCIE_CORE_SERR:
7915 case GAUDI_EVENT_PCIE_IF_SERR:
7916 case GAUDI_EVENT_PCIE_PHY_SERR:
7917 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7918 case GAUDI_EVENT_MME0_ACC_SERR:
7919 case GAUDI_EVENT_MME0_SBAB_SERR:
7920 case GAUDI_EVENT_MME1_ACC_SERR:
7921 case GAUDI_EVENT_MME1_SBAB_SERR:
7922 case GAUDI_EVENT_MME2_ACC_SERR:
7923 case GAUDI_EVENT_MME2_SBAB_SERR:
7924 case GAUDI_EVENT_MME3_ACC_SERR:
7925 case GAUDI_EVENT_MME3_SBAB_SERR:
7926 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7927 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7928 case GAUDI_EVENT_PSOC_MEM_SERR:
7929 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7930 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7931 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7932 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7933 fallthrough;
7934 case GAUDI_EVENT_MMU_SERR:
7935 gaudi_print_irq_info(hdev, event_type, true);
7936 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7937 hl_fw_unmask_irq(hdev, event_type);
7938 break;
7939
7940 case GAUDI_EVENT_PCIE_DEC:
7941 case GAUDI_EVENT_MME0_WBC_RSP:
7942 case GAUDI_EVENT_MME0_SBAB0_RSP:
7943 case GAUDI_EVENT_MME1_WBC_RSP:
7944 case GAUDI_EVENT_MME1_SBAB0_RSP:
7945 case GAUDI_EVENT_MME2_WBC_RSP:
7946 case GAUDI_EVENT_MME2_SBAB0_RSP:
7947 case GAUDI_EVENT_MME3_WBC_RSP:
7948 case GAUDI_EVENT_MME3_SBAB0_RSP:
7949 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7950 case GAUDI_EVENT_PSOC_AXI_DEC:
7951 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7952 case GAUDI_EVENT_MMU_PAGE_FAULT:
7953 case GAUDI_EVENT_MMU_WR_PERM:
7954 case GAUDI_EVENT_RAZWI_OR_ADC:
7955 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7956 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7957 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7958 fallthrough;
7959 case GAUDI_EVENT_NIC0_QM0:
7960 case GAUDI_EVENT_NIC0_QM1:
7961 case GAUDI_EVENT_NIC1_QM0:
7962 case GAUDI_EVENT_NIC1_QM1:
7963 case GAUDI_EVENT_NIC2_QM0:
7964 case GAUDI_EVENT_NIC2_QM1:
7965 case GAUDI_EVENT_NIC3_QM0:
7966 case GAUDI_EVENT_NIC3_QM1:
7967 case GAUDI_EVENT_NIC4_QM0:
7968 case GAUDI_EVENT_NIC4_QM1:
7969 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7970 gaudi_print_irq_info(hdev, event_type, true);
7971 gaudi_handle_qman_err(hdev, event_type);
7972 hl_fw_unmask_irq(hdev, event_type);
7973 break;
7974
7975 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7976 gaudi_print_irq_info(hdev, event_type, true);
7977 goto reset_device;
7978
7979 case GAUDI_EVENT_TPC0_BMON_SPMU:
7980 case GAUDI_EVENT_TPC1_BMON_SPMU:
7981 case GAUDI_EVENT_TPC2_BMON_SPMU:
7982 case GAUDI_EVENT_TPC3_BMON_SPMU:
7983 case GAUDI_EVENT_TPC4_BMON_SPMU:
7984 case GAUDI_EVENT_TPC5_BMON_SPMU:
7985 case GAUDI_EVENT_TPC6_BMON_SPMU:
7986 case GAUDI_EVENT_TPC7_BMON_SPMU:
7987 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7988 gaudi_print_irq_info(hdev, event_type, false);
7989 hl_fw_unmask_irq(hdev, event_type);
7990 break;
7991
7992 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7993 gaudi_print_irq_info(hdev, event_type, false);
7994 gaudi_print_sm_sei_info(hdev, event_type,
7995 &eq_entry->sm_sei_data);
7996 hl_fw_unmask_irq(hdev, event_type);
7997 break;
7998
7999 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8000 gaudi_print_clk_change_info(hdev, event_type);
8001 hl_fw_unmask_irq(hdev, event_type);
8002 break;
8003
8004 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8005 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8006 dev_err(hdev->dev,
8007 "Received high temp H/W interrupt %d (cause %d)\n",
8008 event_type, cause);
8009 break;
8010
8011 case GAUDI_EVENT_DEV_RESET_REQ:
8012 gaudi_print_irq_info(hdev, event_type, false);
8013 goto reset_device;
8014
8015 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8016 gaudi_print_irq_info(hdev, event_type, false);
8017 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8018 goto reset_device;
8019
8020 case GAUDI_EVENT_FW_ALIVE_S:
8021 gaudi_print_irq_info(hdev, event_type, false);
8022 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8023 goto reset_device;
8024
8025 default:
8026 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8027 event_type);
8028 break;
8029 }
8030
8031 return;
8032
8033reset_device:
8034 if (hdev->hard_reset_on_fw_events)
8035 hl_device_reset(hdev, HL_RESET_HARD);
8036 else
8037 hl_fw_unmask_irq(hdev, event_type);
8038}
8039
8040static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8041 u32 *size)
8042{
8043 struct gaudi_device *gaudi = hdev->asic_specific;
8044
8045 if (aggregate) {
8046 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8047 return gaudi->events_stat_aggregate;
8048 }
8049
8050 *size = (u32) sizeof(gaudi->events_stat);
8051 return gaudi->events_stat;
8052}
8053
8054static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8055 u32 flags)
8056{
8057 struct gaudi_device *gaudi = hdev->asic_specific;
8058 u32 status, timeout_usec;
8059 int rc;
8060
8061 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8062 hdev->hard_reset_pending)
8063 return 0;
8064
8065 if (hdev->pldm)
8066 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8067 else
8068 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8069
8070
8071 WREG32(mmSTLB_INV_PS, 3);
8072 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8073 WREG32(mmSTLB_INV_PS, 2);
8074
8075 rc = hl_poll_timeout(
8076 hdev,
8077 mmSTLB_INV_PS,
8078 status,
8079 !status,
8080 1000,
8081 timeout_usec);
8082
8083 WREG32(mmSTLB_INV_SET, 0);
8084
8085 if (rc) {
8086 dev_err_ratelimited(hdev->dev,
8087 "MMU cache invalidation timeout\n");
8088 hl_device_reset(hdev, HL_RESET_HARD);
8089 }
8090
8091 return rc;
8092}
8093
8094static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8095 bool is_hard, u32 flags,
8096 u32 asid, u64 va, u64 size)
8097{
8098
8099
8100
8101 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8102}
8103
8104static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8105 u32 asid, u64 phys_addr)
8106{
8107 u32 status, timeout_usec;
8108 int rc;
8109
8110 if (hdev->pldm)
8111 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8112 else
8113 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8114
8115 WREG32(MMU_ASID, asid);
8116 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8117 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8118 WREG32(MMU_BUSY, 0x80000000);
8119
8120 rc = hl_poll_timeout(
8121 hdev,
8122 MMU_BUSY,
8123 status,
8124 !(status & 0x80000000),
8125 1000,
8126 timeout_usec);
8127
8128 if (rc) {
8129 dev_err(hdev->dev,
8130 "Timeout during MMU hop0 config of asid %d\n", asid);
8131 return rc;
8132 }
8133
8134 return 0;
8135}
8136
8137static int gaudi_send_heartbeat(struct hl_device *hdev)
8138{
8139 struct gaudi_device *gaudi = hdev->asic_specific;
8140
8141 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8142 return 0;
8143
8144 return hl_fw_send_heartbeat(hdev);
8145}
8146
8147static int gaudi_cpucp_info_get(struct hl_device *hdev)
8148{
8149 struct gaudi_device *gaudi = hdev->asic_specific;
8150 struct asic_fixed_properties *prop = &hdev->asic_prop;
8151 int rc;
8152
8153 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8154 return 0;
8155
8156 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8157 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8158 mmCPU_BOOT_ERR1);
8159 if (rc)
8160 return rc;
8161
8162 if (!strlen(prop->cpucp_info.card_name))
8163 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8164 CARD_NAME_MAX_LEN);
8165
8166 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8167
8168 set_default_power_values(hdev);
8169
8170 hdev->max_power = prop->max_power_default;
8171
8172 return 0;
8173}
8174
8175static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8176 u8 mask_len, struct seq_file *s)
8177{
8178 struct gaudi_device *gaudi = hdev->asic_specific;
8179 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8180 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8181 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8182 unsigned long *mask = (unsigned long *)mask_arr;
8183 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8184 bool is_idle = true, is_eng_idle, is_slave;
8185 u64 offset;
8186 int i, dma_id, port;
8187
8188 mutex_lock(&gaudi->clk_gate_mutex);
8189
8190 hdev->asic_funcs->disable_clock_gating(hdev);
8191
8192 if (s)
8193 seq_puts(s,
8194 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8195 "--- ------- ------------ ---------- -------------\n");
8196
8197 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8198 dma_id = gaudi_dma_assignment[i];
8199 offset = dma_id * DMA_QMAN_OFFSET;
8200
8201 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8202 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8203 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8204 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8205 IS_DMA_IDLE(dma_core_sts0);
8206 is_idle &= is_eng_idle;
8207
8208 if (mask && !is_eng_idle)
8209 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8210 if (s)
8211 seq_printf(s, fmt, dma_id,
8212 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8213 qm_cgm_sts, dma_core_sts0);
8214 }
8215
8216 if (s)
8217 seq_puts(s,
8218 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8219 "--- ------- ------------ ---------- ----------\n");
8220
8221 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8222 offset = i * TPC_QMAN_OFFSET;
8223 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8224 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8225 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8226 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8227 IS_TPC_IDLE(tpc_cfg_sts);
8228 is_idle &= is_eng_idle;
8229
8230 if (mask && !is_eng_idle)
8231 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8232 if (s)
8233 seq_printf(s, fmt, i,
8234 is_eng_idle ? "Y" : "N",
8235 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8236 }
8237
8238 if (s)
8239 seq_puts(s,
8240 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8241 "--- ------- ------------ ---------- -----------\n");
8242
8243 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8244 offset = i * MME_QMAN_OFFSET;
8245 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8246 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8247
8248
8249 is_slave = i % 2;
8250 if (!is_slave) {
8251 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8252 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8253 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8254 }
8255
8256 is_idle &= is_eng_idle;
8257
8258 if (mask && !is_eng_idle)
8259 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8260 if (s) {
8261 if (!is_slave)
8262 seq_printf(s, fmt, i,
8263 is_eng_idle ? "Y" : "N",
8264 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8265 else
8266 seq_printf(s, mme_slave_fmt, i,
8267 is_eng_idle ? "Y" : "N", "-",
8268 "-", mme_arch_sts);
8269 }
8270 }
8271
8272 if (s)
8273 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8274 "--- ------- ------------ ----------\n");
8275
8276 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8277 offset = i * NIC_MACRO_QMAN_OFFSET;
8278 port = 2 * i;
8279 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8280 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8281 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8282 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8283 is_idle &= is_eng_idle;
8284
8285 if (mask && !is_eng_idle)
8286 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8287 if (s)
8288 seq_printf(s, nic_fmt, port,
8289 is_eng_idle ? "Y" : "N",
8290 qm_glbl_sts0, qm_cgm_sts);
8291 }
8292
8293 port = 2 * i + 1;
8294 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8295 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8296 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8297 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8298 is_idle &= is_eng_idle;
8299
8300 if (mask && !is_eng_idle)
8301 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8302 if (s)
8303 seq_printf(s, nic_fmt, port,
8304 is_eng_idle ? "Y" : "N",
8305 qm_glbl_sts0, qm_cgm_sts);
8306 }
8307 }
8308
8309 if (s)
8310 seq_puts(s, "\n");
8311
8312 hdev->asic_funcs->set_clock_gating(hdev);
8313
8314 mutex_unlock(&gaudi->clk_gate_mutex);
8315
8316 return is_idle;
8317}
8318
8319static void gaudi_hw_queues_lock(struct hl_device *hdev)
8320 __acquires(&gaudi->hw_queues_lock)
8321{
8322 struct gaudi_device *gaudi = hdev->asic_specific;
8323
8324 spin_lock(&gaudi->hw_queues_lock);
8325}
8326
8327static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8328 __releases(&gaudi->hw_queues_lock)
8329{
8330 struct gaudi_device *gaudi = hdev->asic_specific;
8331
8332 spin_unlock(&gaudi->hw_queues_lock);
8333}
8334
8335static u32 gaudi_get_pci_id(struct hl_device *hdev)
8336{
8337 return hdev->pdev->device;
8338}
8339
8340static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8341 size_t max_size)
8342{
8343 struct gaudi_device *gaudi = hdev->asic_specific;
8344
8345 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8346 return 0;
8347
8348 return hl_fw_get_eeprom_data(hdev, data, max_size);
8349}
8350
8351
8352
8353
8354
8355static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8356 u32 tpc_id)
8357{
8358 struct gaudi_device *gaudi = hdev->asic_specific;
8359 u64 kernel_timeout;
8360 u32 status, offset;
8361 int rc;
8362
8363 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8364
8365 if (hdev->pldm)
8366 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8367 else
8368 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8369
8370 mutex_lock(&gaudi->clk_gate_mutex);
8371
8372 hdev->asic_funcs->disable_clock_gating(hdev);
8373
8374 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8375 lower_32_bits(tpc_kernel));
8376 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8377 upper_32_bits(tpc_kernel));
8378
8379 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8380 lower_32_bits(tpc_kernel));
8381 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8382 upper_32_bits(tpc_kernel));
8383
8384 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8385 lower_32_bits(tpc_kernel));
8386 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8387 upper_32_bits(tpc_kernel));
8388
8389 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8390 lower_32_bits(CFG_BASE +
8391 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8392
8393 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8394 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8395 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8396
8397 usleep_range(1000, 1500);
8398
8399
8400 rc = hl_poll_timeout(
8401 hdev,
8402 mmTPC0_CFG_STATUS + offset,
8403 status,
8404 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8405 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8406 1000,
8407 kernel_timeout);
8408
8409 if (rc) {
8410 dev_err(hdev->dev,
8411 "Timeout while waiting for TPC%d icache prefetch\n",
8412 tpc_id);
8413 hdev->asic_funcs->set_clock_gating(hdev);
8414 mutex_unlock(&gaudi->clk_gate_mutex);
8415 return -EIO;
8416 }
8417
8418 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8419 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8420
8421
8422 usleep_range(1000, 1500);
8423
8424
8425 rc = hl_poll_timeout(
8426 hdev,
8427 mmTPC0_CFG_STATUS + offset,
8428 status,
8429 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8430 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8431 1000,
8432 kernel_timeout);
8433
8434 if (rc) {
8435 dev_err(hdev->dev,
8436 "Timeout while waiting for TPC%d vector pipe\n",
8437 tpc_id);
8438 hdev->asic_funcs->set_clock_gating(hdev);
8439 mutex_unlock(&gaudi->clk_gate_mutex);
8440 return -EIO;
8441 }
8442
8443 rc = hl_poll_timeout(
8444 hdev,
8445 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8446 status,
8447 (status == 0),
8448 1000,
8449 kernel_timeout);
8450
8451 hdev->asic_funcs->set_clock_gating(hdev);
8452 mutex_unlock(&gaudi->clk_gate_mutex);
8453
8454 if (rc) {
8455 dev_err(hdev->dev,
8456 "Timeout while waiting for TPC%d kernel to execute\n",
8457 tpc_id);
8458 return -EIO;
8459 }
8460
8461 return 0;
8462}
8463
8464static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8465 struct hl_ctx *ctx)
8466{
8467 struct gaudi_device *gaudi = hdev->asic_specific;
8468 int min_alloc_order, rc, collective_cb_size;
8469
8470 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8471 return 0;
8472
8473 hdev->internal_cb_pool_virt_addr =
8474 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8475 HOST_SPACE_INTERNAL_CB_SZ,
8476 &hdev->internal_cb_pool_dma_addr,
8477 GFP_KERNEL | __GFP_ZERO);
8478
8479 if (!hdev->internal_cb_pool_virt_addr)
8480 return -ENOMEM;
8481
8482 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8483 sizeof(struct packet_fence);
8484 min_alloc_order = ilog2(collective_cb_size);
8485
8486 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8487 if (!hdev->internal_cb_pool) {
8488 dev_err(hdev->dev,
8489 "Failed to create internal CB pool\n");
8490 rc = -ENOMEM;
8491 goto free_internal_cb_pool;
8492 }
8493
8494 rc = gen_pool_add(hdev->internal_cb_pool,
8495 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8496 HOST_SPACE_INTERNAL_CB_SZ, -1);
8497 if (rc) {
8498 dev_err(hdev->dev,
8499 "Failed to add memory to internal CB pool\n");
8500 rc = -EFAULT;
8501 goto destroy_internal_cb_pool;
8502 }
8503
8504 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8505 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8506 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8507
8508 if (!hdev->internal_cb_va_base) {
8509 rc = -ENOMEM;
8510 goto destroy_internal_cb_pool;
8511 }
8512
8513 mutex_lock(&ctx->mmu_lock);
8514 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8515 hdev->internal_cb_pool_dma_addr,
8516 HOST_SPACE_INTERNAL_CB_SZ);
8517
8518 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8519 mutex_unlock(&ctx->mmu_lock);
8520
8521 if (rc)
8522 goto unreserve_internal_cb_pool;
8523
8524 return 0;
8525
8526unreserve_internal_cb_pool:
8527 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8528 HOST_SPACE_INTERNAL_CB_SZ);
8529destroy_internal_cb_pool:
8530 gen_pool_destroy(hdev->internal_cb_pool);
8531free_internal_cb_pool:
8532 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8533 HOST_SPACE_INTERNAL_CB_SZ,
8534 hdev->internal_cb_pool_virt_addr,
8535 hdev->internal_cb_pool_dma_addr);
8536
8537 return rc;
8538}
8539
8540static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8541 struct hl_ctx *ctx)
8542{
8543 struct gaudi_device *gaudi = hdev->asic_specific;
8544
8545 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8546 return;
8547
8548 mutex_lock(&ctx->mmu_lock);
8549 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8550 HOST_SPACE_INTERNAL_CB_SZ);
8551 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8552 HOST_SPACE_INTERNAL_CB_SZ);
8553 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8554 mutex_unlock(&ctx->mmu_lock);
8555
8556 gen_pool_destroy(hdev->internal_cb_pool);
8557
8558 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8559 HOST_SPACE_INTERNAL_CB_SZ,
8560 hdev->internal_cb_pool_virt_addr,
8561 hdev->internal_cb_pool_dma_addr);
8562}
8563
8564static int gaudi_ctx_init(struct hl_ctx *ctx)
8565{
8566 if (ctx->asid == HL_KERNEL_ASID_ID)
8567 return 0;
8568
8569 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8570 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8571}
8572
8573static void gaudi_ctx_fini(struct hl_ctx *ctx)
8574{
8575 if (ctx->asid == HL_KERNEL_ASID_ID)
8576 return;
8577
8578 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8579}
8580
8581static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8582{
8583 return gaudi_cq_assignment[cq_idx];
8584}
8585
8586static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8587{
8588 return sizeof(struct packet_msg_short) +
8589 sizeof(struct packet_msg_prot) * 2;
8590}
8591
8592static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8593{
8594 return sizeof(struct packet_msg_short) * 4 +
8595 sizeof(struct packet_fence) +
8596 sizeof(struct packet_msg_prot) * 2;
8597}
8598
8599static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8600 u32 size, bool eb)
8601{
8602 struct hl_cb *cb = (struct hl_cb *) data;
8603 struct packet_msg_short *pkt;
8604 u32 value, ctl, pkt_size = sizeof(*pkt);
8605
8606 pkt = cb->kernel_address + size;
8607 memset(pkt, 0, pkt_size);
8608
8609
8610 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8611 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8612
8613 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8614 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0);
8615 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3);
8616 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8617 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8618 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8619 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8620
8621 pkt->value = cpu_to_le32(value);
8622 pkt->ctl = cpu_to_le32(ctl);
8623
8624 return size + pkt_size;
8625}
8626
8627static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8628 u16 addr)
8629{
8630 u32 ctl, pkt_size = sizeof(*pkt);
8631
8632 memset(pkt, 0, pkt_size);
8633
8634 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8635 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);
8636 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8637 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8638 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8639 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0);
8640
8641 pkt->value = cpu_to_le32(value);
8642 pkt->ctl = cpu_to_le32(ctl);
8643
8644 return pkt_size;
8645}
8646
8647static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8648 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8649 u16 sob_val, u16 mon_id)
8650{
8651 u64 monitor_base;
8652 u32 ctl, value, pkt_size = sizeof(*pkt);
8653 u16 msg_addr_offset;
8654 u8 mask;
8655
8656 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8657 dev_err(hdev->dev,
8658 "sob_base %u (mask %#x) is not valid\n",
8659 sob_base, sob_mask);
8660 return 0;
8661 }
8662
8663
8664
8665
8666
8667 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8668
8669 msg_addr_offset =
8670 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8671 monitor_base;
8672
8673 memset(pkt, 0, pkt_size);
8674
8675
8676 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8677 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8678 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8679 0);
8680 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8681
8682 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8683 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0);
8684 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);
8685 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8686 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8687 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8688 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8689
8690 pkt->value = cpu_to_le32(value);
8691 pkt->ctl = cpu_to_le32(ctl);
8692
8693 return pkt_size;
8694}
8695
8696static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8697{
8698 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8699
8700 memset(pkt, 0, pkt_size);
8701
8702 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8703 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8704 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8705
8706 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8707 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8708 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8709 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8710
8711 pkt->cfg = cpu_to_le32(cfg);
8712 pkt->ctl = cpu_to_le32(ctl);
8713
8714 return pkt_size;
8715}
8716
8717static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8718{
8719 u32 offset, nic_index;
8720
8721 switch (queue_id) {
8722 case GAUDI_QUEUE_ID_DMA_0_0:
8723 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8724 break;
8725 case GAUDI_QUEUE_ID_DMA_0_1:
8726 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8727 break;
8728 case GAUDI_QUEUE_ID_DMA_0_2:
8729 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8730 break;
8731 case GAUDI_QUEUE_ID_DMA_0_3:
8732 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8733 break;
8734 case GAUDI_QUEUE_ID_DMA_1_0:
8735 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8736 break;
8737 case GAUDI_QUEUE_ID_DMA_1_1:
8738 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8739 break;
8740 case GAUDI_QUEUE_ID_DMA_1_2:
8741 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8742 break;
8743 case GAUDI_QUEUE_ID_DMA_1_3:
8744 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8745 break;
8746 case GAUDI_QUEUE_ID_DMA_5_0:
8747 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8748 break;
8749 case GAUDI_QUEUE_ID_DMA_5_1:
8750 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8751 break;
8752 case GAUDI_QUEUE_ID_DMA_5_2:
8753 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8754 break;
8755 case GAUDI_QUEUE_ID_DMA_5_3:
8756 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8757 break;
8758 case GAUDI_QUEUE_ID_TPC_7_0:
8759 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8760 break;
8761 case GAUDI_QUEUE_ID_TPC_7_1:
8762 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8763 break;
8764 case GAUDI_QUEUE_ID_TPC_7_2:
8765 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8766 break;
8767 case GAUDI_QUEUE_ID_TPC_7_3:
8768 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8769 break;
8770 case GAUDI_QUEUE_ID_NIC_0_0:
8771 case GAUDI_QUEUE_ID_NIC_1_0:
8772 case GAUDI_QUEUE_ID_NIC_2_0:
8773 case GAUDI_QUEUE_ID_NIC_3_0:
8774 case GAUDI_QUEUE_ID_NIC_4_0:
8775 case GAUDI_QUEUE_ID_NIC_5_0:
8776 case GAUDI_QUEUE_ID_NIC_6_0:
8777 case GAUDI_QUEUE_ID_NIC_7_0:
8778 case GAUDI_QUEUE_ID_NIC_8_0:
8779 case GAUDI_QUEUE_ID_NIC_9_0:
8780 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8781 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8782 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8783 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8784 break;
8785 case GAUDI_QUEUE_ID_NIC_0_1:
8786 case GAUDI_QUEUE_ID_NIC_1_1:
8787 case GAUDI_QUEUE_ID_NIC_2_1:
8788 case GAUDI_QUEUE_ID_NIC_3_1:
8789 case GAUDI_QUEUE_ID_NIC_4_1:
8790 case GAUDI_QUEUE_ID_NIC_5_1:
8791 case GAUDI_QUEUE_ID_NIC_6_1:
8792 case GAUDI_QUEUE_ID_NIC_7_1:
8793 case GAUDI_QUEUE_ID_NIC_8_1:
8794 case GAUDI_QUEUE_ID_NIC_9_1:
8795 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8796 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8797 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8798 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8799 break;
8800 case GAUDI_QUEUE_ID_NIC_0_2:
8801 case GAUDI_QUEUE_ID_NIC_1_2:
8802 case GAUDI_QUEUE_ID_NIC_2_2:
8803 case GAUDI_QUEUE_ID_NIC_3_2:
8804 case GAUDI_QUEUE_ID_NIC_4_2:
8805 case GAUDI_QUEUE_ID_NIC_5_2:
8806 case GAUDI_QUEUE_ID_NIC_6_2:
8807 case GAUDI_QUEUE_ID_NIC_7_2:
8808 case GAUDI_QUEUE_ID_NIC_8_2:
8809 case GAUDI_QUEUE_ID_NIC_9_2:
8810 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8811 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8812 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8813 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8814 break;
8815 case GAUDI_QUEUE_ID_NIC_0_3:
8816 case GAUDI_QUEUE_ID_NIC_1_3:
8817 case GAUDI_QUEUE_ID_NIC_2_3:
8818 case GAUDI_QUEUE_ID_NIC_3_3:
8819 case GAUDI_QUEUE_ID_NIC_4_3:
8820 case GAUDI_QUEUE_ID_NIC_5_3:
8821 case GAUDI_QUEUE_ID_NIC_6_3:
8822 case GAUDI_QUEUE_ID_NIC_7_3:
8823 case GAUDI_QUEUE_ID_NIC_8_3:
8824 case GAUDI_QUEUE_ID_NIC_9_3:
8825 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8826 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8827 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8828 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8829 break;
8830 default:
8831 return -EINVAL;
8832 }
8833
8834 *addr = CFG_BASE + offset;
8835
8836 return 0;
8837}
8838
8839static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8840{
8841 u64 monitor_base;
8842 u32 size = 0;
8843 u16 msg_addr_offset;
8844
8845
8846
8847
8848
8849 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8850
8851
8852 msg_addr_offset =
8853 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8854 monitor_base;
8855
8856 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8857 msg_addr_offset);
8858
8859
8860 msg_addr_offset =
8861 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8862 monitor_base;
8863
8864 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8865 msg_addr_offset);
8866
8867
8868
8869
8870
8871 msg_addr_offset =
8872 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8873 monitor_base;
8874
8875 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8876
8877 return size;
8878}
8879
8880static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8881 struct hl_gen_wait_properties *prop)
8882{
8883 struct hl_cb *cb = (struct hl_cb *) prop->data;
8884 void *buf = cb->kernel_address;
8885 u64 fence_addr = 0;
8886 u32 size = prop->size;
8887
8888 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8889 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8890 prop->q_idx);
8891 return 0;
8892 }
8893
8894 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8895 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8896 prop->sob_mask, prop->sob_val, prop->mon_id);
8897 size += gaudi_add_fence_pkt(buf + size);
8898
8899 return size;
8900}
8901
8902static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8903{
8904 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8905 int rc;
8906
8907 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8908 hw_sob->sob_id);
8909
8910 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8911 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8912 hw_sob->sob_id * 4, 1, 0);
8913 if (rc)
8914 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8915
8916 kref_init(&hw_sob->kref);
8917}
8918
8919static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8920{
8921 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8922 HL_POWER9_HOST_MAGIC) {
8923 hdev->power9_64bit_dma_enable = 1;
8924 hdev->dma_mask = 64;
8925 } else {
8926 hdev->power9_64bit_dma_enable = 0;
8927 hdev->dma_mask = 48;
8928 }
8929}
8930
8931static u64 gaudi_get_device_time(struct hl_device *hdev)
8932{
8933 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8934
8935 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8936}
8937
8938static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8939 u32 *block_size, u32 *block_id)
8940{
8941 return -EPERM;
8942}
8943
8944static int gaudi_block_mmap(struct hl_device *hdev,
8945 struct vm_area_struct *vma,
8946 u32 block_id, u32 block_size)
8947{
8948 return -EPERM;
8949}
8950
8951static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8952{
8953 struct cpu_dyn_regs *dyn_regs =
8954 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8955 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8956 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8957 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8958
8959 WREG32(irq_handler_offset,
8960 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8961}
8962
8963static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8964{
8965 switch (pll_idx) {
8966 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8967 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8968 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8969 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8970 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8971 case HL_GAUDI_MME_PLL: return MME_PLL;
8972 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8973 case HL_GAUDI_IF_PLL: return IF_PLL;
8974 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8975 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8976 default: return -EINVAL;
8977 }
8978}
8979
8980static const struct hl_asic_funcs gaudi_funcs = {
8981 .early_init = gaudi_early_init,
8982 .early_fini = gaudi_early_fini,
8983 .late_init = gaudi_late_init,
8984 .late_fini = gaudi_late_fini,
8985 .sw_init = gaudi_sw_init,
8986 .sw_fini = gaudi_sw_fini,
8987 .hw_init = gaudi_hw_init,
8988 .hw_fini = gaudi_hw_fini,
8989 .halt_engines = gaudi_halt_engines,
8990 .suspend = gaudi_suspend,
8991 .resume = gaudi_resume,
8992 .cb_mmap = gaudi_cb_mmap,
8993 .ring_doorbell = gaudi_ring_doorbell,
8994 .pqe_write = gaudi_pqe_write,
8995 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8996 .asic_dma_free_coherent = gaudi_dma_free_coherent,
8997 .scrub_device_mem = gaudi_scrub_device_mem,
8998 .get_int_queue_base = gaudi_get_int_queue_base,
8999 .test_queues = gaudi_test_queues,
9000 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9001 .asic_dma_pool_free = gaudi_dma_pool_free,
9002 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9003 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9004 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9005 .cs_parser = gaudi_cs_parser,
9006 .asic_dma_map_sg = gaudi_dma_map_sg,
9007 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9008 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9009 .update_eq_ci = gaudi_update_eq_ci,
9010 .context_switch = gaudi_context_switch,
9011 .restore_phase_topology = gaudi_restore_phase_topology,
9012 .debugfs_read32 = gaudi_debugfs_read32,
9013 .debugfs_write32 = gaudi_debugfs_write32,
9014 .debugfs_read64 = gaudi_debugfs_read64,
9015 .debugfs_write64 = gaudi_debugfs_write64,
9016 .debugfs_read_dma = gaudi_debugfs_read_dma,
9017 .add_device_attr = gaudi_add_device_attr,
9018 .handle_eqe = gaudi_handle_eqe,
9019 .set_pll_profile = gaudi_set_pll_profile,
9020 .get_events_stat = gaudi_get_events_stat,
9021 .read_pte = gaudi_read_pte,
9022 .write_pte = gaudi_write_pte,
9023 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9024 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9025 .send_heartbeat = gaudi_send_heartbeat,
9026 .set_clock_gating = gaudi_set_clock_gating,
9027 .disable_clock_gating = gaudi_disable_clock_gating,
9028 .debug_coresight = gaudi_debug_coresight,
9029 .is_device_idle = gaudi_is_device_idle,
9030 .soft_reset_late_init = gaudi_soft_reset_late_init,
9031 .hw_queues_lock = gaudi_hw_queues_lock,
9032 .hw_queues_unlock = gaudi_hw_queues_unlock,
9033 .get_pci_id = gaudi_get_pci_id,
9034 .get_eeprom_data = gaudi_get_eeprom_data,
9035 .send_cpu_message = gaudi_send_cpu_message,
9036 .pci_bars_map = gaudi_pci_bars_map,
9037 .init_iatu = gaudi_init_iatu,
9038 .rreg = hl_rreg,
9039 .wreg = hl_wreg,
9040 .halt_coresight = gaudi_halt_coresight,
9041 .ctx_init = gaudi_ctx_init,
9042 .ctx_fini = gaudi_ctx_fini,
9043 .get_clk_rate = gaudi_get_clk_rate,
9044 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9045 .load_firmware_to_device = gaudi_load_firmware_to_device,
9046 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9047 .get_signal_cb_size = gaudi_get_signal_cb_size,
9048 .get_wait_cb_size = gaudi_get_wait_cb_size,
9049 .gen_signal_cb = gaudi_gen_signal_cb,
9050 .gen_wait_cb = gaudi_gen_wait_cb,
9051 .reset_sob = gaudi_reset_sob,
9052 .reset_sob_group = gaudi_reset_sob_group,
9053 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9054 .get_device_time = gaudi_get_device_time,
9055 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9056 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9057 .scramble_addr = hl_mmu_scramble_addr,
9058 .descramble_addr = hl_mmu_descramble_addr,
9059 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9060 .get_hw_block_id = gaudi_get_hw_block_id,
9061 .hw_block_mmap = gaudi_block_mmap,
9062 .enable_events_from_fw = gaudi_enable_events_from_fw,
9063 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9064 .init_firmware_loader = gaudi_init_firmware_loader,
9065 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm
9066};
9067
9068
9069
9070
9071
9072
9073
9074void gaudi_set_asic_funcs(struct hl_device *hdev)
9075{
9076 hdev->asic_funcs = &gaudi_funcs;
9077}
9078