linux/drivers/misc/habanalabs/gaudi/gaudi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2020 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "gaudiP.h"
   9#include "../include/hw_ip/mmu/mmu_general.h"
  10#include "../include/hw_ip/mmu/mmu_v1_1.h"
  11#include "../include/gaudi/gaudi_masks.h"
  12#include "../include/gaudi/gaudi_fw_if.h"
  13#include "../include/gaudi/gaudi_reg_map.h"
  14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
  15
  16#include <linux/module.h>
  17#include <linux/pci.h>
  18#include <linux/firmware.h>
  19#include <linux/hwmon.h>
  20#include <linux/iommu.h>
  21#include <linux/seq_file.h>
  22
  23/*
  24 * Gaudi security scheme:
  25 *
  26 * 1. Host is protected by:
  27 *        - Range registers
  28 *        - MMU
  29 *
  30 * 2. DDR is protected by:
  31 *        - Range registers (protect the first 512MB)
  32 *
  33 * 3. Configuration is protected by:
  34 *        - Range registers
  35 *        - Protection bits
  36 *
  37 * MMU is always enabled.
  38 *
  39 * QMAN DMA channels 0,1 (PCI DMAN):
  40 *     - DMA is not secured.
  41 *     - PQ and CQ are secured.
  42 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
  43 *                      because of TDMA (tensor DMA). Hence, WREG is always not
  44 *                      secured.
  45 *
  46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
  47 * channel 0 to be secured, execute the DMA and change it back to not secured.
  48 * Currently, the driver doesn't use the DMA while there are compute jobs
  49 * running.
  50 *
  51 * The current use cases for the driver to use the DMA are:
  52 *     - Clear SRAM on context switch (happens on context switch when device is
  53 *       idle)
  54 *     - MMU page tables area clear (happens on init)
  55 *
  56 * QMAN DMA 2-7, TPC, MME, NIC:
  57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
  58 * CQ, CP and the engine are not secured
  59 *
  60 */
  61
  62#define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
  63#define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
  64#define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
  65
  66#define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
  67
  68#define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
  69#define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
  70#define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
  71#define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
  72
  73#define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
  74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
  75#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
  76#define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
  77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
  78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
  79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
  80#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
  81#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
  82
  83#define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
  84
  85#define GAUDI_MAX_STRING_LEN            20
  86
  87#define GAUDI_CB_POOL_CB_CNT            512
  88#define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
  89
  90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
  91
  92#define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
  93
  94#define GAUDI_NUM_OF_QM_ERR_CAUSE       16
  95
  96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
  97
  98#define GAUDI_ARB_WDT_TIMEOUT           0x1000000
  99
 100#define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
 101                BIT(GAUDI_ENGINE_ID_MME_0) |\
 102                BIT(GAUDI_ENGINE_ID_MME_2) |\
 103                GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
 104
 105#define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
 106
 107#define GAUDI_PLL_MAX 10
 108
 109static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 110                "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 111                "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
 112                "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
 113                "gaudi cpu eq"
 114};
 115
 116static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
 117        [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
 118        [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
 119        [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
 120        [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
 121        [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
 122        [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
 123        [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
 124        [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
 125};
 126
 127static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
 128        [0] = GAUDI_QUEUE_ID_DMA_0_0,
 129        [1] = GAUDI_QUEUE_ID_DMA_0_1,
 130        [2] = GAUDI_QUEUE_ID_DMA_0_2,
 131        [3] = GAUDI_QUEUE_ID_DMA_0_3,
 132        [4] = GAUDI_QUEUE_ID_DMA_1_0,
 133        [5] = GAUDI_QUEUE_ID_DMA_1_1,
 134        [6] = GAUDI_QUEUE_ID_DMA_1_2,
 135        [7] = GAUDI_QUEUE_ID_DMA_1_3,
 136};
 137
 138static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
 139        [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
 140        [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
 141        [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
 142        [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
 143        [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
 144        [PACKET_REPEAT]         = sizeof(struct packet_repeat),
 145        [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
 146        [PACKET_FENCE]          = sizeof(struct packet_fence),
 147        [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
 148        [PACKET_NOP]            = sizeof(struct packet_nop),
 149        [PACKET_STOP]           = sizeof(struct packet_stop),
 150        [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
 151        [PACKET_WAIT]           = sizeof(struct packet_wait),
 152        [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
 153};
 154
 155static inline bool validate_packet_id(enum packet_id id)
 156{
 157        switch (id) {
 158        case PACKET_WREG_32:
 159        case PACKET_WREG_BULK:
 160        case PACKET_MSG_LONG:
 161        case PACKET_MSG_SHORT:
 162        case PACKET_CP_DMA:
 163        case PACKET_REPEAT:
 164        case PACKET_MSG_PROT:
 165        case PACKET_FENCE:
 166        case PACKET_LIN_DMA:
 167        case PACKET_NOP:
 168        case PACKET_STOP:
 169        case PACKET_ARB_POINT:
 170        case PACKET_WAIT:
 171        case PACKET_LOAD_AND_EXE:
 172                return true;
 173        default:
 174                return false;
 175        }
 176}
 177
 178static const char * const
 179gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
 180        "tpc_address_exceed_slm",
 181        "tpc_div_by_0",
 182        "tpc_spu_mac_overflow",
 183        "tpc_spu_addsub_overflow",
 184        "tpc_spu_abs_overflow",
 185        "tpc_spu_fp_dst_nan_inf",
 186        "tpc_spu_fp_dst_denorm",
 187        "tpc_vpu_mac_overflow",
 188        "tpc_vpu_addsub_overflow",
 189        "tpc_vpu_abs_overflow",
 190        "tpc_vpu_fp_dst_nan_inf",
 191        "tpc_vpu_fp_dst_denorm",
 192        "tpc_assertions",
 193        "tpc_illegal_instruction",
 194        "tpc_pc_wrap_around",
 195        "tpc_qm_sw_err",
 196        "tpc_hbw_rresp_err",
 197        "tpc_hbw_bresp_err",
 198        "tpc_lbw_rresp_err",
 199        "tpc_lbw_bresp_err"
 200};
 201
 202static const char * const
 203gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
 204        "PQ AXI HBW error",
 205        "CQ AXI HBW error",
 206        "CP AXI HBW error",
 207        "CP error due to undefined OPCODE",
 208        "CP encountered STOP OPCODE",
 209        "CP AXI LBW error",
 210        "CP WRREG32 or WRBULK returned error",
 211        "N/A",
 212        "FENCE 0 inc over max value and clipped",
 213        "FENCE 1 inc over max value and clipped",
 214        "FENCE 2 inc over max value and clipped",
 215        "FENCE 3 inc over max value and clipped",
 216        "FENCE 0 dec under min value and clipped",
 217        "FENCE 1 dec under min value and clipped",
 218        "FENCE 2 dec under min value and clipped",
 219        "FENCE 3 dec under min value and clipped"
 220};
 221
 222static const char * const
 223gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
 224        "Choice push while full error",
 225        "Choice Q watchdog error",
 226        "MSG AXI LBW returned with error"
 227};
 228
 229enum gaudi_sm_sei_cause {
 230        GAUDI_SM_SEI_SO_OVERFLOW,
 231        GAUDI_SM_SEI_LBW_4B_UNALIGNED,
 232        GAUDI_SM_SEI_AXI_RESPONSE_ERR
 233};
 234
 235static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
 236        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
 237        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
 238        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
 239        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
 240        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
 241        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
 242        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
 243        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
 244        QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
 245        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
 246        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
 247        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
 248        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
 249        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
 250        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
 251        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
 252        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
 253        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
 254        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
 255        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
 256        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
 257        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
 258        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
 259        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
 260        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
 261        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
 262        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
 263        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
 264        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
 265        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
 266        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
 267        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
 268        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
 269        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
 270        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
 271        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
 272        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
 273        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
 274        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
 275        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
 276        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
 277        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
 278        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
 279        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
 280        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
 281        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
 282        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
 283        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
 284        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
 285        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
 286        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
 287        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
 288        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
 289        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
 290        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
 291        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
 292        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
 293        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
 294        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
 295        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
 296        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
 297        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
 298        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
 299        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
 300        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
 301        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
 302        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
 303        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
 304        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
 305        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
 306        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
 307        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
 308        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
 309        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
 310        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
 311        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
 312        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
 313        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
 314        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
 315        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
 316        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
 317        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
 318        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
 319        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
 320        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
 321        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
 322        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
 323        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
 324        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
 325        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
 326        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
 327        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
 328        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
 329        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
 330        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
 331        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
 332        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
 333        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
 334        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
 335        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
 336        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
 337        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
 338        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
 339        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
 340        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
 341        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
 342        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
 343        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
 344        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
 345        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
 346        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
 347        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
 348        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
 349};
 350
 351struct ecc_info_extract_params {
 352        u64 block_address;
 353        u32 num_memories;
 354        bool derr;
 355        bool disable_clock_gating;
 356};
 357
 358static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
 359                                                                u64 phys_addr);
 360static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 361                                        struct hl_cs_job *job);
 362static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 363                                        u32 size, u64 val);
 364static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
 365                                        u32 num_regs, u32 val);
 366static int gaudi_schedule_register_memset(struct hl_device *hdev,
 367                u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 368static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 369                                u32 tpc_id);
 370static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
 371static int gaudi_cpucp_info_get(struct hl_device *hdev);
 372static void gaudi_disable_clock_gating(struct hl_device *hdev);
 373static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 374static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
 375                                u32 size, bool eb);
 376static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 377                                struct hl_gen_wait_properties *prop);
 378
 379static inline enum hl_collective_mode
 380get_collective_mode(struct hl_device *hdev, u32 queue_id)
 381{
 382        if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
 383                return HL_COLLECTIVE_MASTER;
 384
 385        if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
 386                        queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
 387                return HL_COLLECTIVE_SLAVE;
 388
 389        if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
 390                        queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
 391                return HL_COLLECTIVE_SLAVE;
 392
 393        if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
 394                        queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
 395                return HL_COLLECTIVE_SLAVE;
 396
 397        return HL_COLLECTIVE_NOT_SUPPORTED;
 398}
 399
 400static inline void set_default_power_values(struct hl_device *hdev)
 401{
 402        struct asic_fixed_properties *prop = &hdev->asic_prop;
 403
 404        if (hdev->card_type == cpucp_card_type_pmc) {
 405                prop->max_power_default = MAX_POWER_DEFAULT_PMC;
 406                prop->dc_power_default = DC_POWER_DEFAULT_PMC;
 407        } else {
 408                prop->max_power_default = MAX_POWER_DEFAULT_PCI;
 409                prop->dc_power_default = DC_POWER_DEFAULT_PCI;
 410        }
 411}
 412
 413static int gaudi_set_fixed_properties(struct hl_device *hdev)
 414{
 415        struct asic_fixed_properties *prop = &hdev->asic_prop;
 416        u32 num_sync_stream_queues = 0;
 417        int i;
 418
 419        prop->max_queues = GAUDI_QUEUE_ID_SIZE;
 420        prop->hw_queues_props = kcalloc(prop->max_queues,
 421                        sizeof(struct hw_queue_properties),
 422                        GFP_KERNEL);
 423
 424        if (!prop->hw_queues_props)
 425                return -ENOMEM;
 426
 427        for (i = 0 ; i < prop->max_queues ; i++) {
 428                if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
 429                        prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 430                        prop->hw_queues_props[i].driver_only = 0;
 431                        prop->hw_queues_props[i].supports_sync_stream = 1;
 432                        prop->hw_queues_props[i].cb_alloc_flags =
 433                                CB_ALLOC_KERNEL;
 434                        num_sync_stream_queues++;
 435                } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
 436                        prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 437                        prop->hw_queues_props[i].driver_only = 1;
 438                        prop->hw_queues_props[i].supports_sync_stream = 0;
 439                        prop->hw_queues_props[i].cb_alloc_flags =
 440                                CB_ALLOC_KERNEL;
 441                } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
 442                        prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 443                        prop->hw_queues_props[i].driver_only = 0;
 444                        prop->hw_queues_props[i].supports_sync_stream = 0;
 445                        prop->hw_queues_props[i].cb_alloc_flags =
 446                                CB_ALLOC_USER;
 447
 448                }
 449                prop->hw_queues_props[i].collective_mode =
 450                                                get_collective_mode(hdev, i);
 451        }
 452
 453        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 454        prop->collective_first_sob = 0;
 455        prop->collective_first_mon = 0;
 456
 457        /* 2 SOBs per internal queue stream are reserved for collective */
 458        prop->sync_stream_first_sob =
 459                        ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
 460                        * QMAN_STREAMS * HL_RSVD_SOBS;
 461
 462        /* 1 monitor per internal queue stream are reserved for collective
 463         * 2 monitors per external queue stream are reserved for collective
 464         */
 465        prop->sync_stream_first_mon =
 466                        (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
 467                        (NUMBER_OF_EXT_HW_QUEUES * 2);
 468
 469        prop->dram_base_address = DRAM_PHYS_BASE;
 470        prop->dram_size = GAUDI_HBM_SIZE_32GB;
 471        prop->dram_end_address = prop->dram_base_address +
 472                                        prop->dram_size;
 473        prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
 474
 475        prop->sram_base_address = SRAM_BASE_ADDR;
 476        prop->sram_size = SRAM_SIZE;
 477        prop->sram_end_address = prop->sram_base_address +
 478                                        prop->sram_size;
 479        prop->sram_user_base_address = prop->sram_base_address +
 480                                        SRAM_USER_BASE_OFFSET;
 481
 482        prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
 483        if (hdev->pldm)
 484                prop->mmu_pgt_size = 0x800000; /* 8MB */
 485        else
 486                prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
 487        prop->mmu_pte_size = HL_PTE_SIZE;
 488        prop->mmu_hop_table_size = HOP_TABLE_SIZE;
 489        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 490        prop->dram_page_size = PAGE_SIZE_2MB;
 491        prop->dram_supports_virtual_memory = false;
 492
 493        prop->pmmu.hop0_shift = HOP0_SHIFT;
 494        prop->pmmu.hop1_shift = HOP1_SHIFT;
 495        prop->pmmu.hop2_shift = HOP2_SHIFT;
 496        prop->pmmu.hop3_shift = HOP3_SHIFT;
 497        prop->pmmu.hop4_shift = HOP4_SHIFT;
 498        prop->pmmu.hop0_mask = HOP0_MASK;
 499        prop->pmmu.hop1_mask = HOP1_MASK;
 500        prop->pmmu.hop2_mask = HOP2_MASK;
 501        prop->pmmu.hop3_mask = HOP3_MASK;
 502        prop->pmmu.hop4_mask = HOP4_MASK;
 503        prop->pmmu.start_addr = VA_HOST_SPACE_START;
 504        prop->pmmu.end_addr =
 505                        (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
 506        prop->pmmu.page_size = PAGE_SIZE_4KB;
 507        prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 508
 509        /* PMMU and HPMMU are the same except of page size */
 510        memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
 511        prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
 512
 513        /* shifts and masks are the same in PMMU and DMMU */
 514        memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
 515        prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
 516        prop->dmmu.end_addr = VA_HOST_SPACE_END;
 517        prop->dmmu.page_size = PAGE_SIZE_2MB;
 518
 519        prop->cfg_size = CFG_SIZE;
 520        prop->max_asid = MAX_ASID;
 521        prop->num_of_events = GAUDI_EVENT_SIZE;
 522        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 523
 524        set_default_power_values(hdev);
 525
 526        prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
 527        prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
 528
 529        prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 530        prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 531
 532        strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 533                                        CARD_NAME_MAX_LEN);
 534
 535        prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
 536
 537        prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
 538                        prop->sync_stream_first_sob +
 539                        (num_sync_stream_queues * HL_RSVD_SOBS);
 540        prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
 541                        prop->sync_stream_first_mon +
 542                        (num_sync_stream_queues * HL_RSVD_MONS);
 543
 544        prop->first_available_user_msix_interrupt = USHRT_MAX;
 545
 546        for (i = 0 ; i < HL_MAX_DCORES ; i++)
 547                prop->first_available_cq[i] = USHRT_MAX;
 548
 549        prop->fw_cpu_boot_dev_sts0_valid = false;
 550        prop->fw_cpu_boot_dev_sts1_valid = false;
 551        prop->hard_reset_done_by_fw = false;
 552        prop->gic_interrupts_enable = true;
 553
 554        return 0;
 555}
 556
 557static int gaudi_pci_bars_map(struct hl_device *hdev)
 558{
 559        static const char * const name[] = {"SRAM", "CFG", "HBM"};
 560        bool is_wc[3] = {false, false, true};
 561        int rc;
 562
 563        rc = hl_pci_bars_map(hdev, name, is_wc);
 564        if (rc)
 565                return rc;
 566
 567        hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
 568                        (CFG_BASE - SPI_FLASH_BASE_ADDR);
 569
 570        return 0;
 571}
 572
 573static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
 574{
 575        struct gaudi_device *gaudi = hdev->asic_specific;
 576        struct hl_inbound_pci_region pci_region;
 577        u64 old_addr = addr;
 578        int rc;
 579
 580        if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
 581                return old_addr;
 582
 583        if (hdev->asic_prop.iatu_done_by_fw)
 584                return U64_MAX;
 585
 586        /* Inbound Region 2 - Bar 4 - Point to HBM */
 587        pci_region.mode = PCI_BAR_MATCH_MODE;
 588        pci_region.bar = HBM_BAR_ID;
 589        pci_region.addr = addr;
 590        rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
 591        if (rc)
 592                return U64_MAX;
 593
 594        if (gaudi) {
 595                old_addr = gaudi->hbm_bar_cur_addr;
 596                gaudi->hbm_bar_cur_addr = addr;
 597        }
 598
 599        return old_addr;
 600}
 601
 602static int gaudi_init_iatu(struct hl_device *hdev)
 603{
 604        struct hl_inbound_pci_region inbound_region;
 605        struct hl_outbound_pci_region outbound_region;
 606        int rc;
 607
 608        if (hdev->asic_prop.iatu_done_by_fw)
 609                return 0;
 610
 611        /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
 612        inbound_region.mode = PCI_BAR_MATCH_MODE;
 613        inbound_region.bar = SRAM_BAR_ID;
 614        inbound_region.addr = SRAM_BASE_ADDR;
 615        rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 616        if (rc)
 617                goto done;
 618
 619        /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
 620        inbound_region.mode = PCI_BAR_MATCH_MODE;
 621        inbound_region.bar = CFG_BAR_ID;
 622        inbound_region.addr = SPI_FLASH_BASE_ADDR;
 623        rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 624        if (rc)
 625                goto done;
 626
 627        /* Inbound Region 2 - Bar 4 - Point to HBM */
 628        inbound_region.mode = PCI_BAR_MATCH_MODE;
 629        inbound_region.bar = HBM_BAR_ID;
 630        inbound_region.addr = DRAM_PHYS_BASE;
 631        rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
 632        if (rc)
 633                goto done;
 634
 635        hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 636
 637        /* Outbound Region 0 - Point to Host */
 638        outbound_region.addr = HOST_PHYS_BASE;
 639        outbound_region.size = HOST_PHYS_SIZE;
 640        rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 641
 642done:
 643        return rc;
 644}
 645
 646static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
 647{
 648        return RREG32(mmHW_STATE);
 649}
 650
 651static int gaudi_early_init(struct hl_device *hdev)
 652{
 653        struct asic_fixed_properties *prop = &hdev->asic_prop;
 654        struct pci_dev *pdev = hdev->pdev;
 655        u32 fw_boot_status;
 656        int rc;
 657
 658        rc = gaudi_set_fixed_properties(hdev);
 659        if (rc) {
 660                dev_err(hdev->dev, "Failed setting fixed properties\n");
 661                return rc;
 662        }
 663
 664        /* Check BAR sizes */
 665        if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
 666                dev_err(hdev->dev,
 667                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 668                        SRAM_BAR_ID,
 669                        (unsigned long long) pci_resource_len(pdev,
 670                                                        SRAM_BAR_ID),
 671                        SRAM_BAR_SIZE);
 672                rc = -ENODEV;
 673                goto free_queue_props;
 674        }
 675
 676        if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
 677                dev_err(hdev->dev,
 678                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 679                        CFG_BAR_ID,
 680                        (unsigned long long) pci_resource_len(pdev,
 681                                                                CFG_BAR_ID),
 682                        CFG_BAR_SIZE);
 683                rc = -ENODEV;
 684                goto free_queue_props;
 685        }
 686
 687        prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
 688
 689        /* If FW security is enabled at this point it means no access to ELBI */
 690        if (hdev->asic_prop.fw_security_enabled) {
 691                hdev->asic_prop.iatu_done_by_fw = true;
 692
 693                /*
 694                 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
 695                 * decision can only be taken based on PCI ID security.
 696                 */
 697                hdev->asic_prop.gic_interrupts_enable = false;
 698                goto pci_init;
 699        }
 700
 701        rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
 702                                &fw_boot_status);
 703        if (rc)
 704                goto free_queue_props;
 705
 706        /* Check whether FW is configuring iATU */
 707        if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
 708                        (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
 709                hdev->asic_prop.iatu_done_by_fw = true;
 710
 711pci_init:
 712        rc = hl_pci_init(hdev);
 713        if (rc)
 714                goto free_queue_props;
 715
 716        /* Before continuing in the initialization, we need to read the preboot
 717         * version to determine whether we run with a security-enabled firmware
 718         */
 719        rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 720                                        mmCPU_BOOT_DEV_STS0,
 721                                        mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
 722                                        mmCPU_BOOT_ERR1,
 723                                        GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
 724        if (rc) {
 725                if (hdev->reset_on_preboot_fail)
 726                        hdev->asic_funcs->hw_fini(hdev, true);
 727                goto pci_fini;
 728        }
 729
 730        if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 731                dev_info(hdev->dev,
 732                        "H/W state is dirty, must reset before initializing\n");
 733                hdev->asic_funcs->hw_fini(hdev, true);
 734        }
 735
 736        return 0;
 737
 738pci_fini:
 739        hl_pci_fini(hdev);
 740free_queue_props:
 741        kfree(hdev->asic_prop.hw_queues_props);
 742        return rc;
 743}
 744
 745static int gaudi_early_fini(struct hl_device *hdev)
 746{
 747        kfree(hdev->asic_prop.hw_queues_props);
 748        hl_pci_fini(hdev);
 749
 750        return 0;
 751}
 752
 753/**
 754 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
 755 *
 756 * @hdev: pointer to hl_device structure
 757 *
 758 */
 759static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 760{
 761        struct asic_fixed_properties *prop = &hdev->asic_prop;
 762        u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
 763        u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 764        int rc;
 765
 766        if (hdev->asic_prop.fw_security_enabled) {
 767                rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
 768
 769                if (rc)
 770                        return rc;
 771
 772                freq = pll_freq_arr[2];
 773        } else {
 774                /* Backward compatibility */
 775                div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
 776                div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
 777                nr = RREG32(mmPSOC_CPU_PLL_NR);
 778                nf = RREG32(mmPSOC_CPU_PLL_NF);
 779                od = RREG32(mmPSOC_CPU_PLL_OD);
 780
 781                if (div_sel == DIV_SEL_REF_CLK ||
 782                                div_sel == DIV_SEL_DIVIDED_REF) {
 783                        if (div_sel == DIV_SEL_REF_CLK)
 784                                freq = PLL_REF_CLK;
 785                        else
 786                                freq = PLL_REF_CLK / (div_fctr + 1);
 787                } else if (div_sel == DIV_SEL_PLL_CLK ||
 788                        div_sel == DIV_SEL_DIVIDED_PLL) {
 789                        pll_clk = PLL_REF_CLK * (nf + 1) /
 790                                        ((nr + 1) * (od + 1));
 791                        if (div_sel == DIV_SEL_PLL_CLK)
 792                                freq = pll_clk;
 793                        else
 794                                freq = pll_clk / (div_fctr + 1);
 795                } else {
 796                        dev_warn(hdev->dev,
 797                                "Received invalid div select value: %d",
 798                                div_sel);
 799                        freq = 0;
 800                }
 801        }
 802
 803        prop->psoc_timestamp_frequency = freq;
 804        prop->psoc_pci_pll_nr = nr;
 805        prop->psoc_pci_pll_nf = nf;
 806        prop->psoc_pci_pll_od = od;
 807        prop->psoc_pci_pll_div_factor = div_fctr;
 808
 809        return 0;
 810}
 811
 812static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 813                dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
 814{
 815        struct asic_fixed_properties *prop = &hdev->asic_prop;
 816        struct packet_lin_dma *init_tpc_mem_pkt;
 817        struct hl_cs_job *job;
 818        struct hl_cb *cb;
 819        u64 dst_addr;
 820        u32 cb_size, ctl;
 821        u8 tpc_id;
 822        int rc;
 823
 824        cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
 825        if (!cb)
 826                return -EFAULT;
 827
 828        init_tpc_mem_pkt = cb->kernel_address;
 829        cb_size = sizeof(*init_tpc_mem_pkt);
 830        memset(init_tpc_mem_pkt, 0, cb_size);
 831
 832        init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
 833
 834        ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
 835        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
 836        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
 837        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 838
 839        init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
 840
 841        init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
 842        dst_addr = (prop->sram_user_base_address &
 843                        GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
 844                        GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
 845        init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
 846
 847        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
 848        if (!job) {
 849                dev_err(hdev->dev, "Failed to allocate a new job\n");
 850                rc = -ENOMEM;
 851                goto release_cb;
 852        }
 853
 854        job->id = 0;
 855        job->user_cb = cb;
 856        atomic_inc(&job->user_cb->cs_cnt);
 857        job->user_cb_size = cb_size;
 858        job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
 859        job->patched_cb = job->user_cb;
 860        job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
 861
 862        hl_debugfs_add_job(hdev, job);
 863
 864        rc = gaudi_send_job_on_qman0(hdev, job);
 865
 866        if (rc)
 867                goto free_job;
 868
 869        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
 870                rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
 871                if (rc)
 872                        break;
 873        }
 874
 875free_job:
 876        hl_userptr_delete_list(hdev, &job->userptr_list);
 877        hl_debugfs_remove_job(hdev, job);
 878        kfree(job);
 879        atomic_dec(&cb->cs_cnt);
 880
 881release_cb:
 882        hl_cb_put(cb);
 883        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
 884
 885        return rc;
 886}
 887
 888/*
 889 * gaudi_init_tpc_mem() - Initialize TPC memories.
 890 * @hdev: Pointer to hl_device structure.
 891 *
 892 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
 893 *
 894 * Return: 0 for success, negative value for error.
 895 */
 896static int gaudi_init_tpc_mem(struct hl_device *hdev)
 897{
 898        const struct firmware *fw;
 899        size_t fw_size;
 900        void *cpu_addr;
 901        dma_addr_t dma_handle;
 902        int rc, count = 5;
 903
 904again:
 905        rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
 906        if (rc == -EINTR && count-- > 0) {
 907                msleep(50);
 908                goto again;
 909        }
 910
 911        if (rc) {
 912                dev_err(hdev->dev, "Failed to load firmware file %s\n",
 913                                GAUDI_TPC_FW_FILE);
 914                goto out;
 915        }
 916
 917        fw_size = fw->size;
 918        cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
 919                        &dma_handle, GFP_KERNEL | __GFP_ZERO);
 920        if (!cpu_addr) {
 921                dev_err(hdev->dev,
 922                        "Failed to allocate %zu of dma memory for TPC kernel\n",
 923                        fw_size);
 924                rc = -ENOMEM;
 925                goto out;
 926        }
 927
 928        memcpy(cpu_addr, fw->data, fw_size);
 929
 930        rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
 931
 932        hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
 933                        dma_handle);
 934
 935out:
 936        release_firmware(fw);
 937        return rc;
 938}
 939
 940static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
 941{
 942        struct gaudi_device *gaudi = hdev->asic_specific;
 943        struct gaudi_collective_properties *prop = &gaudi->collective_props;
 944        struct hl_hw_queue *q;
 945        u32 i, sob_id, sob_group_id, queue_id;
 946
 947        /* Iterate through SOB groups and assign a SOB for each slave queue */
 948        sob_group_id =
 949                stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
 950        sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
 951
 952        queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
 953        for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
 954                q = &hdev->kernel_queues[queue_id + (4 * i)];
 955                q->sync_stream_prop.collective_sob_id = sob_id + i;
 956        }
 957
 958        /* Both DMA5 and TPC7 use the same resources since only a single
 959         * engine need to participate in the reduction process
 960         */
 961        queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
 962        q = &hdev->kernel_queues[queue_id];
 963        q->sync_stream_prop.collective_sob_id =
 964                        sob_id + NIC_NUMBER_OF_ENGINES;
 965
 966        queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
 967        q = &hdev->kernel_queues[queue_id];
 968        q->sync_stream_prop.collective_sob_id =
 969                        sob_id + NIC_NUMBER_OF_ENGINES;
 970}
 971
 972static void gaudi_sob_group_hw_reset(struct kref *ref)
 973{
 974        struct gaudi_hw_sob_group *hw_sob_group =
 975                container_of(ref, struct gaudi_hw_sob_group, kref);
 976        struct hl_device *hdev = hw_sob_group->hdev;
 977        u64 base_addr;
 978        int rc;
 979
 980        base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
 981                        hw_sob_group->base_sob_id * 4;
 982        rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
 983                        base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
 984        if (rc)
 985                dev_err(hdev->dev,
 986                        "failed resetting sob group - sob base %u, count %u",
 987                        hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
 988
 989        kref_init(&hw_sob_group->kref);
 990}
 991
 992static void gaudi_sob_group_reset_error(struct kref *ref)
 993{
 994        struct gaudi_hw_sob_group *hw_sob_group =
 995                container_of(ref, struct gaudi_hw_sob_group, kref);
 996        struct hl_device *hdev = hw_sob_group->hdev;
 997
 998        dev_crit(hdev->dev,
 999                "SOB release shouldn't be called here, base_sob_id: %d\n",
1000                hw_sob_group->base_sob_id);
1001}
1002
1003static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1004{
1005        struct gaudi_collective_properties *prop;
1006        int i;
1007
1008        prop = &gaudi->collective_props;
1009
1010        memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1011
1012        for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1013                if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1014                        prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1015                                        BIT(i % HL_MAX_SOBS_PER_MONITOR);
1016        /* Set collective engine bit */
1017        prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1018                                BIT(i % HL_MAX_SOBS_PER_MONITOR);
1019}
1020
1021static int gaudi_collective_init(struct hl_device *hdev)
1022{
1023        u32 i, sob_id, reserved_sobs_per_group;
1024        struct gaudi_collective_properties *prop;
1025        struct gaudi_device *gaudi;
1026
1027        gaudi = hdev->asic_specific;
1028        prop = &gaudi->collective_props;
1029        sob_id = hdev->asic_prop.collective_first_sob;
1030
1031        /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1032        reserved_sobs_per_group =
1033                ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1034
1035        /* Init SOB groups */
1036        for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1037                prop->hw_sob_group[i].hdev = hdev;
1038                prop->hw_sob_group[i].base_sob_id = sob_id;
1039                sob_id += reserved_sobs_per_group;
1040                gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1041        }
1042
1043        for (i = 0 ; i < QMAN_STREAMS; i++) {
1044                prop->next_sob_group_val[i] = 1;
1045                prop->curr_sob_group_idx[i] = 0;
1046                gaudi_collective_map_sobs(hdev, i);
1047        }
1048
1049        gaudi_collective_mstr_sob_mask_set(gaudi);
1050
1051        return 0;
1052}
1053
1054static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1055{
1056        struct gaudi_device *gaudi = hdev->asic_specific;
1057        struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1058
1059        kref_put(&cprop->hw_sob_group[sob_group].kref,
1060                                        gaudi_sob_group_hw_reset);
1061}
1062
1063static void gaudi_collective_master_init_job(struct hl_device *hdev,
1064                struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1065{
1066        u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1067        struct gaudi_collective_properties *cprop;
1068        struct hl_gen_wait_properties wait_prop;
1069        struct hl_sync_stream_properties *prop;
1070        struct gaudi_device *gaudi;
1071
1072        gaudi = hdev->asic_specific;
1073        cprop = &gaudi->collective_props;
1074        queue_id = job->hw_queue_id;
1075        prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1076
1077        master_sob_base =
1078                cprop->hw_sob_group[sob_group_offset].base_sob_id;
1079        master_monitor = prop->collective_mstr_mon_id[0];
1080
1081        cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1082
1083        dev_dbg(hdev->dev,
1084                "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1085                master_sob_base, cprop->mstr_sob_mask[0],
1086                cprop->next_sob_group_val[stream],
1087                master_monitor, queue_id);
1088
1089        wait_prop.data = (void *) job->patched_cb;
1090        wait_prop.sob_base = master_sob_base;
1091        wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1092        wait_prop.sob_val = cprop->next_sob_group_val[stream];
1093        wait_prop.mon_id = master_monitor;
1094        wait_prop.q_idx = queue_id;
1095        wait_prop.size = cb_size;
1096        cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1097
1098        master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1099        master_monitor = prop->collective_mstr_mon_id[1];
1100
1101        dev_dbg(hdev->dev,
1102                "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1103                master_sob_base, cprop->mstr_sob_mask[1],
1104                cprop->next_sob_group_val[stream],
1105                master_monitor, queue_id);
1106
1107        wait_prop.sob_base = master_sob_base;
1108        wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1109        wait_prop.mon_id = master_monitor;
1110        wait_prop.size = cb_size;
1111        cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1112}
1113
1114static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1115                struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1116{
1117        struct hl_gen_wait_properties wait_prop;
1118        struct hl_sync_stream_properties *prop;
1119        u32 queue_id, cb_size = 0;
1120
1121        queue_id = job->hw_queue_id;
1122        prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1123
1124        /* Add to wait CBs using slave monitor */
1125        wait_prop.data = (void *) job->user_cb;
1126        wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1127        wait_prop.sob_mask = 0x1;
1128        wait_prop.sob_val = cs_cmpl->sob_val;
1129        wait_prop.mon_id = prop->collective_slave_mon_id;
1130        wait_prop.q_idx = queue_id;
1131        wait_prop.size = cb_size;
1132
1133        dev_dbg(hdev->dev,
1134                "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1135                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1136                prop->collective_slave_mon_id, queue_id);
1137
1138        cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1139
1140        dev_dbg(hdev->dev,
1141                "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1142                prop->collective_sob_id, queue_id);
1143
1144        cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1145                        prop->collective_sob_id, cb_size, false);
1146}
1147
1148static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1149{
1150        struct hl_cs_compl *signal_cs_cmpl =
1151                container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1152        struct hl_cs_compl *cs_cmpl =
1153                container_of(cs->fence, struct hl_cs_compl, base_fence);
1154        struct gaudi_collective_properties *cprop;
1155        u32 stream, queue_id, sob_group_offset;
1156        struct gaudi_device *gaudi;
1157        struct hl_device *hdev;
1158        struct hl_cs_job *job;
1159        struct hl_ctx *ctx;
1160
1161        ctx = cs->ctx;
1162        hdev = ctx->hdev;
1163        gaudi = hdev->asic_specific;
1164        cprop = &gaudi->collective_props;
1165
1166        /* copy the SOB id and value of the signal CS */
1167        cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1168        cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1169
1170        /* Calculate the stream from collective master queue (1st job) */
1171        job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1172        stream = job->hw_queue_id % 4;
1173        sob_group_offset =
1174                stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1175
1176        list_for_each_entry(job, &cs->job_list, cs_node) {
1177                queue_id = job->hw_queue_id;
1178
1179                if (hdev->kernel_queues[queue_id].collective_mode ==
1180                                HL_COLLECTIVE_MASTER)
1181                        gaudi_collective_master_init_job(hdev, job, stream,
1182                                                sob_group_offset);
1183                else
1184                        gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1185        }
1186
1187        cs_cmpl->sob_group = sob_group_offset;
1188
1189        /* Handle sob group kref and wraparound */
1190        kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1191        cprop->next_sob_group_val[stream]++;
1192
1193        if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1194                /*
1195                 * Decrement as we reached the max value.
1196                 * The release function won't be called here as we've
1197                 * just incremented the refcount.
1198                 */
1199                kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1200                                gaudi_sob_group_reset_error);
1201                cprop->next_sob_group_val[stream] = 1;
1202                /* only two SOBs are currently in use */
1203                cprop->curr_sob_group_idx[stream] =
1204                        (cprop->curr_sob_group_idx[stream] + 1) &
1205                                                        (HL_RSVD_SOBS - 1);
1206
1207                gaudi_collective_map_sobs(hdev, stream);
1208
1209                dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1210                                cprop->curr_sob_group_idx[stream], stream);
1211        }
1212
1213        /* Increment kref since all slave queues are now waiting on it */
1214        kref_get(&cs_cmpl->hw_sob->kref);
1215        /*
1216         * Must put the signal fence after the SOB refcnt increment so
1217         * the SOB refcnt won't turn 0 and reset the SOB before the
1218         * wait CS was submitted.
1219         */
1220        mb();
1221        hl_fence_put(cs->signal_fence);
1222        cs->signal_fence = NULL;
1223}
1224
1225static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1226                struct hl_ctx *ctx, struct hl_cs *cs,
1227                enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1228{
1229        struct hw_queue_properties *hw_queue_prop;
1230        struct hl_cs_counters_atomic *cntr;
1231        struct hl_cs_job *job;
1232        struct hl_cb *cb;
1233        u32 cb_size;
1234        bool patched_cb;
1235
1236        cntr = &hdev->aggregated_cs_counters;
1237
1238        if (mode == HL_COLLECTIVE_MASTER) {
1239                /* CB size of collective master queue contains
1240                 * 4 msg short packets for monitor 1 configuration
1241                 * 1 fence packet
1242                 * 4 msg short packets for monitor 2 configuration
1243                 * 1 fence packet
1244                 * 2 msg prot packets for completion and MSI-X
1245                 */
1246                cb_size = sizeof(struct packet_msg_short) * 8 +
1247                                sizeof(struct packet_fence) * 2 +
1248                                sizeof(struct packet_msg_prot) * 2;
1249                patched_cb = true;
1250        } else {
1251                /* CB size of collective slave queues contains
1252                 * 4 msg short packets for monitor configuration
1253                 * 1 fence packet
1254                 * 1 additional msg short packet for sob signal
1255                 */
1256                cb_size = sizeof(struct packet_msg_short) * 5 +
1257                                sizeof(struct packet_fence);
1258                patched_cb = false;
1259        }
1260
1261        hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1262        job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1263        if (!job) {
1264                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1265                atomic64_inc(&cntr->out_of_mem_drop_cnt);
1266                dev_err(hdev->dev, "Failed to allocate a new job\n");
1267                return -ENOMEM;
1268        }
1269
1270        /* Allocate internal mapped CB for non patched CBs */
1271        cb = hl_cb_kernel_create(hdev, cb_size,
1272                        hdev->mmu_enable && !patched_cb);
1273        if (!cb) {
1274                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1275                atomic64_inc(&cntr->out_of_mem_drop_cnt);
1276                kfree(job);
1277                return -EFAULT;
1278        }
1279
1280        job->id = 0;
1281        job->cs = cs;
1282        job->user_cb = cb;
1283        atomic_inc(&job->user_cb->cs_cnt);
1284        job->user_cb_size = cb_size;
1285        job->hw_queue_id = queue_id;
1286
1287        /*
1288         * No need in parsing, user CB is the patched CB.
1289         * We call hl_cb_destroy() out of two reasons - we don't need
1290         * the CB in the CB idr anymore and to decrement its refcount as
1291         * it was incremented inside hl_cb_kernel_create().
1292         */
1293        if (patched_cb)
1294                job->patched_cb = job->user_cb;
1295        else
1296                job->patched_cb = NULL;
1297
1298        job->job_cb_size = job->user_cb_size;
1299        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1300
1301        /* increment refcount as for external queues we get completion */
1302        if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1303                cs_get(cs);
1304
1305        cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1306
1307        list_add_tail(&job->cs_node, &cs->job_list);
1308
1309        hl_debugfs_add_job(hdev, job);
1310
1311        return 0;
1312}
1313
1314static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1315                struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1316                u32 collective_engine_id)
1317{
1318        struct gaudi_device *gaudi = hdev->asic_specific;
1319        struct hw_queue_properties *hw_queue_prop;
1320        u32 queue_id, collective_queue, num_jobs;
1321        u32 stream, nic_queue, nic_idx = 0;
1322        bool skip;
1323        int i, rc = 0;
1324
1325        /* Verify wait queue id is configured as master */
1326        hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1327        if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1328                dev_err(hdev->dev,
1329                        "Queue %d is not configured as collective master\n",
1330                        wait_queue_id);
1331                return -EINVAL;
1332        }
1333
1334        /* Verify engine id is supported */
1335        if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1336                        collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1337                dev_err(hdev->dev,
1338                        "Collective wait does not support engine %u\n",
1339                        collective_engine_id);
1340                return -EINVAL;
1341        }
1342
1343        stream = wait_queue_id % 4;
1344
1345        if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1346                collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1347        else
1348                collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1349
1350        num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1351        nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1352
1353        /* First job goes to the collective master queue, it will wait for
1354         * the collective slave queues to finish execution.
1355         * The synchronization is done using two monitors:
1356         * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1357         * reduction engine (DMA5/TPC7).
1358         *
1359         * Rest of the jobs goes to the collective slave queues which will
1360         * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1361         */
1362        for (i = 0 ; i < num_jobs ; i++) {
1363                if (i == 0) {
1364                        queue_id = wait_queue_id;
1365                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1366                                HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1367                } else {
1368                        if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1369                                if (gaudi->hw_cap_initialized &
1370                                        BIT(HW_CAP_NIC_SHIFT + nic_idx))
1371                                        skip = false;
1372                                else
1373                                        skip = true;
1374
1375                                queue_id = nic_queue;
1376                                nic_queue += 4;
1377                                nic_idx++;
1378
1379                                if (skip)
1380                                        continue;
1381                        } else {
1382                                queue_id = collective_queue;
1383                        }
1384
1385                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1386                                HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1387                }
1388
1389                if (rc)
1390                        return rc;
1391        }
1392
1393        return rc;
1394}
1395
1396static int gaudi_late_init(struct hl_device *hdev)
1397{
1398        struct gaudi_device *gaudi = hdev->asic_specific;
1399        int rc;
1400
1401        rc = gaudi->cpucp_info_get(hdev);
1402        if (rc) {
1403                dev_err(hdev->dev, "Failed to get cpucp info\n");
1404                return rc;
1405        }
1406
1407        if ((hdev->card_type == cpucp_card_type_pci) &&
1408                        (hdev->nic_ports_mask & 0x3)) {
1409                dev_info(hdev->dev,
1410                        "PCI card detected, only 8 ports are enabled\n");
1411                hdev->nic_ports_mask &= ~0x3;
1412
1413                /* Stop and disable unused NIC QMANs */
1414                WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1415                                        NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1416                                        NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1417
1418                WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1419                                        NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1420                                        NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1421
1422                WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1423                WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1424
1425                gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1426        }
1427
1428        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1429        if (rc) {
1430                dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1431                return rc;
1432        }
1433
1434        rc = gaudi_fetch_psoc_frequency(hdev);
1435        if (rc) {
1436                dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1437                goto disable_pci_access;
1438        }
1439
1440        rc = gaudi_mmu_clear_pgt_range(hdev);
1441        if (rc) {
1442                dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1443                goto disable_pci_access;
1444        }
1445
1446        rc = gaudi_init_tpc_mem(hdev);
1447        if (rc) {
1448                dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1449                goto disable_pci_access;
1450        }
1451
1452        rc = gaudi_collective_init(hdev);
1453        if (rc) {
1454                dev_err(hdev->dev, "Failed to init collective\n");
1455                goto disable_pci_access;
1456        }
1457
1458        return 0;
1459
1460disable_pci_access:
1461        hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1462
1463        return rc;
1464}
1465
1466static void gaudi_late_fini(struct hl_device *hdev)
1467{
1468        const struct hwmon_channel_info **channel_info_arr;
1469        int i = 0;
1470
1471        if (!hdev->hl_chip_info->info)
1472                return;
1473
1474        channel_info_arr = hdev->hl_chip_info->info;
1475
1476        while (channel_info_arr[i]) {
1477                kfree(channel_info_arr[i]->config);
1478                kfree(channel_info_arr[i]);
1479                i++;
1480        }
1481
1482        kfree(channel_info_arr);
1483
1484        hdev->hl_chip_info->info = NULL;
1485}
1486
1487static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1488{
1489        dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1490        void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1491        int i, j, rc = 0;
1492
1493        /*
1494         * The device CPU works with 40-bits addresses, while bit 39 must be set
1495         * to '1' when accessing the host.
1496         * Bits 49:39 of the full host address are saved for a later
1497         * configuration of the HW to perform extension to 50 bits.
1498         * Because there is a single HW register that holds the extension bits,
1499         * these bits must be identical in all allocated range.
1500         */
1501
1502        for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1503                virt_addr_arr[i] =
1504                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1505                                                HL_CPU_ACCESSIBLE_MEM_SIZE,
1506                                                &dma_addr_arr[i],
1507                                                GFP_KERNEL | __GFP_ZERO);
1508                if (!virt_addr_arr[i]) {
1509                        rc = -ENOMEM;
1510                        goto free_dma_mem_arr;
1511                }
1512
1513                end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1514                if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1515                                GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1516                        break;
1517        }
1518
1519        if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1520                dev_err(hdev->dev,
1521                        "MSB of CPU accessible DMA memory are not identical in all range\n");
1522                rc = -EFAULT;
1523                goto free_dma_mem_arr;
1524        }
1525
1526        hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1527        hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1528        hdev->cpu_pci_msb_addr =
1529                GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1530
1531        if (!hdev->asic_prop.fw_security_enabled)
1532                GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1533
1534free_dma_mem_arr:
1535        for (j = 0 ; j < i ; j++)
1536                hdev->asic_funcs->asic_dma_free_coherent(hdev,
1537                                                HL_CPU_ACCESSIBLE_MEM_SIZE,
1538                                                virt_addr_arr[j],
1539                                                dma_addr_arr[j]);
1540
1541        return rc;
1542}
1543
1544static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1545{
1546        struct gaudi_device *gaudi = hdev->asic_specific;
1547        struct gaudi_internal_qman_info *q;
1548        u32 i;
1549
1550        for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1551                q = &gaudi->internal_qmans[i];
1552                if (!q->pq_kernel_addr)
1553                        continue;
1554                hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1555                                                        q->pq_kernel_addr,
1556                                                        q->pq_dma_addr);
1557        }
1558}
1559
1560static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1561{
1562        struct gaudi_device *gaudi = hdev->asic_specific;
1563        struct gaudi_internal_qman_info *q;
1564        int rc, i;
1565
1566        for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1567                if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1568                        continue;
1569
1570                q = &gaudi->internal_qmans[i];
1571
1572                switch (i) {
1573                case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1574                        q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1575                        break;
1576                case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1577                        q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1578                        break;
1579                case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1580                        q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1581                        break;
1582                case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1583                        q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1584                        break;
1585                default:
1586                        dev_err(hdev->dev, "Bad internal queue index %d", i);
1587                        rc = -EINVAL;
1588                        goto free_internal_qmans_pq_mem;
1589                }
1590
1591                q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1592                                                hdev, q->pq_size,
1593                                                &q->pq_dma_addr,
1594                                                GFP_KERNEL | __GFP_ZERO);
1595                if (!q->pq_kernel_addr) {
1596                        rc = -ENOMEM;
1597                        goto free_internal_qmans_pq_mem;
1598                }
1599        }
1600
1601        return 0;
1602
1603free_internal_qmans_pq_mem:
1604        gaudi_free_internal_qmans_pq_mem(hdev);
1605        return rc;
1606}
1607
1608static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1609{
1610        struct asic_fixed_properties *prop = &hdev->asic_prop;
1611        struct pci_mem_region *region;
1612
1613        /* CFG */
1614        region = &hdev->pci_mem_region[PCI_REGION_CFG];
1615        region->region_base = CFG_BASE;
1616        region->region_size = CFG_SIZE;
1617        region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1618        region->bar_size = CFG_BAR_SIZE;
1619        region->bar_id = CFG_BAR_ID;
1620        region->used = 1;
1621
1622        /* SRAM */
1623        region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1624        region->region_base = SRAM_BASE_ADDR;
1625        region->region_size = SRAM_SIZE;
1626        region->offset_in_bar = 0;
1627        region->bar_size = SRAM_BAR_SIZE;
1628        region->bar_id = SRAM_BAR_ID;
1629        region->used = 1;
1630
1631        /* DRAM */
1632        region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1633        region->region_base = DRAM_PHYS_BASE;
1634        region->region_size = hdev->asic_prop.dram_size;
1635        region->offset_in_bar = 0;
1636        region->bar_size = prop->dram_pci_bar_size;
1637        region->bar_id = HBM_BAR_ID;
1638        region->used = 1;
1639
1640        /* SP SRAM */
1641        region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1642        region->region_base = PSOC_SCRATCHPAD_ADDR;
1643        region->region_size = PSOC_SCRATCHPAD_SIZE;
1644        region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1645        region->bar_size = CFG_BAR_SIZE;
1646        region->bar_id = CFG_BAR_ID;
1647        region->used = 1;
1648}
1649
1650static int gaudi_sw_init(struct hl_device *hdev)
1651{
1652        struct gaudi_device *gaudi;
1653        u32 i, event_id = 0;
1654        int rc;
1655
1656        /* Allocate device structure */
1657        gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1658        if (!gaudi)
1659                return -ENOMEM;
1660
1661        for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1662                if (gaudi_irq_map_table[i].valid) {
1663                        if (event_id == GAUDI_EVENT_SIZE) {
1664                                dev_err(hdev->dev,
1665                                        "Event array exceeds the limit of %u events\n",
1666                                        GAUDI_EVENT_SIZE);
1667                                rc = -EINVAL;
1668                                goto free_gaudi_device;
1669                        }
1670
1671                        gaudi->events[event_id++] =
1672                                        gaudi_irq_map_table[i].fc_id;
1673                }
1674        }
1675
1676        gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1677
1678        gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1679
1680        hdev->asic_specific = gaudi;
1681
1682        /* Create DMA pool for small allocations */
1683        hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1684                        &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1685        if (!hdev->dma_pool) {
1686                dev_err(hdev->dev, "failed to create DMA pool\n");
1687                rc = -ENOMEM;
1688                goto free_gaudi_device;
1689        }
1690
1691        rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1692        if (rc)
1693                goto free_dma_pool;
1694
1695        hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1696        if (!hdev->cpu_accessible_dma_pool) {
1697                dev_err(hdev->dev,
1698                        "Failed to create CPU accessible DMA pool\n");
1699                rc = -ENOMEM;
1700                goto free_cpu_dma_mem;
1701        }
1702
1703        rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1704                                (uintptr_t) hdev->cpu_accessible_dma_mem,
1705                                HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1706        if (rc) {
1707                dev_err(hdev->dev,
1708                        "Failed to add memory to CPU accessible DMA pool\n");
1709                rc = -EFAULT;
1710                goto free_cpu_accessible_dma_pool;
1711        }
1712
1713        rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1714        if (rc)
1715                goto free_cpu_accessible_dma_pool;
1716
1717        spin_lock_init(&gaudi->hw_queues_lock);
1718        mutex_init(&gaudi->clk_gate_mutex);
1719
1720        hdev->supports_sync_stream = true;
1721        hdev->supports_coresight = true;
1722        hdev->supports_staged_submission = true;
1723
1724        gaudi_set_pci_memory_regions(hdev);
1725
1726        return 0;
1727
1728free_cpu_accessible_dma_pool:
1729        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1730free_cpu_dma_mem:
1731        if (!hdev->asic_prop.fw_security_enabled)
1732                GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1733                                        hdev->cpu_pci_msb_addr);
1734        hdev->asic_funcs->asic_dma_free_coherent(hdev,
1735                        HL_CPU_ACCESSIBLE_MEM_SIZE,
1736                        hdev->cpu_accessible_dma_mem,
1737                        hdev->cpu_accessible_dma_address);
1738free_dma_pool:
1739        dma_pool_destroy(hdev->dma_pool);
1740free_gaudi_device:
1741        kfree(gaudi);
1742        return rc;
1743}
1744
1745static int gaudi_sw_fini(struct hl_device *hdev)
1746{
1747        struct gaudi_device *gaudi = hdev->asic_specific;
1748
1749        gaudi_free_internal_qmans_pq_mem(hdev);
1750
1751        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1752
1753        if (!hdev->asic_prop.fw_security_enabled)
1754                GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1755                                        hdev->cpu_pci_msb_addr);
1756
1757        hdev->asic_funcs->asic_dma_free_coherent(hdev,
1758                        HL_CPU_ACCESSIBLE_MEM_SIZE,
1759                        hdev->cpu_accessible_dma_mem,
1760                        hdev->cpu_accessible_dma_address);
1761
1762        dma_pool_destroy(hdev->dma_pool);
1763
1764        mutex_destroy(&gaudi->clk_gate_mutex);
1765
1766        kfree(gaudi);
1767
1768        return 0;
1769}
1770
1771static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1772{
1773        struct hl_device *hdev = arg;
1774        int i;
1775
1776        if (hdev->disabled)
1777                return IRQ_HANDLED;
1778
1779        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1780                hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1781
1782        hl_irq_handler_eq(irq, &hdev->event_queue);
1783
1784        return IRQ_HANDLED;
1785}
1786
1787/*
1788 * For backward compatibility, new MSI interrupts should be set after the
1789 * existing CPU and NIC interrupts.
1790 */
1791static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1792                                bool cpu_eq)
1793{
1794        int msi_vec;
1795
1796        if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1797                dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1798                                GAUDI_EVENT_QUEUE_MSI_IDX);
1799
1800        msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1801                        (nr + NIC_NUMBER_OF_ENGINES + 1);
1802
1803        return pci_irq_vector(hdev->pdev, msi_vec);
1804}
1805
1806static int gaudi_enable_msi_single(struct hl_device *hdev)
1807{
1808        int rc, irq;
1809
1810        dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1811
1812        irq = gaudi_pci_irq_vector(hdev, 0, false);
1813        rc = request_irq(irq, gaudi_irq_handler_single, 0,
1814                        "gaudi single msi", hdev);
1815        if (rc)
1816                dev_err(hdev->dev,
1817                        "Failed to request single MSI IRQ\n");
1818
1819        return rc;
1820}
1821
1822static int gaudi_enable_msi_multi(struct hl_device *hdev)
1823{
1824        int cq_cnt = hdev->asic_prop.completion_queues_count;
1825        int rc, i, irq_cnt_init, irq;
1826
1827        for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1828                irq = gaudi_pci_irq_vector(hdev, i, false);
1829                rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1830                                &hdev->completion_queue[i]);
1831                if (rc) {
1832                        dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1833                        goto free_irqs;
1834                }
1835        }
1836
1837        irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1838        rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1839                                &hdev->event_queue);
1840        if (rc) {
1841                dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1842                goto free_irqs;
1843        }
1844
1845        return 0;
1846
1847free_irqs:
1848        for (i = 0 ; i < irq_cnt_init ; i++)
1849                free_irq(gaudi_pci_irq_vector(hdev, i, false),
1850                                &hdev->completion_queue[i]);
1851        return rc;
1852}
1853
1854static int gaudi_enable_msi(struct hl_device *hdev)
1855{
1856        struct gaudi_device *gaudi = hdev->asic_specific;
1857        int rc;
1858
1859        if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1860                return 0;
1861
1862        rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1863        if (rc < 0) {
1864                dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1865                return rc;
1866        }
1867
1868        if (rc < NUMBER_OF_INTERRUPTS) {
1869                gaudi->multi_msi_mode = false;
1870                rc = gaudi_enable_msi_single(hdev);
1871        } else {
1872                gaudi->multi_msi_mode = true;
1873                rc = gaudi_enable_msi_multi(hdev);
1874        }
1875
1876        if (rc)
1877                goto free_pci_irq_vectors;
1878
1879        gaudi->hw_cap_initialized |= HW_CAP_MSI;
1880
1881        return 0;
1882
1883free_pci_irq_vectors:
1884        pci_free_irq_vectors(hdev->pdev);
1885        return rc;
1886}
1887
1888static void gaudi_sync_irqs(struct hl_device *hdev)
1889{
1890        struct gaudi_device *gaudi = hdev->asic_specific;
1891        int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1892
1893        if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1894                return;
1895
1896        /* Wait for all pending IRQs to be finished */
1897        if (gaudi->multi_msi_mode) {
1898                for (i = 0 ; i < cq_cnt ; i++)
1899                        synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1900
1901                synchronize_irq(gaudi_pci_irq_vector(hdev,
1902                                                GAUDI_EVENT_QUEUE_MSI_IDX,
1903                                                true));
1904        } else {
1905                synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1906        }
1907}
1908
1909static void gaudi_disable_msi(struct hl_device *hdev)
1910{
1911        struct gaudi_device *gaudi = hdev->asic_specific;
1912        int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1913
1914        if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1915                return;
1916
1917        gaudi_sync_irqs(hdev);
1918
1919        if (gaudi->multi_msi_mode) {
1920                irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1921                                                true);
1922                free_irq(irq, &hdev->event_queue);
1923
1924                for (i = 0 ; i < cq_cnt ; i++) {
1925                        irq = gaudi_pci_irq_vector(hdev, i, false);
1926                        free_irq(irq, &hdev->completion_queue[i]);
1927                }
1928        } else {
1929                free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1930        }
1931
1932        pci_free_irq_vectors(hdev->pdev);
1933
1934        gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1935}
1936
1937static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1938{
1939        struct gaudi_device *gaudi = hdev->asic_specific;
1940
1941        if (hdev->asic_prop.fw_security_enabled)
1942                return;
1943
1944        if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
1945                                                CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
1946                return;
1947
1948        if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1949                return;
1950
1951        if (!hdev->sram_scrambler_enable)
1952                return;
1953
1954        WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1955                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1956        WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1957                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958        WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1959                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960        WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1961                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962        WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1963                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1964        WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1965                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1966        WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1967                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1968        WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1969                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1970
1971        WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1972                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1973        WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1974                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1975        WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1976                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1977        WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1978                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1979        WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1980                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1981        WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1982                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1983        WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1984                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1985        WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1986                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1987
1988        WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1989                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1990        WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1991                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1992        WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1993                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1994        WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1995                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1996        WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1997                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1998        WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1999                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2000        WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2001                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2002        WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2003                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2004
2005        gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2006}
2007
2008static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2009{
2010        struct gaudi_device *gaudi = hdev->asic_specific;
2011
2012        if (hdev->asic_prop.fw_security_enabled)
2013                return;
2014
2015        if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2016                                        CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2017                return;
2018
2019        if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2020                return;
2021
2022        if (!hdev->dram_scrambler_enable)
2023                return;
2024
2025        WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2026                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2027        WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2028                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2029        WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2030                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2031        WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2032                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2033        WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2034                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2035        WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2036                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2037        WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2038                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2039        WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2040                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2041
2042        WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2043                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2044        WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2045                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2046        WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2047                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2048        WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2049                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2050        WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2051                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2052        WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2053                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2054        WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2055                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2056        WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2057                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2058
2059        WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2060                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2061        WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2062                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2063        WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2064                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2065        WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2066                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2067        WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2068                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2069        WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2070                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2071        WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2072                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2073        WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2074                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2075
2076        gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2077}
2078
2079static void gaudi_init_e2e(struct hl_device *hdev)
2080{
2081        if (hdev->asic_prop.fw_security_enabled)
2082                return;
2083
2084        if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2085                                        CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2086                return;
2087
2088        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2089        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2090        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2091        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2092
2093        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2094        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2095        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2096        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2097
2098        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2099        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2100        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2101        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2102
2103        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2104        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2105        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2106        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2107
2108        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2109        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2110        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2111        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2112
2113        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2114        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2115        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2116        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2117
2118        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2119        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2120        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2121        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2122
2123        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2124        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2125        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2126        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2127
2128        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2129        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2130        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2131        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2132
2133        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2134        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2135        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2136        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2137
2138        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2139        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2140        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2141        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2142
2143        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2144        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2145        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2146        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2147
2148        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2149        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2150        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2151        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2152
2153        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2154        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2155        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2156        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2157
2158        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2159        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2160        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2161        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2162
2163        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2164        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2165        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2166        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2167
2168        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2169        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2170        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2171        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2172
2173        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2174        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2175        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2176        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2177
2178        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2179        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2180        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2181        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2182
2183        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2184        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2185        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2186        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2187
2188        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2189        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2190        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2191        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2192
2193        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2194        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2195        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2196        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2197
2198        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2199        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2200        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2201        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2202
2203        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2204        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2205        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2206        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2207
2208        if (!hdev->dram_scrambler_enable) {
2209                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2210                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2211                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2212                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2213
2214                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2215                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2216                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2217                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2218
2219                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2220                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2221                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2222                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2223
2224                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2225                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2226                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2227                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2228
2229                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2230                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2231                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2232                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2233
2234                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2235                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2236                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2237                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2238
2239                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2240                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2241                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2242                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2243
2244                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2245                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2246                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2247                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2248
2249                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2250                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2251                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2252                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2253
2254                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2255                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2256                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2257                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2258
2259                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2260                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2261                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2262                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2263
2264                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2265                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2266                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2267                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2268
2269                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2270                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2271                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2272                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2273
2274                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2275                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2276                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2277                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2278
2279                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2280                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2281                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2282                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2283
2284                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2285                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2286                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2287                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2288
2289                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2290                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2291                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2292                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2293
2294                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2295                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2296                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2297                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2298
2299                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2300                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2301                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2302                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2303
2304                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2305                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2306                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2307                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2308
2309                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2310                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2311                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2312                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2313
2314                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2315                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2316                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2317                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2318
2319                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2320                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2321                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2322                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2323
2324                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2325                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2326                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2327                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2328        }
2329
2330        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2331                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2332        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2333                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2334
2335        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2336                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2338                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339
2340        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2341                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2343                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344
2345        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2346                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2348                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349
2350        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2351                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2353                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354
2355        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2356                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2358                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359
2360        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2361                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2363                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364
2365        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2366                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2368                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369
2370        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2371                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2373                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374
2375        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2376                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2378                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379
2380        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2381                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2383                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384
2385        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2386                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2388                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389
2390        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2391                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2393                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2396                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2398                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2401                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2403                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2406                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2408                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2411                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2412        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2413                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2414
2415        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2416                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2418                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419
2420        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2421                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2423                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424
2425        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2426                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2428                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429
2430        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2431                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2433                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434
2435        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2436                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2438                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439
2440        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2441                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2443                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444
2445        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2446                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2448                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449}
2450
2451static void gaudi_init_hbm_cred(struct hl_device *hdev)
2452{
2453        uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2454
2455        if (hdev->asic_prop.fw_security_enabled)
2456                return;
2457
2458        if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2459                                                CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2460                return;
2461
2462        hbm0_wr = 0x33333333;
2463        hbm0_rd = 0x77777777;
2464        hbm1_wr = 0x55555555;
2465        hbm1_rd = 0xDDDDDDDD;
2466
2467        WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2468        WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2469        WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2470        WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2471
2472        WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2473        WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2474        WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2475        WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2476
2477        WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2478        WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2479        WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2480        WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2481
2482        WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2483        WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2484        WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2485        WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2486
2487        WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2488                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2489                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2490        WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2491                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493        WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2494                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496        WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2497                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499
2500        WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2501                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2503        WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2504                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506        WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2507                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509        WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2510                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512}
2513
2514static void gaudi_init_golden_registers(struct hl_device *hdev)
2515{
2516        u32 tpc_offset;
2517        int tpc_id, i;
2518
2519        gaudi_init_e2e(hdev);
2520        gaudi_init_hbm_cred(hdev);
2521
2522        for (tpc_id = 0, tpc_offset = 0;
2523                                tpc_id < TPC_NUMBER_OF_ENGINES;
2524                                tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2525                /* Mask all arithmetic interrupts from TPC */
2526                WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2527                /* Set 16 cache lines */
2528                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2529                                ICACHE_FETCH_LINE_NUM, 2);
2530        }
2531
2532        /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2533        for (i = 0 ; i < 128 ; i += 8)
2534                writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2535
2536        WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2537        WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2538        WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539        WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540}
2541
2542static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2543                                        int qman_id, dma_addr_t qman_pq_addr)
2544{
2545        struct cpu_dyn_regs *dyn_regs =
2546                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2547        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2548        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2549        u32 q_off, dma_qm_offset;
2550        u32 dma_qm_err_cfg, irq_handler_offset;
2551
2552        dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2553
2554        mtr_base_en_lo = lower_32_bits(CFG_BASE +
2555                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2556        mtr_base_en_hi = upper_32_bits(CFG_BASE +
2557                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558        so_base_en_lo = lower_32_bits(CFG_BASE +
2559                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2560        so_base_en_hi = upper_32_bits(CFG_BASE +
2561                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2563                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2564        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2565                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566        so_base_ws_lo = lower_32_bits(CFG_BASE +
2567                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2568        so_base_ws_hi = upper_32_bits(CFG_BASE +
2569                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570
2571        q_off = dma_qm_offset + qman_id * 4;
2572
2573        WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2574        WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2575
2576        WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2577        WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2578        WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2579
2580        WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2581        WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2582                                                        QMAN_LDMA_SRC_OFFSET);
2583        WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2584                                                        QMAN_LDMA_DST_OFFSET);
2585
2586        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2587        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2588        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2589        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2590        WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2591        WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2592        WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2593        WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2594
2595        WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2596
2597        /* The following configuration is needed only once per QMAN */
2598        if (qman_id == 0) {
2599                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2600                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2601                                le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2602
2603                /* Configure RAZWI IRQ */
2604                dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2605                if (hdev->stop_on_err)
2606                        dma_qm_err_cfg |=
2607                                PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2608
2609                WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2610
2611                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2612                        lower_32_bits(CFG_BASE + irq_handler_offset));
2613                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2614                        upper_32_bits(CFG_BASE + irq_handler_offset));
2615
2616                WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2617                        gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2618                                                                        dma_id);
2619
2620                WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2621                                QM_ARB_ERR_MSG_EN_MASK);
2622
2623                /* Increase ARB WDT to support streams architecture */
2624                WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2625                                GAUDI_ARB_WDT_TIMEOUT);
2626
2627                WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2628                                QMAN_EXTERNAL_MAKE_TRUSTED);
2629
2630                WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2631        }
2632}
2633
2634static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2635{
2636        struct cpu_dyn_regs *dyn_regs =
2637                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2638        u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2639        u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2640        u32 irq_handler_offset;
2641
2642        /* Set to maximum possible according to physical size */
2643        WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2644        WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2645
2646        /* WA for H/W bug H3-2116 */
2647        WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2648
2649        /* STOP_ON bit implies no completion to operation in case of RAZWI */
2650        if (hdev->stop_on_err)
2651                dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2652
2653        WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2654
2655        irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2656                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2657                        le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2658
2659        WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2660                lower_32_bits(CFG_BASE + irq_handler_offset));
2661        WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2662                upper_32_bits(CFG_BASE + irq_handler_offset));
2663
2664        WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2665                gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2666        WREG32(mmDMA0_CORE_PROT + dma_offset,
2667                        1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2668        /* If the channel is secured, it should be in MMU bypass mode */
2669        WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2670                        1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2671        WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2672}
2673
2674static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2675                                u32 enable_mask)
2676{
2677        u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2678
2679        WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2680}
2681
2682static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2683{
2684        struct gaudi_device *gaudi = hdev->asic_specific;
2685        struct hl_hw_queue *q;
2686        int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2687
2688        if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2689                return;
2690
2691        for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2692                dma_id = gaudi_dma_assignment[i];
2693                /*
2694                 * For queues after the CPU Q need to add 1 to get the correct
2695                 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2696                 * order to get the correct MSI register.
2697                 */
2698                if (dma_id > 1) {
2699                        cpu_skip = 1;
2700                        nic_skip = NIC_NUMBER_OF_ENGINES;
2701                } else {
2702                        cpu_skip = 0;
2703                        nic_skip = 0;
2704                }
2705
2706                for (j = 0 ; j < QMAN_STREAMS ; j++) {
2707                        q_idx = 4 * dma_id + j + cpu_skip;
2708                        q = &hdev->kernel_queues[q_idx];
2709                        q->cq_id = cq_id++;
2710                        q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2711                        gaudi_init_pci_dma_qman(hdev, dma_id, j,
2712                                                q->bus_address);
2713                }
2714
2715                gaudi_init_dma_core(hdev, dma_id);
2716
2717                gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2718        }
2719
2720        gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2721}
2722
2723static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2724                                        int qman_id, u64 qman_base_addr)
2725{
2726        struct cpu_dyn_regs *dyn_regs =
2727                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2728        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2729        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2730        u32 dma_qm_err_cfg, irq_handler_offset;
2731        u32 q_off, dma_qm_offset;
2732
2733        dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2734
2735        mtr_base_en_lo = lower_32_bits(CFG_BASE +
2736                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2737        mtr_base_en_hi = upper_32_bits(CFG_BASE +
2738                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739        so_base_en_lo = lower_32_bits(CFG_BASE +
2740                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2741        so_base_en_hi = upper_32_bits(CFG_BASE +
2742                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2744                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2745        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2746                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747        so_base_ws_lo = lower_32_bits(CFG_BASE +
2748                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2749        so_base_ws_hi = upper_32_bits(CFG_BASE +
2750                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
2752        q_off = dma_qm_offset + qman_id * 4;
2753
2754        if (qman_id < 4) {
2755                WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2756                                        lower_32_bits(qman_base_addr));
2757                WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2758                                        upper_32_bits(qman_base_addr));
2759
2760                WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2761                WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2762                WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2763
2764                WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2765                                                        QMAN_CPDMA_SIZE_OFFSET);
2766                WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2767                                                        QMAN_CPDMA_SRC_OFFSET);
2768                WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2769                                                        QMAN_CPDMA_DST_OFFSET);
2770        } else {
2771                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2772                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2773                                le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2774
2775                WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2776                                                        QMAN_LDMA_SIZE_OFFSET);
2777                WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2778                                                        QMAN_LDMA_SRC_OFFSET);
2779                WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2780                                                        QMAN_LDMA_DST_OFFSET);
2781
2782                /* Configure RAZWI IRQ */
2783                dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2784                if (hdev->stop_on_err)
2785                        dma_qm_err_cfg |=
2786                                HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2787
2788                WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2789
2790                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2791                        lower_32_bits(CFG_BASE + irq_handler_offset));
2792                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2793                        upper_32_bits(CFG_BASE + irq_handler_offset));
2794
2795                WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2796                        gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2797                                                                        dma_id);
2798
2799                WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2800                                QM_ARB_ERR_MSG_EN_MASK);
2801
2802                /* Increase ARB WDT to support streams architecture */
2803                WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2804                                GAUDI_ARB_WDT_TIMEOUT);
2805
2806                WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807                WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808                                QMAN_INTERNAL_MAKE_TRUSTED);
2809        }
2810
2811        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816        /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817        if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818                WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819                                mtr_base_ws_lo);
2820                WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821                                mtr_base_ws_hi);
2822                WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823                                so_base_ws_lo);
2824                WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825                                so_base_ws_hi);
2826        }
2827}
2828
2829static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830{
2831        struct gaudi_device *gaudi = hdev->asic_specific;
2832        struct gaudi_internal_qman_info *q;
2833        u64 qman_base_addr;
2834        int i, j, dma_id, internal_q_index;
2835
2836        if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837                return;
2838
2839        for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842                for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843                         /*
2844                          * Add the CPU queue in order to get the correct queue
2845                          * number as all internal queue are placed after it
2846                          */
2847                        internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849                        q = &gaudi->internal_qmans[internal_q_index];
2850                        qman_base_addr = (u64) q->pq_dma_addr;
2851                        gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852                                                qman_base_addr);
2853                }
2854
2855                /* Initializing lower CP for HBM DMA QMAN */
2856                gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858                gaudi_init_dma_core(hdev, dma_id);
2859
2860                gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861        }
2862
2863        gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864}
2865
2866static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867                                        int qman_id, u64 qman_base_addr)
2868{
2869        struct cpu_dyn_regs *dyn_regs =
2870                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871        u32 mtr_base_lo, mtr_base_hi;
2872        u32 so_base_lo, so_base_hi;
2873        u32 irq_handler_offset;
2874        u32 q_off, mme_id;
2875        u32 mme_qm_err_cfg;
2876
2877        mtr_base_lo = lower_32_bits(CFG_BASE +
2878                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879        mtr_base_hi = upper_32_bits(CFG_BASE +
2880                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881        so_base_lo = lower_32_bits(CFG_BASE +
2882                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883        so_base_hi = upper_32_bits(CFG_BASE +
2884                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886        q_off = mme_offset + qman_id * 4;
2887
2888        if (qman_id < 4) {
2889                WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890                                        lower_32_bits(qman_base_addr));
2891                WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892                                        upper_32_bits(qman_base_addr));
2893
2894                WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895                WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896                WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898                WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899                                                        QMAN_CPDMA_SIZE_OFFSET);
2900                WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901                                                        QMAN_CPDMA_SRC_OFFSET);
2902                WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903                                                        QMAN_CPDMA_DST_OFFSET);
2904        } else {
2905                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907                                le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909                WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910                                                        QMAN_LDMA_SIZE_OFFSET);
2911                WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912                                                        QMAN_LDMA_SRC_OFFSET);
2913                WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914                                                        QMAN_LDMA_DST_OFFSET);
2915
2916                /* Configure RAZWI IRQ */
2917                mme_id = mme_offset /
2918                                (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920                mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921                if (hdev->stop_on_err)
2922                        mme_qm_err_cfg |=
2923                                MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925                WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927                WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928                        lower_32_bits(CFG_BASE + irq_handler_offset));
2929                WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930                        upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932                WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933                        gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934                                                                        mme_id);
2935
2936                WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937                                QM_ARB_ERR_MSG_EN_MASK);
2938
2939                /* Increase ARB WDT to support streams architecture */
2940                WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2941                                GAUDI_ARB_WDT_TIMEOUT);
2942
2943                WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944                WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945                                QMAN_INTERNAL_MAKE_TRUSTED);
2946        }
2947
2948        WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949        WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950        WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951        WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952}
2953
2954static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955{
2956        struct gaudi_device *gaudi = hdev->asic_specific;
2957        struct gaudi_internal_qman_info *q;
2958        u64 qman_base_addr;
2959        u32 mme_offset;
2960        int i, internal_q_index;
2961
2962        if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963                return;
2964
2965        /*
2966         * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967         * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968         */
2969
2970        mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972        for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973                internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974                q = &gaudi->internal_qmans[internal_q_index];
2975                qman_base_addr = (u64) q->pq_dma_addr;
2976                gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977                                        qman_base_addr);
2978                if (i == 3)
2979                        mme_offset = 0;
2980        }
2981
2982        /* Initializing lower CP for MME QMANs */
2983        mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984        gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985        gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987        WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988        WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990        gaudi->hw_cap_initialized |= HW_CAP_MME;
2991}
2992
2993static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994                                int qman_id, u64 qman_base_addr)
2995{
2996        struct cpu_dyn_regs *dyn_regs =
2997                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000        u32 tpc_qm_err_cfg, irq_handler_offset;
3001        u32 q_off, tpc_id;
3002
3003        mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005        mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007        so_base_en_lo = lower_32_bits(CFG_BASE +
3008                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009        so_base_en_hi = upper_32_bits(CFG_BASE +
3010                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015        so_base_ws_lo = lower_32_bits(CFG_BASE +
3016                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017        so_base_ws_hi = upper_32_bits(CFG_BASE +
3018                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020        q_off = tpc_offset + qman_id * 4;
3021
3022        tpc_id = tpc_offset /
3023                        (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025        if (qman_id < 4) {
3026                WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027                                        lower_32_bits(qman_base_addr));
3028                WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029                                        upper_32_bits(qman_base_addr));
3030
3031                WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032                WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033                WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035                WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036                                                        QMAN_CPDMA_SIZE_OFFSET);
3037                WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038                                                        QMAN_CPDMA_SRC_OFFSET);
3039                WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040                                                        QMAN_CPDMA_DST_OFFSET);
3041        } else {
3042                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044                                le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046                WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047                                                        QMAN_LDMA_SIZE_OFFSET);
3048                WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049                                                        QMAN_LDMA_SRC_OFFSET);
3050                WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051                                                        QMAN_LDMA_DST_OFFSET);
3052
3053                /* Configure RAZWI IRQ */
3054                tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055                if (hdev->stop_on_err)
3056                        tpc_qm_err_cfg |=
3057                                TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059                WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061                WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062                        lower_32_bits(CFG_BASE + irq_handler_offset));
3063                WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064                        upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066                WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067                        gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068                                                                        tpc_id);
3069
3070                WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071                                QM_ARB_ERR_MSG_EN_MASK);
3072
3073                /* Increase ARB WDT to support streams architecture */
3074                WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3075                                GAUDI_ARB_WDT_TIMEOUT);
3076
3077                WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3078                WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3079                                QMAN_INTERNAL_MAKE_TRUSTED);
3080        }
3081
3082        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3083        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3084        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3085        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3086
3087        /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3088        if (tpc_id == 6) {
3089                WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3090                                mtr_base_ws_lo);
3091                WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3092                                mtr_base_ws_hi);
3093                WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3094                                so_base_ws_lo);
3095                WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3096                                so_base_ws_hi);
3097        }
3098}
3099
3100static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3101{
3102        struct gaudi_device *gaudi = hdev->asic_specific;
3103        struct gaudi_internal_qman_info *q;
3104        u64 qman_base_addr;
3105        u32 so_base_hi, tpc_offset = 0;
3106        u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3107                        mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3108        int i, tpc_id, internal_q_index;
3109
3110        if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3111                return;
3112
3113        so_base_hi = upper_32_bits(CFG_BASE +
3114                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3115
3116        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3117                for (i = 0 ; i < QMAN_STREAMS ; i++) {
3118                        internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3119                                                tpc_id * QMAN_STREAMS + i;
3120                        q = &gaudi->internal_qmans[internal_q_index];
3121                        qman_base_addr = (u64) q->pq_dma_addr;
3122                        gaudi_init_tpc_qman(hdev, tpc_offset, i,
3123                                                qman_base_addr);
3124
3125                        if (i == 3) {
3126                                /* Initializing lower CP for TPC QMAN */
3127                                gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3128
3129                                /* Enable the QMAN and TPC channel */
3130                                WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3131                                                QMAN_TPC_ENABLE);
3132                        }
3133                }
3134
3135                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3136                                so_base_hi);
3137
3138                tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3139
3140                gaudi->hw_cap_initialized |=
3141                                FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3142        }
3143}
3144
3145static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3146                                int qman_id, u64 qman_base_addr, int nic_id)
3147{
3148        struct cpu_dyn_regs *dyn_regs =
3149                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3150        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3151        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3152        u32 nic_qm_err_cfg, irq_handler_offset;
3153        u32 q_off;
3154
3155        mtr_base_en_lo = lower_32_bits(CFG_BASE +
3156                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157        mtr_base_en_hi = upper_32_bits(CFG_BASE +
3158                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159        so_base_en_lo = lower_32_bits(CFG_BASE +
3160                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161        so_base_en_hi = upper_32_bits(CFG_BASE +
3162                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3164                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3166                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167        so_base_ws_lo = lower_32_bits(CFG_BASE +
3168                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169        so_base_ws_hi = upper_32_bits(CFG_BASE +
3170                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171
3172        q_off = nic_offset + qman_id * 4;
3173
3174        WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3175        WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3176
3177        WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3178        WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3179        WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3180
3181        WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182                                                        QMAN_LDMA_SIZE_OFFSET);
3183        WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184                                                        QMAN_LDMA_SRC_OFFSET);
3185        WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186                                                        QMAN_LDMA_DST_OFFSET);
3187
3188        WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3189        WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3190        WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3191        WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3192
3193        /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3194        WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3195        WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3196        WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3197        WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3198
3199        if (qman_id == 0) {
3200                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3201                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3202                                le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3203
3204                /* Configure RAZWI IRQ */
3205                nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3206                if (hdev->stop_on_err)
3207                        nic_qm_err_cfg |=
3208                                NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3209
3210                WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3211
3212                WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3213                        lower_32_bits(CFG_BASE + irq_handler_offset));
3214                WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3215                        upper_32_bits(CFG_BASE + irq_handler_offset));
3216
3217                WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3218                        gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3219                                                                        nic_id);
3220
3221                WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3222                                QM_ARB_ERR_MSG_EN_MASK);
3223
3224                /* Increase ARB WDT to support streams architecture */
3225                WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3226                                GAUDI_ARB_WDT_TIMEOUT);
3227
3228                WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229                WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230                                QMAN_INTERNAL_MAKE_TRUSTED);
3231        }
3232}
3233
3234static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235{
3236        struct gaudi_device *gaudi = hdev->asic_specific;
3237        struct gaudi_internal_qman_info *q;
3238        u64 qman_base_addr;
3239        u32 nic_offset = 0;
3240        u32 nic_delta_between_qmans =
3241                        mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242        u32 nic_delta_between_nics =
3243                        mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244        int i, nic_id, internal_q_index;
3245
3246        if (!hdev->nic_ports_mask)
3247                return;
3248
3249        if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250                return;
3251
3252        dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253
3254        for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255                if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256                        nic_offset += nic_delta_between_qmans;
3257                        if (nic_id & 1) {
3258                                nic_offset -= (nic_delta_between_qmans * 2);
3259                                nic_offset += nic_delta_between_nics;
3260                        }
3261                        continue;
3262                }
3263
3264                for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265                        internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266                                                nic_id * QMAN_STREAMS + i;
3267                        q = &gaudi->internal_qmans[internal_q_index];
3268                        qman_base_addr = (u64) q->pq_dma_addr;
3269                        gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270                                                qman_base_addr, nic_id);
3271                }
3272
3273                /* Enable the QMAN */
3274                WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275
3276                nic_offset += nic_delta_between_qmans;
3277                if (nic_id & 1) {
3278                        nic_offset -= (nic_delta_between_qmans * 2);
3279                        nic_offset += nic_delta_between_nics;
3280                }
3281
3282                gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283        }
3284}
3285
3286static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287{
3288        struct gaudi_device *gaudi = hdev->asic_specific;
3289
3290        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291                return;
3292
3293        WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294        WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295        WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296}
3297
3298static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299{
3300        struct gaudi_device *gaudi = hdev->asic_specific;
3301
3302        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303                return;
3304
3305        WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306        WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307        WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308        WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309        WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310}
3311
3312static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313{
3314        struct gaudi_device *gaudi = hdev->asic_specific;
3315
3316        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317                return;
3318
3319        WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320        WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321}
3322
3323static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324{
3325        struct gaudi_device *gaudi = hdev->asic_specific;
3326        u32 tpc_offset = 0;
3327        int tpc_id;
3328
3329        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330                return;
3331
3332        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333                WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334                tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335        }
3336}
3337
3338static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339{
3340        struct gaudi_device *gaudi = hdev->asic_specific;
3341        u32 nic_mask, nic_offset = 0;
3342        u32 nic_delta_between_qmans =
3343                        mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344        u32 nic_delta_between_nics =
3345                        mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346        int nic_id;
3347
3348        for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349                nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350
3351                if (gaudi->hw_cap_initialized & nic_mask)
3352                        WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353
3354                nic_offset += nic_delta_between_qmans;
3355                if (nic_id & 1) {
3356                        nic_offset -= (nic_delta_between_qmans * 2);
3357                        nic_offset += nic_delta_between_nics;
3358                }
3359        }
3360}
3361
3362static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363{
3364        struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367                return;
3368
3369        /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3370        WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371        WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372        WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373}
3374
3375static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376{
3377        struct gaudi_device *gaudi = hdev->asic_specific;
3378
3379        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380                return;
3381
3382        /* Stop CPs of HBM DMA QMANs */
3383
3384        WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385        WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386        WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387        WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388        WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389}
3390
3391static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392{
3393        struct gaudi_device *gaudi = hdev->asic_specific;
3394
3395        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396                return;
3397
3398        /* Stop CPs of MME QMANs */
3399        WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400        WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401}
3402
3403static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404{
3405        struct gaudi_device *gaudi = hdev->asic_specific;
3406
3407        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408                return;
3409
3410        WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411        WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412        WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413        WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414        WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415        WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416        WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417        WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418}
3419
3420static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421{
3422        struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424        /* Stop upper CPs of QMANs */
3425
3426        if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427                WREG32(mmNIC0_QM0_GLBL_CFG1,
3428                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431
3432        if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433                WREG32(mmNIC0_QM1_GLBL_CFG1,
3434                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437
3438        if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439                WREG32(mmNIC1_QM0_GLBL_CFG1,
3440                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443
3444        if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445                WREG32(mmNIC1_QM1_GLBL_CFG1,
3446                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449
3450        if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451                WREG32(mmNIC2_QM0_GLBL_CFG1,
3452                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455
3456        if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457                WREG32(mmNIC2_QM1_GLBL_CFG1,
3458                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461
3462        if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463                WREG32(mmNIC3_QM0_GLBL_CFG1,
3464                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467
3468        if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469                WREG32(mmNIC3_QM1_GLBL_CFG1,
3470                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473
3474        if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475                WREG32(mmNIC4_QM0_GLBL_CFG1,
3476                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479
3480        if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481                WREG32(mmNIC4_QM1_GLBL_CFG1,
3482                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485}
3486
3487static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488{
3489        struct gaudi_device *gaudi = hdev->asic_specific;
3490
3491        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492                return;
3493
3494        WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495        WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496        WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497}
3498
3499static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500{
3501        struct gaudi_device *gaudi = hdev->asic_specific;
3502
3503        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504                return;
3505
3506        WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507        WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508        WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509        WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510        WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511}
3512
3513static void gaudi_mme_stall(struct hl_device *hdev)
3514{
3515        struct gaudi_device *gaudi = hdev->asic_specific;
3516
3517        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518                return;
3519
3520        /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3521        WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522        WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523        WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524        WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525        WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526        WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527        WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528        WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529        WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530        WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531        WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532        WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533        WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534        WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535        WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536        WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537}
3538
3539static void gaudi_tpc_stall(struct hl_device *hdev)
3540{
3541        struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544                return;
3545
3546        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549        WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550        WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551        WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552        WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553        WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554}
3555
3556static void gaudi_set_clock_gating(struct hl_device *hdev)
3557{
3558        struct gaudi_device *gaudi = hdev->asic_specific;
3559        u32 qman_offset;
3560        bool enable;
3561        int i;
3562
3563        /* In case we are during debug session, don't enable the clock gate
3564         * as it may interfere
3565         */
3566        if (hdev->in_debug)
3567                return;
3568
3569        if (hdev->asic_prop.fw_security_enabled)
3570                return;
3571
3572        for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3573                enable = !!(hdev->clock_gating_mask &
3574                                (BIT_ULL(gaudi_dma_assignment[i])));
3575
3576                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3577                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3578                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3579                WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3580                                enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3581        }
3582
3583        for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3584                enable = !!(hdev->clock_gating_mask &
3585                                (BIT_ULL(gaudi_dma_assignment[i])));
3586
3587                /* GC sends work to DMA engine through Upper CP in DMA5 so
3588                 * we need to not enable clock gating in that DMA
3589                 */
3590                if (i == GAUDI_HBM_DMA_4)
3591                        enable = 0;
3592
3593                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3594                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3595                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3596                WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3597                                enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3598        }
3599
3600        enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3601        WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3602        WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3603
3604        enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3605        WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3606        WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3607
3608        for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3609                enable = !!(hdev->clock_gating_mask &
3610                                (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3611
3612                WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3613                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3614                WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3615                                enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3616
3617                qman_offset += TPC_QMAN_OFFSET;
3618        }
3619
3620        gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3621}
3622
3623static void gaudi_disable_clock_gating(struct hl_device *hdev)
3624{
3625        struct gaudi_device *gaudi = hdev->asic_specific;
3626        u32 qman_offset;
3627        int i;
3628
3629        if (hdev->asic_prop.fw_security_enabled)
3630                return;
3631
3632        for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3633                WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3634                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3635
3636                qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3637        }
3638
3639        WREG32(mmMME0_QM_CGM_CFG, 0);
3640        WREG32(mmMME0_QM_CGM_CFG1, 0);
3641        WREG32(mmMME2_QM_CGM_CFG, 0);
3642        WREG32(mmMME2_QM_CGM_CFG1, 0);
3643
3644        for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3645                WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3646                WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3647
3648                qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3649        }
3650
3651        gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3652}
3653
3654static void gaudi_enable_timestamp(struct hl_device *hdev)
3655{
3656        /* Disable the timestamp counter */
3657        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3658
3659        /* Zero the lower/upper parts of the 64-bit counter */
3660        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3661        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3662
3663        /* Enable the counter */
3664        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3665}
3666
3667static void gaudi_disable_timestamp(struct hl_device *hdev)
3668{
3669        /* Disable the timestamp counter */
3670        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3671}
3672
3673static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3674{
3675        u32 wait_timeout_ms;
3676
3677        dev_info(hdev->dev,
3678                "Halting compute engines and disabling interrupts\n");
3679
3680        if (hdev->pldm)
3681                wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3682        else
3683                wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3684
3685        gaudi_stop_nic_qmans(hdev);
3686        gaudi_stop_mme_qmans(hdev);
3687        gaudi_stop_tpc_qmans(hdev);
3688        gaudi_stop_hbm_dma_qmans(hdev);
3689        gaudi_stop_pci_dma_qmans(hdev);
3690
3691        hdev->asic_funcs->disable_clock_gating(hdev);
3692
3693        msleep(wait_timeout_ms);
3694
3695        gaudi_pci_dma_stall(hdev);
3696        gaudi_hbm_dma_stall(hdev);
3697        gaudi_tpc_stall(hdev);
3698        gaudi_mme_stall(hdev);
3699
3700        msleep(wait_timeout_ms);
3701
3702        gaudi_disable_nic_qmans(hdev);
3703        gaudi_disable_mme_qmans(hdev);
3704        gaudi_disable_tpc_qmans(hdev);
3705        gaudi_disable_hbm_dma_qmans(hdev);
3706        gaudi_disable_pci_dma_qmans(hdev);
3707
3708        gaudi_disable_timestamp(hdev);
3709
3710        gaudi_disable_msi(hdev);
3711}
3712
3713static int gaudi_mmu_init(struct hl_device *hdev)
3714{
3715        struct asic_fixed_properties *prop = &hdev->asic_prop;
3716        struct gaudi_device *gaudi = hdev->asic_specific;
3717        u64 hop0_addr;
3718        int rc, i;
3719
3720        if (!hdev->mmu_enable)
3721                return 0;
3722
3723        if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3724                return 0;
3725
3726        for (i = 0 ; i < prop->max_asid ; i++) {
3727                hop0_addr = prop->mmu_pgt_addr +
3728                                (i * prop->mmu_hop_table_size);
3729
3730                rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3731                if (rc) {
3732                        dev_err(hdev->dev,
3733                                "failed to set hop0 addr for asid %d\n", i);
3734                        goto err;
3735                }
3736        }
3737
3738        /* init MMU cache manage page */
3739        WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3740        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3741
3742        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3743
3744        WREG32(mmMMU_UP_MMU_ENABLE, 1);
3745        WREG32(mmMMU_UP_SPI_MASK, 0xF);
3746
3747        WREG32(mmSTLB_HOP_CONFIGURATION,
3748                        hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3749
3750        /*
3751         * The H/W expects the first PI after init to be 1. After wraparound
3752         * we'll write 0.
3753         */
3754        gaudi->mmu_cache_inv_pi = 1;
3755
3756        gaudi->hw_cap_initialized |= HW_CAP_MMU;
3757
3758        return 0;
3759
3760err:
3761        return rc;
3762}
3763
3764static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3765{
3766        void __iomem *dst;
3767
3768        dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3769
3770        return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3771}
3772
3773static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3774{
3775        void __iomem *dst;
3776
3777        dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3778
3779        return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3780}
3781
3782static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3783{
3784        struct dynamic_fw_load_mgr *dynamic_loader;
3785        struct cpu_dyn_regs *dyn_regs;
3786
3787        dynamic_loader = &hdev->fw_loader.dynamic_loader;
3788
3789        /*
3790         * here we update initial values for few specific dynamic regs (as
3791         * before reading the first descriptor from FW those value has to be
3792         * hard-coded) in later stages of the protocol those values will be
3793         * updated automatically by reading the FW descriptor so data there
3794         * will always be up-to-date
3795         */
3796        dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3797        dyn_regs->kmd_msg_to_cpu =
3798                                cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3799        dyn_regs->cpu_cmd_status_to_host =
3800                                cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3801
3802        dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3803}
3804
3805static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3806{
3807        struct static_fw_load_mgr *static_loader;
3808
3809        static_loader = &hdev->fw_loader.static_loader;
3810
3811        static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3812        static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3813        static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3814        static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3815        static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3816        static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3817        static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3818        static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3819        static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3820        static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3821        static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3822        static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3823        static_loader->cpu_reset_wait_msec = hdev->pldm ?
3824                        GAUDI_PLDM_RESET_WAIT_MSEC :
3825                        GAUDI_CPU_RESET_WAIT_MSEC;
3826}
3827
3828static void gaudi_init_firmware_loader(struct hl_device *hdev)
3829{
3830        struct asic_fixed_properties *prop = &hdev->asic_prop;
3831        struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3832
3833        /* fill common fields */
3834        fw_loader->linux_loaded = false;
3835        fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3836        fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3837        fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3838        fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3839        fw_loader->skip_bmc = !hdev->bmc_enable;
3840        fw_loader->sram_bar_id = SRAM_BAR_ID;
3841        fw_loader->dram_bar_id = HBM_BAR_ID;
3842
3843        if (prop->dynamic_fw_load)
3844                gaudi_init_dynamic_firmware_loader(hdev);
3845        else
3846                gaudi_init_static_firmware_loader(hdev);
3847}
3848
3849static int gaudi_init_cpu(struct hl_device *hdev)
3850{
3851        struct gaudi_device *gaudi = hdev->asic_specific;
3852        int rc;
3853
3854        if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3855                return 0;
3856
3857        if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3858                return 0;
3859
3860        /*
3861         * The device CPU works with 40 bits addresses.
3862         * This register sets the extension to 50 bits.
3863         */
3864        if (!hdev->asic_prop.fw_security_enabled)
3865                WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3866
3867        rc = hl_fw_init_cpu(hdev);
3868
3869        if (rc)
3870                return rc;
3871
3872        gaudi->hw_cap_initialized |= HW_CAP_CPU;
3873
3874        return 0;
3875}
3876
3877static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3878{
3879        struct cpu_dyn_regs *dyn_regs =
3880                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3881        struct asic_fixed_properties *prop = &hdev->asic_prop;
3882        struct gaudi_device *gaudi = hdev->asic_specific;
3883        u32 status, irq_handler_offset;
3884        struct hl_eq *eq;
3885        struct hl_hw_queue *cpu_pq =
3886                        &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3887        int err;
3888
3889        if (!hdev->cpu_queues_enable)
3890                return 0;
3891
3892        if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3893                return 0;
3894
3895        eq = &hdev->event_queue;
3896
3897        WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3898        WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3899
3900        WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3901        WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3902
3903        WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3904                        lower_32_bits(hdev->cpu_accessible_dma_address));
3905        WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3906                        upper_32_bits(hdev->cpu_accessible_dma_address));
3907
3908        WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3909        WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3910        WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3911
3912        /* Used for EQ CI */
3913        WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3914
3915        WREG32(mmCPU_IF_PF_PQ_PI, 0);
3916
3917        if (gaudi->multi_msi_mode)
3918                WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3919        else
3920                WREG32(mmCPU_IF_QUEUE_INIT,
3921                        PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3922
3923        irq_handler_offset = prop->gic_interrupts_enable ?
3924                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3925                        le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3926
3927        WREG32(irq_handler_offset,
3928                gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3929
3930        err = hl_poll_timeout(
3931                hdev,
3932                mmCPU_IF_QUEUE_INIT,
3933                status,
3934                (status == PQ_INIT_STATUS_READY_FOR_HOST),
3935                1000,
3936                cpu_timeout);
3937
3938        if (err) {
3939                dev_err(hdev->dev,
3940                        "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3941                return -EIO;
3942        }
3943
3944        /* update FW application security bits */
3945        if (prop->fw_cpu_boot_dev_sts0_valid)
3946                prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3947        if (prop->fw_cpu_boot_dev_sts1_valid)
3948                prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3949
3950        gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3951        return 0;
3952}
3953
3954static void gaudi_pre_hw_init(struct hl_device *hdev)
3955{
3956        /* Perform read from the device to make sure device is up */
3957        RREG32(mmHW_STATE);
3958
3959        if (!hdev->asic_prop.fw_security_enabled) {
3960                /* Set the access through PCI bars (Linux driver only) as
3961                 * secured
3962                 */
3963                WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3964                                (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3965                                PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3966
3967                /* Perform read to flush the waiting writes to ensure
3968                 * configuration was set in the device
3969                 */
3970                RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3971        }
3972
3973        /*
3974         * Let's mark in the H/W that we have reached this point. We check
3975         * this value in the reset_before_init function to understand whether
3976         * we need to reset the chip before doing H/W init. This register is
3977         * cleared by the H/W upon H/W reset
3978         */
3979        WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3980}
3981
3982static int gaudi_hw_init(struct hl_device *hdev)
3983{
3984        struct gaudi_device *gaudi = hdev->asic_specific;
3985        int rc;
3986
3987        gaudi_pre_hw_init(hdev);
3988
3989        /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3990         * So we set it here and if anyone tries to move it later to
3991         * a different address, there will be an error
3992         */
3993        if (hdev->asic_prop.iatu_done_by_fw)
3994                gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3995
3996        /*
3997         * Before pushing u-boot/linux to device, need to set the hbm bar to
3998         * base address of dram
3999         */
4000        if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4001                dev_err(hdev->dev,
4002                        "failed to map HBM bar to DRAM base address\n");
4003                return -EIO;
4004        }
4005
4006        rc = gaudi_init_cpu(hdev);
4007        if (rc) {
4008                dev_err(hdev->dev, "failed to initialize CPU\n");
4009                return rc;
4010        }
4011
4012        /* In case the clock gating was enabled in preboot we need to disable
4013         * it here before touching the MME/TPC registers.
4014         * There is no need to take clk gating mutex because when this function
4015         * runs, no other relevant code can run
4016         */
4017        hdev->asic_funcs->disable_clock_gating(hdev);
4018
4019        /* SRAM scrambler must be initialized after CPU is running from HBM */
4020        gaudi_init_scrambler_sram(hdev);
4021
4022        /* This is here just in case we are working without CPU */
4023        gaudi_init_scrambler_hbm(hdev);
4024
4025        gaudi_init_golden_registers(hdev);
4026
4027        rc = gaudi_mmu_init(hdev);
4028        if (rc)
4029                return rc;
4030
4031        gaudi_init_security(hdev);
4032
4033        gaudi_init_pci_dma_qmans(hdev);
4034
4035        gaudi_init_hbm_dma_qmans(hdev);
4036
4037        gaudi_init_mme_qmans(hdev);
4038
4039        gaudi_init_tpc_qmans(hdev);
4040
4041        gaudi_init_nic_qmans(hdev);
4042
4043        hdev->asic_funcs->set_clock_gating(hdev);
4044
4045        gaudi_enable_timestamp(hdev);
4046
4047        /* MSI must be enabled before CPU queues and NIC are initialized */
4048        rc = gaudi_enable_msi(hdev);
4049        if (rc)
4050                goto disable_queues;
4051
4052        /* must be called after MSI was enabled */
4053        rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4054        if (rc) {
4055                dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4056                        rc);
4057                goto disable_msi;
4058        }
4059
4060        /* Perform read from the device to flush all configuration */
4061        RREG32(mmHW_STATE);
4062
4063        return 0;
4064
4065disable_msi:
4066        gaudi_disable_msi(hdev);
4067disable_queues:
4068        gaudi_disable_mme_qmans(hdev);
4069        gaudi_disable_pci_dma_qmans(hdev);
4070
4071        return rc;
4072}
4073
4074static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4075{
4076        struct cpu_dyn_regs *dyn_regs =
4077                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4078        u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4079        struct gaudi_device *gaudi = hdev->asic_specific;
4080        bool driver_performs_reset;
4081
4082        if (!hard_reset) {
4083                dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4084                return;
4085        }
4086
4087        if (hdev->pldm) {
4088                reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4089                cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4090        } else {
4091                reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4092                cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4093        }
4094
4095        driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4096                                        !hdev->asic_prop.hard_reset_done_by_fw);
4097
4098        /* Set device to handle FLR by H/W as we will put the device CPU to
4099         * halt mode
4100         */
4101        if (driver_performs_reset)
4102                WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4103                                        PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4104
4105        /* If linux is loaded in the device CPU we need to communicate with it
4106         * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4107         * registers in case of old F/Ws
4108         */
4109        if (hdev->fw_loader.linux_loaded) {
4110                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4111                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4112                                le32_to_cpu(dyn_regs->gic_host_halt_irq);
4113
4114                WREG32(irq_handler_offset,
4115                        gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4116        } else {
4117                if (hdev->asic_prop.hard_reset_done_by_fw)
4118                        hl_fw_ask_hard_reset_without_linux(hdev);
4119                else
4120                        hl_fw_ask_halt_machine_without_linux(hdev);
4121        }
4122
4123        if (driver_performs_reset) {
4124
4125                /* Configure the reset registers. Must be done as early as
4126                 * possible in case we fail during H/W initialization
4127                 */
4128                WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4129                                                (CFG_RST_H_DMA_MASK |
4130                                                CFG_RST_H_MME_MASK |
4131                                                CFG_RST_H_SM_MASK |
4132                                                CFG_RST_H_TPC_7_MASK));
4133
4134                WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4135
4136                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4137                                                (CFG_RST_H_HBM_MASK |
4138                                                CFG_RST_H_TPC_7_MASK |
4139                                                CFG_RST_H_NIC_MASK |
4140                                                CFG_RST_H_SM_MASK |
4141                                                CFG_RST_H_DMA_MASK |
4142                                                CFG_RST_H_MME_MASK |
4143                                                CFG_RST_H_CPU_MASK |
4144                                                CFG_RST_H_MMU_MASK));
4145
4146                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4147                                                (CFG_RST_L_IF_MASK |
4148                                                CFG_RST_L_PSOC_MASK |
4149                                                CFG_RST_L_TPC_MASK));
4150
4151                msleep(cpu_timeout_ms);
4152
4153                /* Tell ASIC not to re-initialize PCIe */
4154                WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4155
4156                /* Restart BTL/BLR upon hard-reset */
4157                WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4158
4159                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4160                        1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4161
4162                dev_info(hdev->dev,
4163                        "Issued HARD reset command, going to wait %dms\n",
4164                        reset_timeout_ms);
4165        } else {
4166                dev_info(hdev->dev,
4167                        "Firmware performs HARD reset, going to wait %dms\n",
4168                        reset_timeout_ms);
4169        }
4170
4171        /*
4172         * After hard reset, we can't poll the BTM_FSM register because the PSOC
4173         * itself is in reset. Need to wait until the reset is deasserted
4174         */
4175        msleep(reset_timeout_ms);
4176
4177        status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4178        if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4179                dev_err(hdev->dev,
4180                        "Timeout while waiting for device to reset 0x%x\n",
4181                        status);
4182
4183        if (gaudi) {
4184                gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4185                                HW_CAP_HBM | HW_CAP_PCI_DMA |
4186                                HW_CAP_MME | HW_CAP_TPC_MASK |
4187                                HW_CAP_HBM_DMA | HW_CAP_PLL |
4188                                HW_CAP_NIC_MASK | HW_CAP_MMU |
4189                                HW_CAP_SRAM_SCRAMBLER |
4190                                HW_CAP_HBM_SCRAMBLER |
4191                                HW_CAP_CLK_GATE);
4192
4193                memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4194
4195                hdev->device_cpu_is_halted = false;
4196        }
4197}
4198
4199static int gaudi_suspend(struct hl_device *hdev)
4200{
4201        int rc;
4202
4203        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4204        if (rc)
4205                dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4206
4207        return rc;
4208}
4209
4210static int gaudi_resume(struct hl_device *hdev)
4211{
4212        return gaudi_init_iatu(hdev);
4213}
4214
4215static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4216                        void *cpu_addr, dma_addr_t dma_addr, size_t size)
4217{
4218        int rc;
4219
4220        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4221                        VM_DONTCOPY | VM_NORESERVE;
4222
4223        rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4224                                (dma_addr - HOST_PHYS_BASE), size);
4225        if (rc)
4226                dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4227
4228        return rc;
4229}
4230
4231static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4232{
4233        struct cpu_dyn_regs *dyn_regs =
4234                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4235        u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4236        struct gaudi_device *gaudi = hdev->asic_specific;
4237        bool invalid_queue = false;
4238        int dma_id;
4239
4240        switch (hw_queue_id) {
4241        case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4242                dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4243                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4244                q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4245                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4246                break;
4247
4248        case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4249                dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4250                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4251                q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4252                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4253                break;
4254
4255        case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4256                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4257                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4258                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4259                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4260                break;
4261
4262        case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4263                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4264                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4265                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4266                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4267                break;
4268
4269        case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4270                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4271                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4273                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274                break;
4275
4276        case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4277                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4278                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4280                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281                break;
4282
4283        case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4284                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4285                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288                break;
4289
4290        case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4291                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4292                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295                break;
4296
4297        case GAUDI_QUEUE_ID_CPU_PQ:
4298                if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4299                        db_reg_offset = mmCPU_IF_PF_PQ_PI;
4300                else
4301                        invalid_queue = true;
4302                break;
4303
4304        case GAUDI_QUEUE_ID_MME_0_0:
4305                db_reg_offset = mmMME2_QM_PQ_PI_0;
4306                break;
4307
4308        case GAUDI_QUEUE_ID_MME_0_1:
4309                db_reg_offset = mmMME2_QM_PQ_PI_1;
4310                break;
4311
4312        case GAUDI_QUEUE_ID_MME_0_2:
4313                db_reg_offset = mmMME2_QM_PQ_PI_2;
4314                break;
4315
4316        case GAUDI_QUEUE_ID_MME_0_3:
4317                db_reg_offset = mmMME2_QM_PQ_PI_3;
4318                break;
4319
4320        case GAUDI_QUEUE_ID_MME_1_0:
4321                db_reg_offset = mmMME0_QM_PQ_PI_0;
4322                break;
4323
4324        case GAUDI_QUEUE_ID_MME_1_1:
4325                db_reg_offset = mmMME0_QM_PQ_PI_1;
4326                break;
4327
4328        case GAUDI_QUEUE_ID_MME_1_2:
4329                db_reg_offset = mmMME0_QM_PQ_PI_2;
4330                break;
4331
4332        case GAUDI_QUEUE_ID_MME_1_3:
4333                db_reg_offset = mmMME0_QM_PQ_PI_3;
4334                break;
4335
4336        case GAUDI_QUEUE_ID_TPC_0_0:
4337                db_reg_offset = mmTPC0_QM_PQ_PI_0;
4338                break;
4339
4340        case GAUDI_QUEUE_ID_TPC_0_1:
4341                db_reg_offset = mmTPC0_QM_PQ_PI_1;
4342                break;
4343
4344        case GAUDI_QUEUE_ID_TPC_0_2:
4345                db_reg_offset = mmTPC0_QM_PQ_PI_2;
4346                break;
4347
4348        case GAUDI_QUEUE_ID_TPC_0_3:
4349                db_reg_offset = mmTPC0_QM_PQ_PI_3;
4350                break;
4351
4352        case GAUDI_QUEUE_ID_TPC_1_0:
4353                db_reg_offset = mmTPC1_QM_PQ_PI_0;
4354                break;
4355
4356        case GAUDI_QUEUE_ID_TPC_1_1:
4357                db_reg_offset = mmTPC1_QM_PQ_PI_1;
4358                break;
4359
4360        case GAUDI_QUEUE_ID_TPC_1_2:
4361                db_reg_offset = mmTPC1_QM_PQ_PI_2;
4362                break;
4363
4364        case GAUDI_QUEUE_ID_TPC_1_3:
4365                db_reg_offset = mmTPC1_QM_PQ_PI_3;
4366                break;
4367
4368        case GAUDI_QUEUE_ID_TPC_2_0:
4369                db_reg_offset = mmTPC2_QM_PQ_PI_0;
4370                break;
4371
4372        case GAUDI_QUEUE_ID_TPC_2_1:
4373                db_reg_offset = mmTPC2_QM_PQ_PI_1;
4374                break;
4375
4376        case GAUDI_QUEUE_ID_TPC_2_2:
4377                db_reg_offset = mmTPC2_QM_PQ_PI_2;
4378                break;
4379
4380        case GAUDI_QUEUE_ID_TPC_2_3:
4381                db_reg_offset = mmTPC2_QM_PQ_PI_3;
4382                break;
4383
4384        case GAUDI_QUEUE_ID_TPC_3_0:
4385                db_reg_offset = mmTPC3_QM_PQ_PI_0;
4386                break;
4387
4388        case GAUDI_QUEUE_ID_TPC_3_1:
4389                db_reg_offset = mmTPC3_QM_PQ_PI_1;
4390                break;
4391
4392        case GAUDI_QUEUE_ID_TPC_3_2:
4393                db_reg_offset = mmTPC3_QM_PQ_PI_2;
4394                break;
4395
4396        case GAUDI_QUEUE_ID_TPC_3_3:
4397                db_reg_offset = mmTPC3_QM_PQ_PI_3;
4398                break;
4399
4400        case GAUDI_QUEUE_ID_TPC_4_0:
4401                db_reg_offset = mmTPC4_QM_PQ_PI_0;
4402                break;
4403
4404        case GAUDI_QUEUE_ID_TPC_4_1:
4405                db_reg_offset = mmTPC4_QM_PQ_PI_1;
4406                break;
4407
4408        case GAUDI_QUEUE_ID_TPC_4_2:
4409                db_reg_offset = mmTPC4_QM_PQ_PI_2;
4410                break;
4411
4412        case GAUDI_QUEUE_ID_TPC_4_3:
4413                db_reg_offset = mmTPC4_QM_PQ_PI_3;
4414                break;
4415
4416        case GAUDI_QUEUE_ID_TPC_5_0:
4417                db_reg_offset = mmTPC5_QM_PQ_PI_0;
4418                break;
4419
4420        case GAUDI_QUEUE_ID_TPC_5_1:
4421                db_reg_offset = mmTPC5_QM_PQ_PI_1;
4422                break;
4423
4424        case GAUDI_QUEUE_ID_TPC_5_2:
4425                db_reg_offset = mmTPC5_QM_PQ_PI_2;
4426                break;
4427
4428        case GAUDI_QUEUE_ID_TPC_5_3:
4429                db_reg_offset = mmTPC5_QM_PQ_PI_3;
4430                break;
4431
4432        case GAUDI_QUEUE_ID_TPC_6_0:
4433                db_reg_offset = mmTPC6_QM_PQ_PI_0;
4434                break;
4435
4436        case GAUDI_QUEUE_ID_TPC_6_1:
4437                db_reg_offset = mmTPC6_QM_PQ_PI_1;
4438                break;
4439
4440        case GAUDI_QUEUE_ID_TPC_6_2:
4441                db_reg_offset = mmTPC6_QM_PQ_PI_2;
4442                break;
4443
4444        case GAUDI_QUEUE_ID_TPC_6_3:
4445                db_reg_offset = mmTPC6_QM_PQ_PI_3;
4446                break;
4447
4448        case GAUDI_QUEUE_ID_TPC_7_0:
4449                db_reg_offset = mmTPC7_QM_PQ_PI_0;
4450                break;
4451
4452        case GAUDI_QUEUE_ID_TPC_7_1:
4453                db_reg_offset = mmTPC7_QM_PQ_PI_1;
4454                break;
4455
4456        case GAUDI_QUEUE_ID_TPC_7_2:
4457                db_reg_offset = mmTPC7_QM_PQ_PI_2;
4458                break;
4459
4460        case GAUDI_QUEUE_ID_TPC_7_3:
4461                db_reg_offset = mmTPC7_QM_PQ_PI_3;
4462                break;
4463
4464        case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4465                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4466                        invalid_queue = true;
4467
4468                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4469                db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4470                break;
4471
4472        case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4473                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4474                        invalid_queue = true;
4475
4476                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4477                db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4478                break;
4479
4480        case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4481                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4482                        invalid_queue = true;
4483
4484                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4485                db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4486                break;
4487
4488        case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4489                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4490                        invalid_queue = true;
4491
4492                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4493                db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4494                break;
4495
4496        case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4497                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4498                        invalid_queue = true;
4499
4500                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4501                db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4502                break;
4503
4504        case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4505                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4506                        invalid_queue = true;
4507
4508                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4509                db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4510                break;
4511
4512        case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4513                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4514                        invalid_queue = true;
4515
4516                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4517                db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4518                break;
4519
4520        case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4521                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4522                        invalid_queue = true;
4523
4524                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4525                db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4526                break;
4527
4528        case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4529                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4530                        invalid_queue = true;
4531
4532                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4533                db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4534                break;
4535
4536        case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4537                if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4538                        invalid_queue = true;
4539
4540                q_off = ((hw_queue_id - 1) & 0x3) * 4;
4541                db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4542                break;
4543
4544        default:
4545                invalid_queue = true;
4546        }
4547
4548        if (invalid_queue) {
4549                /* Should never get here */
4550                dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4551                        hw_queue_id);
4552                return;
4553        }
4554
4555        db_value = pi;
4556
4557        /* ring the doorbell */
4558        WREG32(db_reg_offset, db_value);
4559
4560        if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4561                /* make sure device CPU will read latest data from host */
4562                mb();
4563
4564                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4565                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4566                                le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4567
4568                WREG32(irq_handler_offset,
4569                        gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4570        }
4571}
4572
4573static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4574                                struct hl_bd *bd)
4575{
4576        __le64 *pbd = (__le64 *) bd;
4577
4578        /* The QMANs are on the host memory so a simple copy suffice */
4579        pqe[0] = pbd[0];
4580        pqe[1] = pbd[1];
4581}
4582
4583static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4584                                        dma_addr_t *dma_handle, gfp_t flags)
4585{
4586        void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4587                                                dma_handle, flags);
4588
4589        /* Shift to the device's base physical address of host memory */
4590        if (kernel_addr)
4591                *dma_handle += HOST_PHYS_BASE;
4592
4593        return kernel_addr;
4594}
4595
4596static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4597                void *cpu_addr, dma_addr_t dma_handle)
4598{
4599        /* Cancel the device's base physical address of host memory */
4600        dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4601
4602        dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4603}
4604
4605static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4606{
4607        struct asic_fixed_properties *prop = &hdev->asic_prop;
4608        u64  cur_addr = DRAM_BASE_ADDR_USER;
4609        u32 val;
4610        u32 chunk_size;
4611        int rc, dma_id;
4612
4613        while (cur_addr < prop->dram_end_address) {
4614                for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4615                        u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4616
4617                        chunk_size =
4618                        min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4619
4620                        dev_dbg(hdev->dev,
4621                                "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4622                                cur_addr, cur_addr + chunk_size);
4623
4624                        WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4625                        WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4626                        WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4627                                                lower_32_bits(cur_addr));
4628                        WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4629                                                upper_32_bits(cur_addr));
4630                        WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4631                                        chunk_size);
4632                        WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4633                                        ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4634                                        (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4635
4636                        cur_addr += chunk_size;
4637
4638                        if (cur_addr == prop->dram_end_address)
4639                                break;
4640                }
4641
4642                for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4643                        u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4644
4645                        rc = hl_poll_timeout(
4646                                hdev,
4647                                mmDMA0_CORE_STS0 + dma_offset,
4648                                val,
4649                                ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4650                                1000,
4651                                HBM_SCRUBBING_TIMEOUT_US);
4652
4653                        if (rc) {
4654                                dev_err(hdev->dev,
4655                                        "DMA Timeout during HBM scrubbing of DMA #%d\n",
4656                                        dma_id);
4657                                return -EIO;
4658                        }
4659                }
4660        }
4661
4662        return 0;
4663}
4664
4665static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4666{
4667        struct asic_fixed_properties *prop = &hdev->asic_prop;
4668        struct gaudi_device *gaudi = hdev->asic_specific;
4669        int rc = 0;
4670        u64 val = 0;
4671
4672        if (!hdev->memory_scrub)
4673                return 0;
4674
4675        if (!addr && !size) {
4676                /* Wait till device is idle */
4677                rc = hl_poll_timeout(
4678                                hdev,
4679                                mmDMA0_CORE_STS0/* dummy */,
4680                                val/* dummy */,
4681                                (hdev->asic_funcs->is_device_idle(hdev, NULL,
4682                                                0, NULL)),
4683                                                1000,
4684                                                HBM_SCRUBBING_TIMEOUT_US);
4685                if (rc) {
4686                        dev_err(hdev->dev, "waiting for idle timeout\n");
4687                        return -EIO;
4688                }
4689
4690                /* Scrub SRAM */
4691                addr = prop->sram_user_base_address;
4692                size = hdev->pldm ? 0x10000 :
4693                                (prop->sram_size - SRAM_USER_BASE_OFFSET);
4694                val = 0x7777777777777777ull;
4695
4696                rc = gaudi_memset_device_memory(hdev, addr, size, val);
4697                if (rc) {
4698                        dev_err(hdev->dev,
4699                                "Failed to clear SRAM in mem scrub all\n");
4700                        return rc;
4701                }
4702
4703                mutex_lock(&gaudi->clk_gate_mutex);
4704                hdev->asic_funcs->disable_clock_gating(hdev);
4705
4706                /* Scrub HBM using all DMA channels in parallel */
4707                rc = gaudi_hbm_scrubbing(hdev);
4708                if (rc)
4709                        dev_err(hdev->dev,
4710                                "Failed to clear HBM in mem scrub all\n");
4711
4712                hdev->asic_funcs->set_clock_gating(hdev);
4713                mutex_unlock(&gaudi->clk_gate_mutex);
4714        }
4715
4716        return rc;
4717}
4718
4719static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4720                                u32 queue_id, dma_addr_t *dma_handle,
4721                                u16 *queue_len)
4722{
4723        struct gaudi_device *gaudi = hdev->asic_specific;
4724        struct gaudi_internal_qman_info *q;
4725
4726        if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4727                        gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4728                dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4729                return NULL;
4730        }
4731
4732        q = &gaudi->internal_qmans[queue_id];
4733        *dma_handle = q->pq_dma_addr;
4734        *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4735
4736        return q->pq_kernel_addr;
4737}
4738
4739static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4740                                u16 len, u32 timeout, u64 *result)
4741{
4742        struct gaudi_device *gaudi = hdev->asic_specific;
4743
4744        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4745                if (result)
4746                        *result = 0;
4747                return 0;
4748        }
4749
4750        if (!timeout)
4751                timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4752
4753        return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4754                                                timeout, result);
4755}
4756
4757static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4758{
4759        struct packet_msg_prot *fence_pkt;
4760        dma_addr_t pkt_dma_addr;
4761        u32 fence_val, tmp, timeout_usec;
4762        dma_addr_t fence_dma_addr;
4763        u32 *fence_ptr;
4764        int rc;
4765
4766        if (hdev->pldm)
4767                timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4768        else
4769                timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4770
4771        fence_val = GAUDI_QMAN0_FENCE_VAL;
4772
4773        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4774                                                        &fence_dma_addr);
4775        if (!fence_ptr) {
4776                dev_err(hdev->dev,
4777                        "Failed to allocate memory for H/W queue %d testing\n",
4778                        hw_queue_id);
4779                return -ENOMEM;
4780        }
4781
4782        *fence_ptr = 0;
4783
4784        fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4785                                        sizeof(struct packet_msg_prot),
4786                                        GFP_KERNEL, &pkt_dma_addr);
4787        if (!fence_pkt) {
4788                dev_err(hdev->dev,
4789                        "Failed to allocate packet for H/W queue %d testing\n",
4790                        hw_queue_id);
4791                rc = -ENOMEM;
4792                goto free_fence_ptr;
4793        }
4794
4795        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4796        tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4797        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4798
4799        fence_pkt->ctl = cpu_to_le32(tmp);
4800        fence_pkt->value = cpu_to_le32(fence_val);
4801        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4802
4803        rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4804                                        sizeof(struct packet_msg_prot),
4805                                        pkt_dma_addr);
4806        if (rc) {
4807                dev_err(hdev->dev,
4808                        "Failed to send fence packet to H/W queue %d\n",
4809                        hw_queue_id);
4810                goto free_pkt;
4811        }
4812
4813        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4814                                        1000, timeout_usec, true);
4815
4816        hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4817
4818        if (rc == -ETIMEDOUT) {
4819                dev_err(hdev->dev,
4820                        "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4821                        hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4822                rc = -EIO;
4823        }
4824
4825free_pkt:
4826        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4827                                        pkt_dma_addr);
4828free_fence_ptr:
4829        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4830                                        fence_dma_addr);
4831        return rc;
4832}
4833
4834static int gaudi_test_cpu_queue(struct hl_device *hdev)
4835{
4836        struct gaudi_device *gaudi = hdev->asic_specific;
4837
4838        /*
4839         * check capability here as send_cpu_message() won't update the result
4840         * value if no capability
4841         */
4842        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4843                return 0;
4844
4845        return hl_fw_test_cpu_queue(hdev);
4846}
4847
4848static int gaudi_test_queues(struct hl_device *hdev)
4849{
4850        int i, rc, ret_val = 0;
4851
4852        for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4853                if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4854                        rc = gaudi_test_queue(hdev, i);
4855                        if (rc)
4856                                ret_val = -EINVAL;
4857                }
4858        }
4859
4860        rc = gaudi_test_cpu_queue(hdev);
4861        if (rc)
4862                ret_val = -EINVAL;
4863
4864        return ret_val;
4865}
4866
4867static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4868                gfp_t mem_flags, dma_addr_t *dma_handle)
4869{
4870        void *kernel_addr;
4871
4872        if (size > GAUDI_DMA_POOL_BLK_SIZE)
4873                return NULL;
4874
4875        kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4876
4877        /* Shift to the device's base physical address of host memory */
4878        if (kernel_addr)
4879                *dma_handle += HOST_PHYS_BASE;
4880
4881        return kernel_addr;
4882}
4883
4884static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4885                        dma_addr_t dma_addr)
4886{
4887        /* Cancel the device's base physical address of host memory */
4888        dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4889
4890        dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4891}
4892
4893static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4894                                        size_t size, dma_addr_t *dma_handle)
4895{
4896        return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4897}
4898
4899static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4900                                                size_t size, void *vaddr)
4901{
4902        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4903}
4904
4905static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4906                        int nents, enum dma_data_direction dir)
4907{
4908        struct scatterlist *sg;
4909        int i;
4910
4911        if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4912                return -ENOMEM;
4913
4914        /* Shift to the device's base physical address of host memory */
4915        for_each_sg(sgl, sg, nents, i)
4916                sg->dma_address += HOST_PHYS_BASE;
4917
4918        return 0;
4919}
4920
4921static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4922                        int nents, enum dma_data_direction dir)
4923{
4924        struct scatterlist *sg;
4925        int i;
4926
4927        /* Cancel the device's base physical address of host memory */
4928        for_each_sg(sgl, sg, nents, i)
4929                sg->dma_address -= HOST_PHYS_BASE;
4930
4931        dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4932}
4933
4934static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4935                                        struct sg_table *sgt)
4936{
4937        struct scatterlist *sg, *sg_next_iter;
4938        u32 count, dma_desc_cnt;
4939        u64 len, len_next;
4940        dma_addr_t addr, addr_next;
4941
4942        dma_desc_cnt = 0;
4943
4944        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4945
4946                len = sg_dma_len(sg);
4947                addr = sg_dma_address(sg);
4948
4949                if (len == 0)
4950                        break;
4951
4952                while ((count + 1) < sgt->nents) {
4953                        sg_next_iter = sg_next(sg);
4954                        len_next = sg_dma_len(sg_next_iter);
4955                        addr_next = sg_dma_address(sg_next_iter);
4956
4957                        if (len_next == 0)
4958                                break;
4959
4960                        if ((addr + len == addr_next) &&
4961                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4962                                len += len_next;
4963                                count++;
4964                                sg = sg_next_iter;
4965                        } else {
4966                                break;
4967                        }
4968                }
4969
4970                dma_desc_cnt++;
4971        }
4972
4973        return dma_desc_cnt * sizeof(struct packet_lin_dma);
4974}
4975
4976static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4977                                struct hl_cs_parser *parser,
4978                                struct packet_lin_dma *user_dma_pkt,
4979                                u64 addr, enum dma_data_direction dir)
4980{
4981        struct hl_userptr *userptr;
4982        int rc;
4983
4984        if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4985                        parser->job_userptr_list, &userptr))
4986                goto already_pinned;
4987
4988        userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4989        if (!userptr)
4990                return -ENOMEM;
4991
4992        rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4993                                userptr);
4994        if (rc)
4995                goto free_userptr;
4996
4997        list_add_tail(&userptr->job_node, parser->job_userptr_list);
4998
4999        rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5000                                        userptr->sgt->nents, dir);
5001        if (rc) {
5002                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5003                goto unpin_memory;
5004        }
5005
5006        userptr->dma_mapped = true;
5007        userptr->dir = dir;
5008
5009already_pinned:
5010        parser->patched_cb_size +=
5011                        gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5012
5013        return 0;
5014
5015unpin_memory:
5016        list_del(&userptr->job_node);
5017        hl_unpin_host_memory(hdev, userptr);
5018free_userptr:
5019        kfree(userptr);
5020        return rc;
5021}
5022
5023static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5024                                struct hl_cs_parser *parser,
5025                                struct packet_lin_dma *user_dma_pkt,
5026                                bool src_in_host)
5027{
5028        enum dma_data_direction dir;
5029        bool skip_host_mem_pin = false, user_memset;
5030        u64 addr;
5031        int rc = 0;
5032
5033        user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5034                        GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5035                        GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5036
5037        if (src_in_host) {
5038                if (user_memset)
5039                        skip_host_mem_pin = true;
5040
5041                dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5042                dir = DMA_TO_DEVICE;
5043                addr = le64_to_cpu(user_dma_pkt->src_addr);
5044        } else {
5045                dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5046                dir = DMA_FROM_DEVICE;
5047                addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5048                                GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5049                                GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5050        }
5051
5052        if (skip_host_mem_pin)
5053                parser->patched_cb_size += sizeof(*user_dma_pkt);
5054        else
5055                rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5056                                                addr, dir);
5057
5058        return rc;
5059}
5060
5061static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5062                                struct hl_cs_parser *parser,
5063                                struct packet_lin_dma *user_dma_pkt)
5064{
5065        bool src_in_host = false;
5066        u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5067                        GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5068                        GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5069
5070        dev_dbg(hdev->dev, "DMA packet details:\n");
5071        dev_dbg(hdev->dev, "source == 0x%llx\n",
5072                                le64_to_cpu(user_dma_pkt->src_addr));
5073        dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5074        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5075
5076        /*
5077         * Special handling for DMA with size 0. Bypass all validations
5078         * because no transactions will be done except for WR_COMP, which
5079         * is not a security issue
5080         */
5081        if (!le32_to_cpu(user_dma_pkt->tsize)) {
5082                parser->patched_cb_size += sizeof(*user_dma_pkt);
5083                return 0;
5084        }
5085
5086        if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5087                src_in_host = true;
5088
5089        return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5090                                                src_in_host);
5091}
5092
5093static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5094                                        struct hl_cs_parser *parser,
5095                                        struct packet_load_and_exe *user_pkt)
5096{
5097        u32 cfg;
5098
5099        cfg = le32_to_cpu(user_pkt->cfg);
5100
5101        if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5102                dev_err(hdev->dev,
5103                        "User not allowed to use Load and Execute\n");
5104                return -EPERM;
5105        }
5106
5107        parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5108
5109        return 0;
5110}
5111
5112static int gaudi_validate_cb(struct hl_device *hdev,
5113                        struct hl_cs_parser *parser, bool is_mmu)
5114{
5115        u32 cb_parsed_length = 0;
5116        int rc = 0;
5117
5118        parser->patched_cb_size = 0;
5119
5120        /* cb_user_size is more than 0 so loop will always be executed */
5121        while (cb_parsed_length < parser->user_cb_size) {
5122                enum packet_id pkt_id;
5123                u16 pkt_size;
5124                struct gaudi_packet *user_pkt;
5125
5126                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5127
5128                pkt_id = (enum packet_id) (
5129                                (le64_to_cpu(user_pkt->header) &
5130                                PACKET_HEADER_PACKET_ID_MASK) >>
5131                                        PACKET_HEADER_PACKET_ID_SHIFT);
5132
5133                if (!validate_packet_id(pkt_id)) {
5134                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5135                        rc = -EINVAL;
5136                        break;
5137                }
5138
5139                pkt_size = gaudi_packet_sizes[pkt_id];
5140                cb_parsed_length += pkt_size;
5141                if (cb_parsed_length > parser->user_cb_size) {
5142                        dev_err(hdev->dev,
5143                                "packet 0x%x is out of CB boundary\n", pkt_id);
5144                        rc = -EINVAL;
5145                        break;
5146                }
5147
5148                switch (pkt_id) {
5149                case PACKET_MSG_PROT:
5150                        dev_err(hdev->dev,
5151                                "User not allowed to use MSG_PROT\n");
5152                        rc = -EPERM;
5153                        break;
5154
5155                case PACKET_CP_DMA:
5156                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5157                        rc = -EPERM;
5158                        break;
5159
5160                case PACKET_STOP:
5161                        dev_err(hdev->dev, "User not allowed to use STOP\n");
5162                        rc = -EPERM;
5163                        break;
5164
5165                case PACKET_WREG_BULK:
5166                        dev_err(hdev->dev,
5167                                "User not allowed to use WREG_BULK\n");
5168                        rc = -EPERM;
5169                        break;
5170
5171                case PACKET_LOAD_AND_EXE:
5172                        rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5173                                (struct packet_load_and_exe *) user_pkt);
5174                        break;
5175
5176                case PACKET_LIN_DMA:
5177                        parser->contains_dma_pkt = true;
5178                        if (is_mmu)
5179                                parser->patched_cb_size += pkt_size;
5180                        else
5181                                rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5182                                        (struct packet_lin_dma *) user_pkt);
5183                        break;
5184
5185                case PACKET_WREG_32:
5186                case PACKET_MSG_LONG:
5187                case PACKET_MSG_SHORT:
5188                case PACKET_REPEAT:
5189                case PACKET_FENCE:
5190                case PACKET_NOP:
5191                case PACKET_ARB_POINT:
5192                        parser->patched_cb_size += pkt_size;
5193                        break;
5194
5195                default:
5196                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5197                                pkt_id);
5198                        rc = -EINVAL;
5199                        break;
5200                }
5201
5202                if (rc)
5203                        break;
5204        }
5205
5206        /*
5207         * The new CB should have space at the end for two MSG_PROT packets:
5208         * 1. A packet that will act as a completion packet
5209         * 2. A packet that will generate MSI-X interrupt
5210         */
5211        if (parser->completion)
5212                parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5213
5214        return rc;
5215}
5216
5217static int gaudi_patch_dma_packet(struct hl_device *hdev,
5218                                struct hl_cs_parser *parser,
5219                                struct packet_lin_dma *user_dma_pkt,
5220                                struct packet_lin_dma *new_dma_pkt,
5221                                u32 *new_dma_pkt_size)
5222{
5223        struct hl_userptr *userptr;
5224        struct scatterlist *sg, *sg_next_iter;
5225        u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5226        u64 len, len_next;
5227        dma_addr_t dma_addr, dma_addr_next;
5228        u64 device_memory_addr, addr;
5229        enum dma_data_direction dir;
5230        struct sg_table *sgt;
5231        bool src_in_host = false;
5232        bool skip_host_mem_pin = false;
5233        bool user_memset;
5234
5235        ctl = le32_to_cpu(user_dma_pkt->ctl);
5236
5237        if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5238                src_in_host = true;
5239
5240        user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5241                        GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5242
5243        if (src_in_host) {
5244                addr = le64_to_cpu(user_dma_pkt->src_addr);
5245                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5246                dir = DMA_TO_DEVICE;
5247                if (user_memset)
5248                        skip_host_mem_pin = true;
5249        } else {
5250                addr = le64_to_cpu(user_dma_pkt->dst_addr);
5251                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5252                dir = DMA_FROM_DEVICE;
5253        }
5254
5255        if ((!skip_host_mem_pin) &&
5256                (!hl_userptr_is_pinned(hdev, addr,
5257                                        le32_to_cpu(user_dma_pkt->tsize),
5258                                        parser->job_userptr_list, &userptr))) {
5259                dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5260                                addr, user_dma_pkt->tsize);
5261                return -EFAULT;
5262        }
5263
5264        if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5265                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5266                *new_dma_pkt_size = sizeof(*user_dma_pkt);
5267                return 0;
5268        }
5269
5270        user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5271
5272        sgt = userptr->sgt;
5273        dma_desc_cnt = 0;
5274
5275        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5276                len = sg_dma_len(sg);
5277                dma_addr = sg_dma_address(sg);
5278
5279                if (len == 0)
5280                        break;
5281
5282                while ((count + 1) < sgt->nents) {
5283                        sg_next_iter = sg_next(sg);
5284                        len_next = sg_dma_len(sg_next_iter);
5285                        dma_addr_next = sg_dma_address(sg_next_iter);
5286
5287                        if (len_next == 0)
5288                                break;
5289
5290                        if ((dma_addr + len == dma_addr_next) &&
5291                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5292                                len += len_next;
5293                                count++;
5294                                sg = sg_next_iter;
5295                        } else {
5296                                break;
5297                        }
5298                }
5299
5300                ctl = le32_to_cpu(user_dma_pkt->ctl);
5301                if (likely(dma_desc_cnt))
5302                        ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5303                ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5304                new_dma_pkt->ctl = cpu_to_le32(ctl);
5305                new_dma_pkt->tsize = cpu_to_le32(len);
5306
5307                if (dir == DMA_TO_DEVICE) {
5308                        new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5309                        new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5310                } else {
5311                        new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5312                        new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5313                }
5314
5315                if (!user_memset)
5316                        device_memory_addr += len;
5317                dma_desc_cnt++;
5318                new_dma_pkt++;
5319        }
5320
5321        if (!dma_desc_cnt) {
5322                dev_err(hdev->dev,
5323                        "Error of 0 SG entries when patching DMA packet\n");
5324                return -EFAULT;
5325        }
5326
5327        /* Fix the last dma packet - wrcomp must be as user set it */
5328        new_dma_pkt--;
5329        new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5330
5331        *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5332
5333        return 0;
5334}
5335
5336static int gaudi_patch_cb(struct hl_device *hdev,
5337                                struct hl_cs_parser *parser)
5338{
5339        u32 cb_parsed_length = 0;
5340        u32 cb_patched_cur_length = 0;
5341        int rc = 0;
5342
5343        /* cb_user_size is more than 0 so loop will always be executed */
5344        while (cb_parsed_length < parser->user_cb_size) {
5345                enum packet_id pkt_id;
5346                u16 pkt_size;
5347                u32 new_pkt_size = 0;
5348                struct gaudi_packet *user_pkt, *kernel_pkt;
5349
5350                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5351                kernel_pkt = parser->patched_cb->kernel_address +
5352                                        cb_patched_cur_length;
5353
5354                pkt_id = (enum packet_id) (
5355                                (le64_to_cpu(user_pkt->header) &
5356                                PACKET_HEADER_PACKET_ID_MASK) >>
5357                                        PACKET_HEADER_PACKET_ID_SHIFT);
5358
5359                if (!validate_packet_id(pkt_id)) {
5360                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5361                        rc = -EINVAL;
5362                        break;
5363                }
5364
5365                pkt_size = gaudi_packet_sizes[pkt_id];
5366                cb_parsed_length += pkt_size;
5367                if (cb_parsed_length > parser->user_cb_size) {
5368                        dev_err(hdev->dev,
5369                                "packet 0x%x is out of CB boundary\n", pkt_id);
5370                        rc = -EINVAL;
5371                        break;
5372                }
5373
5374                switch (pkt_id) {
5375                case PACKET_LIN_DMA:
5376                        rc = gaudi_patch_dma_packet(hdev, parser,
5377                                        (struct packet_lin_dma *) user_pkt,
5378                                        (struct packet_lin_dma *) kernel_pkt,
5379                                        &new_pkt_size);
5380                        cb_patched_cur_length += new_pkt_size;
5381                        break;
5382
5383                case PACKET_MSG_PROT:
5384                        dev_err(hdev->dev,
5385                                "User not allowed to use MSG_PROT\n");
5386                        rc = -EPERM;
5387                        break;
5388
5389                case PACKET_CP_DMA:
5390                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5391                        rc = -EPERM;
5392                        break;
5393
5394                case PACKET_STOP:
5395                        dev_err(hdev->dev, "User not allowed to use STOP\n");
5396                        rc = -EPERM;
5397                        break;
5398
5399                case PACKET_WREG_32:
5400                case PACKET_WREG_BULK:
5401                case PACKET_MSG_LONG:
5402                case PACKET_MSG_SHORT:
5403                case PACKET_REPEAT:
5404                case PACKET_FENCE:
5405                case PACKET_NOP:
5406                case PACKET_ARB_POINT:
5407                case PACKET_LOAD_AND_EXE:
5408                        memcpy(kernel_pkt, user_pkt, pkt_size);
5409                        cb_patched_cur_length += pkt_size;
5410                        break;
5411
5412                default:
5413                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5414                                pkt_id);
5415                        rc = -EINVAL;
5416                        break;
5417                }
5418
5419                if (rc)
5420                        break;
5421        }
5422
5423        return rc;
5424}
5425
5426static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5427                struct hl_cs_parser *parser)
5428{
5429        u64 patched_cb_handle;
5430        u32 patched_cb_size;
5431        struct hl_cb *user_cb;
5432        int rc;
5433
5434        /*
5435         * The new CB should have space at the end for two MSG_PROT pkt:
5436         * 1. A packet that will act as a completion packet
5437         * 2. A packet that will generate MSI interrupt
5438         */
5439        if (parser->completion)
5440                parser->patched_cb_size = parser->user_cb_size +
5441                                sizeof(struct packet_msg_prot) * 2;
5442        else
5443                parser->patched_cb_size = parser->user_cb_size;
5444
5445        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5446                                parser->patched_cb_size, false, false,
5447                                &patched_cb_handle);
5448
5449        if (rc) {
5450                dev_err(hdev->dev,
5451                        "Failed to allocate patched CB for DMA CS %d\n",
5452                        rc);
5453                return rc;
5454        }
5455
5456        patched_cb_handle >>= PAGE_SHIFT;
5457        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5458                                (u32) patched_cb_handle);
5459        /* hl_cb_get should never fail */
5460        if (!parser->patched_cb) {
5461                dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5462                        (u32) patched_cb_handle);
5463                rc = -EFAULT;
5464                goto out;
5465        }
5466
5467        /*
5468         * The check that parser->user_cb_size <= parser->user_cb->size was done
5469         * in validate_queue_index().
5470         */
5471        memcpy(parser->patched_cb->kernel_address,
5472                parser->user_cb->kernel_address,
5473                parser->user_cb_size);
5474
5475        patched_cb_size = parser->patched_cb_size;
5476
5477        /* Validate patched CB instead of user CB */
5478        user_cb = parser->user_cb;
5479        parser->user_cb = parser->patched_cb;
5480        rc = gaudi_validate_cb(hdev, parser, true);
5481        parser->user_cb = user_cb;
5482
5483        if (rc) {
5484                hl_cb_put(parser->patched_cb);
5485                goto out;
5486        }
5487
5488        if (patched_cb_size != parser->patched_cb_size) {
5489                dev_err(hdev->dev, "user CB size mismatch\n");
5490                hl_cb_put(parser->patched_cb);
5491                rc = -EINVAL;
5492                goto out;
5493        }
5494
5495out:
5496        /*
5497         * Always call cb destroy here because we still have 1 reference
5498         * to it by calling cb_get earlier. After the job will be completed,
5499         * cb_put will release it, but here we want to remove it from the
5500         * idr
5501         */
5502        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5503                                        patched_cb_handle << PAGE_SHIFT);
5504
5505        return rc;
5506}
5507
5508static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5509                struct hl_cs_parser *parser)
5510{
5511        u64 patched_cb_handle;
5512        int rc;
5513
5514        rc = gaudi_validate_cb(hdev, parser, false);
5515
5516        if (rc)
5517                goto free_userptr;
5518
5519        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5520                                parser->patched_cb_size, false, false,
5521                                &patched_cb_handle);
5522        if (rc) {
5523                dev_err(hdev->dev,
5524                        "Failed to allocate patched CB for DMA CS %d\n", rc);
5525                goto free_userptr;
5526        }
5527
5528        patched_cb_handle >>= PAGE_SHIFT;
5529        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5530                                (u32) patched_cb_handle);
5531        /* hl_cb_get should never fail here */
5532        if (!parser->patched_cb) {
5533                dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5534                                (u32) patched_cb_handle);
5535                rc = -EFAULT;
5536                goto out;
5537        }
5538
5539        rc = gaudi_patch_cb(hdev, parser);
5540
5541        if (rc)
5542                hl_cb_put(parser->patched_cb);
5543
5544out:
5545        /*
5546         * Always call cb destroy here because we still have 1 reference
5547         * to it by calling cb_get earlier. After the job will be completed,
5548         * cb_put will release it, but here we want to remove it from the
5549         * idr
5550         */
5551        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5552                                patched_cb_handle << PAGE_SHIFT);
5553
5554free_userptr:
5555        if (rc)
5556                hl_userptr_delete_list(hdev, parser->job_userptr_list);
5557        return rc;
5558}
5559
5560static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5561                                        struct hl_cs_parser *parser)
5562{
5563        struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5564        struct gaudi_device *gaudi = hdev->asic_specific;
5565        u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5566                ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5567
5568        if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5569                        (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5570                        (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5571                dev_err(hdev->dev, "h/w queue %d is disabled\n",
5572                                parser->hw_queue_id);
5573                return -EINVAL;
5574        }
5575
5576        /* For internal queue jobs just check if CB address is valid */
5577        if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5578                                        parser->user_cb_size,
5579                                        asic_prop->sram_user_base_address,
5580                                        asic_prop->sram_end_address))
5581                return 0;
5582
5583        if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5584                                        parser->user_cb_size,
5585                                        asic_prop->dram_user_base_address,
5586                                        asic_prop->dram_end_address))
5587                return 0;
5588
5589        /* PMMU and HPMMU addresses are equal, check only one of them */
5590        if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5591                                        parser->user_cb_size,
5592                                        asic_prop->pmmu.start_addr,
5593                                        asic_prop->pmmu.end_addr))
5594                return 0;
5595
5596        dev_err(hdev->dev,
5597                "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5598                parser->user_cb, parser->user_cb_size);
5599
5600        return -EFAULT;
5601}
5602
5603static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5604{
5605        struct gaudi_device *gaudi = hdev->asic_specific;
5606
5607        if (parser->queue_type == QUEUE_TYPE_INT)
5608                return gaudi_parse_cb_no_ext_queue(hdev, parser);
5609
5610        if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5611                return gaudi_parse_cb_mmu(hdev, parser);
5612        else
5613                return gaudi_parse_cb_no_mmu(hdev, parser);
5614}
5615
5616static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5617                                        void *kernel_address, u32 len,
5618                                        u64 cq_addr, u32 cq_val, u32 msi_vec,
5619                                        bool eb)
5620{
5621        struct gaudi_device *gaudi = hdev->asic_specific;
5622        struct packet_msg_prot *cq_pkt;
5623        u32 tmp;
5624
5625        cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5626
5627        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5628        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5629
5630        if (eb)
5631                tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5632
5633        cq_pkt->ctl = cpu_to_le32(tmp);
5634        cq_pkt->value = cpu_to_le32(cq_val);
5635        cq_pkt->addr = cpu_to_le64(cq_addr);
5636
5637        cq_pkt++;
5638
5639        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5640        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5641        cq_pkt->ctl = cpu_to_le32(tmp);
5642        cq_pkt->value = cpu_to_le32(1);
5643
5644        if (!gaudi->multi_msi_mode)
5645                msi_vec = 0;
5646
5647        cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5648}
5649
5650static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5651{
5652        WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5653}
5654
5655static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5656                                        u32 size, u64 val)
5657{
5658        struct packet_lin_dma *lin_dma_pkt;
5659        struct hl_cs_job *job;
5660        u32 cb_size, ctl, err_cause;
5661        struct hl_cb *cb;
5662        u64 id;
5663        int rc;
5664
5665        cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5666        if (!cb)
5667                return -EFAULT;
5668
5669        lin_dma_pkt = cb->kernel_address;
5670        memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5671        cb_size = sizeof(*lin_dma_pkt);
5672
5673        ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5674        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5675        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5676        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5677        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5678
5679        lin_dma_pkt->ctl = cpu_to_le32(ctl);
5680        lin_dma_pkt->src_addr = cpu_to_le64(val);
5681        lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5682        lin_dma_pkt->tsize = cpu_to_le32(size);
5683
5684        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5685        if (!job) {
5686                dev_err(hdev->dev, "Failed to allocate a new job\n");
5687                rc = -ENOMEM;
5688                goto release_cb;
5689        }
5690
5691        /* Verify DMA is OK */
5692        err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5693        if (err_cause && !hdev->init_done) {
5694                dev_dbg(hdev->dev,
5695                        "Clearing DMA0 engine from errors (cause 0x%x)\n",
5696                        err_cause);
5697                WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5698        }
5699
5700        job->id = 0;
5701        job->user_cb = cb;
5702        atomic_inc(&job->user_cb->cs_cnt);
5703        job->user_cb_size = cb_size;
5704        job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5705        job->patched_cb = job->user_cb;
5706        job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5707
5708        hl_debugfs_add_job(hdev, job);
5709
5710        rc = gaudi_send_job_on_qman0(hdev, job);
5711        hl_debugfs_remove_job(hdev, job);
5712        kfree(job);
5713        atomic_dec(&cb->cs_cnt);
5714
5715        /* Verify DMA is OK */
5716        err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5717        if (err_cause) {
5718                dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5719                rc = -EIO;
5720                if (!hdev->init_done) {
5721                        dev_dbg(hdev->dev,
5722                                "Clearing DMA0 engine from errors (cause 0x%x)\n",
5723                                err_cause);
5724                        WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5725                }
5726        }
5727
5728release_cb:
5729        id = cb->id;
5730        hl_cb_put(cb);
5731        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5732
5733        return rc;
5734}
5735
5736static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5737                                        u32 num_regs, u32 val)
5738{
5739        struct packet_msg_long *pkt;
5740        struct hl_cs_job *job;
5741        u32 cb_size, ctl;
5742        struct hl_cb *cb;
5743        int i, rc;
5744
5745        cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5746
5747        if (cb_size > SZ_2M) {
5748                dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5749                return -ENOMEM;
5750        }
5751
5752        cb = hl_cb_kernel_create(hdev, cb_size, false);
5753        if (!cb)
5754                return -EFAULT;
5755
5756        pkt = cb->kernel_address;
5757
5758        ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5759        ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5760        ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5761        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5762        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5763
5764        for (i = 0; i < num_regs ; i++, pkt++) {
5765                pkt->ctl = cpu_to_le32(ctl);
5766                pkt->value = cpu_to_le32(val);
5767                pkt->addr = cpu_to_le64(reg_base + (i * 4));
5768        }
5769
5770        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5771        if (!job) {
5772                dev_err(hdev->dev, "Failed to allocate a new job\n");
5773                rc = -ENOMEM;
5774                goto release_cb;
5775        }
5776
5777        job->id = 0;
5778        job->user_cb = cb;
5779        atomic_inc(&job->user_cb->cs_cnt);
5780        job->user_cb_size = cb_size;
5781        job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5782        job->patched_cb = job->user_cb;
5783        job->job_cb_size = cb_size;
5784
5785        hl_debugfs_add_job(hdev, job);
5786
5787        rc = gaudi_send_job_on_qman0(hdev, job);
5788        hl_debugfs_remove_job(hdev, job);
5789        kfree(job);
5790        atomic_dec(&cb->cs_cnt);
5791
5792release_cb:
5793        hl_cb_put(cb);
5794        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5795
5796        return rc;
5797}
5798
5799static int gaudi_schedule_register_memset(struct hl_device *hdev,
5800                u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5801{
5802        struct hl_ctx *ctx;
5803        struct hl_pending_cb *pending_cb;
5804        struct packet_msg_long *pkt;
5805        u32 cb_size, ctl;
5806        struct hl_cb *cb;
5807        int i, rc;
5808
5809        mutex_lock(&hdev->fpriv_list_lock);
5810        ctx = hdev->compute_ctx;
5811
5812        /* If no compute context available or context is going down
5813         * memset registers directly
5814         */
5815        if (!ctx || kref_read(&ctx->refcount) == 0) {
5816                rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5817                mutex_unlock(&hdev->fpriv_list_lock);
5818                return rc;
5819        }
5820
5821        mutex_unlock(&hdev->fpriv_list_lock);
5822
5823        cb_size = (sizeof(*pkt) * num_regs) +
5824                        sizeof(struct packet_msg_prot) * 2;
5825
5826        if (cb_size > SZ_2M) {
5827                dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5828                return -ENOMEM;
5829        }
5830
5831        pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5832        if (!pending_cb)
5833                return -ENOMEM;
5834
5835        cb = hl_cb_kernel_create(hdev, cb_size, false);
5836        if (!cb) {
5837                kfree(pending_cb);
5838                return -EFAULT;
5839        }
5840
5841        pkt = cb->kernel_address;
5842
5843        ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5844        ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5845        ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5846        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5847        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5848
5849        for (i = 0; i < num_regs ; i++, pkt++) {
5850                pkt->ctl = cpu_to_le32(ctl);
5851                pkt->value = cpu_to_le32(val);
5852                pkt->addr = cpu_to_le64(reg_base + (i * 4));
5853        }
5854
5855        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5856
5857        pending_cb->cb = cb;
5858        pending_cb->cb_size = cb_size;
5859        /* The queue ID MUST be an external queue ID. Otherwise, we will
5860         * have undefined behavior
5861         */
5862        pending_cb->hw_queue_id = hw_queue_id;
5863
5864        spin_lock(&ctx->pending_cb_lock);
5865        list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5866        spin_unlock(&ctx->pending_cb_lock);
5867
5868        return 0;
5869}
5870
5871static int gaudi_restore_sm_registers(struct hl_device *hdev)
5872{
5873        u64 base_addr;
5874        u32 num_regs;
5875        int rc;
5876
5877        base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5878        num_regs = NUM_OF_SOB_IN_BLOCK;
5879        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5880        if (rc) {
5881                dev_err(hdev->dev, "failed resetting SM registers");
5882                return -ENOMEM;
5883        }
5884
5885        base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5886        num_regs = NUM_OF_SOB_IN_BLOCK;
5887        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5888        if (rc) {
5889                dev_err(hdev->dev, "failed resetting SM registers");
5890                return -ENOMEM;
5891        }
5892
5893        base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5894        num_regs = NUM_OF_SOB_IN_BLOCK;
5895        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5896        if (rc) {
5897                dev_err(hdev->dev, "failed resetting SM registers");
5898                return -ENOMEM;
5899        }
5900
5901        base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5902        num_regs = NUM_OF_MONITORS_IN_BLOCK;
5903        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5904        if (rc) {
5905                dev_err(hdev->dev, "failed resetting SM registers");
5906                return -ENOMEM;
5907        }
5908
5909        base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5910        num_regs = NUM_OF_MONITORS_IN_BLOCK;
5911        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5912        if (rc) {
5913                dev_err(hdev->dev, "failed resetting SM registers");
5914                return -ENOMEM;
5915        }
5916
5917        base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5918        num_regs = NUM_OF_MONITORS_IN_BLOCK;
5919        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5920        if (rc) {
5921                dev_err(hdev->dev, "failed resetting SM registers");
5922                return -ENOMEM;
5923        }
5924
5925        base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5926                        (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5927        num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5928        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5929        if (rc) {
5930                dev_err(hdev->dev, "failed resetting SM registers");
5931                return -ENOMEM;
5932        }
5933
5934        base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5935                        (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5936        num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5937        rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5938        if (rc) {
5939                dev_err(hdev->dev, "failed resetting SM registers");
5940                return -ENOMEM;
5941        }
5942
5943        return 0;
5944}
5945
5946static void gaudi_restore_dma_registers(struct hl_device *hdev)
5947{
5948        u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5949                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5950        int i;
5951
5952        for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5953                u64 sob_addr = CFG_BASE +
5954                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5955                                (i * sob_delta);
5956                u32 dma_offset = i * DMA_CORE_OFFSET;
5957
5958                WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5959                                lower_32_bits(sob_addr));
5960                WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5961                                upper_32_bits(sob_addr));
5962                WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5963
5964                /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5965                 * modified by the user for SRAM reduction
5966                 */
5967                if (i > 1)
5968                        WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5969                                                                0x00000001);
5970        }
5971}
5972
5973static void gaudi_restore_qm_registers(struct hl_device *hdev)
5974{
5975        u32 qman_offset;
5976        int i;
5977
5978        for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5979                qman_offset = i * DMA_QMAN_OFFSET;
5980                WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5981        }
5982
5983        for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5984                qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5985                WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5986        }
5987
5988        for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5989                qman_offset = i * TPC_QMAN_OFFSET;
5990                WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5991        }
5992
5993        for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5994                qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5995                                (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5996                WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5997        }
5998}
5999
6000static int gaudi_restore_user_registers(struct hl_device *hdev)
6001{
6002        int rc;
6003
6004        rc = gaudi_restore_sm_registers(hdev);
6005        if (rc)
6006                return rc;
6007
6008        gaudi_restore_dma_registers(hdev);
6009        gaudi_restore_qm_registers(hdev);
6010
6011        return 0;
6012}
6013
6014static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6015{
6016        return gaudi_restore_user_registers(hdev);
6017}
6018
6019static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6020{
6021        struct asic_fixed_properties *prop = &hdev->asic_prop;
6022        struct gaudi_device *gaudi = hdev->asic_specific;
6023        u64 addr = prop->mmu_pgt_addr;
6024        u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6025
6026        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6027                return 0;
6028
6029        return gaudi_memset_device_memory(hdev, addr, size, 0);
6030}
6031
6032static void gaudi_restore_phase_topology(struct hl_device *hdev)
6033{
6034
6035}
6036
6037static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6038                        bool user_address, u32 *val)
6039{
6040        struct asic_fixed_properties *prop = &hdev->asic_prop;
6041        struct gaudi_device *gaudi = hdev->asic_specific;
6042        u64 hbm_bar_addr, host_phys_end;
6043        int rc = 0;
6044
6045        host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6046
6047        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6048
6049                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6050                                (hdev->clock_gating_mask &
6051                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6052
6053                        dev_err_ratelimited(hdev->dev,
6054                                "Can't read register - clock gating is enabled!\n");
6055                        rc = -EFAULT;
6056                } else {
6057                        *val = RREG32(addr - CFG_BASE);
6058                }
6059
6060        } else if ((addr >= SRAM_BASE_ADDR) &&
6061                        (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6062                *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6063                                (addr - SRAM_BASE_ADDR));
6064        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6065                u64 bar_base_addr = DRAM_PHYS_BASE +
6066                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6067
6068                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6069                if (hbm_bar_addr != U64_MAX) {
6070                        *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6071                                                (addr - bar_base_addr));
6072
6073                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6074                                                hbm_bar_addr);
6075                }
6076                if (hbm_bar_addr == U64_MAX)
6077                        rc = -EIO;
6078        } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6079                        user_address && !iommu_present(&pci_bus_type)) {
6080                *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6081        } else {
6082                rc = -EFAULT;
6083        }
6084
6085        return rc;
6086}
6087
6088static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6089                        bool user_address, u32 val)
6090{
6091        struct asic_fixed_properties *prop = &hdev->asic_prop;
6092        struct gaudi_device *gaudi = hdev->asic_specific;
6093        u64 hbm_bar_addr, host_phys_end;
6094        int rc = 0;
6095
6096        host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6097
6098        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6099
6100                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6101                                (hdev->clock_gating_mask &
6102                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6103
6104                        dev_err_ratelimited(hdev->dev,
6105                                "Can't write register - clock gating is enabled!\n");
6106                        rc = -EFAULT;
6107                } else {
6108                        WREG32(addr - CFG_BASE, val);
6109                }
6110
6111        } else if ((addr >= SRAM_BASE_ADDR) &&
6112                        (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6113                writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6114                                        (addr - SRAM_BASE_ADDR));
6115        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6116                u64 bar_base_addr = DRAM_PHYS_BASE +
6117                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6118
6119                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6120                if (hbm_bar_addr != U64_MAX) {
6121                        writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6122                                                (addr - bar_base_addr));
6123
6124                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6125                                                hbm_bar_addr);
6126                }
6127                if (hbm_bar_addr == U64_MAX)
6128                        rc = -EIO;
6129        } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6130                        user_address && !iommu_present(&pci_bus_type)) {
6131                *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6132        } else {
6133                rc = -EFAULT;
6134        }
6135
6136        return rc;
6137}
6138
6139static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6140                                bool user_address, u64 *val)
6141{
6142        struct asic_fixed_properties *prop = &hdev->asic_prop;
6143        struct gaudi_device *gaudi = hdev->asic_specific;
6144        u64 hbm_bar_addr, host_phys_end;
6145        int rc = 0;
6146
6147        host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6148
6149        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6150
6151                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6152                                (hdev->clock_gating_mask &
6153                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6154
6155                        dev_err_ratelimited(hdev->dev,
6156                                "Can't read register - clock gating is enabled!\n");
6157                        rc = -EFAULT;
6158                } else {
6159                        u32 val_l = RREG32(addr - CFG_BASE);
6160                        u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6161
6162                        *val = (((u64) val_h) << 32) | val_l;
6163                }
6164
6165        } else if ((addr >= SRAM_BASE_ADDR) &&
6166                   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6167                *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6168                                (addr - SRAM_BASE_ADDR));
6169        } else if (addr <=
6170                    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6171                u64 bar_base_addr = DRAM_PHYS_BASE +
6172                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6173
6174                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6175                if (hbm_bar_addr != U64_MAX) {
6176                        *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6177                                                (addr - bar_base_addr));
6178
6179                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6180                                                hbm_bar_addr);
6181                }
6182                if (hbm_bar_addr == U64_MAX)
6183                        rc = -EIO;
6184        } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6185                        user_address && !iommu_present(&pci_bus_type)) {
6186                *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6187        } else {
6188                rc = -EFAULT;
6189        }
6190
6191        return rc;
6192}
6193
6194static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6195                                bool user_address, u64 val)
6196{
6197        struct asic_fixed_properties *prop = &hdev->asic_prop;
6198        struct gaudi_device *gaudi = hdev->asic_specific;
6199        u64 hbm_bar_addr, host_phys_end;
6200        int rc = 0;
6201
6202        host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6203
6204        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6205
6206                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6207                                (hdev->clock_gating_mask &
6208                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6209
6210                        dev_err_ratelimited(hdev->dev,
6211                                "Can't write register - clock gating is enabled!\n");
6212                        rc = -EFAULT;
6213                } else {
6214                        WREG32(addr - CFG_BASE, lower_32_bits(val));
6215                        WREG32(addr + sizeof(u32) - CFG_BASE,
6216                                upper_32_bits(val));
6217                }
6218
6219        } else if ((addr >= SRAM_BASE_ADDR) &&
6220                   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6221                writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6222                                        (addr - SRAM_BASE_ADDR));
6223        } else if (addr <=
6224                    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6225                u64 bar_base_addr = DRAM_PHYS_BASE +
6226                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6227
6228                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6229                if (hbm_bar_addr != U64_MAX) {
6230                        writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6231                                                (addr - bar_base_addr));
6232
6233                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6234                                                hbm_bar_addr);
6235                }
6236                if (hbm_bar_addr == U64_MAX)
6237                        rc = -EIO;
6238        } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6239                        user_address && !iommu_present(&pci_bus_type)) {
6240                *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6241        } else {
6242                rc = -EFAULT;
6243        }
6244
6245        return rc;
6246}
6247
6248static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6249                                        u32 size_to_dma, dma_addr_t dma_addr)
6250{
6251        u32 err_cause, val;
6252        u64 dma_offset;
6253        int rc;
6254
6255        dma_offset = dma_id * DMA_CORE_OFFSET;
6256
6257        WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6258        WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6259        WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6260        WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6261        WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6262        WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6263                        (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6264
6265        rc = hl_poll_timeout(
6266                hdev,
6267                mmDMA0_CORE_STS0 + dma_offset,
6268                val,
6269                ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6270                0,
6271                1000000);
6272
6273        if (rc) {
6274                dev_err(hdev->dev,
6275                        "DMA %d timed-out during reading of 0x%llx\n",
6276                        dma_id, addr);
6277                return -EIO;
6278        }
6279
6280        /* Verify DMA is OK */
6281        err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6282        if (err_cause) {
6283                dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6284                dev_dbg(hdev->dev,
6285                        "Clearing DMA0 engine from errors (cause 0x%x)\n",
6286                        err_cause);
6287                WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6288
6289                return -EIO;
6290        }
6291
6292        return 0;
6293}
6294
6295static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6296                                void *blob_addr)
6297{
6298        u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6299        struct gaudi_device *gaudi = hdev->asic_specific;
6300        u64 dma_offset, qm_offset;
6301        dma_addr_t dma_addr;
6302        void *kernel_addr;
6303        bool is_eng_idle;
6304        int rc = 0, dma_id;
6305
6306        kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6307                                                hdev, SZ_2M,
6308                                                &dma_addr,
6309                                                GFP_KERNEL | __GFP_ZERO);
6310
6311        if (!kernel_addr)
6312                return -ENOMEM;
6313
6314        mutex_lock(&gaudi->clk_gate_mutex);
6315
6316        hdev->asic_funcs->disable_clock_gating(hdev);
6317
6318        hdev->asic_funcs->hw_queues_lock(hdev);
6319
6320        dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6321        dma_offset = dma_id * DMA_CORE_OFFSET;
6322        qm_offset = dma_id * DMA_QMAN_OFFSET;
6323        dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6324        is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6325
6326        if (!is_eng_idle) {
6327                dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6328                dma_offset = dma_id * DMA_CORE_OFFSET;
6329                qm_offset = dma_id * DMA_QMAN_OFFSET;
6330                dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6331                is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6332
6333                if (!is_eng_idle) {
6334                        dev_err_ratelimited(hdev->dev,
6335                                "Can't read via DMA because it is BUSY\n");
6336                        rc = -EAGAIN;
6337                        goto out;
6338                }
6339        }
6340
6341        cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6342        WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6343                        0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6344
6345        /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6346         * using the compute ctx ASID, if exists. If not, use the kernel ctx
6347         * ASID
6348         */
6349        WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6350
6351        /* Verify DMA is OK */
6352        err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6353        if (err_cause) {
6354                dev_dbg(hdev->dev,
6355                        "Clearing DMA0 engine from errors (cause 0x%x)\n",
6356                        err_cause);
6357                WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6358        }
6359
6360        pos = 0;
6361        size_left = size;
6362        size_to_dma = SZ_2M;
6363
6364        while (size_left > 0) {
6365
6366                if (size_left < SZ_2M)
6367                        size_to_dma = size_left;
6368
6369                rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6370                                                dma_addr);
6371                if (rc)
6372                        break;
6373
6374                memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6375
6376                if (size_left <= SZ_2M)
6377                        break;
6378
6379                pos += SZ_2M;
6380                addr += SZ_2M;
6381                size_left -= SZ_2M;
6382        }
6383
6384        /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6385         * using the compute ctx ASID, if exists. If not, use the kernel ctx
6386         * ASID
6387         */
6388        WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6389                        ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6390
6391        WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6392
6393out:
6394        hdev->asic_funcs->hw_queues_unlock(hdev);
6395
6396        hdev->asic_funcs->set_clock_gating(hdev);
6397
6398        mutex_unlock(&gaudi->clk_gate_mutex);
6399
6400        hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6401                                                dma_addr);
6402
6403        return rc;
6404}
6405
6406static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6407{
6408        struct gaudi_device *gaudi = hdev->asic_specific;
6409
6410        if (hdev->hard_reset_pending)
6411                return U64_MAX;
6412
6413        return readq(hdev->pcie_bar[HBM_BAR_ID] +
6414                        (addr - gaudi->hbm_bar_cur_addr));
6415}
6416
6417static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6418{
6419        struct gaudi_device *gaudi = hdev->asic_specific;
6420
6421        if (hdev->hard_reset_pending)
6422                return;
6423
6424        writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6425                        (addr - gaudi->hbm_bar_cur_addr));
6426}
6427
6428void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6429{
6430        /* mask to zero the MMBP and ASID bits */
6431        WREG32_AND(reg, ~0x7FF);
6432        WREG32_OR(reg, asid);
6433}
6434
6435static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6436{
6437        struct gaudi_device *gaudi = hdev->asic_specific;
6438
6439        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6440                return;
6441
6442        if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6443                dev_crit(hdev->dev, "asid %u is too big\n", asid);
6444                return;
6445        }
6446
6447        mutex_lock(&gaudi->clk_gate_mutex);
6448
6449        hdev->asic_funcs->disable_clock_gating(hdev);
6450
6451        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6452        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6453        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6454        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6455        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6456
6457        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6458        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6459        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6460        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6461        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6462
6463        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6464        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6465        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6466        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6467        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6468
6469        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6470        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6471        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6472        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6473        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6474
6475        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6480
6481        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6482        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6483        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6484        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6485        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6486
6487        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6488        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6489        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6490        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6491        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6492
6493        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6494        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6495        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6496        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6497        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6498
6499        gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6500        gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6501        gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6502        gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6503        gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6504        gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6505        gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6506        gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6507
6508        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6509        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6510        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6511        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6512        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6513        gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6514        gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6515
6516        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6517        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6518        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6519        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6520        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6521        gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6522        gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6523
6524        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6525        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6526        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6527        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6528        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6529        gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6530        gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6531
6532        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6533        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6534        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6535        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6536        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6537        gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6538        gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6539
6540        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6541        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6542        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6543        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6544        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6545        gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6546        gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6547
6548        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6549        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6550        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6551        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6552        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6553        gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6554        gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6555
6556        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6557        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6558        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6559        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6560        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6561        gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6562        gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6563
6564        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569        gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6570        gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6571
6572        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6578        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6579        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6580        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6581        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6582
6583        gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6584        gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6585        gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6586        gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6587        gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6588        gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6589        gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6590        gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6591        gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6592        gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6593        gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6594        gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6595
6596        if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6597                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6598                                asid);
6599                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6600                                asid);
6601                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6602                                asid);
6603                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6604                                asid);
6605                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6606                                asid);
6607        }
6608
6609        if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6610                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6611                                asid);
6612                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6613                                asid);
6614                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6615                                asid);
6616                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6617                                asid);
6618                gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6619                                asid);
6620        }
6621
6622        if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6623                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6624                                asid);
6625                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6626                                asid);
6627                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6628                                asid);
6629                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6630                                asid);
6631                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6632                                asid);
6633        }
6634
6635        if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6636                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6637                                asid);
6638                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6639                                asid);
6640                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6641                                asid);
6642                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6643                                asid);
6644                gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6645                                asid);
6646        }
6647
6648        if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6649                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6650                                asid);
6651                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6652                                asid);
6653                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6654                                asid);
6655                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6656                                asid);
6657                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6658                                asid);
6659        }
6660
6661        if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6662                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6663                                asid);
6664                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6665                                asid);
6666                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6667                                asid);
6668                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6669                                asid);
6670                gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6671                                asid);
6672        }
6673
6674        if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6675                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6676                                asid);
6677                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6678                                asid);
6679                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6680                                asid);
6681                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6682                                asid);
6683                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6684                                asid);
6685        }
6686
6687        if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6688                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6689                                asid);
6690                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6691                                asid);
6692                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6693                                asid);
6694                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6695                                asid);
6696                gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6697                                asid);
6698        }
6699
6700        if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6701                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6702                                asid);
6703                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6704                                asid);
6705                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6706                                asid);
6707                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6708                                asid);
6709                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6710                                asid);
6711        }
6712
6713        if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6714                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6715                                asid);
6716                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6717                                asid);
6718                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6719                                asid);
6720                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6721                                asid);
6722                gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6723                                asid);
6724        }
6725
6726        hdev->asic_funcs->set_clock_gating(hdev);
6727
6728        mutex_unlock(&gaudi->clk_gate_mutex);
6729}
6730
6731static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6732                struct hl_cs_job *job)
6733{
6734        struct packet_msg_prot *fence_pkt;
6735        u32 *fence_ptr;
6736        dma_addr_t fence_dma_addr;
6737        struct hl_cb *cb;
6738        u32 tmp, timeout, dma_offset;
6739        int rc;
6740
6741        if (hdev->pldm)
6742                timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6743        else
6744                timeout = HL_DEVICE_TIMEOUT_USEC;
6745
6746        if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6747                dev_err_ratelimited(hdev->dev,
6748                        "Can't send driver job on QMAN0 because the device is not idle\n");
6749                return -EBUSY;
6750        }
6751
6752        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6753                                                        &fence_dma_addr);
6754        if (!fence_ptr) {
6755                dev_err(hdev->dev,
6756                        "Failed to allocate fence memory for QMAN0\n");
6757                return -ENOMEM;
6758        }
6759
6760        cb = job->patched_cb;
6761
6762        fence_pkt = cb->kernel_address +
6763                        job->job_cb_size - sizeof(struct packet_msg_prot);
6764
6765        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6766        tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6767        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6768
6769        fence_pkt->ctl = cpu_to_le32(tmp);
6770        fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6771        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6772
6773        dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6774
6775        WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6776
6777        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6778                                        job->job_cb_size, cb->bus_address);
6779        if (rc) {
6780                dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6781                goto free_fence_ptr;
6782        }
6783
6784        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6785                                (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6786                                timeout, true);
6787
6788        hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6789
6790        if (rc == -ETIMEDOUT) {
6791                dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6792                goto free_fence_ptr;
6793        }
6794
6795free_fence_ptr:
6796        WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6797                        ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6798
6799        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6800                                        fence_dma_addr);
6801        return rc;
6802}
6803
6804static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6805{
6806        if (event_type >= GAUDI_EVENT_SIZE)
6807                goto event_not_supported;
6808
6809        if (!gaudi_irq_map_table[event_type].valid)
6810                goto event_not_supported;
6811
6812        snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6813
6814        return;
6815
6816event_not_supported:
6817        snprintf(desc, size, "N/A");
6818}
6819
6820static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6821                                                        u32 x_y, bool is_write)
6822{
6823        u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6824
6825        mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6826                                DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6827
6828        switch (x_y) {
6829        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6830        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6831                dma_id[0] = 0;
6832                dma_id[1] = 2;
6833                break;
6834        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6835        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6836                dma_id[0] = 1;
6837                dma_id[1] = 3;
6838                break;
6839        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6840        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6841                dma_id[0] = 4;
6842                dma_id[1] = 6;
6843                break;
6844        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6845        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6846                dma_id[0] = 5;
6847                dma_id[1] = 7;
6848                break;
6849        default:
6850                goto unknown_initiator;
6851        }
6852
6853        for (i = 0 ; i < 2 ; i++) {
6854                dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6855                err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6856        }
6857
6858        switch (x_y) {
6859        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6860        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6861                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6862                        return "DMA0";
6863                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6864                        return "DMA2";
6865                else
6866                        return "DMA0 or DMA2";
6867        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6868        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6869                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6870                        return "DMA1";
6871                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6872                        return "DMA3";
6873                else
6874                        return "DMA1 or DMA3";
6875        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6876        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6877                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6878                        return "DMA4";
6879                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6880                        return "DMA6";
6881                else
6882                        return "DMA4 or DMA6";
6883        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6884        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6885                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6886                        return "DMA5";
6887                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6888                        return "DMA7";
6889                else
6890                        return "DMA5 or DMA7";
6891        }
6892
6893unknown_initiator:
6894        return "unknown initiator";
6895}
6896
6897static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6898                                                        bool is_write)
6899{
6900        u32 val, x_y, axi_id;
6901
6902        val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6903                                RREG32(mmMMU_UP_RAZWI_READ_ID);
6904        x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6905                        (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6906        axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6907                        RAZWI_INITIATOR_AXI_ID_SHIFT);
6908
6909        switch (x_y) {
6910        case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6911                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6912                        return "TPC0";
6913                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6914                        return "NIC0";
6915                break;
6916        case RAZWI_INITIATOR_ID_X_Y_TPC1:
6917                return "TPC1";
6918        case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6919        case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6920                return "MME0";
6921        case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6922        case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6923                return "MME1";
6924        case RAZWI_INITIATOR_ID_X_Y_TPC2:
6925                return "TPC2";
6926        case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6927                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6928                        return "TPC3";
6929                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6930                        return "PCI";
6931                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6932                        return "CPU";
6933                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6934                        return "PSOC";
6935                break;
6936        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6937        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6938        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6939        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6940        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6941        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6942        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6943        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6944                return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6945        case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6946                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6947                        return "TPC4";
6948                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6949                        return "NIC1";
6950                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6951                        return "NIC2";
6952                break;
6953        case RAZWI_INITIATOR_ID_X_Y_TPC5:
6954                return "TPC5";
6955        case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6956        case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6957                return "MME2";
6958        case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6959        case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6960                return "MME3";
6961        case RAZWI_INITIATOR_ID_X_Y_TPC6:
6962                return "TPC6";
6963        case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6964                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6965                        return "TPC7";
6966                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6967                        return "NIC4";
6968                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6969                        return "NIC5";
6970                break;
6971        default:
6972                break;
6973        }
6974
6975        dev_err(hdev->dev,
6976                "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6977                val,
6978                (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6979                (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6980                (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6981                        RAZWI_INITIATOR_AXI_ID_MASK);
6982
6983        return "unknown initiator";
6984}
6985
6986static void gaudi_print_razwi_info(struct hl_device *hdev)
6987{
6988        if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6989                dev_err_ratelimited(hdev->dev,
6990                        "RAZWI event caused by illegal write of %s\n",
6991                        gaudi_get_razwi_initiator_name(hdev, true));
6992                WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6993        }
6994
6995        if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6996                dev_err_ratelimited(hdev->dev,
6997                        "RAZWI event caused by illegal read of %s\n",
6998                        gaudi_get_razwi_initiator_name(hdev, false));
6999                WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7000        }
7001}
7002
7003static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7004{
7005        struct gaudi_device *gaudi = hdev->asic_specific;
7006        u64 addr;
7007        u32 val;
7008
7009        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7010                return;
7011
7012        val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7013        if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7014                addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7015                addr <<= 32;
7016                addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7017
7018                dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7019                                        addr);
7020
7021                WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7022        }
7023
7024        val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7025        if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7026                addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7027                addr <<= 32;
7028                addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7029
7030                dev_err_ratelimited(hdev->dev,
7031                                "MMU access error on va 0x%llx\n", addr);
7032
7033                WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7034        }
7035}
7036
7037/*
7038 *  +-------------------+------------------------------------------------------+
7039 *  | Configuration Reg |                     Description                      |
7040 *  |      Address      |                                                      |
7041 *  +-------------------+------------------------------------------------------+
7042 *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7043 *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7044 *  |                   |0xF34 memory wrappers 63:32                           |
7045 *  |                   |0xF38 memory wrappers 95:64                           |
7046 *  |                   |0xF3C memory wrappers 127:96                          |
7047 *  +-------------------+------------------------------------------------------+
7048 *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7049 *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7050 *  |                   |0xF44 memory wrappers 63:32                           |
7051 *  |                   |0xF48 memory wrappers 95:64                           |
7052 *  |                   |0xF4C memory wrappers 127:96                          |
7053 *  +-------------------+------------------------------------------------------+
7054 */
7055static int gaudi_extract_ecc_info(struct hl_device *hdev,
7056                struct ecc_info_extract_params *params, u64 *ecc_address,
7057                u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7058{
7059        struct gaudi_device *gaudi = hdev->asic_specific;
7060        u32 i, num_mem_regs, reg, err_bit;
7061        u64 err_addr, err_word = 0;
7062        int rc = 0;
7063
7064        num_mem_regs = params->num_memories / 32 +
7065                        ((params->num_memories % 32) ? 1 : 0);
7066
7067        if (params->block_address >= CFG_BASE)
7068                params->block_address -= CFG_BASE;
7069
7070        if (params->derr)
7071                err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7072        else
7073                err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7074
7075        if (params->disable_clock_gating) {
7076                mutex_lock(&gaudi->clk_gate_mutex);
7077                hdev->asic_funcs->disable_clock_gating(hdev);
7078        }
7079
7080        /* Set invalid wrapper index */
7081        *memory_wrapper_idx = 0xFF;
7082
7083        /* Iterate through memory wrappers, a single bit must be set */
7084        for (i = 0 ; i < num_mem_regs ; i++) {
7085                err_addr += i * 4;
7086                err_word = RREG32(err_addr);
7087                if (err_word) {
7088                        err_bit = __ffs(err_word);
7089                        *memory_wrapper_idx = err_bit + (32 * i);
7090                        break;
7091                }
7092        }
7093
7094        if (*memory_wrapper_idx == 0xFF) {
7095                dev_err(hdev->dev, "ECC error information cannot be found\n");
7096                rc = -EINVAL;
7097                goto enable_clk_gate;
7098        }
7099
7100        WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7101                        *memory_wrapper_idx);
7102
7103        *ecc_address =
7104                RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7105        *ecc_syndrom =
7106                RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7107
7108        /* Clear error indication */
7109        reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7110        if (params->derr)
7111                reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7112        else
7113                reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7114
7115        WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7116
7117enable_clk_gate:
7118        if (params->disable_clock_gating) {
7119                hdev->asic_funcs->set_clock_gating(hdev);
7120
7121                mutex_unlock(&gaudi->clk_gate_mutex);
7122        }
7123
7124        return rc;
7125}
7126
7127/*
7128 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7129 *
7130 * @idx: the current pi/ci value
7131 * @q_len: the queue length (power of 2)
7132 *
7133 * @return the cyclically decremented index
7134 */
7135static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7136{
7137        u32 mask = q_len - 1;
7138
7139        /*
7140         * modular decrement is equivalent to adding (queue_size -1)
7141         * later we take LSBs to make sure the value is in the
7142         * range [0, queue_len - 1]
7143         */
7144        return (idx + q_len - 1) & mask;
7145}
7146
7147/**
7148 * gaudi_print_sw_config_stream_data - print SW config stream data
7149 *
7150 * @hdev: pointer to the habanalabs device structure
7151 * @stream: the QMAN's stream
7152 * @qman_base: base address of QMAN registers block
7153 */
7154static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7155                                                u64 qman_base)
7156{
7157        u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7158        u32 cq_ptr_lo_off, size;
7159
7160        cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7161
7162        cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7163                                                stream * cq_ptr_lo_off;
7164        cq_ptr_hi = cq_ptr_lo +
7165                                (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7166        cq_tsize = cq_ptr_lo +
7167                                (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7168
7169        cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7170        size = RREG32(cq_tsize);
7171        dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7172                                                        stream, cq_ptr, size);
7173}
7174
7175/**
7176 * gaudi_print_last_pqes_on_err - print last PQEs on error
7177 *
7178 * @hdev: pointer to the habanalabs device structure
7179 * @qid_base: first QID of the QMAN (out of 4 streams)
7180 * @stream: the QMAN's stream
7181 * @qman_base: base address of QMAN registers block
7182 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7183 */
7184static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7185                                                u32 stream, u64 qman_base,
7186                                                bool pr_sw_conf)
7187{
7188        u32 ci, qm_ci_stream_off, queue_len;
7189        struct hl_hw_queue *q;
7190        u64 pq_ci;
7191        int i;
7192
7193        q = &hdev->kernel_queues[qid_base + stream];
7194
7195        qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7196        pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7197                                                stream * qm_ci_stream_off;
7198
7199        queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7200                                        q->int_queue_len : HL_QUEUE_LENGTH;
7201
7202        hdev->asic_funcs->hw_queues_lock(hdev);
7203
7204        if (pr_sw_conf)
7205                gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7206
7207        ci = RREG32(pq_ci);
7208
7209        /* we should start printing form ci -1 */
7210        ci = gaudi_queue_idx_dec(ci, queue_len);
7211
7212        for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7213                struct hl_bd *bd;
7214                u64 addr;
7215                u32 len;
7216
7217                bd = q->kernel_address;
7218                bd += ci;
7219
7220                len = le32_to_cpu(bd->len);
7221                /* len 0 means uninitialized entry- break */
7222                if (!len)
7223                        break;
7224
7225                addr = le64_to_cpu(bd->ptr);
7226
7227                dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7228                                                        stream, ci, addr, len);
7229
7230                /* get previous ci, wrap if needed */
7231                ci = gaudi_queue_idx_dec(ci, queue_len);
7232        }
7233
7234        hdev->asic_funcs->hw_queues_unlock(hdev);
7235}
7236
7237/**
7238 * print_qman_data_on_err - extract QMAN data on error
7239 *
7240 * @hdev: pointer to the habanalabs device structure
7241 * @qid_base: first QID of the QMAN (out of 4 streams)
7242 * @stream: the QMAN's stream
7243 * @qman_base: base address of QMAN registers block
7244 *
7245 * This function attempt to exatract as much data as possible on QMAN error.
7246 * On upper CP print the SW config stream data and last 8 PQEs.
7247 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7248 */
7249static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7250                                                u32 stream, u64 qman_base)
7251{
7252        u32 i;
7253
7254        if (stream != QMAN_STREAMS) {
7255                gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7256                                                                        true);
7257                return;
7258        }
7259
7260        gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7261
7262        for (i = 0; i < QMAN_STREAMS; i++)
7263                gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7264                                                                        false);
7265}
7266
7267static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7268                                          const char *qm_name,
7269                                          u64 qman_base,
7270                                          u32 qid_base)
7271{
7272        u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7273        u64 glbl_sts_addr, arb_err_addr;
7274        char reg_desc[32];
7275
7276        glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7277        arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7278
7279        /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7280        for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7281                glbl_sts_clr_val = 0;
7282                glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7283
7284                if (!glbl_sts_val)
7285                        continue;
7286
7287                if (i == QMAN_STREAMS)
7288                        snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7289                else
7290                        snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7291
7292                for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7293                        if (glbl_sts_val & BIT(j)) {
7294                                dev_err_ratelimited(hdev->dev,
7295                                                "%s %s. err cause: %s\n",
7296                                                qm_name, reg_desc,
7297                                                gaudi_qman_error_cause[j]);
7298                                glbl_sts_clr_val |= BIT(j);
7299                        }
7300                }
7301
7302                /* Write 1 clear errors */
7303                if (!hdev->stop_on_err)
7304                        WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7305                else
7306                        print_qman_data_on_err(hdev, qid_base, i, qman_base);
7307        }
7308
7309        arb_err_val = RREG32(arb_err_addr);
7310
7311        if (!arb_err_val)
7312                return;
7313
7314        for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7315                if (arb_err_val & BIT(j)) {
7316                        dev_err_ratelimited(hdev->dev,
7317                                        "%s ARB_ERR. err cause: %s\n",
7318                                        qm_name,
7319                                        gaudi_qman_arb_error_cause[j]);
7320                }
7321        }
7322}
7323
7324static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7325                struct hl_eq_sm_sei_data *sei_data)
7326{
7327        u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7328
7329        switch (sei_data->sei_cause) {
7330        case SM_SEI_SO_OVERFLOW:
7331                dev_err(hdev->dev,
7332                        "SM %u SEI Error: SO %u overflow/underflow",
7333                        index, le32_to_cpu(sei_data->sei_log));
7334                break;
7335        case SM_SEI_LBW_4B_UNALIGNED:
7336                dev_err(hdev->dev,
7337                        "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7338                        index, le32_to_cpu(sei_data->sei_log));
7339                break;
7340        case SM_SEI_AXI_RESPONSE_ERR:
7341                dev_err(hdev->dev,
7342                        "SM %u SEI Error: AXI ID %u response error",
7343                        index, le32_to_cpu(sei_data->sei_log));
7344                break;
7345        default:
7346                dev_err(hdev->dev, "Unknown SM SEI cause %u",
7347                                le32_to_cpu(sei_data->sei_log));
7348                break;
7349        }
7350}
7351
7352static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7353                struct hl_eq_ecc_data *ecc_data)
7354{
7355        struct ecc_info_extract_params params;
7356        u64 ecc_address = 0, ecc_syndrom = 0;
7357        u8 index, memory_wrapper_idx = 0;
7358        bool extract_info_from_fw;
7359        int rc;
7360
7361        switch (event_type) {
7362        case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7363        case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7364                extract_info_from_fw = true;
7365                break;
7366        case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7367                index = event_type - GAUDI_EVENT_TPC0_SERR;
7368                params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7369                params.num_memories = 90;
7370                params.derr = false;
7371                params.disable_clock_gating = true;
7372                extract_info_from_fw = false;
7373                break;
7374        case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7375                index = event_type - GAUDI_EVENT_TPC0_DERR;
7376                params.block_address =
7377                        mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7378                params.num_memories = 90;
7379                params.derr = true;
7380                params.disable_clock_gating = true;
7381                extract_info_from_fw = false;
7382                break;
7383        case GAUDI_EVENT_MME0_ACC_SERR:
7384        case GAUDI_EVENT_MME1_ACC_SERR:
7385        case GAUDI_EVENT_MME2_ACC_SERR:
7386        case GAUDI_EVENT_MME3_ACC_SERR:
7387                index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7388                params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7389                params.num_memories = 128;
7390                params.derr = false;
7391                params.disable_clock_gating = true;
7392                extract_info_from_fw = false;
7393                break;
7394        case GAUDI_EVENT_MME0_ACC_DERR:
7395        case GAUDI_EVENT_MME1_ACC_DERR:
7396        case GAUDI_EVENT_MME2_ACC_DERR:
7397        case GAUDI_EVENT_MME3_ACC_DERR:
7398                index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7399                params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7400                params.num_memories = 128;
7401                params.derr = true;
7402                params.disable_clock_gating = true;
7403                extract_info_from_fw = false;
7404                break;
7405        case GAUDI_EVENT_MME0_SBAB_SERR:
7406        case GAUDI_EVENT_MME1_SBAB_SERR:
7407        case GAUDI_EVENT_MME2_SBAB_SERR:
7408        case GAUDI_EVENT_MME3_SBAB_SERR:
7409                index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7410                params.block_address =
7411                        mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7412                params.num_memories = 33;
7413                params.derr = false;
7414                params.disable_clock_gating = true;
7415                extract_info_from_fw = false;
7416                break;
7417        case GAUDI_EVENT_MME0_SBAB_DERR:
7418        case GAUDI_EVENT_MME1_SBAB_DERR:
7419        case GAUDI_EVENT_MME2_SBAB_DERR:
7420        case GAUDI_EVENT_MME3_SBAB_DERR:
7421                index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7422                params.block_address =
7423                        mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7424                params.num_memories = 33;
7425                params.derr = true;
7426                params.disable_clock_gating = true;
7427                extract_info_from_fw = false;
7428                break;
7429        default:
7430                return;
7431        }
7432
7433        if (extract_info_from_fw) {
7434                ecc_address = le64_to_cpu(ecc_data->ecc_address);
7435                ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7436                memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7437        } else {
7438                rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7439                                &ecc_syndrom, &memory_wrapper_idx);
7440                if (rc)
7441                        return;
7442        }
7443
7444        dev_err(hdev->dev,
7445                "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7446                ecc_address, ecc_syndrom, memory_wrapper_idx);
7447}
7448
7449static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7450{
7451        u64 qman_base;
7452        char desc[32];
7453        u32 qid_base;
7454        u8 index;
7455
7456        switch (event_type) {
7457        case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7458                index = event_type - GAUDI_EVENT_TPC0_QM;
7459                qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7460                qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7461                snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7462                break;
7463        case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7464                index = event_type - GAUDI_EVENT_MME0_QM;
7465                qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7466                qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7467                snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7468                break;
7469        case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7470                index = event_type - GAUDI_EVENT_DMA0_QM;
7471                qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7472                /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7473                if (index > 1)
7474                        qid_base++;
7475                qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7476                snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7477                break;
7478        case GAUDI_EVENT_NIC0_QM0:
7479                qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7480                qman_base = mmNIC0_QM0_BASE;
7481                snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7482                break;
7483        case GAUDI_EVENT_NIC0_QM1:
7484                qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7485                qman_base = mmNIC0_QM1_BASE;
7486                snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7487                break;
7488        case GAUDI_EVENT_NIC1_QM0:
7489                qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7490                qman_base = mmNIC1_QM0_BASE;
7491                snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7492                break;
7493        case GAUDI_EVENT_NIC1_QM1:
7494                qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7495                qman_base = mmNIC1_QM1_BASE;
7496                snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7497                break;
7498        case GAUDI_EVENT_NIC2_QM0:
7499                qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7500                qman_base = mmNIC2_QM0_BASE;
7501                snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7502                break;
7503        case GAUDI_EVENT_NIC2_QM1:
7504                qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7505                qman_base = mmNIC2_QM1_BASE;
7506                snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7507                break;
7508        case GAUDI_EVENT_NIC3_QM0:
7509                qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7510                qman_base = mmNIC3_QM0_BASE;
7511                snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7512                break;
7513        case GAUDI_EVENT_NIC3_QM1:
7514                qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7515                qman_base = mmNIC3_QM1_BASE;
7516                snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7517                break;
7518        case GAUDI_EVENT_NIC4_QM0:
7519                qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7520                qman_base = mmNIC4_QM0_BASE;
7521                snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7522                break;
7523        case GAUDI_EVENT_NIC4_QM1:
7524                qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7525                qman_base = mmNIC4_QM1_BASE;
7526                snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7527                break;
7528        default:
7529                return;
7530        }
7531
7532        gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7533}
7534
7535static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7536                                        bool razwi)
7537{
7538        char desc[64] = "";
7539
7540        gaudi_get_event_desc(event_type, desc, sizeof(desc));
7541        dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7542                event_type, desc);
7543
7544        if (razwi) {
7545                gaudi_print_razwi_info(hdev);
7546                gaudi_print_mmu_error_info(hdev);
7547        }
7548}
7549
7550static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7551                                        struct cpucp_pkt_sync_err *sync_err)
7552{
7553        struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7554
7555        dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7556                        sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7557}
7558
7559static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7560                                        struct hl_eq_fw_alive *fw_alive)
7561{
7562        dev_err(hdev->dev,
7563                "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7564                (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7565                "Minor" : "Critical", fw_alive->process_id,
7566                fw_alive->thread_id, fw_alive->uptime_seconds);
7567}
7568
7569static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7570{
7571        struct gaudi_device *gaudi = hdev->asic_specific;
7572
7573        /* Unmask all IRQs since some could have been received
7574         * during the soft reset
7575         */
7576        return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7577}
7578
7579static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7580                        struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7581{
7582        u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7583        int rc = 0;
7584
7585        if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7586                                        CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7587                if (!hbm_ecc_data) {
7588                        dev_err(hdev->dev, "No FW ECC data");
7589                        return 0;
7590                }
7591
7592                wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7593                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7594                rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7595                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7596                ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7597                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7598                derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7599                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7600                serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7601                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7602                type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7603                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7604                ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7605                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7606
7607                dev_err(hdev->dev,
7608                        "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7609                        device, ch, wr_par, rd_par, ca_par, serr, derr);
7610                dev_err(hdev->dev,
7611                        "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7612                        device, ch, hbm_ecc_data->first_addr, type,
7613                        hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7614                        hbm_ecc_data->dec_cnt);
7615                return 0;
7616        }
7617
7618        if (hdev->asic_prop.fw_security_enabled) {
7619                dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7620                return 0;
7621        }
7622
7623        base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7624        for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7625                val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7626                val = (val & 0xFF) | ((val >> 8) & 0xFF);
7627                if (val) {
7628                        rc = -EIO;
7629                        dev_err(hdev->dev,
7630                                "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7631                                device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7632                                (val >> 2) & 0x1, (val >> 3) & 0x1,
7633                                (val >> 4) & 0x1);
7634
7635                        val2 = RREG32(base + ch * 0x1000 + 0x060);
7636                        dev_err(hdev->dev,
7637                                "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7638                                device, ch * 2,
7639                                RREG32(base + ch * 0x1000 + 0x064),
7640                                (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7641                                (val2 & 0xFF0000) >> 16,
7642                                (val2 & 0xFF000000) >> 24);
7643                }
7644
7645                val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7646                val = (val & 0xFF) | ((val >> 8) & 0xFF);
7647                if (val) {
7648                        rc = -EIO;
7649                        dev_err(hdev->dev,
7650                                "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7651                                device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7652                                (val >> 2) & 0x1, (val >> 3) & 0x1,
7653                                (val >> 4) & 0x1);
7654
7655                        val2 = RREG32(base + ch * 0x1000 + 0x070);
7656                        dev_err(hdev->dev,
7657                                "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7658                                device, ch * 2 + 1,
7659                                RREG32(base + ch * 0x1000 + 0x074),
7660                                (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7661                                (val2 & 0xFF0000) >> 16,
7662                                (val2 & 0xFF000000) >> 24);
7663                }
7664
7665                /* Clear interrupts */
7666                RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7667                RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7668                WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7669                WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7670                RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7671                RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7672        }
7673
7674        val  = RREG32(base + 0x8F30);
7675        val2 = RREG32(base + 0x8F34);
7676        if (val | val2) {
7677                rc = -EIO;
7678                dev_err(hdev->dev,
7679                        "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7680                        device, val, val2);
7681        }
7682        val  = RREG32(base + 0x8F40);
7683        val2 = RREG32(base + 0x8F44);
7684        if (val | val2) {
7685                rc = -EIO;
7686                dev_err(hdev->dev,
7687                        "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7688                        device, val, val2);
7689        }
7690
7691        return rc;
7692}
7693
7694static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7695{
7696        switch (hbm_event_type) {
7697        case GAUDI_EVENT_HBM0_SPI_0:
7698        case GAUDI_EVENT_HBM0_SPI_1:
7699                return 0;
7700        case GAUDI_EVENT_HBM1_SPI_0:
7701        case GAUDI_EVENT_HBM1_SPI_1:
7702                return 1;
7703        case GAUDI_EVENT_HBM2_SPI_0:
7704        case GAUDI_EVENT_HBM2_SPI_1:
7705                return 2;
7706        case GAUDI_EVENT_HBM3_SPI_0:
7707        case GAUDI_EVENT_HBM3_SPI_1:
7708                return 3;
7709        default:
7710                break;
7711        }
7712
7713        /* Should never happen */
7714        return 0;
7715}
7716
7717static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7718                                        char *interrupt_name)
7719{
7720        struct gaudi_device *gaudi = hdev->asic_specific;
7721        u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7722        bool soft_reset_required = false;
7723
7724        /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7725         * gating, and thus cannot be done in CPU-CP and should be done instead
7726         * by the driver.
7727         */
7728
7729        mutex_lock(&gaudi->clk_gate_mutex);
7730
7731        hdev->asic_funcs->disable_clock_gating(hdev);
7732
7733        tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7734                                TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7735
7736        for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7737                if (tpc_interrupts_cause & BIT(i)) {
7738                        dev_err_ratelimited(hdev->dev,
7739                                        "TPC%d_%s interrupt cause: %s\n",
7740                                        tpc_id, interrupt_name,
7741                                        gaudi_tpc_interrupts_cause[i]);
7742                        /* If this is QM error, we need to soft-reset */
7743                        if (i == 15)
7744                                soft_reset_required = true;
7745                }
7746
7747        /* Clear interrupts */
7748        WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7749
7750        hdev->asic_funcs->set_clock_gating(hdev);
7751
7752        mutex_unlock(&gaudi->clk_gate_mutex);
7753
7754        return soft_reset_required;
7755}
7756
7757static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7758{
7759        return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7760}
7761
7762static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7763{
7764        return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7765}
7766
7767static void gaudi_print_clk_change_info(struct hl_device *hdev,
7768                                        u16 event_type)
7769{
7770        switch (event_type) {
7771        case GAUDI_EVENT_FIX_POWER_ENV_S:
7772                hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7773                dev_info_ratelimited(hdev->dev,
7774                        "Clock throttling due to power consumption\n");
7775                break;
7776
7777        case GAUDI_EVENT_FIX_POWER_ENV_E:
7778                hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7779                dev_info_ratelimited(hdev->dev,
7780                        "Power envelop is safe, back to optimal clock\n");
7781                break;
7782
7783        case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7784                hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7785                dev_info_ratelimited(hdev->dev,
7786                        "Clock throttling due to overheating\n");
7787                break;
7788
7789        case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7790                hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7791                dev_info_ratelimited(hdev->dev,
7792                        "Thermal envelop is safe, back to optimal clock\n");
7793                break;
7794
7795        default:
7796                dev_err(hdev->dev, "Received invalid clock change event %d\n",
7797                        event_type);
7798                break;
7799        }
7800}
7801
7802static void gaudi_handle_eqe(struct hl_device *hdev,
7803                                struct hl_eq_entry *eq_entry)
7804{
7805        struct gaudi_device *gaudi = hdev->asic_specific;
7806        u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7807        u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7808                        >> EQ_CTL_EVENT_TYPE_SHIFT);
7809        u8 cause;
7810        bool reset_required;
7811
7812        gaudi->events_stat[event_type]++;
7813        gaudi->events_stat_aggregate[event_type]++;
7814
7815        switch (event_type) {
7816        case GAUDI_EVENT_PCIE_CORE_DERR:
7817        case GAUDI_EVENT_PCIE_IF_DERR:
7818        case GAUDI_EVENT_PCIE_PHY_DERR:
7819        case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7820        case GAUDI_EVENT_MME0_ACC_DERR:
7821        case GAUDI_EVENT_MME0_SBAB_DERR:
7822        case GAUDI_EVENT_MME1_ACC_DERR:
7823        case GAUDI_EVENT_MME1_SBAB_DERR:
7824        case GAUDI_EVENT_MME2_ACC_DERR:
7825        case GAUDI_EVENT_MME2_SBAB_DERR:
7826        case GAUDI_EVENT_MME3_ACC_DERR:
7827        case GAUDI_EVENT_MME3_SBAB_DERR:
7828        case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7829                fallthrough;
7830        case GAUDI_EVENT_CPU_IF_ECC_DERR:
7831        case GAUDI_EVENT_PSOC_MEM_DERR:
7832        case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7833        case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7834        case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7835        case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7836        case GAUDI_EVENT_MMU_DERR:
7837        case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7838                gaudi_print_irq_info(hdev, event_type, true);
7839                gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7840                goto reset_device;
7841
7842        case GAUDI_EVENT_GIC500:
7843        case GAUDI_EVENT_AXI_ECC:
7844        case GAUDI_EVENT_L2_RAM_ECC:
7845        case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7846                gaudi_print_irq_info(hdev, event_type, false);
7847                goto reset_device;
7848
7849        case GAUDI_EVENT_HBM0_SPI_0:
7850        case GAUDI_EVENT_HBM1_SPI_0:
7851        case GAUDI_EVENT_HBM2_SPI_0:
7852        case GAUDI_EVENT_HBM3_SPI_0:
7853                gaudi_print_irq_info(hdev, event_type, false);
7854                gaudi_hbm_read_interrupts(hdev,
7855                                gaudi_hbm_event_to_dev(event_type),
7856                                &eq_entry->hbm_ecc_data);
7857                goto reset_device;
7858
7859        case GAUDI_EVENT_HBM0_SPI_1:
7860        case GAUDI_EVENT_HBM1_SPI_1:
7861        case GAUDI_EVENT_HBM2_SPI_1:
7862        case GAUDI_EVENT_HBM3_SPI_1:
7863                gaudi_print_irq_info(hdev, event_type, false);
7864                gaudi_hbm_read_interrupts(hdev,
7865                                gaudi_hbm_event_to_dev(event_type),
7866                                &eq_entry->hbm_ecc_data);
7867                hl_fw_unmask_irq(hdev, event_type);
7868                break;
7869
7870        case GAUDI_EVENT_TPC0_DEC:
7871        case GAUDI_EVENT_TPC1_DEC:
7872        case GAUDI_EVENT_TPC2_DEC:
7873        case GAUDI_EVENT_TPC3_DEC:
7874        case GAUDI_EVENT_TPC4_DEC:
7875        case GAUDI_EVENT_TPC5_DEC:
7876        case GAUDI_EVENT_TPC6_DEC:
7877        case GAUDI_EVENT_TPC7_DEC:
7878                gaudi_print_irq_info(hdev, event_type, true);
7879                reset_required = gaudi_tpc_read_interrupts(hdev,
7880                                        tpc_dec_event_to_tpc_id(event_type),
7881                                        "AXI_SLV_DEC_Error");
7882                if (reset_required) {
7883                        dev_err(hdev->dev, "hard reset required due to %s\n",
7884                                gaudi_irq_map_table[event_type].name);
7885
7886                        goto reset_device;
7887                } else {
7888                        hl_fw_unmask_irq(hdev, event_type);
7889                }
7890                break;
7891
7892        case GAUDI_EVENT_TPC0_KRN_ERR:
7893        case GAUDI_EVENT_TPC1_KRN_ERR:
7894        case GAUDI_EVENT_TPC2_KRN_ERR:
7895        case GAUDI_EVENT_TPC3_KRN_ERR:
7896        case GAUDI_EVENT_TPC4_KRN_ERR:
7897        case GAUDI_EVENT_TPC5_KRN_ERR:
7898        case GAUDI_EVENT_TPC6_KRN_ERR:
7899        case GAUDI_EVENT_TPC7_KRN_ERR:
7900                gaudi_print_irq_info(hdev, event_type, true);
7901                reset_required = gaudi_tpc_read_interrupts(hdev,
7902                                        tpc_krn_event_to_tpc_id(event_type),
7903                                        "KRN_ERR");
7904                if (reset_required) {
7905                        dev_err(hdev->dev, "hard reset required due to %s\n",
7906                                gaudi_irq_map_table[event_type].name);
7907
7908                        goto reset_device;
7909                } else {
7910                        hl_fw_unmask_irq(hdev, event_type);
7911                }
7912                break;
7913
7914        case GAUDI_EVENT_PCIE_CORE_SERR:
7915        case GAUDI_EVENT_PCIE_IF_SERR:
7916        case GAUDI_EVENT_PCIE_PHY_SERR:
7917        case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7918        case GAUDI_EVENT_MME0_ACC_SERR:
7919        case GAUDI_EVENT_MME0_SBAB_SERR:
7920        case GAUDI_EVENT_MME1_ACC_SERR:
7921        case GAUDI_EVENT_MME1_SBAB_SERR:
7922        case GAUDI_EVENT_MME2_ACC_SERR:
7923        case GAUDI_EVENT_MME2_SBAB_SERR:
7924        case GAUDI_EVENT_MME3_ACC_SERR:
7925        case GAUDI_EVENT_MME3_SBAB_SERR:
7926        case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7927        case GAUDI_EVENT_CPU_IF_ECC_SERR:
7928        case GAUDI_EVENT_PSOC_MEM_SERR:
7929        case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7930        case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7931        case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7932        case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7933                fallthrough;
7934        case GAUDI_EVENT_MMU_SERR:
7935                gaudi_print_irq_info(hdev, event_type, true);
7936                gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7937                hl_fw_unmask_irq(hdev, event_type);
7938                break;
7939
7940        case GAUDI_EVENT_PCIE_DEC:
7941        case GAUDI_EVENT_MME0_WBC_RSP:
7942        case GAUDI_EVENT_MME0_SBAB0_RSP:
7943        case GAUDI_EVENT_MME1_WBC_RSP:
7944        case GAUDI_EVENT_MME1_SBAB0_RSP:
7945        case GAUDI_EVENT_MME2_WBC_RSP:
7946        case GAUDI_EVENT_MME2_SBAB0_RSP:
7947        case GAUDI_EVENT_MME3_WBC_RSP:
7948        case GAUDI_EVENT_MME3_SBAB0_RSP:
7949        case GAUDI_EVENT_CPU_AXI_SPLITTER:
7950        case GAUDI_EVENT_PSOC_AXI_DEC:
7951        case GAUDI_EVENT_PSOC_PRSTN_FALL:
7952        case GAUDI_EVENT_MMU_PAGE_FAULT:
7953        case GAUDI_EVENT_MMU_WR_PERM:
7954        case GAUDI_EVENT_RAZWI_OR_ADC:
7955        case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7956        case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7957        case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7958                fallthrough;
7959        case GAUDI_EVENT_NIC0_QM0:
7960        case GAUDI_EVENT_NIC0_QM1:
7961        case GAUDI_EVENT_NIC1_QM0:
7962        case GAUDI_EVENT_NIC1_QM1:
7963        case GAUDI_EVENT_NIC2_QM0:
7964        case GAUDI_EVENT_NIC2_QM1:
7965        case GAUDI_EVENT_NIC3_QM0:
7966        case GAUDI_EVENT_NIC3_QM1:
7967        case GAUDI_EVENT_NIC4_QM0:
7968        case GAUDI_EVENT_NIC4_QM1:
7969        case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7970                gaudi_print_irq_info(hdev, event_type, true);
7971                gaudi_handle_qman_err(hdev, event_type);
7972                hl_fw_unmask_irq(hdev, event_type);
7973                break;
7974
7975        case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7976                gaudi_print_irq_info(hdev, event_type, true);
7977                goto reset_device;
7978
7979        case GAUDI_EVENT_TPC0_BMON_SPMU:
7980        case GAUDI_EVENT_TPC1_BMON_SPMU:
7981        case GAUDI_EVENT_TPC2_BMON_SPMU:
7982        case GAUDI_EVENT_TPC3_BMON_SPMU:
7983        case GAUDI_EVENT_TPC4_BMON_SPMU:
7984        case GAUDI_EVENT_TPC5_BMON_SPMU:
7985        case GAUDI_EVENT_TPC6_BMON_SPMU:
7986        case GAUDI_EVENT_TPC7_BMON_SPMU:
7987        case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7988                gaudi_print_irq_info(hdev, event_type, false);
7989                hl_fw_unmask_irq(hdev, event_type);
7990                break;
7991
7992        case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7993                gaudi_print_irq_info(hdev, event_type, false);
7994                gaudi_print_sm_sei_info(hdev, event_type,
7995                                        &eq_entry->sm_sei_data);
7996                hl_fw_unmask_irq(hdev, event_type);
7997                break;
7998
7999        case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8000                gaudi_print_clk_change_info(hdev, event_type);
8001                hl_fw_unmask_irq(hdev, event_type);
8002                break;
8003
8004        case GAUDI_EVENT_PSOC_GPIO_U16_0:
8005                cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8006                dev_err(hdev->dev,
8007                        "Received high temp H/W interrupt %d (cause %d)\n",
8008                        event_type, cause);
8009                break;
8010
8011        case GAUDI_EVENT_DEV_RESET_REQ:
8012                gaudi_print_irq_info(hdev, event_type, false);
8013                goto reset_device;
8014
8015        case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8016                gaudi_print_irq_info(hdev, event_type, false);
8017                gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8018                goto reset_device;
8019
8020        case GAUDI_EVENT_FW_ALIVE_S:
8021                gaudi_print_irq_info(hdev, event_type, false);
8022                gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8023                goto reset_device;
8024
8025        default:
8026                dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8027                                event_type);
8028                break;
8029        }
8030
8031        return;
8032
8033reset_device:
8034        if (hdev->hard_reset_on_fw_events)
8035                hl_device_reset(hdev, HL_RESET_HARD);
8036        else
8037                hl_fw_unmask_irq(hdev, event_type);
8038}
8039
8040static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8041                                        u32 *size)
8042{
8043        struct gaudi_device *gaudi = hdev->asic_specific;
8044
8045        if (aggregate) {
8046                *size = (u32) sizeof(gaudi->events_stat_aggregate);
8047                return gaudi->events_stat_aggregate;
8048        }
8049
8050        *size = (u32) sizeof(gaudi->events_stat);
8051        return gaudi->events_stat;
8052}
8053
8054static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8055                                        u32 flags)
8056{
8057        struct gaudi_device *gaudi = hdev->asic_specific;
8058        u32 status, timeout_usec;
8059        int rc;
8060
8061        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8062                hdev->hard_reset_pending)
8063                return 0;
8064
8065        if (hdev->pldm)
8066                timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8067        else
8068                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8069
8070        /* L0 & L1 invalidation */
8071        WREG32(mmSTLB_INV_PS, 3);
8072        WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8073        WREG32(mmSTLB_INV_PS, 2);
8074
8075        rc = hl_poll_timeout(
8076                hdev,
8077                mmSTLB_INV_PS,
8078                status,
8079                !status,
8080                1000,
8081                timeout_usec);
8082
8083        WREG32(mmSTLB_INV_SET, 0);
8084
8085        if (rc) {
8086                dev_err_ratelimited(hdev->dev,
8087                                        "MMU cache invalidation timeout\n");
8088                hl_device_reset(hdev, HL_RESET_HARD);
8089        }
8090
8091        return rc;
8092}
8093
8094static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8095                                                bool is_hard, u32 flags,
8096                                                u32 asid, u64 va, u64 size)
8097{
8098        /* Treat as invalidate all because there is no range invalidation
8099         * in Gaudi
8100         */
8101        return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8102}
8103
8104static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8105                                        u32 asid, u64 phys_addr)
8106{
8107        u32 status, timeout_usec;
8108        int rc;
8109
8110        if (hdev->pldm)
8111                timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8112        else
8113                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8114
8115        WREG32(MMU_ASID, asid);
8116        WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8117        WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8118        WREG32(MMU_BUSY, 0x80000000);
8119
8120        rc = hl_poll_timeout(
8121                hdev,
8122                MMU_BUSY,
8123                status,
8124                !(status & 0x80000000),
8125                1000,
8126                timeout_usec);
8127
8128        if (rc) {
8129                dev_err(hdev->dev,
8130                        "Timeout during MMU hop0 config of asid %d\n", asid);
8131                return rc;
8132        }
8133
8134        return 0;
8135}
8136
8137static int gaudi_send_heartbeat(struct hl_device *hdev)
8138{
8139        struct gaudi_device *gaudi = hdev->asic_specific;
8140
8141        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8142                return 0;
8143
8144        return hl_fw_send_heartbeat(hdev);
8145}
8146
8147static int gaudi_cpucp_info_get(struct hl_device *hdev)
8148{
8149        struct gaudi_device *gaudi = hdev->asic_specific;
8150        struct asic_fixed_properties *prop = &hdev->asic_prop;
8151        int rc;
8152
8153        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8154                return 0;
8155
8156        rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8157                                        mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8158                                        mmCPU_BOOT_ERR1);
8159        if (rc)
8160                return rc;
8161
8162        if (!strlen(prop->cpucp_info.card_name))
8163                strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8164                                CARD_NAME_MAX_LEN);
8165
8166        hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8167
8168        set_default_power_values(hdev);
8169
8170        hdev->max_power = prop->max_power_default;
8171
8172        return 0;
8173}
8174
8175static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8176                                        u8 mask_len, struct seq_file *s)
8177{
8178        struct gaudi_device *gaudi = hdev->asic_specific;
8179        const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8180        const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8181        const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8182        unsigned long *mask = (unsigned long *)mask_arr;
8183        u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8184        bool is_idle = true, is_eng_idle, is_slave;
8185        u64 offset;
8186        int i, dma_id, port;
8187
8188        mutex_lock(&gaudi->clk_gate_mutex);
8189
8190        hdev->asic_funcs->disable_clock_gating(hdev);
8191
8192        if (s)
8193                seq_puts(s,
8194                        "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8195                        "---  -------  ------------  ----------  -------------\n");
8196
8197        for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8198                dma_id = gaudi_dma_assignment[i];
8199                offset = dma_id * DMA_QMAN_OFFSET;
8200
8201                qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8202                qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8203                dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8204                is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8205                                IS_DMA_IDLE(dma_core_sts0);
8206                is_idle &= is_eng_idle;
8207
8208                if (mask && !is_eng_idle)
8209                        set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8210                if (s)
8211                        seq_printf(s, fmt, dma_id,
8212                                is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8213                                qm_cgm_sts, dma_core_sts0);
8214        }
8215
8216        if (s)
8217                seq_puts(s,
8218                        "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8219                        "---  -------  ------------  ----------  ----------\n");
8220
8221        for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8222                offset = i * TPC_QMAN_OFFSET;
8223                qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8224                qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8225                tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8226                is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8227                                IS_TPC_IDLE(tpc_cfg_sts);
8228                is_idle &= is_eng_idle;
8229
8230                if (mask && !is_eng_idle)
8231                        set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8232                if (s)
8233                        seq_printf(s, fmt, i,
8234                                is_eng_idle ? "Y" : "N",
8235                                qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8236        }
8237
8238        if (s)
8239                seq_puts(s,
8240                        "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8241                        "---  -------  ------------  ----------  -----------\n");
8242
8243        for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8244                offset = i * MME_QMAN_OFFSET;
8245                mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8246                is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8247
8248                /* MME 1 & 3 are slaves, no need to check their QMANs */
8249                is_slave = i % 2;
8250                if (!is_slave) {
8251                        qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8252                        qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8253                        is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8254                }
8255
8256                is_idle &= is_eng_idle;
8257
8258                if (mask && !is_eng_idle)
8259                        set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8260                if (s) {
8261                        if (!is_slave)
8262                                seq_printf(s, fmt, i,
8263                                        is_eng_idle ? "Y" : "N",
8264                                        qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8265                        else
8266                                seq_printf(s, mme_slave_fmt, i,
8267                                        is_eng_idle ? "Y" : "N", "-",
8268                                        "-", mme_arch_sts);
8269                }
8270        }
8271
8272        if (s)
8273                seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8274                                "---  -------  ------------  ----------\n");
8275
8276        for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8277                offset = i * NIC_MACRO_QMAN_OFFSET;
8278                port = 2 * i;
8279                if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8280                        qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8281                        qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8282                        is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8283                        is_idle &= is_eng_idle;
8284
8285                        if (mask && !is_eng_idle)
8286                                set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8287                        if (s)
8288                                seq_printf(s, nic_fmt, port,
8289                                                is_eng_idle ? "Y" : "N",
8290                                                qm_glbl_sts0, qm_cgm_sts);
8291                }
8292
8293                port = 2 * i + 1;
8294                if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8295                        qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8296                        qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8297                        is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8298                        is_idle &= is_eng_idle;
8299
8300                        if (mask && !is_eng_idle)
8301                                set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8302                        if (s)
8303                                seq_printf(s, nic_fmt, port,
8304                                                is_eng_idle ? "Y" : "N",
8305                                                qm_glbl_sts0, qm_cgm_sts);
8306                }
8307        }
8308
8309        if (s)
8310                seq_puts(s, "\n");
8311
8312        hdev->asic_funcs->set_clock_gating(hdev);
8313
8314        mutex_unlock(&gaudi->clk_gate_mutex);
8315
8316        return is_idle;
8317}
8318
8319static void gaudi_hw_queues_lock(struct hl_device *hdev)
8320        __acquires(&gaudi->hw_queues_lock)
8321{
8322        struct gaudi_device *gaudi = hdev->asic_specific;
8323
8324        spin_lock(&gaudi->hw_queues_lock);
8325}
8326
8327static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8328        __releases(&gaudi->hw_queues_lock)
8329{
8330        struct gaudi_device *gaudi = hdev->asic_specific;
8331
8332        spin_unlock(&gaudi->hw_queues_lock);
8333}
8334
8335static u32 gaudi_get_pci_id(struct hl_device *hdev)
8336{
8337        return hdev->pdev->device;
8338}
8339
8340static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8341                                size_t max_size)
8342{
8343        struct gaudi_device *gaudi = hdev->asic_specific;
8344
8345        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8346                return 0;
8347
8348        return hl_fw_get_eeprom_data(hdev, data, max_size);
8349}
8350
8351/*
8352 * this function should be used only during initialization and/or after reset,
8353 * when there are no active users.
8354 */
8355static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8356                                u32 tpc_id)
8357{
8358        struct gaudi_device *gaudi = hdev->asic_specific;
8359        u64 kernel_timeout;
8360        u32 status, offset;
8361        int rc;
8362
8363        offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8364
8365        if (hdev->pldm)
8366                kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8367        else
8368                kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8369
8370        mutex_lock(&gaudi->clk_gate_mutex);
8371
8372        hdev->asic_funcs->disable_clock_gating(hdev);
8373
8374        WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8375                        lower_32_bits(tpc_kernel));
8376        WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8377                        upper_32_bits(tpc_kernel));
8378
8379        WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8380                        lower_32_bits(tpc_kernel));
8381        WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8382                        upper_32_bits(tpc_kernel));
8383        /* set a valid LUT pointer, content is of no significance */
8384        WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8385                        lower_32_bits(tpc_kernel));
8386        WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8387                        upper_32_bits(tpc_kernel));
8388
8389        WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8390                        lower_32_bits(CFG_BASE +
8391                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8392
8393        WREG32(mmTPC0_CFG_TPC_CMD + offset,
8394                        (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8395                        1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8396        /* wait a bit for the engine to start executing */
8397        usleep_range(1000, 1500);
8398
8399        /* wait until engine has finished executing */
8400        rc = hl_poll_timeout(
8401                hdev,
8402                mmTPC0_CFG_STATUS + offset,
8403                status,
8404                (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8405                                TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8406                1000,
8407                kernel_timeout);
8408
8409        if (rc) {
8410                dev_err(hdev->dev,
8411                        "Timeout while waiting for TPC%d icache prefetch\n",
8412                        tpc_id);
8413                hdev->asic_funcs->set_clock_gating(hdev);
8414                mutex_unlock(&gaudi->clk_gate_mutex);
8415                return -EIO;
8416        }
8417
8418        WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8419                        1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8420
8421        /* wait a bit for the engine to start executing */
8422        usleep_range(1000, 1500);
8423
8424        /* wait until engine has finished executing */
8425        rc = hl_poll_timeout(
8426                hdev,
8427                mmTPC0_CFG_STATUS + offset,
8428                status,
8429                (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8430                                TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8431                1000,
8432                kernel_timeout);
8433
8434        if (rc) {
8435                dev_err(hdev->dev,
8436                        "Timeout while waiting for TPC%d vector pipe\n",
8437                        tpc_id);
8438                hdev->asic_funcs->set_clock_gating(hdev);
8439                mutex_unlock(&gaudi->clk_gate_mutex);
8440                return -EIO;
8441        }
8442
8443        rc = hl_poll_timeout(
8444                hdev,
8445                mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8446                status,
8447                (status == 0),
8448                1000,
8449                kernel_timeout);
8450
8451        hdev->asic_funcs->set_clock_gating(hdev);
8452        mutex_unlock(&gaudi->clk_gate_mutex);
8453
8454        if (rc) {
8455                dev_err(hdev->dev,
8456                        "Timeout while waiting for TPC%d kernel to execute\n",
8457                        tpc_id);
8458                return -EIO;
8459        }
8460
8461        return 0;
8462}
8463
8464static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8465                struct hl_ctx *ctx)
8466{
8467        struct gaudi_device *gaudi = hdev->asic_specific;
8468        int min_alloc_order, rc, collective_cb_size;
8469
8470        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8471                return 0;
8472
8473        hdev->internal_cb_pool_virt_addr =
8474                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8475                                        HOST_SPACE_INTERNAL_CB_SZ,
8476                                        &hdev->internal_cb_pool_dma_addr,
8477                                        GFP_KERNEL | __GFP_ZERO);
8478
8479        if (!hdev->internal_cb_pool_virt_addr)
8480                return -ENOMEM;
8481
8482        collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8483                        sizeof(struct packet_fence);
8484        min_alloc_order = ilog2(collective_cb_size);
8485
8486        hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8487        if (!hdev->internal_cb_pool) {
8488                dev_err(hdev->dev,
8489                        "Failed to create internal CB pool\n");
8490                rc = -ENOMEM;
8491                goto free_internal_cb_pool;
8492        }
8493
8494        rc = gen_pool_add(hdev->internal_cb_pool,
8495                                (uintptr_t) hdev->internal_cb_pool_virt_addr,
8496                                HOST_SPACE_INTERNAL_CB_SZ, -1);
8497        if (rc) {
8498                dev_err(hdev->dev,
8499                        "Failed to add memory to internal CB pool\n");
8500                rc = -EFAULT;
8501                goto destroy_internal_cb_pool;
8502        }
8503
8504        hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8505                        HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8506                        HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8507
8508        if (!hdev->internal_cb_va_base) {
8509                rc = -ENOMEM;
8510                goto destroy_internal_cb_pool;
8511        }
8512
8513        mutex_lock(&ctx->mmu_lock);
8514        rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8515                        hdev->internal_cb_pool_dma_addr,
8516                        HOST_SPACE_INTERNAL_CB_SZ);
8517
8518        hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8519        mutex_unlock(&ctx->mmu_lock);
8520
8521        if (rc)
8522                goto unreserve_internal_cb_pool;
8523
8524        return 0;
8525
8526unreserve_internal_cb_pool:
8527        hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8528                        HOST_SPACE_INTERNAL_CB_SZ);
8529destroy_internal_cb_pool:
8530        gen_pool_destroy(hdev->internal_cb_pool);
8531free_internal_cb_pool:
8532        hdev->asic_funcs->asic_dma_free_coherent(hdev,
8533                        HOST_SPACE_INTERNAL_CB_SZ,
8534                        hdev->internal_cb_pool_virt_addr,
8535                        hdev->internal_cb_pool_dma_addr);
8536
8537        return rc;
8538}
8539
8540static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8541                struct hl_ctx *ctx)
8542{
8543        struct gaudi_device *gaudi = hdev->asic_specific;
8544
8545        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8546                return;
8547
8548        mutex_lock(&ctx->mmu_lock);
8549        hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8550                        HOST_SPACE_INTERNAL_CB_SZ);
8551        hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8552                        HOST_SPACE_INTERNAL_CB_SZ);
8553        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8554        mutex_unlock(&ctx->mmu_lock);
8555
8556        gen_pool_destroy(hdev->internal_cb_pool);
8557
8558        hdev->asic_funcs->asic_dma_free_coherent(hdev,
8559                        HOST_SPACE_INTERNAL_CB_SZ,
8560                        hdev->internal_cb_pool_virt_addr,
8561                        hdev->internal_cb_pool_dma_addr);
8562}
8563
8564static int gaudi_ctx_init(struct hl_ctx *ctx)
8565{
8566        if (ctx->asid == HL_KERNEL_ASID_ID)
8567                return 0;
8568
8569        gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8570        return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8571}
8572
8573static void gaudi_ctx_fini(struct hl_ctx *ctx)
8574{
8575        if (ctx->asid == HL_KERNEL_ASID_ID)
8576                return;
8577
8578        gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8579}
8580
8581static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8582{
8583        return gaudi_cq_assignment[cq_idx];
8584}
8585
8586static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8587{
8588        return sizeof(struct packet_msg_short) +
8589                        sizeof(struct packet_msg_prot) * 2;
8590}
8591
8592static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8593{
8594        return sizeof(struct packet_msg_short) * 4 +
8595                        sizeof(struct packet_fence) +
8596                        sizeof(struct packet_msg_prot) * 2;
8597}
8598
8599static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8600                                u32 size, bool eb)
8601{
8602        struct hl_cb *cb = (struct hl_cb *) data;
8603        struct packet_msg_short *pkt;
8604        u32 value, ctl, pkt_size = sizeof(*pkt);
8605
8606        pkt = cb->kernel_address + size;
8607        memset(pkt, 0, pkt_size);
8608
8609        /* Inc by 1, Mode ADD */
8610        value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8611        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8612
8613        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8614        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8615        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8616        ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8617        ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8618        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8619        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8620
8621        pkt->value = cpu_to_le32(value);
8622        pkt->ctl = cpu_to_le32(ctl);
8623
8624        return size + pkt_size;
8625}
8626
8627static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8628                                        u16 addr)
8629{
8630        u32 ctl, pkt_size = sizeof(*pkt);
8631
8632        memset(pkt, 0, pkt_size);
8633
8634        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8635        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8636        ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8637        ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8638        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8639        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8640
8641        pkt->value = cpu_to_le32(value);
8642        pkt->ctl = cpu_to_le32(ctl);
8643
8644        return pkt_size;
8645}
8646
8647static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8648                struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8649                u16 sob_val, u16 mon_id)
8650{
8651        u64 monitor_base;
8652        u32 ctl, value, pkt_size = sizeof(*pkt);
8653        u16 msg_addr_offset;
8654        u8 mask;
8655
8656        if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8657                dev_err(hdev->dev,
8658                        "sob_base %u (mask %#x) is not valid\n",
8659                        sob_base, sob_mask);
8660                return 0;
8661        }
8662
8663        /*
8664         * monitor_base should be the content of the base0 address registers,
8665         * so it will be added to the msg short offsets
8666         */
8667        monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8668
8669        msg_addr_offset =
8670                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8671                                monitor_base;
8672
8673        memset(pkt, 0, pkt_size);
8674
8675        /* Monitor config packet: bind the monitor to a sync object */
8676        value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8677        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8678        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8679                        0); /* GREATER OR EQUAL*/
8680        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8681
8682        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8683        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8684        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8685        ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8686        ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8687        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8688        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8689
8690        pkt->value = cpu_to_le32(value);
8691        pkt->ctl = cpu_to_le32(ctl);
8692
8693        return pkt_size;
8694}
8695
8696static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8697{
8698        u32 ctl, cfg, pkt_size = sizeof(*pkt);
8699
8700        memset(pkt, 0, pkt_size);
8701
8702        cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8703        cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8704        cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8705
8706        ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8707        ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8708        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8709        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8710
8711        pkt->cfg = cpu_to_le32(cfg);
8712        pkt->ctl = cpu_to_le32(ctl);
8713
8714        return pkt_size;
8715}
8716
8717static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8718{
8719        u32 offset, nic_index;
8720
8721        switch (queue_id) {
8722        case GAUDI_QUEUE_ID_DMA_0_0:
8723                offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8724                break;
8725        case GAUDI_QUEUE_ID_DMA_0_1:
8726                offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8727                break;
8728        case GAUDI_QUEUE_ID_DMA_0_2:
8729                offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8730                break;
8731        case GAUDI_QUEUE_ID_DMA_0_3:
8732                offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8733                break;
8734        case GAUDI_QUEUE_ID_DMA_1_0:
8735                offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8736                break;
8737        case GAUDI_QUEUE_ID_DMA_1_1:
8738                offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8739                break;
8740        case GAUDI_QUEUE_ID_DMA_1_2:
8741                offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8742                break;
8743        case GAUDI_QUEUE_ID_DMA_1_3:
8744                offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8745                break;
8746        case GAUDI_QUEUE_ID_DMA_5_0:
8747                offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8748                break;
8749        case GAUDI_QUEUE_ID_DMA_5_1:
8750                offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8751                break;
8752        case GAUDI_QUEUE_ID_DMA_5_2:
8753                offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8754                break;
8755        case GAUDI_QUEUE_ID_DMA_5_3:
8756                offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8757                break;
8758        case GAUDI_QUEUE_ID_TPC_7_0:
8759                offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8760                break;
8761        case GAUDI_QUEUE_ID_TPC_7_1:
8762                offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8763                break;
8764        case GAUDI_QUEUE_ID_TPC_7_2:
8765                offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8766                break;
8767        case GAUDI_QUEUE_ID_TPC_7_3:
8768                offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8769                break;
8770        case GAUDI_QUEUE_ID_NIC_0_0:
8771        case GAUDI_QUEUE_ID_NIC_1_0:
8772        case GAUDI_QUEUE_ID_NIC_2_0:
8773        case GAUDI_QUEUE_ID_NIC_3_0:
8774        case GAUDI_QUEUE_ID_NIC_4_0:
8775        case GAUDI_QUEUE_ID_NIC_5_0:
8776        case GAUDI_QUEUE_ID_NIC_6_0:
8777        case GAUDI_QUEUE_ID_NIC_7_0:
8778        case GAUDI_QUEUE_ID_NIC_8_0:
8779        case GAUDI_QUEUE_ID_NIC_9_0:
8780                nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8781                offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8782                                (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8783                                (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8784                break;
8785        case GAUDI_QUEUE_ID_NIC_0_1:
8786        case GAUDI_QUEUE_ID_NIC_1_1:
8787        case GAUDI_QUEUE_ID_NIC_2_1:
8788        case GAUDI_QUEUE_ID_NIC_3_1:
8789        case GAUDI_QUEUE_ID_NIC_4_1:
8790        case GAUDI_QUEUE_ID_NIC_5_1:
8791        case GAUDI_QUEUE_ID_NIC_6_1:
8792        case GAUDI_QUEUE_ID_NIC_7_1:
8793        case GAUDI_QUEUE_ID_NIC_8_1:
8794        case GAUDI_QUEUE_ID_NIC_9_1:
8795                nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8796                offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8797                                (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8798                                (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8799                break;
8800        case GAUDI_QUEUE_ID_NIC_0_2:
8801        case GAUDI_QUEUE_ID_NIC_1_2:
8802        case GAUDI_QUEUE_ID_NIC_2_2:
8803        case GAUDI_QUEUE_ID_NIC_3_2:
8804        case GAUDI_QUEUE_ID_NIC_4_2:
8805        case GAUDI_QUEUE_ID_NIC_5_2:
8806        case GAUDI_QUEUE_ID_NIC_6_2:
8807        case GAUDI_QUEUE_ID_NIC_7_2:
8808        case GAUDI_QUEUE_ID_NIC_8_2:
8809        case GAUDI_QUEUE_ID_NIC_9_2:
8810                nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8811                offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8812                                (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8813                                (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8814                break;
8815        case GAUDI_QUEUE_ID_NIC_0_3:
8816        case GAUDI_QUEUE_ID_NIC_1_3:
8817        case GAUDI_QUEUE_ID_NIC_2_3:
8818        case GAUDI_QUEUE_ID_NIC_3_3:
8819        case GAUDI_QUEUE_ID_NIC_4_3:
8820        case GAUDI_QUEUE_ID_NIC_5_3:
8821        case GAUDI_QUEUE_ID_NIC_6_3:
8822        case GAUDI_QUEUE_ID_NIC_7_3:
8823        case GAUDI_QUEUE_ID_NIC_8_3:
8824        case GAUDI_QUEUE_ID_NIC_9_3:
8825                nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8826                offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8827                                (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8828                                (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8829                break;
8830        default:
8831                return -EINVAL;
8832        }
8833
8834        *addr = CFG_BASE + offset;
8835
8836        return 0;
8837}
8838
8839static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8840{
8841        u64 monitor_base;
8842        u32 size = 0;
8843        u16 msg_addr_offset;
8844
8845        /*
8846         * monitor_base should be the content of the base0 address registers,
8847         * so it will be added to the msg short offsets
8848         */
8849        monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8850
8851        /* First monitor config packet: low address of the sync */
8852        msg_addr_offset =
8853                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8854                                monitor_base;
8855
8856        size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8857                                        msg_addr_offset);
8858
8859        /* Second monitor config packet: high address of the sync */
8860        msg_addr_offset =
8861                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8862                                monitor_base;
8863
8864        size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8865                                        msg_addr_offset);
8866
8867        /*
8868         * Third monitor config packet: the payload, i.e. what to write when the
8869         * sync triggers
8870         */
8871        msg_addr_offset =
8872                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8873                                monitor_base;
8874
8875        size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8876
8877        return size;
8878}
8879
8880static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8881                                struct hl_gen_wait_properties *prop)
8882{
8883        struct hl_cb *cb = (struct hl_cb *) prop->data;
8884        void *buf = cb->kernel_address;
8885        u64 fence_addr = 0;
8886        u32 size = prop->size;
8887
8888        if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8889                dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8890                                prop->q_idx);
8891                return 0;
8892        }
8893
8894        size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8895        size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8896                        prop->sob_mask, prop->sob_val, prop->mon_id);
8897        size += gaudi_add_fence_pkt(buf + size);
8898
8899        return size;
8900}
8901
8902static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8903{
8904        struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8905        int rc;
8906
8907        dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8908                hw_sob->sob_id);
8909
8910        rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8911                        CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8912                        hw_sob->sob_id * 4, 1, 0);
8913        if (rc)
8914                dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8915
8916        kref_init(&hw_sob->kref);
8917}
8918
8919static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8920{
8921        if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8922                                                        HL_POWER9_HOST_MAGIC) {
8923                hdev->power9_64bit_dma_enable = 1;
8924                hdev->dma_mask = 64;
8925        } else {
8926                hdev->power9_64bit_dma_enable = 0;
8927                hdev->dma_mask = 48;
8928        }
8929}
8930
8931static u64 gaudi_get_device_time(struct hl_device *hdev)
8932{
8933        u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8934
8935        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8936}
8937
8938static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8939                                u32 *block_size, u32 *block_id)
8940{
8941        return -EPERM;
8942}
8943
8944static int gaudi_block_mmap(struct hl_device *hdev,
8945                                struct vm_area_struct *vma,
8946                                u32 block_id, u32 block_size)
8947{
8948        return -EPERM;
8949}
8950
8951static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8952{
8953        struct cpu_dyn_regs *dyn_regs =
8954                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8955        u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8956                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8957                        le32_to_cpu(dyn_regs->gic_host_ints_irq);
8958
8959        WREG32(irq_handler_offset,
8960                gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8961}
8962
8963static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8964{
8965        switch (pll_idx) {
8966        case HL_GAUDI_CPU_PLL: return CPU_PLL;
8967        case HL_GAUDI_PCI_PLL: return PCI_PLL;
8968        case HL_GAUDI_NIC_PLL: return NIC_PLL;
8969        case HL_GAUDI_DMA_PLL: return DMA_PLL;
8970        case HL_GAUDI_MESH_PLL: return MESH_PLL;
8971        case HL_GAUDI_MME_PLL: return MME_PLL;
8972        case HL_GAUDI_TPC_PLL: return TPC_PLL;
8973        case HL_GAUDI_IF_PLL: return IF_PLL;
8974        case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8975        case HL_GAUDI_HBM_PLL: return HBM_PLL;
8976        default: return -EINVAL;
8977        }
8978}
8979
8980static const struct hl_asic_funcs gaudi_funcs = {
8981        .early_init = gaudi_early_init,
8982        .early_fini = gaudi_early_fini,
8983        .late_init = gaudi_late_init,
8984        .late_fini = gaudi_late_fini,
8985        .sw_init = gaudi_sw_init,
8986        .sw_fini = gaudi_sw_fini,
8987        .hw_init = gaudi_hw_init,
8988        .hw_fini = gaudi_hw_fini,
8989        .halt_engines = gaudi_halt_engines,
8990        .suspend = gaudi_suspend,
8991        .resume = gaudi_resume,
8992        .cb_mmap = gaudi_cb_mmap,
8993        .ring_doorbell = gaudi_ring_doorbell,
8994        .pqe_write = gaudi_pqe_write,
8995        .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8996        .asic_dma_free_coherent = gaudi_dma_free_coherent,
8997        .scrub_device_mem = gaudi_scrub_device_mem,
8998        .get_int_queue_base = gaudi_get_int_queue_base,
8999        .test_queues = gaudi_test_queues,
9000        .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9001        .asic_dma_pool_free = gaudi_dma_pool_free,
9002        .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9003        .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9004        .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9005        .cs_parser = gaudi_cs_parser,
9006        .asic_dma_map_sg = gaudi_dma_map_sg,
9007        .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9008        .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9009        .update_eq_ci = gaudi_update_eq_ci,
9010        .context_switch = gaudi_context_switch,
9011        .restore_phase_topology = gaudi_restore_phase_topology,
9012        .debugfs_read32 = gaudi_debugfs_read32,
9013        .debugfs_write32 = gaudi_debugfs_write32,
9014        .debugfs_read64 = gaudi_debugfs_read64,
9015        .debugfs_write64 = gaudi_debugfs_write64,
9016        .debugfs_read_dma = gaudi_debugfs_read_dma,
9017        .add_device_attr = gaudi_add_device_attr,
9018        .handle_eqe = gaudi_handle_eqe,
9019        .set_pll_profile = gaudi_set_pll_profile,
9020        .get_events_stat = gaudi_get_events_stat,
9021        .read_pte = gaudi_read_pte,
9022        .write_pte = gaudi_write_pte,
9023        .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9024        .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9025        .send_heartbeat = gaudi_send_heartbeat,
9026        .set_clock_gating = gaudi_set_clock_gating,
9027        .disable_clock_gating = gaudi_disable_clock_gating,
9028        .debug_coresight = gaudi_debug_coresight,
9029        .is_device_idle = gaudi_is_device_idle,
9030        .soft_reset_late_init = gaudi_soft_reset_late_init,
9031        .hw_queues_lock = gaudi_hw_queues_lock,
9032        .hw_queues_unlock = gaudi_hw_queues_unlock,
9033        .get_pci_id = gaudi_get_pci_id,
9034        .get_eeprom_data = gaudi_get_eeprom_data,
9035        .send_cpu_message = gaudi_send_cpu_message,
9036        .pci_bars_map = gaudi_pci_bars_map,
9037        .init_iatu = gaudi_init_iatu,
9038        .rreg = hl_rreg,
9039        .wreg = hl_wreg,
9040        .halt_coresight = gaudi_halt_coresight,
9041        .ctx_init = gaudi_ctx_init,
9042        .ctx_fini = gaudi_ctx_fini,
9043        .get_clk_rate = gaudi_get_clk_rate,
9044        .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9045        .load_firmware_to_device = gaudi_load_firmware_to_device,
9046        .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9047        .get_signal_cb_size = gaudi_get_signal_cb_size,
9048        .get_wait_cb_size = gaudi_get_wait_cb_size,
9049        .gen_signal_cb = gaudi_gen_signal_cb,
9050        .gen_wait_cb = gaudi_gen_wait_cb,
9051        .reset_sob = gaudi_reset_sob,
9052        .reset_sob_group = gaudi_reset_sob_group,
9053        .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9054        .get_device_time = gaudi_get_device_time,
9055        .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9056        .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9057        .scramble_addr = hl_mmu_scramble_addr,
9058        .descramble_addr = hl_mmu_descramble_addr,
9059        .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9060        .get_hw_block_id = gaudi_get_hw_block_id,
9061        .hw_block_mmap = gaudi_block_mmap,
9062        .enable_events_from_fw = gaudi_enable_events_from_fw,
9063        .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9064        .init_firmware_loader = gaudi_init_firmware_loader,
9065        .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm
9066};
9067
9068/**
9069 * gaudi_set_asic_funcs - set GAUDI function pointers
9070 *
9071 * @hdev: pointer to hl_device structure
9072 *
9073 */
9074void gaudi_set_asic_funcs(struct hl_device *hdev)
9075{
9076        hdev->asic_funcs = &gaudi_funcs;
9077}
9078