linux/drivers/misc/habanalabs/gaudi/gaudi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2020 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "gaudiP.h"
   9#include "../include/hw_ip/mmu/mmu_general.h"
  10#include "../include/hw_ip/mmu/mmu_v1_1.h"
  11#include "../include/gaudi/gaudi_masks.h"
  12#include "../include/gaudi/gaudi_fw_if.h"
  13#include "../include/gaudi/gaudi_reg_map.h"
  14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
  15
  16#include <linux/module.h>
  17#include <linux/pci.h>
  18#include <linux/firmware.h>
  19#include <linux/hwmon.h>
  20#include <linux/genalloc.h>
  21#include <linux/io-64-nonatomic-lo-hi.h>
  22#include <linux/iommu.h>
  23#include <linux/seq_file.h>
  24
  25/*
  26 * Gaudi security scheme:
  27 *
  28 * 1. Host is protected by:
  29 *        - Range registers
  30 *        - MMU
  31 *
  32 * 2. DDR is protected by:
  33 *        - Range registers (protect the first 512MB)
  34 *
  35 * 3. Configuration is protected by:
  36 *        - Range registers
  37 *        - Protection bits
  38 *
  39 * MMU is always enabled.
  40 *
  41 * QMAN DMA channels 0,1,5 (PCI DMAN):
  42 *     - DMA is not secured.
  43 *     - PQ and CQ are secured.
  44 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
  45 *                      because of TDMA (tensor DMA). Hence, WREG is always not
  46 *                      secured.
  47 *
  48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
  49 * channel 0 to be secured, execute the DMA and change it back to not secured.
  50 * Currently, the driver doesn't use the DMA while there are compute jobs
  51 * running.
  52 *
  53 * The current use cases for the driver to use the DMA are:
  54 *     - Clear SRAM on context switch (happens on context switch when device is
  55 *       idle)
  56 *     - MMU page tables area clear (happens on init)
  57 *
  58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
  59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
  60 * CQ, CP and the engine are not secured
  61 *
  62 */
  63
  64#define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
  65#define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
  66#define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
  67
  68#define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
  69
  70#define GAUDI_RESET_TIMEOUT_MSEC        1000            /* 1000ms */
  71#define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
  72#define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
  73#define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
  74
  75#define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
  76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
  77#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
  78#define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
  79#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
  80#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
  81#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
  82#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
  83
  84#define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
  85
  86#define GAUDI_MAX_STRING_LEN            20
  87
  88#define GAUDI_CB_POOL_CB_CNT            512
  89#define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
  90
  91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
  92
  93#define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
  94
  95#define GAUDI_NUM_OF_QM_ERR_CAUSE       16
  96
  97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
  98
  99#define GAUDI_ARB_WDT_TIMEOUT           0x1000000
 100
 101#define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
 102                BIT(GAUDI_ENGINE_ID_MME_0) |\
 103                BIT(GAUDI_ENGINE_ID_MME_2) |\
 104                GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
 105
 106static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 107                "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 108                "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
 109                "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
 110                "gaudi cpu eq"
 111};
 112
 113static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
 114        [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
 115        [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
 116        [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
 117        [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
 118        [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
 119        [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
 120        [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
 121        [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
 122};
 123
 124static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
 125        [0] = GAUDI_QUEUE_ID_DMA_0_0,
 126        [1] = GAUDI_QUEUE_ID_DMA_0_1,
 127        [2] = GAUDI_QUEUE_ID_DMA_0_2,
 128        [3] = GAUDI_QUEUE_ID_DMA_0_3,
 129        [4] = GAUDI_QUEUE_ID_DMA_1_0,
 130        [5] = GAUDI_QUEUE_ID_DMA_1_1,
 131        [6] = GAUDI_QUEUE_ID_DMA_1_2,
 132        [7] = GAUDI_QUEUE_ID_DMA_1_3,
 133        [8] = GAUDI_QUEUE_ID_DMA_5_0,
 134        [9] = GAUDI_QUEUE_ID_DMA_5_1,
 135        [10] = GAUDI_QUEUE_ID_DMA_5_2,
 136        [11] = GAUDI_QUEUE_ID_DMA_5_3
 137};
 138
 139static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
 140        [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
 141        [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
 142        [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
 143        [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
 144        [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
 145        [PACKET_REPEAT]         = sizeof(struct packet_repeat),
 146        [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
 147        [PACKET_FENCE]          = sizeof(struct packet_fence),
 148        [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
 149        [PACKET_NOP]            = sizeof(struct packet_nop),
 150        [PACKET_STOP]           = sizeof(struct packet_stop),
 151        [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
 152        [PACKET_WAIT]           = sizeof(struct packet_wait),
 153        [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
 154};
 155
 156static inline bool validate_packet_id(enum packet_id id)
 157{
 158        switch (id) {
 159        case PACKET_WREG_32:
 160        case PACKET_WREG_BULK:
 161        case PACKET_MSG_LONG:
 162        case PACKET_MSG_SHORT:
 163        case PACKET_CP_DMA:
 164        case PACKET_REPEAT:
 165        case PACKET_MSG_PROT:
 166        case PACKET_FENCE:
 167        case PACKET_LIN_DMA:
 168        case PACKET_NOP:
 169        case PACKET_STOP:
 170        case PACKET_ARB_POINT:
 171        case PACKET_WAIT:
 172        case PACKET_LOAD_AND_EXE:
 173                return true;
 174        default:
 175                return false;
 176        }
 177}
 178
 179static const char * const
 180gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
 181        "tpc_address_exceed_slm",
 182        "tpc_div_by_0",
 183        "tpc_spu_mac_overflow",
 184        "tpc_spu_addsub_overflow",
 185        "tpc_spu_abs_overflow",
 186        "tpc_spu_fp_dst_nan_inf",
 187        "tpc_spu_fp_dst_denorm",
 188        "tpc_vpu_mac_overflow",
 189        "tpc_vpu_addsub_overflow",
 190        "tpc_vpu_abs_overflow",
 191        "tpc_vpu_fp_dst_nan_inf",
 192        "tpc_vpu_fp_dst_denorm",
 193        "tpc_assertions",
 194        "tpc_illegal_instruction",
 195        "tpc_pc_wrap_around",
 196        "tpc_qm_sw_err",
 197        "tpc_hbw_rresp_err",
 198        "tpc_hbw_bresp_err",
 199        "tpc_lbw_rresp_err",
 200        "tpc_lbw_bresp_err"
 201};
 202
 203static const char * const
 204gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
 205        "PQ AXI HBW error",
 206        "CQ AXI HBW error",
 207        "CP AXI HBW error",
 208        "CP error due to undefined OPCODE",
 209        "CP encountered STOP OPCODE",
 210        "CP AXI LBW error",
 211        "CP WRREG32 or WRBULK returned error",
 212        "N/A",
 213        "FENCE 0 inc over max value and clipped",
 214        "FENCE 1 inc over max value and clipped",
 215        "FENCE 2 inc over max value and clipped",
 216        "FENCE 3 inc over max value and clipped",
 217        "FENCE 0 dec under min value and clipped",
 218        "FENCE 1 dec under min value and clipped",
 219        "FENCE 2 dec under min value and clipped",
 220        "FENCE 3 dec under min value and clipped"
 221};
 222
 223static const char * const
 224gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
 225        "Choice push while full error",
 226        "Choice Q watchdog error",
 227        "MSG AXI LBW returned with error"
 228};
 229
 230static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
 231        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
 232        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
 233        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
 234        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
 235        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
 236        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
 237        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
 238        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
 239        QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
 240        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
 241        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
 242        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
 243        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
 244        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
 245        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
 246        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
 247        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
 248        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
 249        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
 250        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
 251        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
 252        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
 253        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
 254        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
 255        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
 256        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
 257        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
 258        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
 259        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
 260        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
 261        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
 262        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
 263        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
 264        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
 265        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
 266        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
 267        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
 268        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
 269        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
 270        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
 271        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
 272        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
 273        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
 274        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
 275        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
 276        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
 277        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
 278        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
 279        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
 280        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
 281        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
 282        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
 283        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
 284        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
 285        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
 286        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
 287        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
 288        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
 289        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
 290        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
 291        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
 292        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
 293        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
 294        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
 295        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
 296        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
 297        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
 298        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
 299        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
 300        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
 301        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
 302        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
 303        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
 304        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
 305        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
 306        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
 307        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
 308        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
 309        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
 310        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
 311        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
 312        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
 313        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
 314        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
 315        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
 316        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
 317        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
 318        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
 319        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
 320        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
 321        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
 322        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
 323        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
 324        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
 325        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
 326        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
 327        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
 328        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
 329        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
 330        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
 331        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
 332        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
 333        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
 334        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
 335        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
 336        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
 337        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
 338        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
 339        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
 340        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
 341        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
 342        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
 343        QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
 344};
 345
 346struct ecc_info_extract_params {
 347        u64 block_address;
 348        u32 num_memories;
 349        bool derr;
 350        bool disable_clock_gating;
 351};
 352
 353static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
 354                                                                u64 phys_addr);
 355static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 356                                        struct hl_cs_job *job);
 357static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 358                                        u32 size, u64 val);
 359static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 360                                u32 tpc_id);
 361static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
 362static int gaudi_cpucp_info_get(struct hl_device *hdev);
 363static void gaudi_disable_clock_gating(struct hl_device *hdev);
 364static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 365
 366static int gaudi_get_fixed_properties(struct hl_device *hdev)
 367{
 368        struct asic_fixed_properties *prop = &hdev->asic_prop;
 369        u32 num_sync_stream_queues = 0;
 370        int i;
 371
 372        prop->max_queues = GAUDI_QUEUE_ID_SIZE;
 373        prop->hw_queues_props = kcalloc(prop->max_queues,
 374                        sizeof(struct hw_queue_properties),
 375                        GFP_KERNEL);
 376
 377        if (!prop->hw_queues_props)
 378                return -ENOMEM;
 379
 380        for (i = 0 ; i < prop->max_queues ; i++) {
 381                if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
 382                        prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 383                        prop->hw_queues_props[i].driver_only = 0;
 384                        prop->hw_queues_props[i].requires_kernel_cb = 1;
 385                        prop->hw_queues_props[i].supports_sync_stream = 1;
 386                        num_sync_stream_queues++;
 387                } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
 388                        prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 389                        prop->hw_queues_props[i].driver_only = 1;
 390                        prop->hw_queues_props[i].requires_kernel_cb = 0;
 391                        prop->hw_queues_props[i].supports_sync_stream = 0;
 392                } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
 393                        prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 394                        prop->hw_queues_props[i].driver_only = 0;
 395                        prop->hw_queues_props[i].requires_kernel_cb = 0;
 396                } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
 397                        prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 398                        prop->hw_queues_props[i].driver_only = 0;
 399                        prop->hw_queues_props[i].requires_kernel_cb = 0;
 400                        prop->hw_queues_props[i].supports_sync_stream = 0;
 401                }
 402        }
 403
 404        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 405        prop->sync_stream_first_sob = 0;
 406        prop->sync_stream_first_mon = 0;
 407        prop->dram_base_address = DRAM_PHYS_BASE;
 408        prop->dram_size = GAUDI_HBM_SIZE_32GB;
 409        prop->dram_end_address = prop->dram_base_address +
 410                                        prop->dram_size;
 411        prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
 412
 413        prop->sram_base_address = SRAM_BASE_ADDR;
 414        prop->sram_size = SRAM_SIZE;
 415        prop->sram_end_address = prop->sram_base_address +
 416                                        prop->sram_size;
 417        prop->sram_user_base_address = prop->sram_base_address +
 418                                        SRAM_USER_BASE_OFFSET;
 419
 420        prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
 421        if (hdev->pldm)
 422                prop->mmu_pgt_size = 0x800000; /* 8MB */
 423        else
 424                prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
 425        prop->mmu_pte_size = HL_PTE_SIZE;
 426        prop->mmu_hop_table_size = HOP_TABLE_SIZE;
 427        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 428        prop->dram_page_size = PAGE_SIZE_2MB;
 429
 430        prop->pmmu.hop0_shift = HOP0_SHIFT;
 431        prop->pmmu.hop1_shift = HOP1_SHIFT;
 432        prop->pmmu.hop2_shift = HOP2_SHIFT;
 433        prop->pmmu.hop3_shift = HOP3_SHIFT;
 434        prop->pmmu.hop4_shift = HOP4_SHIFT;
 435        prop->pmmu.hop0_mask = HOP0_MASK;
 436        prop->pmmu.hop1_mask = HOP1_MASK;
 437        prop->pmmu.hop2_mask = HOP2_MASK;
 438        prop->pmmu.hop3_mask = HOP3_MASK;
 439        prop->pmmu.hop4_mask = HOP4_MASK;
 440        prop->pmmu.start_addr = VA_HOST_SPACE_START;
 441        prop->pmmu.end_addr =
 442                        (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
 443        prop->pmmu.page_size = PAGE_SIZE_4KB;
 444        prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 445
 446        /* PMMU and HPMMU are the same except of page size */
 447        memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
 448        prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
 449
 450        /* shifts and masks are the same in PMMU and DMMU */
 451        memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
 452        prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
 453        prop->dmmu.end_addr = VA_HOST_SPACE_END;
 454        prop->dmmu.page_size = PAGE_SIZE_2MB;
 455
 456        prop->cfg_size = CFG_SIZE;
 457        prop->max_asid = MAX_ASID;
 458        prop->num_of_events = GAUDI_EVENT_SIZE;
 459        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 460
 461        prop->max_power_default = MAX_POWER_DEFAULT_PCI;
 462
 463        prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
 464        prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
 465
 466        prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 467        prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 468
 469        strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 470                                        CARD_NAME_MAX_LEN);
 471
 472        prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
 473
 474        prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
 475                        num_sync_stream_queues * HL_RSVD_SOBS;
 476        prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
 477                        num_sync_stream_queues * HL_RSVD_MONS;
 478
 479        return 0;
 480}
 481
 482static int gaudi_pci_bars_map(struct hl_device *hdev)
 483{
 484        static const char * const name[] = {"SRAM", "CFG", "HBM"};
 485        bool is_wc[3] = {false, false, true};
 486        int rc;
 487
 488        rc = hl_pci_bars_map(hdev, name, is_wc);
 489        if (rc)
 490                return rc;
 491
 492        hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
 493                        (CFG_BASE - SPI_FLASH_BASE_ADDR);
 494
 495        return 0;
 496}
 497
 498static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
 499{
 500        struct gaudi_device *gaudi = hdev->asic_specific;
 501        struct hl_inbound_pci_region pci_region;
 502        u64 old_addr = addr;
 503        int rc;
 504
 505        if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
 506                return old_addr;
 507
 508        /* Inbound Region 2 - Bar 4 - Point to HBM */
 509        pci_region.mode = PCI_BAR_MATCH_MODE;
 510        pci_region.bar = HBM_BAR_ID;
 511        pci_region.addr = addr;
 512        rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
 513        if (rc)
 514                return U64_MAX;
 515
 516        if (gaudi) {
 517                old_addr = gaudi->hbm_bar_cur_addr;
 518                gaudi->hbm_bar_cur_addr = addr;
 519        }
 520
 521        return old_addr;
 522}
 523
 524static int gaudi_init_iatu(struct hl_device *hdev)
 525{
 526        struct hl_inbound_pci_region inbound_region;
 527        struct hl_outbound_pci_region outbound_region;
 528        int rc;
 529
 530        /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
 531        inbound_region.mode = PCI_BAR_MATCH_MODE;
 532        inbound_region.bar = SRAM_BAR_ID;
 533        inbound_region.addr = SRAM_BASE_ADDR;
 534        rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 535        if (rc)
 536                goto done;
 537
 538        /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
 539        inbound_region.mode = PCI_BAR_MATCH_MODE;
 540        inbound_region.bar = CFG_BAR_ID;
 541        inbound_region.addr = SPI_FLASH_BASE_ADDR;
 542        rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 543        if (rc)
 544                goto done;
 545
 546        /* Inbound Region 2 - Bar 4 - Point to HBM */
 547        inbound_region.mode = PCI_BAR_MATCH_MODE;
 548        inbound_region.bar = HBM_BAR_ID;
 549        inbound_region.addr = DRAM_PHYS_BASE;
 550        rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
 551        if (rc)
 552                goto done;
 553
 554        hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 555
 556        /* Outbound Region 0 - Point to Host */
 557        outbound_region.addr = HOST_PHYS_BASE;
 558        outbound_region.size = HOST_PHYS_SIZE;
 559        rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 560
 561done:
 562        return rc;
 563}
 564
 565static int gaudi_early_init(struct hl_device *hdev)
 566{
 567        struct asic_fixed_properties *prop = &hdev->asic_prop;
 568        struct pci_dev *pdev = hdev->pdev;
 569        int rc;
 570
 571        rc = gaudi_get_fixed_properties(hdev);
 572        if (rc) {
 573                dev_err(hdev->dev, "Failed to get fixed properties\n");
 574                return rc;
 575        }
 576
 577        /* Check BAR sizes */
 578        if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
 579                dev_err(hdev->dev,
 580                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 581                        SRAM_BAR_ID,
 582                        (unsigned long long) pci_resource_len(pdev,
 583                                                        SRAM_BAR_ID),
 584                        SRAM_BAR_SIZE);
 585                rc = -ENODEV;
 586                goto free_queue_props;
 587        }
 588
 589        if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
 590                dev_err(hdev->dev,
 591                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 592                        CFG_BAR_ID,
 593                        (unsigned long long) pci_resource_len(pdev,
 594                                                                CFG_BAR_ID),
 595                        CFG_BAR_SIZE);
 596                rc = -ENODEV;
 597                goto free_queue_props;
 598        }
 599
 600        prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
 601
 602        rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 603                        mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
 604        if (rc)
 605                goto free_queue_props;
 606
 607        /* GAUDI Firmware does not yet support security */
 608        prop->fw_security_disabled = true;
 609        dev_info(hdev->dev, "firmware-level security is disabled\n");
 610
 611        return 0;
 612
 613free_queue_props:
 614        kfree(hdev->asic_prop.hw_queues_props);
 615        return rc;
 616}
 617
 618static int gaudi_early_fini(struct hl_device *hdev)
 619{
 620        kfree(hdev->asic_prop.hw_queues_props);
 621        hl_pci_fini(hdev);
 622
 623        return 0;
 624}
 625
 626/**
 627 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
 628 *
 629 * @hdev: pointer to hl_device structure
 630 *
 631 */
 632static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 633{
 634        struct asic_fixed_properties *prop = &hdev->asic_prop;
 635        u32 trace_freq = 0;
 636        u32 pll_clk = 0;
 637        u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
 638        u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
 639        u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
 640        u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
 641        u32 od = RREG32(mmPSOC_CPU_PLL_OD);
 642
 643        if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
 644                if (div_sel == DIV_SEL_REF_CLK)
 645                        trace_freq = PLL_REF_CLK;
 646                else
 647                        trace_freq = PLL_REF_CLK / (div_fctr + 1);
 648        } else if (div_sel == DIV_SEL_PLL_CLK ||
 649                                        div_sel == DIV_SEL_DIVIDED_PLL) {
 650                pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
 651                if (div_sel == DIV_SEL_PLL_CLK)
 652                        trace_freq = pll_clk;
 653                else
 654                        trace_freq = pll_clk / (div_fctr + 1);
 655        } else {
 656                dev_warn(hdev->dev,
 657                        "Received invalid div select value: %d", div_sel);
 658        }
 659
 660        prop->psoc_timestamp_frequency = trace_freq;
 661        prop->psoc_pci_pll_nr = nr;
 662        prop->psoc_pci_pll_nf = nf;
 663        prop->psoc_pci_pll_od = od;
 664        prop->psoc_pci_pll_div_factor = div_fctr;
 665}
 666
 667static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 668                dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
 669{
 670        struct asic_fixed_properties *prop = &hdev->asic_prop;
 671        struct packet_lin_dma *init_tpc_mem_pkt;
 672        struct hl_cs_job *job;
 673        struct hl_cb *cb;
 674        u64 dst_addr;
 675        u32 cb_size, ctl;
 676        u8 tpc_id;
 677        int rc;
 678
 679        cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
 680        if (!cb)
 681                return -EFAULT;
 682
 683        init_tpc_mem_pkt = cb->kernel_address;
 684        cb_size = sizeof(*init_tpc_mem_pkt);
 685        memset(init_tpc_mem_pkt, 0, cb_size);
 686
 687        init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
 688
 689        ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
 690        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
 691        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
 692        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 693
 694        init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
 695
 696        init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
 697        dst_addr = (prop->sram_user_base_address &
 698                        GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
 699                        GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
 700        init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
 701
 702        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
 703        if (!job) {
 704                dev_err(hdev->dev, "Failed to allocate a new job\n");
 705                rc = -ENOMEM;
 706                goto release_cb;
 707        }
 708
 709        job->id = 0;
 710        job->user_cb = cb;
 711        job->user_cb->cs_cnt++;
 712        job->user_cb_size = cb_size;
 713        job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
 714        job->patched_cb = job->user_cb;
 715        job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
 716
 717        hl_debugfs_add_job(hdev, job);
 718
 719        rc = gaudi_send_job_on_qman0(hdev, job);
 720
 721        if (rc)
 722                goto free_job;
 723
 724        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
 725                rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
 726                if (rc)
 727                        break;
 728        }
 729
 730free_job:
 731        hl_userptr_delete_list(hdev, &job->userptr_list);
 732        hl_debugfs_remove_job(hdev, job);
 733        kfree(job);
 734        cb->cs_cnt--;
 735
 736release_cb:
 737        hl_cb_put(cb);
 738        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
 739
 740        return rc;
 741}
 742
 743/*
 744 * gaudi_init_tpc_mem() - Initialize TPC memories.
 745 * @hdev: Pointer to hl_device structure.
 746 *
 747 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
 748 *
 749 * Return: 0 for success, negative value for error.
 750 */
 751static int gaudi_init_tpc_mem(struct hl_device *hdev)
 752{
 753        const struct firmware *fw;
 754        size_t fw_size;
 755        void *cpu_addr;
 756        dma_addr_t dma_handle;
 757        int rc;
 758
 759        rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
 760        if (rc) {
 761                dev_err(hdev->dev, "Firmware file %s is not found!\n",
 762                                GAUDI_TPC_FW_FILE);
 763                goto out;
 764        }
 765
 766        fw_size = fw->size;
 767        cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
 768                        &dma_handle, GFP_KERNEL | __GFP_ZERO);
 769        if (!cpu_addr) {
 770                dev_err(hdev->dev,
 771                        "Failed to allocate %zu of dma memory for TPC kernel\n",
 772                        fw_size);
 773                rc = -ENOMEM;
 774                goto out;
 775        }
 776
 777        memcpy(cpu_addr, fw->data, fw_size);
 778
 779        rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
 780
 781        hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
 782                        dma_handle);
 783
 784out:
 785        release_firmware(fw);
 786        return rc;
 787}
 788
 789static int gaudi_late_init(struct hl_device *hdev)
 790{
 791        struct gaudi_device *gaudi = hdev->asic_specific;
 792        int rc;
 793
 794        rc = gaudi->cpucp_info_get(hdev);
 795        if (rc) {
 796                dev_err(hdev->dev, "Failed to get cpucp info\n");
 797                return rc;
 798        }
 799
 800        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
 801        if (rc) {
 802                dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
 803                return rc;
 804        }
 805
 806        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
 807
 808        gaudi_fetch_psoc_frequency(hdev);
 809
 810        rc = gaudi_mmu_clear_pgt_range(hdev);
 811        if (rc) {
 812                dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
 813                goto disable_pci_access;
 814        }
 815
 816        rc = gaudi_init_tpc_mem(hdev);
 817        if (rc) {
 818                dev_err(hdev->dev, "Failed to initialize TPC memories\n");
 819                goto disable_pci_access;
 820        }
 821
 822        return 0;
 823
 824disable_pci_access:
 825        hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
 826
 827        return rc;
 828}
 829
 830static void gaudi_late_fini(struct hl_device *hdev)
 831{
 832        const struct hwmon_channel_info **channel_info_arr;
 833        int i = 0;
 834
 835        if (!hdev->hl_chip_info->info)
 836                return;
 837
 838        channel_info_arr = hdev->hl_chip_info->info;
 839
 840        while (channel_info_arr[i]) {
 841                kfree(channel_info_arr[i]->config);
 842                kfree(channel_info_arr[i]);
 843                i++;
 844        }
 845
 846        kfree(channel_info_arr);
 847
 848        hdev->hl_chip_info->info = NULL;
 849}
 850
 851static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
 852{
 853        dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
 854        void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
 855        int i, j, rc = 0;
 856
 857        /*
 858         * The device CPU works with 40-bits addresses, while bit 39 must be set
 859         * to '1' when accessing the host.
 860         * Bits 49:39 of the full host address are saved for a later
 861         * configuration of the HW to perform extension to 50 bits.
 862         * Because there is a single HW register that holds the extension bits,
 863         * these bits must be identical in all allocated range.
 864         */
 865
 866        for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
 867                virt_addr_arr[i] =
 868                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
 869                                                HL_CPU_ACCESSIBLE_MEM_SIZE,
 870                                                &dma_addr_arr[i],
 871                                                GFP_KERNEL | __GFP_ZERO);
 872                if (!virt_addr_arr[i]) {
 873                        rc = -ENOMEM;
 874                        goto free_dma_mem_arr;
 875                }
 876
 877                end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
 878                if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
 879                                GAUDI_CPU_PCI_MSB_ADDR(end_addr))
 880                        break;
 881        }
 882
 883        if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
 884                dev_err(hdev->dev,
 885                        "MSB of CPU accessible DMA memory are not identical in all range\n");
 886                rc = -EFAULT;
 887                goto free_dma_mem_arr;
 888        }
 889
 890        hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
 891        hdev->cpu_accessible_dma_address = dma_addr_arr[i];
 892        hdev->cpu_pci_msb_addr =
 893                GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
 894
 895        GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
 896
 897free_dma_mem_arr:
 898        for (j = 0 ; j < i ; j++)
 899                hdev->asic_funcs->asic_dma_free_coherent(hdev,
 900                                                HL_CPU_ACCESSIBLE_MEM_SIZE,
 901                                                virt_addr_arr[j],
 902                                                dma_addr_arr[j]);
 903
 904        return rc;
 905}
 906
 907static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
 908{
 909        struct gaudi_device *gaudi = hdev->asic_specific;
 910        struct gaudi_internal_qman_info *q;
 911        u32 i;
 912
 913        for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
 914                q = &gaudi->internal_qmans[i];
 915                if (!q->pq_kernel_addr)
 916                        continue;
 917                hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
 918                                                        q->pq_kernel_addr,
 919                                                        q->pq_dma_addr);
 920        }
 921}
 922
 923static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
 924{
 925        struct gaudi_device *gaudi = hdev->asic_specific;
 926        struct gaudi_internal_qman_info *q;
 927        int rc, i;
 928
 929        for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
 930                if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
 931                        continue;
 932
 933                q = &gaudi->internal_qmans[i];
 934
 935                switch (i) {
 936                case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
 937                case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
 938                        q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
 939                        break;
 940                case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
 941                        q->pq_size = MME_QMAN_SIZE_IN_BYTES;
 942                        break;
 943                case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
 944                        q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
 945                        break;
 946                default:
 947                        dev_err(hdev->dev, "Bad internal queue index %d", i);
 948                        rc = -EINVAL;
 949                        goto free_internal_qmans_pq_mem;
 950                }
 951
 952                q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
 953                                                hdev, q->pq_size,
 954                                                &q->pq_dma_addr,
 955                                                GFP_KERNEL | __GFP_ZERO);
 956                if (!q->pq_kernel_addr) {
 957                        rc = -ENOMEM;
 958                        goto free_internal_qmans_pq_mem;
 959                }
 960        }
 961
 962        return 0;
 963
 964free_internal_qmans_pq_mem:
 965        gaudi_free_internal_qmans_pq_mem(hdev);
 966        return rc;
 967}
 968
 969static int gaudi_sw_init(struct hl_device *hdev)
 970{
 971        struct gaudi_device *gaudi;
 972        u32 i, event_id = 0;
 973        int rc;
 974
 975        /* Allocate device structure */
 976        gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
 977        if (!gaudi)
 978                return -ENOMEM;
 979
 980        for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
 981                if (gaudi_irq_map_table[i].valid) {
 982                        if (event_id == GAUDI_EVENT_SIZE) {
 983                                dev_err(hdev->dev,
 984                                        "Event array exceeds the limit of %u events\n",
 985                                        GAUDI_EVENT_SIZE);
 986                                rc = -EINVAL;
 987                                goto free_gaudi_device;
 988                        }
 989
 990                        gaudi->events[event_id++] =
 991                                        gaudi_irq_map_table[i].fc_id;
 992                }
 993        }
 994
 995        gaudi->cpucp_info_get = gaudi_cpucp_info_get;
 996
 997        gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
 998
 999        hdev->asic_specific = gaudi;
1000
1001        /* Create DMA pool for small allocations */
1002        hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1003                        &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1004        if (!hdev->dma_pool) {
1005                dev_err(hdev->dev, "failed to create DMA pool\n");
1006                rc = -ENOMEM;
1007                goto free_gaudi_device;
1008        }
1009
1010        rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1011        if (rc)
1012                goto free_dma_pool;
1013
1014        hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1015        if (!hdev->cpu_accessible_dma_pool) {
1016                dev_err(hdev->dev,
1017                        "Failed to create CPU accessible DMA pool\n");
1018                rc = -ENOMEM;
1019                goto free_cpu_dma_mem;
1020        }
1021
1022        rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1023                                (uintptr_t) hdev->cpu_accessible_dma_mem,
1024                                HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1025        if (rc) {
1026                dev_err(hdev->dev,
1027                        "Failed to add memory to CPU accessible DMA pool\n");
1028                rc = -EFAULT;
1029                goto free_cpu_accessible_dma_pool;
1030        }
1031
1032        rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1033        if (rc)
1034                goto free_cpu_accessible_dma_pool;
1035
1036        spin_lock_init(&gaudi->hw_queues_lock);
1037        mutex_init(&gaudi->clk_gate_mutex);
1038
1039        hdev->supports_sync_stream = true;
1040        hdev->supports_coresight = true;
1041
1042        return 0;
1043
1044free_cpu_accessible_dma_pool:
1045        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1046free_cpu_dma_mem:
1047        GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1048                                hdev->cpu_pci_msb_addr);
1049        hdev->asic_funcs->asic_dma_free_coherent(hdev,
1050                        HL_CPU_ACCESSIBLE_MEM_SIZE,
1051                        hdev->cpu_accessible_dma_mem,
1052                        hdev->cpu_accessible_dma_address);
1053free_dma_pool:
1054        dma_pool_destroy(hdev->dma_pool);
1055free_gaudi_device:
1056        kfree(gaudi);
1057        return rc;
1058}
1059
1060static int gaudi_sw_fini(struct hl_device *hdev)
1061{
1062        struct gaudi_device *gaudi = hdev->asic_specific;
1063
1064        gaudi_free_internal_qmans_pq_mem(hdev);
1065
1066        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1067
1068        GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1069                                        hdev->cpu_pci_msb_addr);
1070        hdev->asic_funcs->asic_dma_free_coherent(hdev,
1071                        HL_CPU_ACCESSIBLE_MEM_SIZE,
1072                        hdev->cpu_accessible_dma_mem,
1073                        hdev->cpu_accessible_dma_address);
1074
1075        dma_pool_destroy(hdev->dma_pool);
1076
1077        mutex_destroy(&gaudi->clk_gate_mutex);
1078
1079        kfree(gaudi);
1080
1081        return 0;
1082}
1083
1084static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1085{
1086        struct hl_device *hdev = arg;
1087        int i;
1088
1089        if (hdev->disabled)
1090                return IRQ_HANDLED;
1091
1092        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1093                hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1094
1095        hl_irq_handler_eq(irq, &hdev->event_queue);
1096
1097        return IRQ_HANDLED;
1098}
1099
1100/*
1101 * For backward compatibility, new MSI interrupts should be set after the
1102 * existing CPU and NIC interrupts.
1103 */
1104static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1105                                bool cpu_eq)
1106{
1107        int msi_vec;
1108
1109        if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1110                dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1111                                GAUDI_EVENT_QUEUE_MSI_IDX);
1112
1113        msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1114                        (nr + NIC_NUMBER_OF_ENGINES + 1);
1115
1116        return pci_irq_vector(hdev->pdev, msi_vec);
1117}
1118
1119static int gaudi_enable_msi_single(struct hl_device *hdev)
1120{
1121        int rc, irq;
1122
1123        dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1124
1125        irq = gaudi_pci_irq_vector(hdev, 0, false);
1126        rc = request_irq(irq, gaudi_irq_handler_single, 0,
1127                        "gaudi single msi", hdev);
1128        if (rc)
1129                dev_err(hdev->dev,
1130                        "Failed to request single MSI IRQ\n");
1131
1132        return rc;
1133}
1134
1135static int gaudi_enable_msi_multi(struct hl_device *hdev)
1136{
1137        int cq_cnt = hdev->asic_prop.completion_queues_count;
1138        int rc, i, irq_cnt_init, irq;
1139
1140        for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1141                irq = gaudi_pci_irq_vector(hdev, i, false);
1142                rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1143                                &hdev->completion_queue[i]);
1144                if (rc) {
1145                        dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1146                        goto free_irqs;
1147                }
1148        }
1149
1150        irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1151        rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1152                                &hdev->event_queue);
1153        if (rc) {
1154                dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1155                goto free_irqs;
1156        }
1157
1158        return 0;
1159
1160free_irqs:
1161        for (i = 0 ; i < irq_cnt_init ; i++)
1162                free_irq(gaudi_pci_irq_vector(hdev, i, false),
1163                                &hdev->completion_queue[i]);
1164        return rc;
1165}
1166
1167static int gaudi_enable_msi(struct hl_device *hdev)
1168{
1169        struct gaudi_device *gaudi = hdev->asic_specific;
1170        int rc;
1171
1172        if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1173                return 0;
1174
1175        rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1176                                        PCI_IRQ_MSI);
1177        if (rc < 0) {
1178                dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1179                return rc;
1180        }
1181
1182        if (rc < NUMBER_OF_INTERRUPTS) {
1183                gaudi->multi_msi_mode = false;
1184                rc = gaudi_enable_msi_single(hdev);
1185        } else {
1186                gaudi->multi_msi_mode = true;
1187                rc = gaudi_enable_msi_multi(hdev);
1188        }
1189
1190        if (rc)
1191                goto free_pci_irq_vectors;
1192
1193        gaudi->hw_cap_initialized |= HW_CAP_MSI;
1194
1195        return 0;
1196
1197free_pci_irq_vectors:
1198        pci_free_irq_vectors(hdev->pdev);
1199        return rc;
1200}
1201
1202static void gaudi_sync_irqs(struct hl_device *hdev)
1203{
1204        struct gaudi_device *gaudi = hdev->asic_specific;
1205        int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1206
1207        if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1208                return;
1209
1210        /* Wait for all pending IRQs to be finished */
1211        if (gaudi->multi_msi_mode) {
1212                for (i = 0 ; i < cq_cnt ; i++)
1213                        synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1214
1215                synchronize_irq(gaudi_pci_irq_vector(hdev,
1216                                                GAUDI_EVENT_QUEUE_MSI_IDX,
1217                                                true));
1218        } else {
1219                synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1220        }
1221}
1222
1223static void gaudi_disable_msi(struct hl_device *hdev)
1224{
1225        struct gaudi_device *gaudi = hdev->asic_specific;
1226        int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1227
1228        if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1229                return;
1230
1231        gaudi_sync_irqs(hdev);
1232
1233        if (gaudi->multi_msi_mode) {
1234                irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1235                                                true);
1236                free_irq(irq, &hdev->event_queue);
1237
1238                for (i = 0 ; i < cq_cnt ; i++) {
1239                        irq = gaudi_pci_irq_vector(hdev, i, false);
1240                        free_irq(irq, &hdev->completion_queue[i]);
1241                }
1242        } else {
1243                free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1244        }
1245
1246        pci_free_irq_vectors(hdev->pdev);
1247
1248        gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1249}
1250
1251static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1252{
1253        struct gaudi_device *gaudi = hdev->asic_specific;
1254
1255        if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1256                return;
1257
1258        if (!hdev->sram_scrambler_enable)
1259                return;
1260
1261        WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1262                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263        WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1264                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265        WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1266                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267        WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1268                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269        WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1270                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271        WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1272                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273        WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1274                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275        WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1276                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277
1278        WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1279                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280        WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1281                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282        WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1283                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284        WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1285                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286        WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1287                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288        WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1289                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290        WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1291                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292        WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1293                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294
1295        WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1296                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297        WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1298                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299        WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1300                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301        WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1302                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303        WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1304                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305        WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1306                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307        WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1308                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309        WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1310                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311
1312        gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1313}
1314
1315static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1316{
1317        struct gaudi_device *gaudi = hdev->asic_specific;
1318
1319        if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1320                return;
1321
1322        if (!hdev->dram_scrambler_enable)
1323                return;
1324
1325        WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1326                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327        WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1328                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329        WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1330                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331        WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1332                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333        WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1334                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335        WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1336                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337        WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1338                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339        WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1340                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341
1342        WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1343                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344        WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1345                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346        WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1347                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348        WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1349                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350        WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1351                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352        WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1353                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354        WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1355                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356        WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1357                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358
1359        WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1360                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361        WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1362                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363        WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1364                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365        WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1366                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367        WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1368                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369        WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1370                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371        WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1372                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373        WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1374                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375
1376        gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1377}
1378
1379static void gaudi_init_e2e(struct hl_device *hdev)
1380{
1381        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1382        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1383        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1384        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1385
1386        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1387        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1388        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1389        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1390
1391        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1392        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1393        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1394        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1395
1396        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1397        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1398        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1399        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1400
1401        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1402        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1403        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1404        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1405
1406        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1407        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1408        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1409        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1410
1411        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1412        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1413        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1414        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1415
1416        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1417        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1418        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1419        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1420
1421        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1422        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1423        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1424        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1425
1426        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1427        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1428        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1429        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1430
1431        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1432        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1433        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1434        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1435
1436        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1437        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1438        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1439        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1440
1441        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1442        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1443        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1444        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1445
1446        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1447        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1448        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1449        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1450
1451        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1452        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1453        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1454        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1455
1456        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1457        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1458        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1459        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1460
1461        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1462        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1463        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1464        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1465
1466        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1467        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1468        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1469        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1470
1471        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1472        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1473        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1474        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1475
1476        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1477        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1478        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1479        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1480
1481        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1482        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1483        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1484        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1485
1486        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1487        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1488        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1489        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1490
1491        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1492        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1493        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1494        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1495
1496        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1497        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1498        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1499        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1500
1501        if (!hdev->dram_scrambler_enable) {
1502                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1503                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1504                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1505                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1506
1507                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1508                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1509                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1510                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1511
1512                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1513                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1514                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1515                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1516
1517                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1518                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1519                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1520                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1521
1522                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1523                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1524                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1525                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1526
1527                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1528                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1529                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1530                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1531
1532                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1533                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1534                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1535                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1536
1537                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1538                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1539                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1540                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1541
1542                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1543                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1544                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1545                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1546
1547                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1548                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1549                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1550                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1551
1552                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1553                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1554                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1555                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1556
1557                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1558                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1559                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1560                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1561
1562                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1563                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1564                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1565                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1566
1567                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1568                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1569                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1570                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1571
1572                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1573                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1574                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1575                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1576
1577                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1578                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1579                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1580                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1581
1582                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1583                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1584                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1585                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1586
1587                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1588                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1589                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1590                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1591
1592                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1593                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1594                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1595                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1596
1597                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1598                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1599                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1600                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1601
1602                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1603                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1604                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1605                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1606
1607                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1608                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1609                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1610                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1611
1612                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1613                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1614                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1615                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1616
1617                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1618                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1619                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1620                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1621        }
1622
1623        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1624                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1625        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1626                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1627
1628        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1629                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1630        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1631                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1632
1633        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1634                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1635        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1636                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1637
1638        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1639                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1640        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1641                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1642
1643        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1644                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1645        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1646                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1647
1648        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1649                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1650        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1651                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1652
1653        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1654                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1655        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1656                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1657
1658        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1659                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1660        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1661                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1662
1663        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1664                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1665        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1666                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1667
1668        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1669                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1670        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1671                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1672
1673        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1674                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1675        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1676                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1677
1678        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1679                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1680        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1681                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1682
1683        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1684                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1685        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1686                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1687
1688        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1689                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1690        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1691                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1692
1693        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1694                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1695        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1696                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1697
1698        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1699                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1700        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1701                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1702
1703        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1704                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1705        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1706                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1707
1708        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1709                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1710        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1711                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1712
1713        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1714                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1715        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1716                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1717
1718        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1719                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1720        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1721                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1722
1723        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1724                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1725        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1726                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1727
1728        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1729                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1730        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1731                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1732
1733        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1734                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1735        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1736                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1737
1738        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1739                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1740        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1741                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1742}
1743
1744static void gaudi_init_hbm_cred(struct hl_device *hdev)
1745{
1746        uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1747
1748        hbm0_wr = 0x33333333;
1749        hbm0_rd = 0x77777777;
1750        hbm1_wr = 0x55555555;
1751        hbm1_rd = 0xDDDDDDDD;
1752
1753        WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1754        WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1755        WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1756        WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1757
1758        WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1759        WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1760        WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1761        WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1762
1763        WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1764        WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1765        WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1766        WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1767
1768        WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1769        WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1770        WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1771        WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1772
1773        WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1774                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1775                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1776        WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1777                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1778                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1779        WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1780                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782        WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1783                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785
1786        WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1787                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1788                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1789        WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1790                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1791                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1792        WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1793                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795        WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1796                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798}
1799
1800static void gaudi_init_golden_registers(struct hl_device *hdev)
1801{
1802        u32 tpc_offset;
1803        int tpc_id, i;
1804
1805        gaudi_init_e2e(hdev);
1806
1807        gaudi_init_hbm_cred(hdev);
1808
1809        hdev->asic_funcs->disable_clock_gating(hdev);
1810
1811        for (tpc_id = 0, tpc_offset = 0;
1812                                tpc_id < TPC_NUMBER_OF_ENGINES;
1813                                tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1814                /* Mask all arithmetic interrupts from TPC */
1815                WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1816                /* Set 16 cache lines */
1817                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1818                                ICACHE_FETCH_LINE_NUM, 2);
1819        }
1820
1821        /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1822        for (i = 0 ; i < 128 ; i += 8)
1823                writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1824
1825        WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1826        WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1827        WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1828        WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1829}
1830
1831static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1832                                        int qman_id, dma_addr_t qman_pq_addr)
1833{
1834        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1835        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1836        u32 q_off, dma_qm_offset;
1837        u32 dma_qm_err_cfg;
1838
1839        dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1840
1841        mtr_base_en_lo = lower_32_bits(CFG_BASE +
1842                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1843        mtr_base_en_hi = upper_32_bits(CFG_BASE +
1844                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1845        so_base_en_lo = lower_32_bits(CFG_BASE +
1846                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1847        so_base_en_hi = upper_32_bits(CFG_BASE +
1848                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1849        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1850                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1852                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1853        so_base_ws_lo = lower_32_bits(CFG_BASE +
1854                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855        so_base_ws_hi = upper_32_bits(CFG_BASE +
1856                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1857
1858        q_off = dma_qm_offset + qman_id * 4;
1859
1860        WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1861        WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1862
1863        WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1864        WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1865        WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1866
1867        WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1868        WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1869                                                        QMAN_LDMA_SRC_OFFSET);
1870        WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1871                                                        QMAN_LDMA_DST_OFFSET);
1872
1873        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1874        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1875        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1876        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1877        WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1878        WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1879        WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1880        WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1881
1882        WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1883
1884        /* The following configuration is needed only once per QMAN */
1885        if (qman_id == 0) {
1886                /* Configure RAZWI IRQ */
1887                dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1888                if (hdev->stop_on_err) {
1889                        dma_qm_err_cfg |=
1890                                PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1891                }
1892
1893                WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1894                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1895                        lower_32_bits(CFG_BASE +
1896                                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1897                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1898                        upper_32_bits(CFG_BASE +
1899                                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1900                WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1901                        gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1902                                                                        dma_id);
1903
1904                WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1905                                QM_ARB_ERR_MSG_EN_MASK);
1906
1907                /* Increase ARB WDT to support streams architecture */
1908                WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1909                                GAUDI_ARB_WDT_TIMEOUT);
1910
1911                WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1912                                QMAN_EXTERNAL_MAKE_TRUSTED);
1913
1914                WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1915        }
1916}
1917
1918static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1919{
1920        u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1921        u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1922
1923        /* Set to maximum possible according to physical size */
1924        WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1925        WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1926
1927        /* WA for H/W bug H3-2116 */
1928        WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1929
1930        /* STOP_ON bit implies no completion to operation in case of RAZWI */
1931        if (hdev->stop_on_err)
1932                dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1933
1934        WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1935        WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1936                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1937        WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1938                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1939        WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1940                gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1941        WREG32(mmDMA0_CORE_PROT + dma_offset,
1942                        1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1943        /* If the channel is secured, it should be in MMU bypass mode */
1944        WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1945                        1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1946        WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1947}
1948
1949static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1950                                u32 enable_mask)
1951{
1952        u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1953
1954        WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1955}
1956
1957static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1958{
1959        struct gaudi_device *gaudi = hdev->asic_specific;
1960        struct hl_hw_queue *q;
1961        int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1962
1963        if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1964                return;
1965
1966        for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1967                dma_id = gaudi_dma_assignment[i];
1968                /*
1969                 * For queues after the CPU Q need to add 1 to get the correct
1970                 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1971                 * order to get the correct MSI register.
1972                 */
1973                if (dma_id > 1) {
1974                        cpu_skip = 1;
1975                        nic_skip = NIC_NUMBER_OF_ENGINES;
1976                } else {
1977                        cpu_skip = 0;
1978                        nic_skip = 0;
1979                }
1980
1981                for (j = 0 ; j < QMAN_STREAMS ; j++) {
1982                        q_idx = 4 * dma_id + j + cpu_skip;
1983                        q = &hdev->kernel_queues[q_idx];
1984                        q->cq_id = cq_id++;
1985                        q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1986                        gaudi_init_pci_dma_qman(hdev, dma_id, j,
1987                                                q->bus_address);
1988                }
1989
1990                gaudi_init_dma_core(hdev, dma_id);
1991
1992                gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1993        }
1994
1995        gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1996}
1997
1998static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1999                                        int qman_id, u64 qman_base_addr)
2000{
2001        u32 mtr_base_lo, mtr_base_hi;
2002        u32 so_base_lo, so_base_hi;
2003        u32 q_off, dma_qm_offset;
2004        u32 dma_qm_err_cfg;
2005
2006        dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2007
2008        mtr_base_lo = lower_32_bits(CFG_BASE +
2009                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2010        mtr_base_hi = upper_32_bits(CFG_BASE +
2011                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2012        so_base_lo = lower_32_bits(CFG_BASE +
2013                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2014        so_base_hi = upper_32_bits(CFG_BASE +
2015                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2016
2017        q_off = dma_qm_offset + qman_id * 4;
2018
2019        if (qman_id < 4) {
2020                WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2021                                        lower_32_bits(qman_base_addr));
2022                WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2023                                        upper_32_bits(qman_base_addr));
2024
2025                WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2026                WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2027                WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2028
2029                WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2030                                                        QMAN_CPDMA_SIZE_OFFSET);
2031                WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2032                                                        QMAN_CPDMA_SRC_OFFSET);
2033                WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2034                                                        QMAN_CPDMA_DST_OFFSET);
2035        } else {
2036                WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2037                                                        QMAN_LDMA_SIZE_OFFSET);
2038                WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2039                                                        QMAN_LDMA_SRC_OFFSET);
2040                WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2041                                                        QMAN_LDMA_DST_OFFSET);
2042
2043                /* Configure RAZWI IRQ */
2044                dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2045                if (hdev->stop_on_err) {
2046                        dma_qm_err_cfg |=
2047                                HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2048                }
2049                WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2050
2051                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2052                        lower_32_bits(CFG_BASE +
2053                                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2054                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2055                        upper_32_bits(CFG_BASE +
2056                                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2057                WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2058                        gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2059                                                                        dma_id);
2060
2061                WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2062                                QM_ARB_ERR_MSG_EN_MASK);
2063
2064                /* Increase ARB WDT to support streams architecture */
2065                WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2066                                GAUDI_ARB_WDT_TIMEOUT);
2067
2068                WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2069                WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2070                                QMAN_INTERNAL_MAKE_TRUSTED);
2071        }
2072
2073        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2074        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2075        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2076        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2077}
2078
2079static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2080{
2081        struct gaudi_device *gaudi = hdev->asic_specific;
2082        struct gaudi_internal_qman_info *q;
2083        u64 qman_base_addr;
2084        int i, j, dma_id, internal_q_index;
2085
2086        if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2087                return;
2088
2089        for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2090                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2091
2092                for (j = 0 ; j < QMAN_STREAMS ; j++) {
2093                         /*
2094                          * Add the CPU queue in order to get the correct queue
2095                          * number as all internal queue are placed after it
2096                          */
2097                        internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2098
2099                        q = &gaudi->internal_qmans[internal_q_index];
2100                        qman_base_addr = (u64) q->pq_dma_addr;
2101                        gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2102                                                qman_base_addr);
2103                }
2104
2105                /* Initializing lower CP for HBM DMA QMAN */
2106                gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2107
2108                gaudi_init_dma_core(hdev, dma_id);
2109
2110                gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2111        }
2112
2113        gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2114}
2115
2116static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2117                                        int qman_id, u64 qman_base_addr)
2118{
2119        u32 mtr_base_lo, mtr_base_hi;
2120        u32 so_base_lo, so_base_hi;
2121        u32 q_off, mme_id;
2122        u32 mme_qm_err_cfg;
2123
2124        mtr_base_lo = lower_32_bits(CFG_BASE +
2125                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2126        mtr_base_hi = upper_32_bits(CFG_BASE +
2127                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2128        so_base_lo = lower_32_bits(CFG_BASE +
2129                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2130        so_base_hi = upper_32_bits(CFG_BASE +
2131                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2132
2133        q_off = mme_offset + qman_id * 4;
2134
2135        if (qman_id < 4) {
2136                WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2137                                        lower_32_bits(qman_base_addr));
2138                WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2139                                        upper_32_bits(qman_base_addr));
2140
2141                WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2142                WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2143                WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2144
2145                WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2146                                                        QMAN_CPDMA_SIZE_OFFSET);
2147                WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2148                                                        QMAN_CPDMA_SRC_OFFSET);
2149                WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2150                                                        QMAN_CPDMA_DST_OFFSET);
2151        } else {
2152                WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2153                                                        QMAN_LDMA_SIZE_OFFSET);
2154                WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2155                                                        QMAN_LDMA_SRC_OFFSET);
2156                WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2157                                                        QMAN_LDMA_DST_OFFSET);
2158
2159                /* Configure RAZWI IRQ */
2160                mme_id = mme_offset /
2161                                (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2162
2163                mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2164                if (hdev->stop_on_err) {
2165                        mme_qm_err_cfg |=
2166                                MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2167                }
2168                WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2169                WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2170                        lower_32_bits(CFG_BASE +
2171                                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2172                WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2173                        upper_32_bits(CFG_BASE +
2174                                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2175                WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2176                        gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2177                                                                        mme_id);
2178
2179                WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2180                                QM_ARB_ERR_MSG_EN_MASK);
2181
2182                /* Increase ARB WDT to support streams architecture */
2183                WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2184                                GAUDI_ARB_WDT_TIMEOUT);
2185
2186                WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2187                WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2188                                QMAN_INTERNAL_MAKE_TRUSTED);
2189        }
2190
2191        WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2192        WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2193        WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2194        WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2195}
2196
2197static void gaudi_init_mme_qmans(struct hl_device *hdev)
2198{
2199        struct gaudi_device *gaudi = hdev->asic_specific;
2200        struct gaudi_internal_qman_info *q;
2201        u64 qman_base_addr;
2202        u32 mme_offset;
2203        int i, internal_q_index;
2204
2205        if (gaudi->hw_cap_initialized & HW_CAP_MME)
2206                return;
2207
2208        /*
2209         * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2210         * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2211         */
2212
2213        mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2214
2215        for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2216                internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2217                q = &gaudi->internal_qmans[internal_q_index];
2218                qman_base_addr = (u64) q->pq_dma_addr;
2219                gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2220                                        qman_base_addr);
2221                if (i == 3)
2222                        mme_offset = 0;
2223        }
2224
2225        /* Initializing lower CP for MME QMANs */
2226        mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2227        gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2228        gaudi_init_mme_qman(hdev, 0, 4, 0);
2229
2230        WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2231        WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2232
2233        gaudi->hw_cap_initialized |= HW_CAP_MME;
2234}
2235
2236static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2237                                int qman_id, u64 qman_base_addr)
2238{
2239        u32 mtr_base_lo, mtr_base_hi;
2240        u32 so_base_lo, so_base_hi;
2241        u32 q_off, tpc_id;
2242        u32 tpc_qm_err_cfg;
2243
2244        mtr_base_lo = lower_32_bits(CFG_BASE +
2245                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2246        mtr_base_hi = upper_32_bits(CFG_BASE +
2247                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2248        so_base_lo = lower_32_bits(CFG_BASE +
2249                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2250        so_base_hi = upper_32_bits(CFG_BASE +
2251                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2252
2253        q_off = tpc_offset + qman_id * 4;
2254
2255        if (qman_id < 4) {
2256                WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2257                                        lower_32_bits(qman_base_addr));
2258                WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2259                                        upper_32_bits(qman_base_addr));
2260
2261                WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2262                WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2263                WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2264
2265                WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2266                                                        QMAN_CPDMA_SIZE_OFFSET);
2267                WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2268                                                        QMAN_CPDMA_SRC_OFFSET);
2269                WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2270                                                        QMAN_CPDMA_DST_OFFSET);
2271        } else {
2272                WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2273                                                        QMAN_LDMA_SIZE_OFFSET);
2274                WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2275                                                        QMAN_LDMA_SRC_OFFSET);
2276                WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2277                                                        QMAN_LDMA_DST_OFFSET);
2278
2279                /* Configure RAZWI IRQ */
2280                tpc_id = tpc_offset /
2281                                (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2282
2283                tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2284                if (hdev->stop_on_err) {
2285                        tpc_qm_err_cfg |=
2286                                TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2287                }
2288
2289                WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2290                WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2291                        lower_32_bits(CFG_BASE +
2292                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2293                WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2294                        upper_32_bits(CFG_BASE +
2295                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2296                WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2297                        gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2298                                                                        tpc_id);
2299
2300                WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2301                                QM_ARB_ERR_MSG_EN_MASK);
2302
2303                /* Increase ARB WDT to support streams architecture */
2304                WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2305                                GAUDI_ARB_WDT_TIMEOUT);
2306
2307                WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2308                WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2309                                QMAN_INTERNAL_MAKE_TRUSTED);
2310        }
2311
2312        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2313        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2314        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2315        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2316}
2317
2318static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2319{
2320        struct gaudi_device *gaudi = hdev->asic_specific;
2321        struct gaudi_internal_qman_info *q;
2322        u64 qman_base_addr;
2323        u32 so_base_hi, tpc_offset = 0;
2324        u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2325                        mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2326        int i, tpc_id, internal_q_index;
2327
2328        if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2329                return;
2330
2331        so_base_hi = upper_32_bits(CFG_BASE +
2332                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2333
2334        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2335                for (i = 0 ; i < QMAN_STREAMS ; i++) {
2336                        internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2337                                                tpc_id * QMAN_STREAMS + i;
2338                        q = &gaudi->internal_qmans[internal_q_index];
2339                        qman_base_addr = (u64) q->pq_dma_addr;
2340                        gaudi_init_tpc_qman(hdev, tpc_offset, i,
2341                                                qman_base_addr);
2342
2343                        if (i == 3) {
2344                                /* Initializing lower CP for TPC QMAN */
2345                                gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2346
2347                                /* Enable the QMAN and TPC channel */
2348                                WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2349                                                QMAN_TPC_ENABLE);
2350                        }
2351                }
2352
2353                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2354                                so_base_hi);
2355
2356                tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2357
2358                gaudi->hw_cap_initialized |=
2359                                FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2360        }
2361}
2362
2363static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2364{
2365        struct gaudi_device *gaudi = hdev->asic_specific;
2366
2367        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2368                return;
2369
2370        WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2371        WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2372        WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2373}
2374
2375static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2376{
2377        struct gaudi_device *gaudi = hdev->asic_specific;
2378
2379        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2380                return;
2381
2382        WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2383        WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2384        WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2385        WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2386        WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2387}
2388
2389static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2390{
2391        struct gaudi_device *gaudi = hdev->asic_specific;
2392
2393        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2394                return;
2395
2396        WREG32(mmMME2_QM_GLBL_CFG0, 0);
2397        WREG32(mmMME0_QM_GLBL_CFG0, 0);
2398}
2399
2400static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2401{
2402        struct gaudi_device *gaudi = hdev->asic_specific;
2403        u32 tpc_offset = 0;
2404        int tpc_id;
2405
2406        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2407                return;
2408
2409        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2410                WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2411                tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2412        }
2413}
2414
2415static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2416{
2417        struct gaudi_device *gaudi = hdev->asic_specific;
2418
2419        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2420                return;
2421
2422        /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2423        WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2424        WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2425        WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426}
2427
2428static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2429{
2430        struct gaudi_device *gaudi = hdev->asic_specific;
2431
2432        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2433                return;
2434
2435        /* Stop CPs of HBM DMA QMANs */
2436
2437        WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438        WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439        WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440        WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441        WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442}
2443
2444static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2445{
2446        struct gaudi_device *gaudi = hdev->asic_specific;
2447
2448        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2449                return;
2450
2451        /* Stop CPs of MME QMANs */
2452        WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2453        WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2454}
2455
2456static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2457{
2458        struct gaudi_device *gaudi = hdev->asic_specific;
2459
2460        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2461                return;
2462
2463        WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2464        WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2465        WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2466        WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2467        WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2468        WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2469        WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470        WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471}
2472
2473static void gaudi_pci_dma_stall(struct hl_device *hdev)
2474{
2475        struct gaudi_device *gaudi = hdev->asic_specific;
2476
2477        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2478                return;
2479
2480        WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2481        WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2482        WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2483}
2484
2485static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2486{
2487        struct gaudi_device *gaudi = hdev->asic_specific;
2488
2489        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2490                return;
2491
2492        WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2493        WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2494        WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2495        WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2496        WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2497}
2498
2499static void gaudi_mme_stall(struct hl_device *hdev)
2500{
2501        struct gaudi_device *gaudi = hdev->asic_specific;
2502
2503        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2504                return;
2505
2506        /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2507        WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2508        WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2509        WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2510        WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2511        WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2512        WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2513        WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2514        WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2515        WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2516        WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2517        WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2518        WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2519        WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2520        WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2521        WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2522        WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2523}
2524
2525static void gaudi_tpc_stall(struct hl_device *hdev)
2526{
2527        struct gaudi_device *gaudi = hdev->asic_specific;
2528
2529        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2530                return;
2531
2532        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2533        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2534        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2535        WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2536        WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2537        WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2538        WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539        WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540}
2541
2542static void gaudi_set_clock_gating(struct hl_device *hdev)
2543{
2544        struct gaudi_device *gaudi = hdev->asic_specific;
2545        u32 qman_offset;
2546        bool enable;
2547        int i;
2548
2549        /* In case we are during debug session, don't enable the clock gate
2550         * as it may interfere
2551         */
2552        if (hdev->in_debug)
2553                return;
2554
2555        for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2556                enable = !!(hdev->clock_gating_mask &
2557                                (BIT_ULL(gaudi_dma_assignment[i])));
2558
2559                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2560                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2561                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2562                WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2563                                enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2564        }
2565
2566        for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2567                enable = !!(hdev->clock_gating_mask &
2568                                (BIT_ULL(gaudi_dma_assignment[i])));
2569
2570                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2571                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2572                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2573                WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2574                                enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2575        }
2576
2577        enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2578        WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579        WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2580
2581        enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2582        WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2583        WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2584
2585        for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2586                enable = !!(hdev->clock_gating_mask &
2587                                (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2588
2589                WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2590                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2591                WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2592                                enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2593
2594                qman_offset += TPC_QMAN_OFFSET;
2595        }
2596
2597        gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2598}
2599
2600static void gaudi_disable_clock_gating(struct hl_device *hdev)
2601{
2602        struct gaudi_device *gaudi = hdev->asic_specific;
2603        u32 qman_offset;
2604        int i;
2605
2606        if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2607                return;
2608
2609        for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2610                WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2611                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2612
2613                qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2614        }
2615
2616        WREG32(mmMME0_QM_CGM_CFG, 0);
2617        WREG32(mmMME0_QM_CGM_CFG1, 0);
2618        WREG32(mmMME2_QM_CGM_CFG, 0);
2619        WREG32(mmMME2_QM_CGM_CFG1, 0);
2620
2621        for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2622                WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2623                WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2624
2625                qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2626        }
2627
2628        gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2629}
2630
2631static void gaudi_enable_timestamp(struct hl_device *hdev)
2632{
2633        /* Disable the timestamp counter */
2634        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2635
2636        /* Zero the lower/upper parts of the 64-bit counter */
2637        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2638        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2639
2640        /* Enable the counter */
2641        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2642}
2643
2644static void gaudi_disable_timestamp(struct hl_device *hdev)
2645{
2646        /* Disable the timestamp counter */
2647        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2648}
2649
2650static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2651{
2652        u32 wait_timeout_ms;
2653
2654        dev_info(hdev->dev,
2655                "Halting compute engines and disabling interrupts\n");
2656
2657        if (hdev->pldm)
2658                wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2659        else
2660                wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2661
2662
2663        gaudi_stop_mme_qmans(hdev);
2664        gaudi_stop_tpc_qmans(hdev);
2665        gaudi_stop_hbm_dma_qmans(hdev);
2666        gaudi_stop_pci_dma_qmans(hdev);
2667
2668        hdev->asic_funcs->disable_clock_gating(hdev);
2669
2670        msleep(wait_timeout_ms);
2671
2672        gaudi_pci_dma_stall(hdev);
2673        gaudi_hbm_dma_stall(hdev);
2674        gaudi_tpc_stall(hdev);
2675        gaudi_mme_stall(hdev);
2676
2677        msleep(wait_timeout_ms);
2678
2679        gaudi_disable_mme_qmans(hdev);
2680        gaudi_disable_tpc_qmans(hdev);
2681        gaudi_disable_hbm_dma_qmans(hdev);
2682        gaudi_disable_pci_dma_qmans(hdev);
2683
2684        gaudi_disable_timestamp(hdev);
2685
2686        gaudi_disable_msi(hdev);
2687}
2688
2689static int gaudi_mmu_init(struct hl_device *hdev)
2690{
2691        struct asic_fixed_properties *prop = &hdev->asic_prop;
2692        struct gaudi_device *gaudi = hdev->asic_specific;
2693        u64 hop0_addr;
2694        int rc, i;
2695
2696        if (!hdev->mmu_enable)
2697                return 0;
2698
2699        if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2700                return 0;
2701
2702        hdev->dram_supports_virtual_memory = false;
2703
2704        for (i = 0 ; i < prop->max_asid ; i++) {
2705                hop0_addr = prop->mmu_pgt_addr +
2706                                (i * prop->mmu_hop_table_size);
2707
2708                rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2709                if (rc) {
2710                        dev_err(hdev->dev,
2711                                "failed to set hop0 addr for asid %d\n", i);
2712                        goto err;
2713                }
2714        }
2715
2716        /* init MMU cache manage page */
2717        WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2718        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2719
2720        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2721
2722        WREG32(mmMMU_UP_MMU_ENABLE, 1);
2723        WREG32(mmMMU_UP_SPI_MASK, 0xF);
2724
2725        WREG32(mmSTLB_HOP_CONFIGURATION,
2726                        hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2727
2728        /*
2729         * The H/W expects the first PI after init to be 1. After wraparound
2730         * we'll write 0.
2731         */
2732        gaudi->mmu_cache_inv_pi = 1;
2733
2734        gaudi->hw_cap_initialized |= HW_CAP_MMU;
2735
2736        return 0;
2737
2738err:
2739        return rc;
2740}
2741
2742static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2743{
2744        void __iomem *dst;
2745
2746        /* HBM scrambler must be initialized before pushing F/W to HBM */
2747        gaudi_init_scrambler_hbm(hdev);
2748
2749        dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2750
2751        return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2752}
2753
2754static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2755{
2756        void __iomem *dst;
2757
2758        dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2759
2760        return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2761}
2762
2763static void gaudi_read_device_fw_version(struct hl_device *hdev,
2764                                        enum hl_fw_component fwc)
2765{
2766        const char *name;
2767        u32 ver_off;
2768        char *dest;
2769
2770        switch (fwc) {
2771        case FW_COMP_UBOOT:
2772                ver_off = RREG32(mmUBOOT_VER_OFFSET);
2773                dest = hdev->asic_prop.uboot_ver;
2774                name = "U-Boot";
2775                break;
2776        case FW_COMP_PREBOOT:
2777                ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2778                dest = hdev->asic_prop.preboot_ver;
2779                name = "Preboot";
2780                break;
2781        default:
2782                dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2783                return;
2784        }
2785
2786        ver_off &= ~((u32)SRAM_BASE_ADDR);
2787
2788        if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2789                memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2790                                                        VERSION_MAX_LEN);
2791        } else {
2792                dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2793                                                                name, ver_off);
2794                strcpy(dest, "unavailable");
2795        }
2796}
2797
2798static int gaudi_init_cpu(struct hl_device *hdev)
2799{
2800        struct gaudi_device *gaudi = hdev->asic_specific;
2801        int rc;
2802
2803        if (!hdev->cpu_enable)
2804                return 0;
2805
2806        if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2807                return 0;
2808
2809        /*
2810         * The device CPU works with 40 bits addresses.
2811         * This register sets the extension to 50 bits.
2812         */
2813        WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2814
2815        rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2816                        mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2817                        mmCPU_CMD_STATUS_TO_HOST,
2818                        mmCPU_BOOT_ERR0,
2819                        !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2820                        GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2821
2822        if (rc)
2823                return rc;
2824
2825        gaudi->hw_cap_initialized |= HW_CAP_CPU;
2826
2827        return 0;
2828}
2829
2830static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2831{
2832        struct gaudi_device *gaudi = hdev->asic_specific;
2833        struct hl_eq *eq;
2834        u32 status;
2835        struct hl_hw_queue *cpu_pq =
2836                        &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2837        int err;
2838
2839        if (!hdev->cpu_queues_enable)
2840                return 0;
2841
2842        if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2843                return 0;
2844
2845        eq = &hdev->event_queue;
2846
2847        WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2848        WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2849
2850        WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2851        WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2852
2853        WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2854                        lower_32_bits(hdev->cpu_accessible_dma_address));
2855        WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2856                        upper_32_bits(hdev->cpu_accessible_dma_address));
2857
2858        WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2859        WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2860        WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2861
2862        /* Used for EQ CI */
2863        WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2864
2865        WREG32(mmCPU_IF_PF_PQ_PI, 0);
2866
2867        if (gaudi->multi_msi_mode)
2868                WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2869        else
2870                WREG32(mmCPU_IF_QUEUE_INIT,
2871                        PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2872
2873        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2874
2875        err = hl_poll_timeout(
2876                hdev,
2877                mmCPU_IF_QUEUE_INIT,
2878                status,
2879                (status == PQ_INIT_STATUS_READY_FOR_HOST),
2880                1000,
2881                cpu_timeout);
2882
2883        if (err) {
2884                dev_err(hdev->dev,
2885                        "Failed to communicate with Device CPU (CPU-CP timeout)\n");
2886                return -EIO;
2887        }
2888
2889        gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2890        return 0;
2891}
2892
2893static void gaudi_pre_hw_init(struct hl_device *hdev)
2894{
2895        /* Perform read from the device to make sure device is up */
2896        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2897
2898        /* Set the access through PCI bars (Linux driver only) as
2899         * secured
2900         */
2901        WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2902                        (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2903                        PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2904
2905        /* Perform read to flush the waiting writes to ensure
2906         * configuration was set in the device
2907         */
2908        RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2909
2910        /*
2911         * Let's mark in the H/W that we have reached this point. We check
2912         * this value in the reset_before_init function to understand whether
2913         * we need to reset the chip before doing H/W init. This register is
2914         * cleared by the H/W upon H/W reset
2915         */
2916        WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2917
2918        /* Configure the reset registers. Must be done as early as possible
2919         * in case we fail during H/W initialization
2920         */
2921        WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2922                                        (CFG_RST_H_DMA_MASK |
2923                                        CFG_RST_H_MME_MASK |
2924                                        CFG_RST_H_SM_MASK |
2925                                        CFG_RST_H_TPC_7_MASK));
2926
2927        WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2928
2929        WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2930                                        (CFG_RST_H_HBM_MASK |
2931                                        CFG_RST_H_TPC_7_MASK |
2932                                        CFG_RST_H_NIC_MASK |
2933                                        CFG_RST_H_SM_MASK |
2934                                        CFG_RST_H_DMA_MASK |
2935                                        CFG_RST_H_MME_MASK |
2936                                        CFG_RST_H_CPU_MASK |
2937                                        CFG_RST_H_MMU_MASK));
2938
2939        WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2940                                        (CFG_RST_L_IF_MASK |
2941                                        CFG_RST_L_PSOC_MASK |
2942                                        CFG_RST_L_TPC_MASK));
2943}
2944
2945static int gaudi_hw_init(struct hl_device *hdev)
2946{
2947        int rc;
2948
2949        dev_info(hdev->dev, "Starting initialization of H/W\n");
2950
2951        gaudi_pre_hw_init(hdev);
2952
2953        gaudi_init_pci_dma_qmans(hdev);
2954
2955        gaudi_init_hbm_dma_qmans(hdev);
2956
2957        rc = gaudi_init_cpu(hdev);
2958        if (rc) {
2959                dev_err(hdev->dev, "failed to initialize CPU\n");
2960                return rc;
2961        }
2962
2963        /* SRAM scrambler must be initialized after CPU is running from HBM */
2964        gaudi_init_scrambler_sram(hdev);
2965
2966        /* This is here just in case we are working without CPU */
2967        gaudi_init_scrambler_hbm(hdev);
2968
2969        gaudi_init_golden_registers(hdev);
2970
2971        rc = gaudi_mmu_init(hdev);
2972        if (rc)
2973                return rc;
2974
2975        gaudi_init_security(hdev);
2976
2977        gaudi_init_mme_qmans(hdev);
2978
2979        gaudi_init_tpc_qmans(hdev);
2980
2981        hdev->asic_funcs->set_clock_gating(hdev);
2982
2983        gaudi_enable_timestamp(hdev);
2984
2985        /* MSI must be enabled before CPU queues are initialized */
2986        rc = gaudi_enable_msi(hdev);
2987        if (rc)
2988                goto disable_queues;
2989
2990        /* must be called after MSI was enabled */
2991        rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2992        if (rc) {
2993                dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2994                        rc);
2995                goto disable_msi;
2996        }
2997
2998        /* Perform read from the device to flush all configuration */
2999        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3000
3001        return 0;
3002
3003disable_msi:
3004        gaudi_disable_msi(hdev);
3005disable_queues:
3006        gaudi_disable_mme_qmans(hdev);
3007        gaudi_disable_pci_dma_qmans(hdev);
3008
3009        return rc;
3010}
3011
3012static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3013{
3014        struct gaudi_device *gaudi = hdev->asic_specific;
3015        u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3016
3017        if (!hard_reset) {
3018                dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3019                return;
3020        }
3021
3022        if (hdev->pldm) {
3023                reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3024                cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3025        } else {
3026                reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3027                cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3028        }
3029
3030        /* Set device to handle FLR by H/W as we will put the device CPU to
3031         * halt mode
3032         */
3033        WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3034                                        PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3035
3036        /* I don't know what is the state of the CPU so make sure it is
3037         * stopped in any means necessary
3038         */
3039        WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3040        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3041
3042        msleep(cpu_timeout_ms);
3043
3044        /* Tell ASIC not to re-initialize PCIe */
3045        WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3046
3047        boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3048
3049        /* H/W bug WA:
3050         * rdata[31:0] = strap_read_val;
3051         * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3052         */
3053        boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3054                        (boot_strap & 0x001FFFFF));
3055        WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3056
3057        /* Restart BTL/BLR upon hard-reset */
3058        WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3059
3060        WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3061                        1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3062        dev_info(hdev->dev,
3063                "Issued HARD reset command, going to wait %dms\n",
3064                reset_timeout_ms);
3065
3066        /*
3067         * After hard reset, we can't poll the BTM_FSM register because the PSOC
3068         * itself is in reset. Need to wait until the reset is deasserted
3069         */
3070        msleep(reset_timeout_ms);
3071
3072        status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3073        if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3074                dev_err(hdev->dev,
3075                        "Timeout while waiting for device to reset 0x%x\n",
3076                        status);
3077
3078        WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3079
3080        gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3081                                        HW_CAP_HBM | HW_CAP_PCI_DMA |
3082                                        HW_CAP_MME | HW_CAP_TPC_MASK |
3083                                        HW_CAP_HBM_DMA | HW_CAP_PLL |
3084                                        HW_CAP_MMU |
3085                                        HW_CAP_SRAM_SCRAMBLER |
3086                                        HW_CAP_HBM_SCRAMBLER |
3087                                        HW_CAP_CLK_GATE);
3088
3089        memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3090}
3091
3092static int gaudi_suspend(struct hl_device *hdev)
3093{
3094        int rc;
3095
3096        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3097        if (rc)
3098                dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3099
3100        return rc;
3101}
3102
3103static int gaudi_resume(struct hl_device *hdev)
3104{
3105        return gaudi_init_iatu(hdev);
3106}
3107
3108static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3109                        void *cpu_addr, dma_addr_t dma_addr, size_t size)
3110{
3111        int rc;
3112
3113        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3114                        VM_DONTCOPY | VM_NORESERVE;
3115
3116        rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
3117        if (rc)
3118                dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3119
3120        return rc;
3121}
3122
3123static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3124{
3125        struct gaudi_device *gaudi = hdev->asic_specific;
3126        u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3127        int dma_id;
3128        bool invalid_queue = false;
3129
3130        switch (hw_queue_id) {
3131        case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3132                dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3133                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134                q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3135                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136                break;
3137
3138        case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3139                dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3140                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141                q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143                break;
3144
3145        case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3146                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3147                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150                break;
3151
3152        case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3153                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3154                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157                break;
3158
3159        case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3160                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3161                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164                break;
3165
3166        case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3167                dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3168                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171                break;
3172
3173        case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3174                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3175                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178                break;
3179
3180        case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3181                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3182                dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183                q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184                db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185                break;
3186
3187        case GAUDI_QUEUE_ID_CPU_PQ:
3188                if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3189                        db_reg_offset = mmCPU_IF_PF_PQ_PI;
3190                else
3191                        invalid_queue = true;
3192                break;
3193
3194        case GAUDI_QUEUE_ID_MME_0_0:
3195                db_reg_offset = mmMME2_QM_PQ_PI_0;
3196                break;
3197
3198        case GAUDI_QUEUE_ID_MME_0_1:
3199                db_reg_offset = mmMME2_QM_PQ_PI_1;
3200                break;
3201
3202        case GAUDI_QUEUE_ID_MME_0_2:
3203                db_reg_offset = mmMME2_QM_PQ_PI_2;
3204                break;
3205
3206        case GAUDI_QUEUE_ID_MME_0_3:
3207                db_reg_offset = mmMME2_QM_PQ_PI_3;
3208                break;
3209
3210        case GAUDI_QUEUE_ID_MME_1_0:
3211                db_reg_offset = mmMME0_QM_PQ_PI_0;
3212                break;
3213
3214        case GAUDI_QUEUE_ID_MME_1_1:
3215                db_reg_offset = mmMME0_QM_PQ_PI_1;
3216                break;
3217
3218        case GAUDI_QUEUE_ID_MME_1_2:
3219                db_reg_offset = mmMME0_QM_PQ_PI_2;
3220                break;
3221
3222        case GAUDI_QUEUE_ID_MME_1_3:
3223                db_reg_offset = mmMME0_QM_PQ_PI_3;
3224                break;
3225
3226        case GAUDI_QUEUE_ID_TPC_0_0:
3227                db_reg_offset = mmTPC0_QM_PQ_PI_0;
3228                break;
3229
3230        case GAUDI_QUEUE_ID_TPC_0_1:
3231                db_reg_offset = mmTPC0_QM_PQ_PI_1;
3232                break;
3233
3234        case GAUDI_QUEUE_ID_TPC_0_2:
3235                db_reg_offset = mmTPC0_QM_PQ_PI_2;
3236                break;
3237
3238        case GAUDI_QUEUE_ID_TPC_0_3:
3239                db_reg_offset = mmTPC0_QM_PQ_PI_3;
3240                break;
3241
3242        case GAUDI_QUEUE_ID_TPC_1_0:
3243                db_reg_offset = mmTPC1_QM_PQ_PI_0;
3244                break;
3245
3246        case GAUDI_QUEUE_ID_TPC_1_1:
3247                db_reg_offset = mmTPC1_QM_PQ_PI_1;
3248                break;
3249
3250        case GAUDI_QUEUE_ID_TPC_1_2:
3251                db_reg_offset = mmTPC1_QM_PQ_PI_2;
3252                break;
3253
3254        case GAUDI_QUEUE_ID_TPC_1_3:
3255                db_reg_offset = mmTPC1_QM_PQ_PI_3;
3256                break;
3257
3258        case GAUDI_QUEUE_ID_TPC_2_0:
3259                db_reg_offset = mmTPC2_QM_PQ_PI_0;
3260                break;
3261
3262        case GAUDI_QUEUE_ID_TPC_2_1:
3263                db_reg_offset = mmTPC2_QM_PQ_PI_1;
3264                break;
3265
3266        case GAUDI_QUEUE_ID_TPC_2_2:
3267                db_reg_offset = mmTPC2_QM_PQ_PI_2;
3268                break;
3269
3270        case GAUDI_QUEUE_ID_TPC_2_3:
3271                db_reg_offset = mmTPC2_QM_PQ_PI_3;
3272                break;
3273
3274        case GAUDI_QUEUE_ID_TPC_3_0:
3275                db_reg_offset = mmTPC3_QM_PQ_PI_0;
3276                break;
3277
3278        case GAUDI_QUEUE_ID_TPC_3_1:
3279                db_reg_offset = mmTPC3_QM_PQ_PI_1;
3280                break;
3281
3282        case GAUDI_QUEUE_ID_TPC_3_2:
3283                db_reg_offset = mmTPC3_QM_PQ_PI_2;
3284                break;
3285
3286        case GAUDI_QUEUE_ID_TPC_3_3:
3287                db_reg_offset = mmTPC3_QM_PQ_PI_3;
3288                break;
3289
3290        case GAUDI_QUEUE_ID_TPC_4_0:
3291                db_reg_offset = mmTPC4_QM_PQ_PI_0;
3292                break;
3293
3294        case GAUDI_QUEUE_ID_TPC_4_1:
3295                db_reg_offset = mmTPC4_QM_PQ_PI_1;
3296                break;
3297
3298        case GAUDI_QUEUE_ID_TPC_4_2:
3299                db_reg_offset = mmTPC4_QM_PQ_PI_2;
3300                break;
3301
3302        case GAUDI_QUEUE_ID_TPC_4_3:
3303                db_reg_offset = mmTPC4_QM_PQ_PI_3;
3304                break;
3305
3306        case GAUDI_QUEUE_ID_TPC_5_0:
3307                db_reg_offset = mmTPC5_QM_PQ_PI_0;
3308                break;
3309
3310        case GAUDI_QUEUE_ID_TPC_5_1:
3311                db_reg_offset = mmTPC5_QM_PQ_PI_1;
3312                break;
3313
3314        case GAUDI_QUEUE_ID_TPC_5_2:
3315                db_reg_offset = mmTPC5_QM_PQ_PI_2;
3316                break;
3317
3318        case GAUDI_QUEUE_ID_TPC_5_3:
3319                db_reg_offset = mmTPC5_QM_PQ_PI_3;
3320                break;
3321
3322        case GAUDI_QUEUE_ID_TPC_6_0:
3323                db_reg_offset = mmTPC6_QM_PQ_PI_0;
3324                break;
3325
3326        case GAUDI_QUEUE_ID_TPC_6_1:
3327                db_reg_offset = mmTPC6_QM_PQ_PI_1;
3328                break;
3329
3330        case GAUDI_QUEUE_ID_TPC_6_2:
3331                db_reg_offset = mmTPC6_QM_PQ_PI_2;
3332                break;
3333
3334        case GAUDI_QUEUE_ID_TPC_6_3:
3335                db_reg_offset = mmTPC6_QM_PQ_PI_3;
3336                break;
3337
3338        case GAUDI_QUEUE_ID_TPC_7_0:
3339                db_reg_offset = mmTPC7_QM_PQ_PI_0;
3340                break;
3341
3342        case GAUDI_QUEUE_ID_TPC_7_1:
3343                db_reg_offset = mmTPC7_QM_PQ_PI_1;
3344                break;
3345
3346        case GAUDI_QUEUE_ID_TPC_7_2:
3347                db_reg_offset = mmTPC7_QM_PQ_PI_2;
3348                break;
3349
3350        case GAUDI_QUEUE_ID_TPC_7_3:
3351                db_reg_offset = mmTPC7_QM_PQ_PI_3;
3352                break;
3353
3354        default:
3355                invalid_queue = true;
3356        }
3357
3358        if (invalid_queue) {
3359                /* Should never get here */
3360                dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3361                        hw_queue_id);
3362                return;
3363        }
3364
3365        db_value = pi;
3366
3367        /* ring the doorbell */
3368        WREG32(db_reg_offset, db_value);
3369
3370        if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3371                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3372                                GAUDI_EVENT_PI_UPDATE);
3373}
3374
3375static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3376                                struct hl_bd *bd)
3377{
3378        __le64 *pbd = (__le64 *) bd;
3379
3380        /* The QMANs are on the host memory so a simple copy suffice */
3381        pqe[0] = pbd[0];
3382        pqe[1] = pbd[1];
3383}
3384
3385static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3386                                        dma_addr_t *dma_handle, gfp_t flags)
3387{
3388        void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3389                                                dma_handle, flags);
3390
3391        /* Shift to the device's base physical address of host memory */
3392        if (kernel_addr)
3393                *dma_handle += HOST_PHYS_BASE;
3394
3395        return kernel_addr;
3396}
3397
3398static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3399                void *cpu_addr, dma_addr_t dma_handle)
3400{
3401        /* Cancel the device's base physical address of host memory */
3402        dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3403
3404        dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3405}
3406
3407static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3408                                u32 queue_id, dma_addr_t *dma_handle,
3409                                u16 *queue_len)
3410{
3411        struct gaudi_device *gaudi = hdev->asic_specific;
3412        struct gaudi_internal_qman_info *q;
3413
3414        if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3415                        gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3416                dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3417                return NULL;
3418        }
3419
3420        q = &gaudi->internal_qmans[queue_id];
3421        *dma_handle = q->pq_dma_addr;
3422        *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3423
3424        return q->pq_kernel_addr;
3425}
3426
3427static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3428                                u16 len, u32 timeout, long *result)
3429{
3430        struct gaudi_device *gaudi = hdev->asic_specific;
3431
3432        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3433                if (result)
3434                        *result = 0;
3435                return 0;
3436        }
3437
3438        if (!timeout)
3439                timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3440
3441        return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3442                                                timeout, result);
3443}
3444
3445static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3446{
3447        struct packet_msg_prot *fence_pkt;
3448        dma_addr_t pkt_dma_addr;
3449        u32 fence_val, tmp, timeout_usec;
3450        dma_addr_t fence_dma_addr;
3451        u32 *fence_ptr;
3452        int rc;
3453
3454        if (hdev->pldm)
3455                timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3456        else
3457                timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3458
3459        fence_val = GAUDI_QMAN0_FENCE_VAL;
3460
3461        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3462                                                        &fence_dma_addr);
3463        if (!fence_ptr) {
3464                dev_err(hdev->dev,
3465                        "Failed to allocate memory for H/W queue %d testing\n",
3466                        hw_queue_id);
3467                return -ENOMEM;
3468        }
3469
3470        *fence_ptr = 0;
3471
3472        fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3473                                        sizeof(struct packet_msg_prot),
3474                                        GFP_KERNEL, &pkt_dma_addr);
3475        if (!fence_pkt) {
3476                dev_err(hdev->dev,
3477                        "Failed to allocate packet for H/W queue %d testing\n",
3478                        hw_queue_id);
3479                rc = -ENOMEM;
3480                goto free_fence_ptr;
3481        }
3482
3483        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3484        tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3485        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3486
3487        fence_pkt->ctl = cpu_to_le32(tmp);
3488        fence_pkt->value = cpu_to_le32(fence_val);
3489        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3490
3491        rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3492                                        sizeof(struct packet_msg_prot),
3493                                        pkt_dma_addr);
3494        if (rc) {
3495                dev_err(hdev->dev,
3496                        "Failed to send fence packet to H/W queue %d\n",
3497                        hw_queue_id);
3498                goto free_pkt;
3499        }
3500
3501        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3502                                        1000, timeout_usec, true);
3503
3504        hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3505
3506        if (rc == -ETIMEDOUT) {
3507                dev_err(hdev->dev,
3508                        "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3509                        hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3510                rc = -EIO;
3511        }
3512
3513free_pkt:
3514        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3515                                        pkt_dma_addr);
3516free_fence_ptr:
3517        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3518                                        fence_dma_addr);
3519        return rc;
3520}
3521
3522static int gaudi_test_cpu_queue(struct hl_device *hdev)
3523{
3524        struct gaudi_device *gaudi = hdev->asic_specific;
3525
3526        /*
3527         * check capability here as send_cpu_message() won't update the result
3528         * value if no capability
3529         */
3530        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3531                return 0;
3532
3533        return hl_fw_test_cpu_queue(hdev);
3534}
3535
3536static int gaudi_test_queues(struct hl_device *hdev)
3537{
3538        int i, rc, ret_val = 0;
3539
3540        for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3541                if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3542                        rc = gaudi_test_queue(hdev, i);
3543                        if (rc)
3544                                ret_val = -EINVAL;
3545                }
3546        }
3547
3548        rc = gaudi_test_cpu_queue(hdev);
3549        if (rc)
3550                ret_val = -EINVAL;
3551
3552        return ret_val;
3553}
3554
3555static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3556                gfp_t mem_flags, dma_addr_t *dma_handle)
3557{
3558        void *kernel_addr;
3559
3560        if (size > GAUDI_DMA_POOL_BLK_SIZE)
3561                return NULL;
3562
3563        kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3564
3565        /* Shift to the device's base physical address of host memory */
3566        if (kernel_addr)
3567                *dma_handle += HOST_PHYS_BASE;
3568
3569        return kernel_addr;
3570}
3571
3572static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3573                        dma_addr_t dma_addr)
3574{
3575        /* Cancel the device's base physical address of host memory */
3576        dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3577
3578        dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3579}
3580
3581static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3582                                        size_t size, dma_addr_t *dma_handle)
3583{
3584        return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3585}
3586
3587static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3588                                                size_t size, void *vaddr)
3589{
3590        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3591}
3592
3593static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3594                        int nents, enum dma_data_direction dir)
3595{
3596        struct scatterlist *sg;
3597        int i;
3598
3599        if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3600                return -ENOMEM;
3601
3602        /* Shift to the device's base physical address of host memory */
3603        for_each_sg(sgl, sg, nents, i)
3604                sg->dma_address += HOST_PHYS_BASE;
3605
3606        return 0;
3607}
3608
3609static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3610                        int nents, enum dma_data_direction dir)
3611{
3612        struct scatterlist *sg;
3613        int i;
3614
3615        /* Cancel the device's base physical address of host memory */
3616        for_each_sg(sgl, sg, nents, i)
3617                sg->dma_address -= HOST_PHYS_BASE;
3618
3619        dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3620}
3621
3622static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3623                                        struct sg_table *sgt)
3624{
3625        struct scatterlist *sg, *sg_next_iter;
3626        u32 count, dma_desc_cnt;
3627        u64 len, len_next;
3628        dma_addr_t addr, addr_next;
3629
3630        dma_desc_cnt = 0;
3631
3632        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3633
3634                len = sg_dma_len(sg);
3635                addr = sg_dma_address(sg);
3636
3637                if (len == 0)
3638                        break;
3639
3640                while ((count + 1) < sgt->nents) {
3641                        sg_next_iter = sg_next(sg);
3642                        len_next = sg_dma_len(sg_next_iter);
3643                        addr_next = sg_dma_address(sg_next_iter);
3644
3645                        if (len_next == 0)
3646                                break;
3647
3648                        if ((addr + len == addr_next) &&
3649                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3650                                len += len_next;
3651                                count++;
3652                                sg = sg_next_iter;
3653                        } else {
3654                                break;
3655                        }
3656                }
3657
3658                dma_desc_cnt++;
3659        }
3660
3661        return dma_desc_cnt * sizeof(struct packet_lin_dma);
3662}
3663
3664static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3665                                struct hl_cs_parser *parser,
3666                                struct packet_lin_dma *user_dma_pkt,
3667                                u64 addr, enum dma_data_direction dir)
3668{
3669        struct hl_userptr *userptr;
3670        int rc;
3671
3672        if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3673                        parser->job_userptr_list, &userptr))
3674                goto already_pinned;
3675
3676        userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3677        if (!userptr)
3678                return -ENOMEM;
3679
3680        rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3681                                userptr);
3682        if (rc)
3683                goto free_userptr;
3684
3685        list_add_tail(&userptr->job_node, parser->job_userptr_list);
3686
3687        rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3688                                        userptr->sgt->nents, dir);
3689        if (rc) {
3690                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3691                goto unpin_memory;
3692        }
3693
3694        userptr->dma_mapped = true;
3695        userptr->dir = dir;
3696
3697already_pinned:
3698        parser->patched_cb_size +=
3699                        gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3700
3701        return 0;
3702
3703unpin_memory:
3704        hl_unpin_host_memory(hdev, userptr);
3705free_userptr:
3706        kfree(userptr);
3707        return rc;
3708}
3709
3710static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3711                                struct hl_cs_parser *parser,
3712                                struct packet_lin_dma *user_dma_pkt,
3713                                bool src_in_host)
3714{
3715        enum dma_data_direction dir;
3716        bool skip_host_mem_pin = false, user_memset;
3717        u64 addr;
3718        int rc = 0;
3719
3720        user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3721                        GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3722                        GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3723
3724        if (src_in_host) {
3725                if (user_memset)
3726                        skip_host_mem_pin = true;
3727
3728                dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3729                dir = DMA_TO_DEVICE;
3730                addr = le64_to_cpu(user_dma_pkt->src_addr);
3731        } else {
3732                dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3733                dir = DMA_FROM_DEVICE;
3734                addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3735                                GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3736                                GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3737        }
3738
3739        if (skip_host_mem_pin)
3740                parser->patched_cb_size += sizeof(*user_dma_pkt);
3741        else
3742                rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3743                                                addr, dir);
3744
3745        return rc;
3746}
3747
3748static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3749                                struct hl_cs_parser *parser,
3750                                struct packet_lin_dma *user_dma_pkt)
3751{
3752        bool src_in_host = false;
3753        u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3754                        GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3755                        GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3756
3757        dev_dbg(hdev->dev, "DMA packet details:\n");
3758        dev_dbg(hdev->dev, "source == 0x%llx\n",
3759                                le64_to_cpu(user_dma_pkt->src_addr));
3760        dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3761        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3762
3763        /*
3764         * Special handling for DMA with size 0. Bypass all validations
3765         * because no transactions will be done except for WR_COMP, which
3766         * is not a security issue
3767         */
3768        if (!le32_to_cpu(user_dma_pkt->tsize)) {
3769                parser->patched_cb_size += sizeof(*user_dma_pkt);
3770                return 0;
3771        }
3772
3773        if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3774                src_in_host = true;
3775
3776        return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3777                                                src_in_host);
3778}
3779
3780static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3781                                        struct hl_cs_parser *parser,
3782                                        struct packet_load_and_exe *user_pkt)
3783{
3784        u32 cfg;
3785
3786        cfg = le32_to_cpu(user_pkt->cfg);
3787
3788        if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3789                dev_err(hdev->dev,
3790                        "User not allowed to use Load and Execute\n");
3791                return -EPERM;
3792        }
3793
3794        parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3795
3796        return 0;
3797}
3798
3799static int gaudi_validate_cb(struct hl_device *hdev,
3800                        struct hl_cs_parser *parser, bool is_mmu)
3801{
3802        u32 cb_parsed_length = 0;
3803        int rc = 0;
3804
3805        parser->patched_cb_size = 0;
3806
3807        /* cb_user_size is more than 0 so loop will always be executed */
3808        while (cb_parsed_length < parser->user_cb_size) {
3809                enum packet_id pkt_id;
3810                u16 pkt_size;
3811                struct gaudi_packet *user_pkt;
3812
3813                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3814
3815                pkt_id = (enum packet_id) (
3816                                (le64_to_cpu(user_pkt->header) &
3817                                PACKET_HEADER_PACKET_ID_MASK) >>
3818                                        PACKET_HEADER_PACKET_ID_SHIFT);
3819
3820                if (!validate_packet_id(pkt_id)) {
3821                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3822                        rc = -EINVAL;
3823                        break;
3824                }
3825
3826                pkt_size = gaudi_packet_sizes[pkt_id];
3827                cb_parsed_length += pkt_size;
3828                if (cb_parsed_length > parser->user_cb_size) {
3829                        dev_err(hdev->dev,
3830                                "packet 0x%x is out of CB boundary\n", pkt_id);
3831                        rc = -EINVAL;
3832                        break;
3833                }
3834
3835                switch (pkt_id) {
3836                case PACKET_MSG_PROT:
3837                        dev_err(hdev->dev,
3838                                "User not allowed to use MSG_PROT\n");
3839                        rc = -EPERM;
3840                        break;
3841
3842                case PACKET_CP_DMA:
3843                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3844                        rc = -EPERM;
3845                        break;
3846
3847                case PACKET_STOP:
3848                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3849                        rc = -EPERM;
3850                        break;
3851
3852                case PACKET_WREG_BULK:
3853                        dev_err(hdev->dev,
3854                                "User not allowed to use WREG_BULK\n");
3855                        rc = -EPERM;
3856                        break;
3857
3858                case PACKET_LOAD_AND_EXE:
3859                        rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3860                                (struct packet_load_and_exe *) user_pkt);
3861                        break;
3862
3863                case PACKET_LIN_DMA:
3864                        parser->contains_dma_pkt = true;
3865                        if (is_mmu)
3866                                parser->patched_cb_size += pkt_size;
3867                        else
3868                                rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3869                                        (struct packet_lin_dma *) user_pkt);
3870                        break;
3871
3872                case PACKET_WREG_32:
3873                case PACKET_MSG_LONG:
3874                case PACKET_MSG_SHORT:
3875                case PACKET_REPEAT:
3876                case PACKET_FENCE:
3877                case PACKET_NOP:
3878                case PACKET_ARB_POINT:
3879                        parser->patched_cb_size += pkt_size;
3880                        break;
3881
3882                default:
3883                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3884                                pkt_id);
3885                        rc = -EINVAL;
3886                        break;
3887                }
3888
3889                if (rc)
3890                        break;
3891        }
3892
3893        /*
3894         * The new CB should have space at the end for two MSG_PROT packets:
3895         * 1. A packet that will act as a completion packet
3896         * 2. A packet that will generate MSI-X interrupt
3897         */
3898        parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3899
3900        return rc;
3901}
3902
3903static int gaudi_patch_dma_packet(struct hl_device *hdev,
3904                                struct hl_cs_parser *parser,
3905                                struct packet_lin_dma *user_dma_pkt,
3906                                struct packet_lin_dma *new_dma_pkt,
3907                                u32 *new_dma_pkt_size)
3908{
3909        struct hl_userptr *userptr;
3910        struct scatterlist *sg, *sg_next_iter;
3911        u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3912        u64 len, len_next;
3913        dma_addr_t dma_addr, dma_addr_next;
3914        u64 device_memory_addr, addr;
3915        enum dma_data_direction dir;
3916        struct sg_table *sgt;
3917        bool src_in_host = false;
3918        bool skip_host_mem_pin = false;
3919        bool user_memset;
3920
3921        ctl = le32_to_cpu(user_dma_pkt->ctl);
3922
3923        if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3924                src_in_host = true;
3925
3926        user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3927                        GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3928
3929        if (src_in_host) {
3930                addr = le64_to_cpu(user_dma_pkt->src_addr);
3931                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3932                dir = DMA_TO_DEVICE;
3933                if (user_memset)
3934                        skip_host_mem_pin = true;
3935        } else {
3936                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3937                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3938                dir = DMA_FROM_DEVICE;
3939        }
3940
3941        if ((!skip_host_mem_pin) &&
3942                (!hl_userptr_is_pinned(hdev, addr,
3943                                        le32_to_cpu(user_dma_pkt->tsize),
3944                                        parser->job_userptr_list, &userptr))) {
3945                dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3946                                addr, user_dma_pkt->tsize);
3947                return -EFAULT;
3948        }
3949
3950        if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3951                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3952                *new_dma_pkt_size = sizeof(*user_dma_pkt);
3953                return 0;
3954        }
3955
3956        user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3957
3958        sgt = userptr->sgt;
3959        dma_desc_cnt = 0;
3960
3961        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3962                len = sg_dma_len(sg);
3963                dma_addr = sg_dma_address(sg);
3964
3965                if (len == 0)
3966                        break;
3967
3968                while ((count + 1) < sgt->nents) {
3969                        sg_next_iter = sg_next(sg);
3970                        len_next = sg_dma_len(sg_next_iter);
3971                        dma_addr_next = sg_dma_address(sg_next_iter);
3972
3973                        if (len_next == 0)
3974                                break;
3975
3976                        if ((dma_addr + len == dma_addr_next) &&
3977                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3978                                len += len_next;
3979                                count++;
3980                                sg = sg_next_iter;
3981                        } else {
3982                                break;
3983                        }
3984                }
3985
3986                ctl = le32_to_cpu(user_dma_pkt->ctl);
3987                if (likely(dma_desc_cnt))
3988                        ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3989                ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3990                new_dma_pkt->ctl = cpu_to_le32(ctl);
3991                new_dma_pkt->tsize = cpu_to_le32(len);
3992
3993                if (dir == DMA_TO_DEVICE) {
3994                        new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3995                        new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3996                } else {
3997                        new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3998                        new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3999                }
4000
4001                if (!user_memset)
4002                        device_memory_addr += len;
4003                dma_desc_cnt++;
4004                new_dma_pkt++;
4005        }
4006
4007        if (!dma_desc_cnt) {
4008                dev_err(hdev->dev,
4009                        "Error of 0 SG entries when patching DMA packet\n");
4010                return -EFAULT;
4011        }
4012
4013        /* Fix the last dma packet - wrcomp must be as user set it */
4014        new_dma_pkt--;
4015        new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4016
4017        *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4018
4019        return 0;
4020}
4021
4022static int gaudi_patch_cb(struct hl_device *hdev,
4023                                struct hl_cs_parser *parser)
4024{
4025        u32 cb_parsed_length = 0;
4026        u32 cb_patched_cur_length = 0;
4027        int rc = 0;
4028
4029        /* cb_user_size is more than 0 so loop will always be executed */
4030        while (cb_parsed_length < parser->user_cb_size) {
4031                enum packet_id pkt_id;
4032                u16 pkt_size;
4033                u32 new_pkt_size = 0;
4034                struct gaudi_packet *user_pkt, *kernel_pkt;
4035
4036                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4037                kernel_pkt = parser->patched_cb->kernel_address +
4038                                        cb_patched_cur_length;
4039
4040                pkt_id = (enum packet_id) (
4041                                (le64_to_cpu(user_pkt->header) &
4042                                PACKET_HEADER_PACKET_ID_MASK) >>
4043                                        PACKET_HEADER_PACKET_ID_SHIFT);
4044
4045                if (!validate_packet_id(pkt_id)) {
4046                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4047                        rc = -EINVAL;
4048                        break;
4049                }
4050
4051                pkt_size = gaudi_packet_sizes[pkt_id];
4052                cb_parsed_length += pkt_size;
4053                if (cb_parsed_length > parser->user_cb_size) {
4054                        dev_err(hdev->dev,
4055                                "packet 0x%x is out of CB boundary\n", pkt_id);
4056                        rc = -EINVAL;
4057                        break;
4058                }
4059
4060                switch (pkt_id) {
4061                case PACKET_LIN_DMA:
4062                        rc = gaudi_patch_dma_packet(hdev, parser,
4063                                        (struct packet_lin_dma *) user_pkt,
4064                                        (struct packet_lin_dma *) kernel_pkt,
4065                                        &new_pkt_size);
4066                        cb_patched_cur_length += new_pkt_size;
4067                        break;
4068
4069                case PACKET_MSG_PROT:
4070                        dev_err(hdev->dev,
4071                                "User not allowed to use MSG_PROT\n");
4072                        rc = -EPERM;
4073                        break;
4074
4075                case PACKET_CP_DMA:
4076                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4077                        rc = -EPERM;
4078                        break;
4079
4080                case PACKET_STOP:
4081                        dev_err(hdev->dev, "User not allowed to use STOP\n");
4082                        rc = -EPERM;
4083                        break;
4084
4085                case PACKET_WREG_32:
4086                case PACKET_WREG_BULK:
4087                case PACKET_MSG_LONG:
4088                case PACKET_MSG_SHORT:
4089                case PACKET_REPEAT:
4090                case PACKET_FENCE:
4091                case PACKET_NOP:
4092                case PACKET_ARB_POINT:
4093                case PACKET_LOAD_AND_EXE:
4094                        memcpy(kernel_pkt, user_pkt, pkt_size);
4095                        cb_patched_cur_length += pkt_size;
4096                        break;
4097
4098                default:
4099                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4100                                pkt_id);
4101                        rc = -EINVAL;
4102                        break;
4103                }
4104
4105                if (rc)
4106                        break;
4107        }
4108
4109        return rc;
4110}
4111
4112static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4113                struct hl_cs_parser *parser)
4114{
4115        u64 patched_cb_handle;
4116        u32 patched_cb_size;
4117        struct hl_cb *user_cb;
4118        int rc;
4119
4120        /*
4121         * The new CB should have space at the end for two MSG_PROT pkt:
4122         * 1. A packet that will act as a completion packet
4123         * 2. A packet that will generate MSI interrupt
4124         */
4125        parser->patched_cb_size = parser->user_cb_size +
4126                        sizeof(struct packet_msg_prot) * 2;
4127
4128        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4129                                parser->patched_cb_size, false, false,
4130                                &patched_cb_handle);
4131
4132        if (rc) {
4133                dev_err(hdev->dev,
4134                        "Failed to allocate patched CB for DMA CS %d\n",
4135                        rc);
4136                return rc;
4137        }
4138
4139        patched_cb_handle >>= PAGE_SHIFT;
4140        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4141                                (u32) patched_cb_handle);
4142        /* hl_cb_get should never fail here so use kernel WARN */
4143        WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4144                        (u32) patched_cb_handle);
4145        if (!parser->patched_cb) {
4146                rc = -EFAULT;
4147                goto out;
4148        }
4149
4150        /*
4151         * The check that parser->user_cb_size <= parser->user_cb->size was done
4152         * in validate_queue_index().
4153         */
4154        memcpy(parser->patched_cb->kernel_address,
4155                parser->user_cb->kernel_address,
4156                parser->user_cb_size);
4157
4158        patched_cb_size = parser->patched_cb_size;
4159
4160        /* Validate patched CB instead of user CB */
4161        user_cb = parser->user_cb;
4162        parser->user_cb = parser->patched_cb;
4163        rc = gaudi_validate_cb(hdev, parser, true);
4164        parser->user_cb = user_cb;
4165
4166        if (rc) {
4167                hl_cb_put(parser->patched_cb);
4168                goto out;
4169        }
4170
4171        if (patched_cb_size != parser->patched_cb_size) {
4172                dev_err(hdev->dev, "user CB size mismatch\n");
4173                hl_cb_put(parser->patched_cb);
4174                rc = -EINVAL;
4175                goto out;
4176        }
4177
4178out:
4179        /*
4180         * Always call cb destroy here because we still have 1 reference
4181         * to it by calling cb_get earlier. After the job will be completed,
4182         * cb_put will release it, but here we want to remove it from the
4183         * idr
4184         */
4185        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4186                                        patched_cb_handle << PAGE_SHIFT);
4187
4188        return rc;
4189}
4190
4191static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4192                struct hl_cs_parser *parser)
4193{
4194        u64 patched_cb_handle;
4195        int rc;
4196
4197        rc = gaudi_validate_cb(hdev, parser, false);
4198
4199        if (rc)
4200                goto free_userptr;
4201
4202        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4203                                parser->patched_cb_size, false, false,
4204                                &patched_cb_handle);
4205        if (rc) {
4206                dev_err(hdev->dev,
4207                        "Failed to allocate patched CB for DMA CS %d\n", rc);
4208                goto free_userptr;
4209        }
4210
4211        patched_cb_handle >>= PAGE_SHIFT;
4212        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4213                                (u32) patched_cb_handle);
4214        /* hl_cb_get should never fail here so use kernel WARN */
4215        WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4216                        (u32) patched_cb_handle);
4217        if (!parser->patched_cb) {
4218                rc = -EFAULT;
4219                goto out;
4220        }
4221
4222        rc = gaudi_patch_cb(hdev, parser);
4223
4224        if (rc)
4225                hl_cb_put(parser->patched_cb);
4226
4227out:
4228        /*
4229         * Always call cb destroy here because we still have 1 reference
4230         * to it by calling cb_get earlier. After the job will be completed,
4231         * cb_put will release it, but here we want to remove it from the
4232         * idr
4233         */
4234        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4235                                patched_cb_handle << PAGE_SHIFT);
4236
4237free_userptr:
4238        if (rc)
4239                hl_userptr_delete_list(hdev, parser->job_userptr_list);
4240        return rc;
4241}
4242
4243static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4244                                        struct hl_cs_parser *parser)
4245{
4246        struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4247
4248        /* For internal queue jobs just check if CB address is valid */
4249        if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4250                                        parser->user_cb_size,
4251                                        asic_prop->sram_user_base_address,
4252                                        asic_prop->sram_end_address))
4253                return 0;
4254
4255        if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4256                                        parser->user_cb_size,
4257                                        asic_prop->dram_user_base_address,
4258                                        asic_prop->dram_end_address))
4259                return 0;
4260
4261        /* PMMU and HPMMU addresses are equal, check only one of them */
4262        if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4263                                        parser->user_cb_size,
4264                                        asic_prop->pmmu.start_addr,
4265                                        asic_prop->pmmu.end_addr))
4266                return 0;
4267
4268        dev_err(hdev->dev,
4269                "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4270                parser->user_cb, parser->user_cb_size);
4271
4272        return -EFAULT;
4273}
4274
4275static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4276{
4277        struct gaudi_device *gaudi = hdev->asic_specific;
4278
4279        if (parser->queue_type == QUEUE_TYPE_INT)
4280                return gaudi_parse_cb_no_ext_queue(hdev, parser);
4281
4282        if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4283                return gaudi_parse_cb_mmu(hdev, parser);
4284        else
4285                return gaudi_parse_cb_no_mmu(hdev, parser);
4286}
4287
4288static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4289                                        void *kernel_address, u32 len,
4290                                        u64 cq_addr, u32 cq_val, u32 msi_vec,
4291                                        bool eb)
4292{
4293        struct gaudi_device *gaudi = hdev->asic_specific;
4294        struct packet_msg_prot *cq_pkt;
4295        u32 tmp;
4296
4297        cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4298
4299        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4300        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4301
4302        if (eb)
4303                tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4304
4305        cq_pkt->ctl = cpu_to_le32(tmp);
4306        cq_pkt->value = cpu_to_le32(cq_val);
4307        cq_pkt->addr = cpu_to_le64(cq_addr);
4308
4309        cq_pkt++;
4310
4311        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4312        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4313        cq_pkt->ctl = cpu_to_le32(tmp);
4314        cq_pkt->value = cpu_to_le32(1);
4315
4316        if (!gaudi->multi_msi_mode)
4317                msi_vec = 0;
4318
4319        cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4320}
4321
4322static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4323{
4324        WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4325}
4326
4327static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4328                                        u32 size, u64 val)
4329{
4330        struct packet_lin_dma *lin_dma_pkt;
4331        struct hl_cs_job *job;
4332        u32 cb_size, ctl, err_cause;
4333        struct hl_cb *cb;
4334        int rc;
4335
4336        cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4337        if (!cb)
4338                return -EFAULT;
4339
4340        lin_dma_pkt = cb->kernel_address;
4341        memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4342        cb_size = sizeof(*lin_dma_pkt);
4343
4344        ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4345        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4346        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4347        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4348        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4349
4350        lin_dma_pkt->ctl = cpu_to_le32(ctl);
4351        lin_dma_pkt->src_addr = cpu_to_le64(val);
4352        lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4353        lin_dma_pkt->tsize = cpu_to_le32(size);
4354
4355        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4356        if (!job) {
4357                dev_err(hdev->dev, "Failed to allocate a new job\n");
4358                rc = -ENOMEM;
4359                goto release_cb;
4360        }
4361
4362        /* Verify DMA is OK */
4363        err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4364        if (err_cause && !hdev->init_done) {
4365                dev_dbg(hdev->dev,
4366                        "Clearing DMA0 engine from errors (cause 0x%x)\n",
4367                        err_cause);
4368                WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4369        }
4370
4371        job->id = 0;
4372        job->user_cb = cb;
4373        job->user_cb->cs_cnt++;
4374        job->user_cb_size = cb_size;
4375        job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4376        job->patched_cb = job->user_cb;
4377        job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4378
4379        hl_debugfs_add_job(hdev, job);
4380
4381        rc = gaudi_send_job_on_qman0(hdev, job);
4382        hl_debugfs_remove_job(hdev, job);
4383        kfree(job);
4384        cb->cs_cnt--;
4385
4386        /* Verify DMA is OK */
4387        err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4388        if (err_cause) {
4389                dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4390                rc = -EIO;
4391                if (!hdev->init_done) {
4392                        dev_dbg(hdev->dev,
4393                                "Clearing DMA0 engine from errors (cause 0x%x)\n",
4394                                err_cause);
4395                        WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4396                }
4397        }
4398
4399release_cb:
4400        hl_cb_put(cb);
4401        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4402
4403        return rc;
4404}
4405
4406static void gaudi_restore_sm_registers(struct hl_device *hdev)
4407{
4408        int i;
4409
4410        for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4411                WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4412                WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4413                WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4414        }
4415
4416        for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4417                WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4418                WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4419                WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4420        }
4421
4422        i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4423
4424        for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4425                WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4426
4427        i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4428
4429        for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4430                WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4431}
4432
4433static void gaudi_restore_dma_registers(struct hl_device *hdev)
4434{
4435        u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4436                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4437        int i;
4438
4439        for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4440                u64 sob_addr = CFG_BASE +
4441                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4442                                (i * sob_delta);
4443                u32 dma_offset = i * DMA_CORE_OFFSET;
4444
4445                WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4446                                lower_32_bits(sob_addr));
4447                WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4448                                upper_32_bits(sob_addr));
4449                WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4450
4451                /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4452                 * modified by the user for SRAM reduction
4453                 */
4454                if (i > 1)
4455                        WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4456                                                                0x00000001);
4457        }
4458}
4459
4460static void gaudi_restore_qm_registers(struct hl_device *hdev)
4461{
4462        u32 qman_offset;
4463        int i;
4464
4465        for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4466                qman_offset = i * DMA_QMAN_OFFSET;
4467                WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4468        }
4469
4470        for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4471                qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4472                WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4473        }
4474
4475        for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4476                qman_offset = i * TPC_QMAN_OFFSET;
4477                WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4478        }
4479}
4480
4481static void gaudi_restore_user_registers(struct hl_device *hdev)
4482{
4483        gaudi_restore_sm_registers(hdev);
4484        gaudi_restore_dma_registers(hdev);
4485        gaudi_restore_qm_registers(hdev);
4486}
4487
4488static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4489{
4490        struct asic_fixed_properties *prop = &hdev->asic_prop;
4491        u64 addr = prop->sram_user_base_address;
4492        u32 size = hdev->pldm ? 0x10000 :
4493                        (prop->sram_size - SRAM_USER_BASE_OFFSET);
4494        u64 val = 0x7777777777777777ull;
4495        int rc;
4496
4497        rc = gaudi_memset_device_memory(hdev, addr, size, val);
4498        if (rc) {
4499                dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4500                return rc;
4501        }
4502
4503        gaudi_mmu_prepare(hdev, asid);
4504
4505        gaudi_restore_user_registers(hdev);
4506
4507        return 0;
4508}
4509
4510static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4511{
4512        struct asic_fixed_properties *prop = &hdev->asic_prop;
4513        struct gaudi_device *gaudi = hdev->asic_specific;
4514        u64 addr = prop->mmu_pgt_addr;
4515        u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4516
4517        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4518                return 0;
4519
4520        return gaudi_memset_device_memory(hdev, addr, size, 0);
4521}
4522
4523static void gaudi_restore_phase_topology(struct hl_device *hdev)
4524{
4525
4526}
4527
4528static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4529{
4530        struct asic_fixed_properties *prop = &hdev->asic_prop;
4531        struct gaudi_device *gaudi = hdev->asic_specific;
4532        u64 hbm_bar_addr;
4533        int rc = 0;
4534
4535        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4536
4537                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4538                                (hdev->clock_gating_mask &
4539                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4540
4541                        dev_err_ratelimited(hdev->dev,
4542                                "Can't read register - clock gating is enabled!\n");
4543                        rc = -EFAULT;
4544                } else {
4545                        *val = RREG32(addr - CFG_BASE);
4546                }
4547
4548        } else if ((addr >= SRAM_BASE_ADDR) &&
4549                        (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4550                *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4551                                (addr - SRAM_BASE_ADDR));
4552        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4553                u64 bar_base_addr = DRAM_PHYS_BASE +
4554                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4555
4556                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4557                if (hbm_bar_addr != U64_MAX) {
4558                        *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4559                                                (addr - bar_base_addr));
4560
4561                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4562                                                hbm_bar_addr);
4563                }
4564                if (hbm_bar_addr == U64_MAX)
4565                        rc = -EIO;
4566        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4567                *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4568        } else {
4569                rc = -EFAULT;
4570        }
4571
4572        return rc;
4573}
4574
4575static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4576{
4577        struct asic_fixed_properties *prop = &hdev->asic_prop;
4578        struct gaudi_device *gaudi = hdev->asic_specific;
4579        u64 hbm_bar_addr;
4580        int rc = 0;
4581
4582        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4583
4584                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4585                                (hdev->clock_gating_mask &
4586                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4587
4588                        dev_err_ratelimited(hdev->dev,
4589                                "Can't write register - clock gating is enabled!\n");
4590                        rc = -EFAULT;
4591                } else {
4592                        WREG32(addr - CFG_BASE, val);
4593                }
4594
4595        } else if ((addr >= SRAM_BASE_ADDR) &&
4596                        (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4597                writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4598                                        (addr - SRAM_BASE_ADDR));
4599        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4600                u64 bar_base_addr = DRAM_PHYS_BASE +
4601                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4602
4603                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4604                if (hbm_bar_addr != U64_MAX) {
4605                        writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4606                                                (addr - bar_base_addr));
4607
4608                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4609                                                hbm_bar_addr);
4610                }
4611                if (hbm_bar_addr == U64_MAX)
4612                        rc = -EIO;
4613        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4614                *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4615        } else {
4616                rc = -EFAULT;
4617        }
4618
4619        return rc;
4620}
4621
4622static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4623{
4624        struct asic_fixed_properties *prop = &hdev->asic_prop;
4625        struct gaudi_device *gaudi = hdev->asic_specific;
4626        u64 hbm_bar_addr;
4627        int rc = 0;
4628
4629        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4630
4631                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4632                                (hdev->clock_gating_mask &
4633                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4634
4635                        dev_err_ratelimited(hdev->dev,
4636                                "Can't read register - clock gating is enabled!\n");
4637                        rc = -EFAULT;
4638                } else {
4639                        u32 val_l = RREG32(addr - CFG_BASE);
4640                        u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4641
4642                        *val = (((u64) val_h) << 32) | val_l;
4643                }
4644
4645        } else if ((addr >= SRAM_BASE_ADDR) &&
4646                   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4647                *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4648                                (addr - SRAM_BASE_ADDR));
4649        } else if (addr <=
4650                    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4651                u64 bar_base_addr = DRAM_PHYS_BASE +
4652                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4653
4654                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4655                if (hbm_bar_addr != U64_MAX) {
4656                        *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4657                                                (addr - bar_base_addr));
4658
4659                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4660                                                hbm_bar_addr);
4661                }
4662                if (hbm_bar_addr == U64_MAX)
4663                        rc = -EIO;
4664        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4665                *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4666        } else {
4667                rc = -EFAULT;
4668        }
4669
4670        return rc;
4671}
4672
4673static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4674{
4675        struct asic_fixed_properties *prop = &hdev->asic_prop;
4676        struct gaudi_device *gaudi = hdev->asic_specific;
4677        u64 hbm_bar_addr;
4678        int rc = 0;
4679
4680        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4681
4682                if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4683                                (hdev->clock_gating_mask &
4684                                                GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4685
4686                        dev_err_ratelimited(hdev->dev,
4687                                "Can't write register - clock gating is enabled!\n");
4688                        rc = -EFAULT;
4689                } else {
4690                        WREG32(addr - CFG_BASE, lower_32_bits(val));
4691                        WREG32(addr + sizeof(u32) - CFG_BASE,
4692                                upper_32_bits(val));
4693                }
4694
4695        } else if ((addr >= SRAM_BASE_ADDR) &&
4696                   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4697                writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4698                                        (addr - SRAM_BASE_ADDR));
4699        } else if (addr <=
4700                    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4701                u64 bar_base_addr = DRAM_PHYS_BASE +
4702                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4703
4704                hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4705                if (hbm_bar_addr != U64_MAX) {
4706                        writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4707                                                (addr - bar_base_addr));
4708
4709                        hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4710                                                hbm_bar_addr);
4711                }
4712                if (hbm_bar_addr == U64_MAX)
4713                        rc = -EIO;
4714        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4715                *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4716        } else {
4717                rc = -EFAULT;
4718        }
4719
4720        return rc;
4721}
4722
4723static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4724{
4725        struct gaudi_device *gaudi = hdev->asic_specific;
4726
4727        if (hdev->hard_reset_pending)
4728                return U64_MAX;
4729
4730        return readq(hdev->pcie_bar[HBM_BAR_ID] +
4731                        (addr - gaudi->hbm_bar_cur_addr));
4732}
4733
4734static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4735{
4736        struct gaudi_device *gaudi = hdev->asic_specific;
4737
4738        if (hdev->hard_reset_pending)
4739                return;
4740
4741        writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4742                        (addr - gaudi->hbm_bar_cur_addr));
4743}
4744
4745void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4746{
4747        /* mask to zero the MMBP and ASID bits */
4748        WREG32_AND(reg, ~0x7FF);
4749        WREG32_OR(reg, asid);
4750}
4751
4752static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4753{
4754        struct gaudi_device *gaudi = hdev->asic_specific;
4755
4756        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4757                return;
4758
4759        if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4760                WARN(1, "asid %u is too big\n", asid);
4761                return;
4762        }
4763
4764        mutex_lock(&gaudi->clk_gate_mutex);
4765
4766        hdev->asic_funcs->disable_clock_gating(hdev);
4767
4768        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4769        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4770        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4771        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4772        gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4773
4774        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4775        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4776        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4777        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4778        gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4779
4780        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4781        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4782        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4783        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4784        gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4785
4786        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4787        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4788        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4789        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4790        gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4791
4792        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4793        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4794        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4795        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4796        gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4797
4798        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4799        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4800        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4801        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4802        gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4803
4804        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4805        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4806        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4807        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4808        gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4809
4810        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4811        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4812        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4813        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4814        gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4815
4816        gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4817        gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4818        gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4819        gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4820        gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4821        gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4822        gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4823        gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4824
4825        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4826        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4827        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4828        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4829        gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4830        gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4831        gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4832
4833        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837        gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838        gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4839        gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4840
4841        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845        gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846        gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4847        gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4848
4849        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853        gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854        gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4855        gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4856
4857        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861        gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862        gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4863        gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4864
4865        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869        gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870        gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4871        gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4872
4873        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877        gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878        gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4879        gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4880
4881        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885        gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886        gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4887        gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4888
4889        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893        gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4895        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4896        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4897        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4898        gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4899
4900        gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4901        gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4902        gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4903        gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4904        gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4905        gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4906        gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4907        gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4908        gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4909        gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4910        gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4911        gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4912
4913        hdev->asic_funcs->set_clock_gating(hdev);
4914
4915        mutex_unlock(&gaudi->clk_gate_mutex);
4916}
4917
4918static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4919                struct hl_cs_job *job)
4920{
4921        struct packet_msg_prot *fence_pkt;
4922        u32 *fence_ptr;
4923        dma_addr_t fence_dma_addr;
4924        struct hl_cb *cb;
4925        u32 tmp, timeout, dma_offset;
4926        int rc;
4927
4928        if (hdev->pldm)
4929                timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4930        else
4931                timeout = HL_DEVICE_TIMEOUT_USEC;
4932
4933        if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4934                dev_err_ratelimited(hdev->dev,
4935                        "Can't send driver job on QMAN0 because the device is not idle\n");
4936                return -EBUSY;
4937        }
4938
4939        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4940                                                        &fence_dma_addr);
4941        if (!fence_ptr) {
4942                dev_err(hdev->dev,
4943                        "Failed to allocate fence memory for QMAN0\n");
4944                return -ENOMEM;
4945        }
4946
4947        cb = job->patched_cb;
4948
4949        fence_pkt = cb->kernel_address +
4950                        job->job_cb_size - sizeof(struct packet_msg_prot);
4951
4952        tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4953        tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4954        tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4955
4956        fence_pkt->ctl = cpu_to_le32(tmp);
4957        fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4958        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4959
4960        dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4961
4962        WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4963
4964        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4965                                        job->job_cb_size, cb->bus_address);
4966        if (rc) {
4967                dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4968                goto free_fence_ptr;
4969        }
4970
4971        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4972                                (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4973                                timeout, true);
4974
4975        hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4976
4977        if (rc == -ETIMEDOUT) {
4978                dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4979                goto free_fence_ptr;
4980        }
4981
4982free_fence_ptr:
4983        WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4984                        ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4985
4986        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4987                                        fence_dma_addr);
4988        return rc;
4989}
4990
4991static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4992{
4993        if (event_type >= GAUDI_EVENT_SIZE)
4994                goto event_not_supported;
4995
4996        if (!gaudi_irq_map_table[event_type].valid)
4997                goto event_not_supported;
4998
4999        snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5000
5001        return;
5002
5003event_not_supported:
5004        snprintf(desc, size, "N/A");
5005}
5006
5007static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5008                                                        u32 x_y, bool is_write)
5009{
5010        u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5011
5012        mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5013                                DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5014
5015        switch (x_y) {
5016        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5017        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5018                dma_id[0] = 0;
5019                dma_id[1] = 2;
5020                break;
5021        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5022        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5023                dma_id[0] = 1;
5024                dma_id[1] = 3;
5025                break;
5026        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5027        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5028                dma_id[0] = 4;
5029                dma_id[1] = 6;
5030                break;
5031        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5032        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5033                dma_id[0] = 5;
5034                dma_id[1] = 7;
5035                break;
5036        default:
5037                goto unknown_initiator;
5038        }
5039
5040        for (i = 0 ; i < 2 ; i++) {
5041                dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5042                err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5043        }
5044
5045        switch (x_y) {
5046        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5047        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5048                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5049                        return "DMA0";
5050                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5051                        return "DMA2";
5052                else
5053                        return "DMA0 or DMA2";
5054        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5055        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5056                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057                        return "DMA1";
5058                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059                        return "DMA3";
5060                else
5061                        return "DMA1 or DMA3";
5062        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5063        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5064                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065                        return "DMA4";
5066                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067                        return "DMA6";
5068                else
5069                        return "DMA4 or DMA6";
5070        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5071        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5072                if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073                        return "DMA5";
5074                else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075                        return "DMA7";
5076                else
5077                        return "DMA5 or DMA7";
5078        }
5079
5080unknown_initiator:
5081        return "unknown initiator";
5082}
5083
5084static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5085                                                        bool is_write)
5086{
5087        u32 val, x_y, axi_id;
5088
5089        val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5090                                RREG32(mmMMU_UP_RAZWI_READ_ID);
5091        x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5092                        (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5093        axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5094                        RAZWI_INITIATOR_AXI_ID_SHIFT);
5095
5096        switch (x_y) {
5097        case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5098                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5099                        return "TPC0";
5100                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5101                        return "NIC0";
5102                break;
5103        case RAZWI_INITIATOR_ID_X_Y_TPC1:
5104                return "TPC1";
5105        case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5106        case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5107                return "MME0";
5108        case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5109        case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5110                return "MME1";
5111        case RAZWI_INITIATOR_ID_X_Y_TPC2:
5112                return "TPC2";
5113        case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5114                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5115                        return "TPC3";
5116                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5117                        return "PCI";
5118                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5119                        return "CPU";
5120                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5121                        return "PSOC";
5122                break;
5123        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5124        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5125        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5126        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5127        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5128        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5129        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5130        case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5131                return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5132        case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5133                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5134                        return "TPC4";
5135                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5136                        return "NIC1";
5137                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5138                        return "NIC2";
5139                break;
5140        case RAZWI_INITIATOR_ID_X_Y_TPC5:
5141                return "TPC5";
5142        case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5143        case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5144                return "MME2";
5145        case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5146        case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5147                return "MME3";
5148        case RAZWI_INITIATOR_ID_X_Y_TPC6:
5149                return "TPC6";
5150        case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5151                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5152                        return "TPC7";
5153                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5154                        return "NIC4";
5155                if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5156                        return "NIC5";
5157                break;
5158        default:
5159                break;
5160        }
5161
5162        dev_err(hdev->dev,
5163                "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5164                val,
5165                (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5166                (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5167                (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5168                        RAZWI_INITIATOR_AXI_ID_MASK);
5169
5170        return "unknown initiator";
5171}
5172
5173static void gaudi_print_razwi_info(struct hl_device *hdev)
5174{
5175        if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5176                dev_err_ratelimited(hdev->dev,
5177                        "RAZWI event caused by illegal write of %s\n",
5178                        gaudi_get_razwi_initiator_name(hdev, true));
5179                WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5180        }
5181
5182        if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5183                dev_err_ratelimited(hdev->dev,
5184                        "RAZWI event caused by illegal read of %s\n",
5185                        gaudi_get_razwi_initiator_name(hdev, false));
5186                WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5187        }
5188}
5189
5190static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5191{
5192        struct gaudi_device *gaudi = hdev->asic_specific;
5193        u64 addr;
5194        u32 val;
5195
5196        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5197                return;
5198
5199        val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5200        if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5201                addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5202                addr <<= 32;
5203                addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5204
5205                dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5206                                        addr);
5207
5208                WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5209        }
5210
5211        val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5212        if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5213                addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5214                addr <<= 32;
5215                addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5216
5217                dev_err_ratelimited(hdev->dev,
5218                                "MMU access error on va 0x%llx\n", addr);
5219
5220                WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5221        }
5222}
5223
5224/*
5225 *  +-------------------+------------------------------------------------------+
5226 *  | Configuration Reg |                     Description                      |
5227 *  |      Address      |                                                      |
5228 *  +-------------------+------------------------------------------------------+
5229 *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5230 *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5231 *  |                   |0xF34 memory wrappers 63:32                           |
5232 *  |                   |0xF38 memory wrappers 95:64                           |
5233 *  |                   |0xF3C memory wrappers 127:96                          |
5234 *  +-------------------+------------------------------------------------------+
5235 *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5236 *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5237 *  |                   |0xF44 memory wrappers 63:32                           |
5238 *  |                   |0xF48 memory wrappers 95:64                           |
5239 *  |                   |0xF4C memory wrappers 127:96                          |
5240 *  +-------------------+------------------------------------------------------+
5241 */
5242static int gaudi_extract_ecc_info(struct hl_device *hdev,
5243                struct ecc_info_extract_params *params, u64 *ecc_address,
5244                u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5245{
5246        struct gaudi_device *gaudi = hdev->asic_specific;
5247        u32 i, num_mem_regs, reg, err_bit;
5248        u64 err_addr, err_word = 0;
5249        int rc = 0;
5250
5251        num_mem_regs = params->num_memories / 32 +
5252                        ((params->num_memories % 32) ? 1 : 0);
5253
5254        if (params->block_address >= CFG_BASE)
5255                params->block_address -= CFG_BASE;
5256
5257        if (params->derr)
5258                err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5259        else
5260                err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5261
5262        if (params->disable_clock_gating) {
5263                mutex_lock(&gaudi->clk_gate_mutex);
5264                hdev->asic_funcs->disable_clock_gating(hdev);
5265        }
5266
5267        /* Set invalid wrapper index */
5268        *memory_wrapper_idx = 0xFF;
5269
5270        /* Iterate through memory wrappers, a single bit must be set */
5271        for (i = 0 ; i < num_mem_regs ; i++) {
5272                err_addr += i * 4;
5273                err_word = RREG32(err_addr);
5274                if (err_word) {
5275                        err_bit = __ffs(err_word);
5276                        *memory_wrapper_idx = err_bit + (32 * i);
5277                        break;
5278                }
5279        }
5280
5281        if (*memory_wrapper_idx == 0xFF) {
5282                dev_err(hdev->dev, "ECC error information cannot be found\n");
5283                rc = -EINVAL;
5284                goto enable_clk_gate;
5285        }
5286
5287        WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5288                        *memory_wrapper_idx);
5289
5290        *ecc_address =
5291                RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5292        *ecc_syndrom =
5293                RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5294
5295        /* Clear error indication */
5296        reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5297        if (params->derr)
5298                reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5299        else
5300                reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5301
5302        WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5303
5304enable_clk_gate:
5305        if (params->disable_clock_gating) {
5306                hdev->asic_funcs->set_clock_gating(hdev);
5307
5308                mutex_unlock(&gaudi->clk_gate_mutex);
5309        }
5310
5311        return rc;
5312}
5313
5314static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5315                                          const char *qm_name,
5316                                          u64 glbl_sts_addr,
5317                                          u64 arb_err_addr)
5318{
5319        u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5320        char reg_desc[32];
5321
5322        /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5323        for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5324                glbl_sts_clr_val = 0;
5325                glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5326
5327                if (!glbl_sts_val)
5328                        continue;
5329
5330                if (i == QMAN_STREAMS)
5331                        snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5332                else
5333                        snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5334
5335                for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5336                        if (glbl_sts_val & BIT(j)) {
5337                                dev_err_ratelimited(hdev->dev,
5338                                                "%s %s. err cause: %s\n",
5339                                                qm_name, reg_desc,
5340                                                gaudi_qman_error_cause[j]);
5341                                glbl_sts_clr_val |= BIT(j);
5342                        }
5343                }
5344
5345                /* Write 1 clear errors */
5346                WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5347        }
5348
5349        arb_err_val = RREG32(arb_err_addr);
5350
5351        if (!arb_err_val)
5352                return;
5353
5354        for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5355                if (arb_err_val & BIT(j)) {
5356                        dev_err_ratelimited(hdev->dev,
5357                                        "%s ARB_ERR. err cause: %s\n",
5358                                        qm_name,
5359                                        gaudi_qman_arb_error_cause[j]);
5360                }
5361        }
5362}
5363
5364static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5365                struct hl_eq_ecc_data *ecc_data)
5366{
5367        struct ecc_info_extract_params params;
5368        u64 ecc_address = 0, ecc_syndrom = 0;
5369        u8 index, memory_wrapper_idx = 0;
5370        bool extract_info_from_fw;
5371        int rc;
5372
5373        switch (event_type) {
5374        case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5375        case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5376                extract_info_from_fw = true;
5377                break;
5378        case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5379                index = event_type - GAUDI_EVENT_TPC0_SERR;
5380                params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5381                params.num_memories = 90;
5382                params.derr = false;
5383                params.disable_clock_gating = true;
5384                extract_info_from_fw = false;
5385                break;
5386        case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5387                index = event_type - GAUDI_EVENT_TPC0_DERR;
5388                params.block_address =
5389                        mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5390                params.num_memories = 90;
5391                params.derr = true;
5392                params.disable_clock_gating = true;
5393                extract_info_from_fw = false;
5394                break;
5395        case GAUDI_EVENT_MME0_ACC_SERR:
5396        case GAUDI_EVENT_MME1_ACC_SERR:
5397        case GAUDI_EVENT_MME2_ACC_SERR:
5398        case GAUDI_EVENT_MME3_ACC_SERR:
5399                index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5400                params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5401                params.num_memories = 128;
5402                params.derr = false;
5403                params.disable_clock_gating = true;
5404                extract_info_from_fw = false;
5405                break;
5406        case GAUDI_EVENT_MME0_ACC_DERR:
5407        case GAUDI_EVENT_MME1_ACC_DERR:
5408        case GAUDI_EVENT_MME2_ACC_DERR:
5409        case GAUDI_EVENT_MME3_ACC_DERR:
5410                index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5411                params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5412                params.num_memories = 128;
5413                params.derr = true;
5414                params.disable_clock_gating = true;
5415                extract_info_from_fw = false;
5416                break;
5417        case GAUDI_EVENT_MME0_SBAB_SERR:
5418        case GAUDI_EVENT_MME1_SBAB_SERR:
5419        case GAUDI_EVENT_MME2_SBAB_SERR:
5420        case GAUDI_EVENT_MME3_SBAB_SERR:
5421                index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5422                params.block_address =
5423                        mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5424                params.num_memories = 33;
5425                params.derr = false;
5426                params.disable_clock_gating = true;
5427                extract_info_from_fw = false;
5428                break;
5429        case GAUDI_EVENT_MME0_SBAB_DERR:
5430        case GAUDI_EVENT_MME1_SBAB_DERR:
5431        case GAUDI_EVENT_MME2_SBAB_DERR:
5432        case GAUDI_EVENT_MME3_SBAB_DERR:
5433                index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5434                params.block_address =
5435                        mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5436                params.num_memories = 33;
5437                params.derr = true;
5438                params.disable_clock_gating = true;
5439                extract_info_from_fw = false;
5440                break;
5441        default:
5442                return;
5443        }
5444
5445        if (extract_info_from_fw) {
5446                ecc_address = le64_to_cpu(ecc_data->ecc_address);
5447                ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5448                memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5449        } else {
5450                rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5451                                &ecc_syndrom, &memory_wrapper_idx);
5452                if (rc)
5453                        return;
5454        }
5455
5456        dev_err(hdev->dev,
5457                "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5458                ecc_address, ecc_syndrom, memory_wrapper_idx);
5459}
5460
5461static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5462{
5463        u64 glbl_sts_addr, arb_err_addr;
5464        u8 index;
5465        char desc[32];
5466
5467        switch (event_type) {
5468        case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5469                index = event_type - GAUDI_EVENT_TPC0_QM;
5470                glbl_sts_addr =
5471                        mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5472                arb_err_addr =
5473                        mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5474                snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5475                break;
5476        case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5477                index = event_type - GAUDI_EVENT_MME0_QM;
5478                glbl_sts_addr =
5479                        mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5480                arb_err_addr =
5481                        mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5482                snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5483                break;
5484        case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5485                index = event_type - GAUDI_EVENT_DMA0_QM;
5486                glbl_sts_addr =
5487                        mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5488                arb_err_addr =
5489                        mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5490                snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5491                break;
5492        default:
5493                return;
5494        }
5495
5496        gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5497}
5498
5499static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5500                                        bool razwi)
5501{
5502        char desc[64] = "";
5503
5504        gaudi_get_event_desc(event_type, desc, sizeof(desc));
5505        dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5506                event_type, desc);
5507
5508        if (razwi) {
5509                gaudi_print_razwi_info(hdev);
5510                gaudi_print_mmu_error_info(hdev);
5511        }
5512}
5513
5514static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5515{
5516        struct gaudi_device *gaudi = hdev->asic_specific;
5517
5518        /* Unmask all IRQs since some could have been received
5519         * during the soft reset
5520         */
5521        return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5522}
5523
5524static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5525{
5526        int ch, err = 0;
5527        u32 base, val, val2;
5528
5529        base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5530        for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5531                val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5532                val = (val & 0xFF) | ((val >> 8) & 0xFF);
5533                if (val) {
5534                        err = 1;
5535                        dev_err(hdev->dev,
5536                                "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5537                                device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5538                                (val >> 2) & 0x1, (val >> 3) & 0x1,
5539                                (val >> 4) & 0x1);
5540
5541                        val2 = RREG32(base + ch * 0x1000 + 0x060);
5542                        dev_err(hdev->dev,
5543                                "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5544                                device, ch * 2,
5545                                RREG32(base + ch * 0x1000 + 0x064),
5546                                (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5547                                (val2 & 0xFF0000) >> 16,
5548                                (val2 & 0xFF000000) >> 24);
5549                }
5550
5551                val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5552                val = (val & 0xFF) | ((val >> 8) & 0xFF);
5553                if (val) {
5554                        err = 1;
5555                        dev_err(hdev->dev,
5556                                "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5557                                device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5558                                (val >> 2) & 0x1, (val >> 3) & 0x1,
5559                                (val >> 4) & 0x1);
5560
5561                        val2 = RREG32(base + ch * 0x1000 + 0x070);
5562                        dev_err(hdev->dev,
5563                                "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5564                                device, ch * 2 + 1,
5565                                RREG32(base + ch * 0x1000 + 0x074),
5566                                (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5567                                (val2 & 0xFF0000) >> 16,
5568                                (val2 & 0xFF000000) >> 24);
5569                }
5570
5571                /* Clear interrupts */
5572                RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5573                RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5574                WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5575                WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5576                RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5577                RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5578        }
5579
5580        val  = RREG32(base + 0x8F30);
5581        val2 = RREG32(base + 0x8F34);
5582        if (val | val2) {
5583                err = 1;
5584                dev_err(hdev->dev,
5585                        "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5586                        device, val, val2);
5587        }
5588        val  = RREG32(base + 0x8F40);
5589        val2 = RREG32(base + 0x8F44);
5590        if (val | val2) {
5591                err = 1;
5592                dev_err(hdev->dev,
5593                        "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5594                        device, val, val2);
5595        }
5596
5597        return err;
5598}
5599
5600static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5601{
5602        switch (hbm_event_type) {
5603        case GAUDI_EVENT_HBM0_SPI_0:
5604        case GAUDI_EVENT_HBM0_SPI_1:
5605                return 0;
5606        case GAUDI_EVENT_HBM1_SPI_0:
5607        case GAUDI_EVENT_HBM1_SPI_1:
5608                return 1;
5609        case GAUDI_EVENT_HBM2_SPI_0:
5610        case GAUDI_EVENT_HBM2_SPI_1:
5611                return 2;
5612        case GAUDI_EVENT_HBM3_SPI_0:
5613        case GAUDI_EVENT_HBM3_SPI_1:
5614                return 3;
5615        default:
5616                break;
5617        }
5618
5619        /* Should never happen */
5620        return 0;
5621}
5622
5623static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5624                                        char *interrupt_name)
5625{
5626        struct gaudi_device *gaudi = hdev->asic_specific;
5627        u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5628        bool soft_reset_required = false;
5629
5630        /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5631         * gating, and thus cannot be done in CPU-CP and should be done instead
5632         * by the driver.
5633         */
5634
5635        mutex_lock(&gaudi->clk_gate_mutex);
5636
5637        hdev->asic_funcs->disable_clock_gating(hdev);
5638
5639        tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5640                                TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5641
5642        for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5643                if (tpc_interrupts_cause & BIT(i)) {
5644                        dev_err_ratelimited(hdev->dev,
5645                                        "TPC%d_%s interrupt cause: %s\n",
5646                                        tpc_id, interrupt_name,
5647                                        gaudi_tpc_interrupts_cause[i]);
5648                        /* If this is QM error, we need to soft-reset */
5649                        if (i == 15)
5650                                soft_reset_required = true;
5651                }
5652
5653        /* Clear interrupts */
5654        WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5655
5656        hdev->asic_funcs->set_clock_gating(hdev);
5657
5658        mutex_unlock(&gaudi->clk_gate_mutex);
5659
5660        return soft_reset_required;
5661}
5662
5663static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5664{
5665        return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5666}
5667
5668static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5669{
5670        return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5671}
5672
5673static void gaudi_print_clk_change_info(struct hl_device *hdev,
5674                                        u16 event_type)
5675{
5676        switch (event_type) {
5677        case GAUDI_EVENT_FIX_POWER_ENV_S:
5678                hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5679                dev_info_ratelimited(hdev->dev,
5680                        "Clock throttling due to power consumption\n");
5681                break;
5682
5683        case GAUDI_EVENT_FIX_POWER_ENV_E:
5684                hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5685                dev_info_ratelimited(hdev->dev,
5686                        "Power envelop is safe, back to optimal clock\n");
5687                break;
5688
5689        case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5690                hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5691                dev_info_ratelimited(hdev->dev,
5692                        "Clock throttling due to overheating\n");
5693                break;
5694
5695        case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5696                hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5697                dev_info_ratelimited(hdev->dev,
5698                        "Thermal envelop is safe, back to optimal clock\n");
5699                break;
5700
5701        default:
5702                dev_err(hdev->dev, "Received invalid clock change event %d\n",
5703                        event_type);
5704                break;
5705        }
5706}
5707
5708static void gaudi_handle_eqe(struct hl_device *hdev,
5709                                struct hl_eq_entry *eq_entry)
5710{
5711        struct gaudi_device *gaudi = hdev->asic_specific;
5712        u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5713        u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5714                        >> EQ_CTL_EVENT_TYPE_SHIFT);
5715        u8 cause;
5716        bool reset_required;
5717
5718        gaudi->events_stat[event_type]++;
5719        gaudi->events_stat_aggregate[event_type]++;
5720
5721        switch (event_type) {
5722        case GAUDI_EVENT_PCIE_CORE_DERR:
5723        case GAUDI_EVENT_PCIE_IF_DERR:
5724        case GAUDI_EVENT_PCIE_PHY_DERR:
5725        case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5726        case GAUDI_EVENT_MME0_ACC_DERR:
5727        case GAUDI_EVENT_MME0_SBAB_DERR:
5728        case GAUDI_EVENT_MME1_ACC_DERR:
5729        case GAUDI_EVENT_MME1_SBAB_DERR:
5730        case GAUDI_EVENT_MME2_ACC_DERR:
5731        case GAUDI_EVENT_MME2_SBAB_DERR:
5732        case GAUDI_EVENT_MME3_ACC_DERR:
5733        case GAUDI_EVENT_MME3_SBAB_DERR:
5734        case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5735                fallthrough;
5736        case GAUDI_EVENT_CPU_IF_ECC_DERR:
5737        case GAUDI_EVENT_PSOC_MEM_DERR:
5738        case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5739        case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5740        case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5741        case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5742        case GAUDI_EVENT_MMU_DERR:
5743                gaudi_print_irq_info(hdev, event_type, true);
5744                gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5745                if (hdev->hard_reset_on_fw_events)
5746                        hl_device_reset(hdev, true, false);
5747                break;
5748
5749        case GAUDI_EVENT_GIC500:
5750        case GAUDI_EVENT_AXI_ECC:
5751        case GAUDI_EVENT_L2_RAM_ECC:
5752        case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5753                gaudi_print_irq_info(hdev, event_type, false);
5754                if (hdev->hard_reset_on_fw_events)
5755                        hl_device_reset(hdev, true, false);
5756                break;
5757
5758        case GAUDI_EVENT_HBM0_SPI_0:
5759        case GAUDI_EVENT_HBM1_SPI_0:
5760        case GAUDI_EVENT_HBM2_SPI_0:
5761        case GAUDI_EVENT_HBM3_SPI_0:
5762                gaudi_print_irq_info(hdev, event_type, false);
5763                gaudi_hbm_read_interrupts(hdev,
5764                                          gaudi_hbm_event_to_dev(event_type));
5765                if (hdev->hard_reset_on_fw_events)
5766                        hl_device_reset(hdev, true, false);
5767                break;
5768
5769        case GAUDI_EVENT_HBM0_SPI_1:
5770        case GAUDI_EVENT_HBM1_SPI_1:
5771        case GAUDI_EVENT_HBM2_SPI_1:
5772        case GAUDI_EVENT_HBM3_SPI_1:
5773                gaudi_print_irq_info(hdev, event_type, false);
5774                gaudi_hbm_read_interrupts(hdev,
5775                                          gaudi_hbm_event_to_dev(event_type));
5776                break;
5777
5778        case GAUDI_EVENT_TPC0_DEC:
5779        case GAUDI_EVENT_TPC1_DEC:
5780        case GAUDI_EVENT_TPC2_DEC:
5781        case GAUDI_EVENT_TPC3_DEC:
5782        case GAUDI_EVENT_TPC4_DEC:
5783        case GAUDI_EVENT_TPC5_DEC:
5784        case GAUDI_EVENT_TPC6_DEC:
5785        case GAUDI_EVENT_TPC7_DEC:
5786                gaudi_print_irq_info(hdev, event_type, true);
5787                reset_required = gaudi_tpc_read_interrupts(hdev,
5788                                        tpc_dec_event_to_tpc_id(event_type),
5789                                        "AXI_SLV_DEC_Error");
5790                if (reset_required) {
5791                        dev_err(hdev->dev, "hard reset required due to %s\n",
5792                                gaudi_irq_map_table[event_type].name);
5793
5794                        if (hdev->hard_reset_on_fw_events)
5795                                hl_device_reset(hdev, true, false);
5796                } else {
5797                        hl_fw_unmask_irq(hdev, event_type);
5798                }
5799                break;
5800
5801        case GAUDI_EVENT_TPC0_KRN_ERR:
5802        case GAUDI_EVENT_TPC1_KRN_ERR:
5803        case GAUDI_EVENT_TPC2_KRN_ERR:
5804        case GAUDI_EVENT_TPC3_KRN_ERR:
5805        case GAUDI_EVENT_TPC4_KRN_ERR:
5806        case GAUDI_EVENT_TPC5_KRN_ERR:
5807        case GAUDI_EVENT_TPC6_KRN_ERR:
5808        case GAUDI_EVENT_TPC7_KRN_ERR:
5809                gaudi_print_irq_info(hdev, event_type, true);
5810                reset_required = gaudi_tpc_read_interrupts(hdev,
5811                                        tpc_krn_event_to_tpc_id(event_type),
5812                                        "KRN_ERR");
5813                if (reset_required) {
5814                        dev_err(hdev->dev, "hard reset required due to %s\n",
5815                                gaudi_irq_map_table[event_type].name);
5816
5817                        if (hdev->hard_reset_on_fw_events)
5818                                hl_device_reset(hdev, true, false);
5819                } else {
5820                        hl_fw_unmask_irq(hdev, event_type);
5821                }
5822                break;
5823
5824        case GAUDI_EVENT_PCIE_CORE_SERR:
5825        case GAUDI_EVENT_PCIE_IF_SERR:
5826        case GAUDI_EVENT_PCIE_PHY_SERR:
5827        case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5828        case GAUDI_EVENT_MME0_ACC_SERR:
5829        case GAUDI_EVENT_MME0_SBAB_SERR:
5830        case GAUDI_EVENT_MME1_ACC_SERR:
5831        case GAUDI_EVENT_MME1_SBAB_SERR:
5832        case GAUDI_EVENT_MME2_ACC_SERR:
5833        case GAUDI_EVENT_MME2_SBAB_SERR:
5834        case GAUDI_EVENT_MME3_ACC_SERR:
5835        case GAUDI_EVENT_MME3_SBAB_SERR:
5836        case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5837        case GAUDI_EVENT_CPU_IF_ECC_SERR:
5838        case GAUDI_EVENT_PSOC_MEM_SERR:
5839        case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5840        case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5841        case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5842        case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5843                fallthrough;
5844        case GAUDI_EVENT_MMU_SERR:
5845                gaudi_print_irq_info(hdev, event_type, true);
5846                gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5847                hl_fw_unmask_irq(hdev, event_type);
5848                break;
5849
5850        case GAUDI_EVENT_PCIE_DEC:
5851        case GAUDI_EVENT_MME0_WBC_RSP:
5852        case GAUDI_EVENT_MME0_SBAB0_RSP:
5853        case GAUDI_EVENT_MME1_WBC_RSP:
5854        case GAUDI_EVENT_MME1_SBAB0_RSP:
5855        case GAUDI_EVENT_MME2_WBC_RSP:
5856        case GAUDI_EVENT_MME2_SBAB0_RSP:
5857        case GAUDI_EVENT_MME3_WBC_RSP:
5858        case GAUDI_EVENT_MME3_SBAB0_RSP:
5859        case GAUDI_EVENT_CPU_AXI_SPLITTER:
5860        case GAUDI_EVENT_PSOC_AXI_DEC:
5861        case GAUDI_EVENT_PSOC_PRSTN_FALL:
5862        case GAUDI_EVENT_MMU_PAGE_FAULT:
5863        case GAUDI_EVENT_MMU_WR_PERM:
5864        case GAUDI_EVENT_RAZWI_OR_ADC:
5865        case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5866        case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5867        case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5868                fallthrough;
5869        case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5870                gaudi_print_irq_info(hdev, event_type, true);
5871                gaudi_handle_qman_err(hdev, event_type);
5872                hl_fw_unmask_irq(hdev, event_type);
5873                break;
5874
5875        case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5876                gaudi_print_irq_info(hdev, event_type, true);
5877                if (hdev->hard_reset_on_fw_events)
5878                        hl_device_reset(hdev, true, false);
5879                break;
5880
5881        case GAUDI_EVENT_TPC0_BMON_SPMU:
5882        case GAUDI_EVENT_TPC1_BMON_SPMU:
5883        case GAUDI_EVENT_TPC2_BMON_SPMU:
5884        case GAUDI_EVENT_TPC3_BMON_SPMU:
5885        case GAUDI_EVENT_TPC4_BMON_SPMU:
5886        case GAUDI_EVENT_TPC5_BMON_SPMU:
5887        case GAUDI_EVENT_TPC6_BMON_SPMU:
5888        case GAUDI_EVENT_TPC7_BMON_SPMU:
5889        case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5890                gaudi_print_irq_info(hdev, event_type, false);
5891                hl_fw_unmask_irq(hdev, event_type);
5892                break;
5893
5894        case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5895                gaudi_print_clk_change_info(hdev, event_type);
5896                hl_fw_unmask_irq(hdev, event_type);
5897                break;
5898
5899        case GAUDI_EVENT_PSOC_GPIO_U16_0:
5900                cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5901                dev_err(hdev->dev,
5902                        "Received high temp H/W interrupt %d (cause %d)\n",
5903                        event_type, cause);
5904                break;
5905
5906        default:
5907                dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5908                                event_type);
5909                break;
5910        }
5911}
5912
5913static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5914                                        u32 *size)
5915{
5916        struct gaudi_device *gaudi = hdev->asic_specific;
5917
5918        if (aggregate) {
5919                *size = (u32) sizeof(gaudi->events_stat_aggregate);
5920                return gaudi->events_stat_aggregate;
5921        }
5922
5923        *size = (u32) sizeof(gaudi->events_stat);
5924        return gaudi->events_stat;
5925}
5926
5927static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5928                                        u32 flags)
5929{
5930        struct gaudi_device *gaudi = hdev->asic_specific;
5931        u32 status, timeout_usec;
5932        int rc;
5933
5934        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5935                hdev->hard_reset_pending)
5936                return 0;
5937
5938        if (hdev->pldm)
5939                timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5940        else
5941                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5942
5943        mutex_lock(&hdev->mmu_cache_lock);
5944
5945        /* L0 & L1 invalidation */
5946        WREG32(mmSTLB_INV_PS, 3);
5947        WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5948        WREG32(mmSTLB_INV_PS, 2);
5949
5950        rc = hl_poll_timeout(
5951                hdev,
5952                mmSTLB_INV_PS,
5953                status,
5954                !status,
5955                1000,
5956                timeout_usec);
5957
5958        WREG32(mmSTLB_INV_SET, 0);
5959
5960        mutex_unlock(&hdev->mmu_cache_lock);
5961
5962        if (rc) {
5963                dev_err_ratelimited(hdev->dev,
5964                                        "MMU cache invalidation timeout\n");
5965                hl_device_reset(hdev, true, false);
5966        }
5967
5968        return rc;
5969}
5970
5971static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5972                                bool is_hard, u32 asid, u64 va, u64 size)
5973{
5974        struct gaudi_device *gaudi = hdev->asic_specific;
5975        u32 status, timeout_usec;
5976        u32 inv_data;
5977        u32 pi;
5978        int rc;
5979
5980        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5981                hdev->hard_reset_pending)
5982                return 0;
5983
5984        mutex_lock(&hdev->mmu_cache_lock);
5985
5986        if (hdev->pldm)
5987                timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5988        else
5989                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5990
5991        /*
5992         * TODO: currently invalidate entire L0 & L1 as in regular hard
5993         * invalidation. Need to apply invalidation of specific cache
5994         * lines with mask of ASID & VA & size.
5995         * Note that L1 with be flushed entirely in any case.
5996         */
5997
5998        /* L0 & L1 invalidation */
5999        inv_data = RREG32(mmSTLB_CACHE_INV);
6000        /* PI is 8 bit */
6001        pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6002        WREG32(mmSTLB_CACHE_INV,
6003                (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6004
6005        rc = hl_poll_timeout(
6006                hdev,
6007                mmSTLB_INV_CONSUMER_INDEX,
6008                status,
6009                status == pi,
6010                1000,
6011                timeout_usec);
6012
6013        mutex_unlock(&hdev->mmu_cache_lock);
6014
6015        if (rc) {
6016                dev_err_ratelimited(hdev->dev,
6017                                        "MMU cache invalidation timeout\n");
6018                hl_device_reset(hdev, true, false);
6019        }
6020
6021        return rc;
6022}
6023
6024static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6025                                        u32 asid, u64 phys_addr)
6026{
6027        u32 status, timeout_usec;
6028        int rc;
6029
6030        if (hdev->pldm)
6031                timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6032        else
6033                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6034
6035        WREG32(MMU_ASID, asid);
6036        WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6037        WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6038        WREG32(MMU_BUSY, 0x80000000);
6039
6040        rc = hl_poll_timeout(
6041                hdev,
6042                MMU_BUSY,
6043                status,
6044                !(status & 0x80000000),
6045                1000,
6046                timeout_usec);
6047
6048        if (rc) {
6049                dev_err(hdev->dev,
6050                        "Timeout during MMU hop0 config of asid %d\n", asid);
6051                return rc;
6052        }
6053
6054        return 0;
6055}
6056
6057static int gaudi_send_heartbeat(struct hl_device *hdev)
6058{
6059        struct gaudi_device *gaudi = hdev->asic_specific;
6060
6061        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6062                return 0;
6063
6064        return hl_fw_send_heartbeat(hdev);
6065}
6066
6067static int gaudi_cpucp_info_get(struct hl_device *hdev)
6068{
6069        struct gaudi_device *gaudi = hdev->asic_specific;
6070        struct asic_fixed_properties *prop = &hdev->asic_prop;
6071        int rc;
6072
6073        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6074                return 0;
6075
6076        rc = hl_fw_cpucp_info_get(hdev);
6077        if (rc)
6078                return rc;
6079
6080        if (!strlen(prop->cpucp_info.card_name))
6081                strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6082                                CARD_NAME_MAX_LEN);
6083
6084        hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6085
6086        if (hdev->card_type == cpucp_card_type_pci)
6087                prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6088        else if (hdev->card_type == cpucp_card_type_pmc)
6089                prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6090
6091        hdev->max_power = prop->max_power_default;
6092
6093        return 0;
6094}
6095
6096static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6097                                        struct seq_file *s)
6098{
6099        struct gaudi_device *gaudi = hdev->asic_specific;
6100        const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6101        const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6102        u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6103        bool is_idle = true, is_eng_idle, is_slave;
6104        u64 offset;
6105        int i, dma_id;
6106
6107        mutex_lock(&gaudi->clk_gate_mutex);
6108
6109        hdev->asic_funcs->disable_clock_gating(hdev);
6110
6111        if (s)
6112                seq_puts(s,
6113                        "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6114                        "---  -------  ------------  ----------  -------------\n");
6115
6116        for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6117                dma_id = gaudi_dma_assignment[i];
6118                offset = dma_id * DMA_QMAN_OFFSET;
6119
6120                qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6121                qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6122                dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6123                is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6124                                IS_DMA_IDLE(dma_core_sts0);
6125                is_idle &= is_eng_idle;
6126
6127                if (mask)
6128                        *mask |= ((u64) !is_eng_idle) <<
6129                                        (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6130                if (s)
6131                        seq_printf(s, fmt, dma_id,
6132                                is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6133                                qm_cgm_sts, dma_core_sts0);
6134        }
6135
6136        if (s)
6137                seq_puts(s,
6138                        "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6139                        "---  -------  ------------  ----------  ----------\n");
6140
6141        for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6142                offset = i * TPC_QMAN_OFFSET;
6143                qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6144                qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6145                tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6146                is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6147                                IS_TPC_IDLE(tpc_cfg_sts);
6148                is_idle &= is_eng_idle;
6149
6150                if (mask)
6151                        *mask |= ((u64) !is_eng_idle) <<
6152                                                (GAUDI_ENGINE_ID_TPC_0 + i);
6153                if (s)
6154                        seq_printf(s, fmt, i,
6155                                is_eng_idle ? "Y" : "N",
6156                                qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6157        }
6158
6159        if (s)
6160                seq_puts(s,
6161                        "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6162                        "---  -------  ------------  ----------  -----------\n");
6163
6164        for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6165                offset = i * MME_QMAN_OFFSET;
6166                mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6167                is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6168
6169                /* MME 1 & 3 are slaves, no need to check their QMANs */
6170                is_slave = i % 2;
6171                if (!is_slave) {
6172                        qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6173                        qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6174                        is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6175                }
6176
6177                is_idle &= is_eng_idle;
6178
6179                if (mask)
6180                        *mask |= ((u64) !is_eng_idle) <<
6181                                                (GAUDI_ENGINE_ID_MME_0 + i);
6182                if (s) {
6183                        if (!is_slave)
6184                                seq_printf(s, fmt, i,
6185                                        is_eng_idle ? "Y" : "N",
6186                                        qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6187                        else
6188                                seq_printf(s, mme_slave_fmt, i,
6189                                        is_eng_idle ? "Y" : "N", "-",
6190                                        "-", mme_arch_sts);
6191                }
6192        }
6193
6194        if (s)
6195                seq_puts(s, "\n");
6196
6197        hdev->asic_funcs->set_clock_gating(hdev);
6198
6199        mutex_unlock(&gaudi->clk_gate_mutex);
6200
6201        return is_idle;
6202}
6203
6204static void gaudi_hw_queues_lock(struct hl_device *hdev)
6205        __acquires(&gaudi->hw_queues_lock)
6206{
6207        struct gaudi_device *gaudi = hdev->asic_specific;
6208
6209        spin_lock(&gaudi->hw_queues_lock);
6210}
6211
6212static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6213        __releases(&gaudi->hw_queues_lock)
6214{
6215        struct gaudi_device *gaudi = hdev->asic_specific;
6216
6217        spin_unlock(&gaudi->hw_queues_lock);
6218}
6219
6220static u32 gaudi_get_pci_id(struct hl_device *hdev)
6221{
6222        return hdev->pdev->device;
6223}
6224
6225static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6226                                size_t max_size)
6227{
6228        struct gaudi_device *gaudi = hdev->asic_specific;
6229
6230        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6231                return 0;
6232
6233        return hl_fw_get_eeprom_data(hdev, data, max_size);
6234}
6235
6236/*
6237 * this function should be used only during initialization and/or after reset,
6238 * when there are no active users.
6239 */
6240static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6241                                u32 tpc_id)
6242{
6243        struct gaudi_device *gaudi = hdev->asic_specific;
6244        u64 kernel_timeout;
6245        u32 status, offset;
6246        int rc;
6247
6248        offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6249
6250        if (hdev->pldm)
6251                kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6252        else
6253                kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6254
6255        mutex_lock(&gaudi->clk_gate_mutex);
6256
6257        hdev->asic_funcs->disable_clock_gating(hdev);
6258
6259        WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6260                        lower_32_bits(tpc_kernel));
6261        WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6262                        upper_32_bits(tpc_kernel));
6263
6264        WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6265                        lower_32_bits(tpc_kernel));
6266        WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6267                        upper_32_bits(tpc_kernel));
6268        /* set a valid LUT pointer, content is of no significance */
6269        WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6270                        lower_32_bits(tpc_kernel));
6271        WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6272                        upper_32_bits(tpc_kernel));
6273
6274        WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6275                        lower_32_bits(CFG_BASE +
6276                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6277
6278        WREG32(mmTPC0_CFG_TPC_CMD + offset,
6279                        (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6280                        1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6281        /* wait a bit for the engine to start executing */
6282        usleep_range(1000, 1500);
6283
6284        /* wait until engine has finished executing */
6285        rc = hl_poll_timeout(
6286                hdev,
6287                mmTPC0_CFG_STATUS + offset,
6288                status,
6289                (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6290                                TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6291                1000,
6292                kernel_timeout);
6293
6294        if (rc) {
6295                dev_err(hdev->dev,
6296                        "Timeout while waiting for TPC%d icache prefetch\n",
6297                        tpc_id);
6298                hdev->asic_funcs->set_clock_gating(hdev);
6299                mutex_unlock(&gaudi->clk_gate_mutex);
6300                return -EIO;
6301        }
6302
6303        WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6304                        1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6305
6306        /* wait a bit for the engine to start executing */
6307        usleep_range(1000, 1500);
6308
6309        /* wait until engine has finished executing */
6310        rc = hl_poll_timeout(
6311                hdev,
6312                mmTPC0_CFG_STATUS + offset,
6313                status,
6314                (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6315                                TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6316                1000,
6317                kernel_timeout);
6318
6319        if (rc) {
6320                dev_err(hdev->dev,
6321                        "Timeout while waiting for TPC%d vector pipe\n",
6322                        tpc_id);
6323                hdev->asic_funcs->set_clock_gating(hdev);
6324                mutex_unlock(&gaudi->clk_gate_mutex);
6325                return -EIO;
6326        }
6327
6328        rc = hl_poll_timeout(
6329                hdev,
6330                mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6331                status,
6332                (status == 0),
6333                1000,
6334                kernel_timeout);
6335
6336        hdev->asic_funcs->set_clock_gating(hdev);
6337        mutex_unlock(&gaudi->clk_gate_mutex);
6338
6339        if (rc) {
6340                dev_err(hdev->dev,
6341                        "Timeout while waiting for TPC%d kernel to execute\n",
6342                        tpc_id);
6343                return -EIO;
6344        }
6345
6346        return 0;
6347}
6348
6349static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6350{
6351        return RREG32(mmHW_STATE);
6352}
6353
6354static int gaudi_ctx_init(struct hl_ctx *ctx)
6355{
6356        return 0;
6357}
6358
6359static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6360{
6361        return gaudi_cq_assignment[cq_idx];
6362}
6363
6364static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6365{
6366        return sizeof(struct packet_msg_short) +
6367                        sizeof(struct packet_msg_prot) * 2;
6368}
6369
6370static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6371{
6372        return sizeof(struct packet_msg_short) * 4 +
6373                        sizeof(struct packet_fence) +
6374                        sizeof(struct packet_msg_prot) * 2;
6375}
6376
6377static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6378{
6379        struct hl_cb *cb = (struct hl_cb *) data;
6380        struct packet_msg_short *pkt;
6381        u32 value, ctl;
6382
6383        pkt = cb->kernel_address;
6384        memset(pkt, 0, sizeof(*pkt));
6385
6386        /* Inc by 1, Mode ADD */
6387        value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6388        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6389
6390        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6391        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6392        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6393        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6394        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6395        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6396        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6397
6398        pkt->value = cpu_to_le32(value);
6399        pkt->ctl = cpu_to_le32(ctl);
6400}
6401
6402static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6403                                        u16 addr)
6404{
6405        u32 ctl, pkt_size = sizeof(*pkt);
6406
6407        memset(pkt, 0, pkt_size);
6408
6409        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6410        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6411        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6412        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6413        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6414        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6415
6416        pkt->value = cpu_to_le32(value);
6417        pkt->ctl = cpu_to_le32(ctl);
6418
6419        return pkt_size;
6420}
6421
6422static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6423                                        u16 sob_val, u16 addr)
6424{
6425        u32 ctl, value, pkt_size = sizeof(*pkt);
6426        u8 mask = ~(1 << (sob_id & 0x7));
6427
6428        memset(pkt, 0, pkt_size);
6429
6430        value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6431        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6432        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6433                        0); /* GREATER OR EQUAL*/
6434        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6435
6436        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6437        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6438        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6439        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6440        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6441        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6442        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6443
6444        pkt->value = cpu_to_le32(value);
6445        pkt->ctl = cpu_to_le32(ctl);
6446
6447        return pkt_size;
6448}
6449
6450static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6451{
6452        u32 ctl, cfg, pkt_size = sizeof(*pkt);
6453
6454        memset(pkt, 0, pkt_size);
6455
6456        cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6457        cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6458        cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6459
6460        ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6461        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6462        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6463        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6464
6465        pkt->cfg = cpu_to_le32(cfg);
6466        pkt->ctl = cpu_to_le32(ctl);
6467
6468        return pkt_size;
6469}
6470
6471static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6472                        u16 sob_val, u16 mon_id, u32 q_idx)
6473{
6474        struct hl_cb *cb = (struct hl_cb *) data;
6475        void *buf = cb->kernel_address;
6476        u64 monitor_base, fence_addr = 0;
6477        u32 size = 0;
6478        u16 msg_addr_offset;
6479
6480        switch (q_idx) {
6481        case GAUDI_QUEUE_ID_DMA_0_0:
6482                fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6483                break;
6484        case GAUDI_QUEUE_ID_DMA_0_1:
6485                fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6486                break;
6487        case GAUDI_QUEUE_ID_DMA_0_2:
6488                fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6489                break;
6490        case GAUDI_QUEUE_ID_DMA_0_3:
6491                fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6492                break;
6493        case GAUDI_QUEUE_ID_DMA_1_0:
6494                fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6495                break;
6496        case GAUDI_QUEUE_ID_DMA_1_1:
6497                fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6498                break;
6499        case GAUDI_QUEUE_ID_DMA_1_2:
6500                fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6501                break;
6502        case GAUDI_QUEUE_ID_DMA_1_3:
6503                fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6504                break;
6505        case GAUDI_QUEUE_ID_DMA_5_0:
6506                fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6507                break;
6508        case GAUDI_QUEUE_ID_DMA_5_1:
6509                fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6510                break;
6511        case GAUDI_QUEUE_ID_DMA_5_2:
6512                fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6513                break;
6514        case GAUDI_QUEUE_ID_DMA_5_3:
6515                fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6516                break;
6517        default:
6518                /* queue index should be valid here */
6519                dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6520                                q_idx);
6521                return;
6522        }
6523
6524        fence_addr += CFG_BASE;
6525
6526        /*
6527         * monitor_base should be the content of the base0 address registers,
6528         * so it will be added to the msg short offsets
6529         */
6530        monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6531
6532        /* First monitor config packet: low address of the sync */
6533        msg_addr_offset =
6534                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6535                                monitor_base;
6536
6537        size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6538                                        msg_addr_offset);
6539
6540        /* Second monitor config packet: high address of the sync */
6541        msg_addr_offset =
6542                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6543                                monitor_base;
6544
6545        size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6546                                        msg_addr_offset);
6547
6548        /*
6549         * Third monitor config packet: the payload, i.e. what to write when the
6550         * sync triggers
6551         */
6552        msg_addr_offset =
6553                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6554                                monitor_base;
6555
6556        size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6557
6558        /* Fourth monitor config packet: bind the monitor to a sync object */
6559        msg_addr_offset =
6560                (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6561                                monitor_base;
6562        size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6563                                                msg_addr_offset);
6564
6565        /* Fence packet */
6566        size += gaudi_add_fence_pkt(buf + size);
6567}
6568
6569static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6570{
6571        struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6572
6573        dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6574                hw_sob->sob_id);
6575
6576        WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6577                0);
6578
6579        kref_init(&hw_sob->kref);
6580}
6581
6582static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6583{
6584        if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6585                                                        HL_POWER9_HOST_MAGIC) {
6586                hdev->power9_64bit_dma_enable = 1;
6587                hdev->dma_mask = 64;
6588        } else {
6589                hdev->power9_64bit_dma_enable = 0;
6590                hdev->dma_mask = 48;
6591        }
6592}
6593
6594static u64 gaudi_get_device_time(struct hl_device *hdev)
6595{
6596        u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6597
6598        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6599}
6600
6601static const struct hl_asic_funcs gaudi_funcs = {
6602        .early_init = gaudi_early_init,
6603        .early_fini = gaudi_early_fini,
6604        .late_init = gaudi_late_init,
6605        .late_fini = gaudi_late_fini,
6606        .sw_init = gaudi_sw_init,
6607        .sw_fini = gaudi_sw_fini,
6608        .hw_init = gaudi_hw_init,
6609        .hw_fini = gaudi_hw_fini,
6610        .halt_engines = gaudi_halt_engines,
6611        .suspend = gaudi_suspend,
6612        .resume = gaudi_resume,
6613        .cb_mmap = gaudi_cb_mmap,
6614        .ring_doorbell = gaudi_ring_doorbell,
6615        .pqe_write = gaudi_pqe_write,
6616        .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6617        .asic_dma_free_coherent = gaudi_dma_free_coherent,
6618        .get_int_queue_base = gaudi_get_int_queue_base,
6619        .test_queues = gaudi_test_queues,
6620        .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6621        .asic_dma_pool_free = gaudi_dma_pool_free,
6622        .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6623        .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6624        .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6625        .cs_parser = gaudi_cs_parser,
6626        .asic_dma_map_sg = gaudi_dma_map_sg,
6627        .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6628        .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6629        .update_eq_ci = gaudi_update_eq_ci,
6630        .context_switch = gaudi_context_switch,
6631        .restore_phase_topology = gaudi_restore_phase_topology,
6632        .debugfs_read32 = gaudi_debugfs_read32,
6633        .debugfs_write32 = gaudi_debugfs_write32,
6634        .debugfs_read64 = gaudi_debugfs_read64,
6635        .debugfs_write64 = gaudi_debugfs_write64,
6636        .add_device_attr = gaudi_add_device_attr,
6637        .handle_eqe = gaudi_handle_eqe,
6638        .set_pll_profile = gaudi_set_pll_profile,
6639        .get_events_stat = gaudi_get_events_stat,
6640        .read_pte = gaudi_read_pte,
6641        .write_pte = gaudi_write_pte,
6642        .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6643        .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6644        .send_heartbeat = gaudi_send_heartbeat,
6645        .set_clock_gating = gaudi_set_clock_gating,
6646        .disable_clock_gating = gaudi_disable_clock_gating,
6647        .debug_coresight = gaudi_debug_coresight,
6648        .is_device_idle = gaudi_is_device_idle,
6649        .soft_reset_late_init = gaudi_soft_reset_late_init,
6650        .hw_queues_lock = gaudi_hw_queues_lock,
6651        .hw_queues_unlock = gaudi_hw_queues_unlock,
6652        .get_pci_id = gaudi_get_pci_id,
6653        .get_eeprom_data = gaudi_get_eeprom_data,
6654        .send_cpu_message = gaudi_send_cpu_message,
6655        .get_hw_state = gaudi_get_hw_state,
6656        .pci_bars_map = gaudi_pci_bars_map,
6657        .init_iatu = gaudi_init_iatu,
6658        .rreg = hl_rreg,
6659        .wreg = hl_wreg,
6660        .halt_coresight = gaudi_halt_coresight,
6661        .ctx_init = gaudi_ctx_init,
6662        .get_clk_rate = gaudi_get_clk_rate,
6663        .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6664        .read_device_fw_version = gaudi_read_device_fw_version,
6665        .load_firmware_to_device = gaudi_load_firmware_to_device,
6666        .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6667        .get_signal_cb_size = gaudi_get_signal_cb_size,
6668        .get_wait_cb_size = gaudi_get_wait_cb_size,
6669        .gen_signal_cb = gaudi_gen_signal_cb,
6670        .gen_wait_cb = gaudi_gen_wait_cb,
6671        .reset_sob = gaudi_reset_sob,
6672        .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6673        .get_device_time = gaudi_get_device_time
6674};
6675
6676/**
6677 * gaudi_set_asic_funcs - set GAUDI function pointers
6678 *
6679 * @hdev: pointer to hl_device structure
6680 *
6681 */
6682void gaudi_set_asic_funcs(struct hl_device *hdev)
6683{
6684        hdev->asic_funcs = &gaudi_funcs;
6685}
6686