linux/drivers/misc/habanalabs/goya/goya.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "goyaP.h"
   9#include "../include/hw_ip/mmu/mmu_general.h"
  10#include "../include/hw_ip/mmu/mmu_v1_0.h"
  11#include "../include/goya/asic_reg/goya_masks.h"
  12#include "../include/goya/goya_reg_map.h"
  13
  14#include <linux/pci.h>
  15#include <linux/genalloc.h>
  16#include <linux/hwmon.h>
  17#include <linux/io-64-nonatomic-lo-hi.h>
  18#include <linux/iommu.h>
  19#include <linux/seq_file.h>
  20
  21/*
  22 * GOYA security scheme:
  23 *
  24 * 1. Host is protected by:
  25 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
  26 *        - MMU
  27 *
  28 * 2. DRAM is protected by:
  29 *        - Range registers (protect the first 512MB)
  30 *        - MMU (isolation between users)
  31 *
  32 * 3. Configuration is protected by:
  33 *        - Range registers
  34 *        - Protection bits
  35 *
  36 * When MMU is disabled:
  37 *
  38 * QMAN DMA: PQ, CQ, CP, DMA are secured.
  39 * PQ, CB and the data are on the host.
  40 *
  41 * QMAN TPC/MME:
  42 * PQ, CQ and CP are not secured.
  43 * PQ, CB and the data are on the SRAM/DRAM.
  44 *
  45 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
  46 *     - checks DMA pointer
  47 *     - WREG, MSG_PROT are not allowed.
  48 *     - MSG_LONG/SHORT are allowed.
  49 *
  50 * A read/write transaction by the QMAN to a protected area will succeed if
  51 * and only if the QMAN's CP is secured and MSG_PROT is used
  52 *
  53 *
  54 * When MMU is enabled:
  55 *
  56 * QMAN DMA: PQ, CQ and CP are secured.
  57 * MMU is set to bypass on the Secure props register of the QMAN.
  58 * The reasons we don't enable MMU for PQ, CQ and CP are:
  59 *     - PQ entry is in kernel address space and the driver doesn't map it.
  60 *     - CP writes to MSIX register and to kernel address space (completion
  61 *       queue).
  62 *
  63 * DMA is not secured but because CP is secured, the driver still needs to parse
  64 * the CB, but doesn't need to check the DMA addresses.
  65 *
  66 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
  67 * the driver doesn't map memory in MMU.
  68 *
  69 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
  70 *
  71 * DMA RR does NOT protect host because DMA is not secured
  72 *
  73 */
  74
  75#define GOYA_BOOT_FIT_FILE      "habanalabs/goya/goya-boot-fit.itb"
  76#define GOYA_LINUX_FW_FILE      "habanalabs/goya/goya-fit.itb"
  77
  78#define GOYA_MMU_REGS_NUM               63
  79
  80#define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
  81
  82#define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
  83#define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
  84#define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
  85#define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
  86#define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
  87#define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
  88#define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
  89#define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
  90#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC  1000000         /* 1s */
  91#define GOYA_MSG_TO_CPU_TIMEOUT_USEC    4000000         /* 4s */
  92
  93#define GOYA_QMAN0_FENCE_VAL            0xD169B243
  94
  95#define GOYA_MAX_STRING_LEN             20
  96
  97#define GOYA_CB_POOL_CB_CNT             512
  98#define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
  99
 100#define IS_QM_IDLE(engine, qm_glbl_sts0) \
 101        (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
 102#define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
 103#define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
 104#define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
 105
 106#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
 107        (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
 108                        engine##_CMDQ_IDLE_MASK)
 109#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
 110        IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
 111#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
 112        IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
 113
 114#define IS_DMA_IDLE(dma_core_sts0) \
 115        !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
 116
 117#define IS_TPC_IDLE(tpc_cfg_sts) \
 118        (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
 119
 120#define IS_MME_IDLE(mme_arch_sts) \
 121        (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
 122
 123
 124static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 125                "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 126                "goya cq 4", "goya cpu eq"
 127};
 128
 129static u16 goya_packet_sizes[MAX_PACKET_ID] = {
 130        [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
 131        [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
 132        [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
 133        [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
 134        [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
 135        [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
 136        [PACKET_FENCE]          = sizeof(struct packet_fence),
 137        [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
 138        [PACKET_NOP]            = sizeof(struct packet_nop),
 139        [PACKET_STOP]           = sizeof(struct packet_stop)
 140};
 141
 142static inline bool validate_packet_id(enum packet_id id)
 143{
 144        switch (id) {
 145        case PACKET_WREG_32:
 146        case PACKET_WREG_BULK:
 147        case PACKET_MSG_LONG:
 148        case PACKET_MSG_SHORT:
 149        case PACKET_CP_DMA:
 150        case PACKET_MSG_PROT:
 151        case PACKET_FENCE:
 152        case PACKET_LIN_DMA:
 153        case PACKET_NOP:
 154        case PACKET_STOP:
 155                return true;
 156        default:
 157                return false;
 158        }
 159}
 160
 161static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
 162        mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
 163        mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
 164        mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
 165        mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
 166        mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
 167        mmTPC0_QM_GLBL_SECURE_PROPS,
 168        mmTPC0_QM_GLBL_NON_SECURE_PROPS,
 169        mmTPC0_CMDQ_GLBL_SECURE_PROPS,
 170        mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
 171        mmTPC0_CFG_ARUSER,
 172        mmTPC0_CFG_AWUSER,
 173        mmTPC1_QM_GLBL_SECURE_PROPS,
 174        mmTPC1_QM_GLBL_NON_SECURE_PROPS,
 175        mmTPC1_CMDQ_GLBL_SECURE_PROPS,
 176        mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
 177        mmTPC1_CFG_ARUSER,
 178        mmTPC1_CFG_AWUSER,
 179        mmTPC2_QM_GLBL_SECURE_PROPS,
 180        mmTPC2_QM_GLBL_NON_SECURE_PROPS,
 181        mmTPC2_CMDQ_GLBL_SECURE_PROPS,
 182        mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
 183        mmTPC2_CFG_ARUSER,
 184        mmTPC2_CFG_AWUSER,
 185        mmTPC3_QM_GLBL_SECURE_PROPS,
 186        mmTPC3_QM_GLBL_NON_SECURE_PROPS,
 187        mmTPC3_CMDQ_GLBL_SECURE_PROPS,
 188        mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
 189        mmTPC3_CFG_ARUSER,
 190        mmTPC3_CFG_AWUSER,
 191        mmTPC4_QM_GLBL_SECURE_PROPS,
 192        mmTPC4_QM_GLBL_NON_SECURE_PROPS,
 193        mmTPC4_CMDQ_GLBL_SECURE_PROPS,
 194        mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
 195        mmTPC4_CFG_ARUSER,
 196        mmTPC4_CFG_AWUSER,
 197        mmTPC5_QM_GLBL_SECURE_PROPS,
 198        mmTPC5_QM_GLBL_NON_SECURE_PROPS,
 199        mmTPC5_CMDQ_GLBL_SECURE_PROPS,
 200        mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
 201        mmTPC5_CFG_ARUSER,
 202        mmTPC5_CFG_AWUSER,
 203        mmTPC6_QM_GLBL_SECURE_PROPS,
 204        mmTPC6_QM_GLBL_NON_SECURE_PROPS,
 205        mmTPC6_CMDQ_GLBL_SECURE_PROPS,
 206        mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
 207        mmTPC6_CFG_ARUSER,
 208        mmTPC6_CFG_AWUSER,
 209        mmTPC7_QM_GLBL_SECURE_PROPS,
 210        mmTPC7_QM_GLBL_NON_SECURE_PROPS,
 211        mmTPC7_CMDQ_GLBL_SECURE_PROPS,
 212        mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
 213        mmTPC7_CFG_ARUSER,
 214        mmTPC7_CFG_AWUSER,
 215        mmMME_QM_GLBL_SECURE_PROPS,
 216        mmMME_QM_GLBL_NON_SECURE_PROPS,
 217        mmMME_CMDQ_GLBL_SECURE_PROPS,
 218        mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
 219        mmMME_SBA_CONTROL_DATA,
 220        mmMME_SBB_CONTROL_DATA,
 221        mmMME_SBC_CONTROL_DATA,
 222        mmMME_WBC_CONTROL_DATA,
 223        mmPCIE_WRAP_PSOC_ARUSER,
 224        mmPCIE_WRAP_PSOC_AWUSER
 225};
 226
 227static u32 goya_all_events[] = {
 228        GOYA_ASYNC_EVENT_ID_PCIE_IF,
 229        GOYA_ASYNC_EVENT_ID_TPC0_ECC,
 230        GOYA_ASYNC_EVENT_ID_TPC1_ECC,
 231        GOYA_ASYNC_EVENT_ID_TPC2_ECC,
 232        GOYA_ASYNC_EVENT_ID_TPC3_ECC,
 233        GOYA_ASYNC_EVENT_ID_TPC4_ECC,
 234        GOYA_ASYNC_EVENT_ID_TPC5_ECC,
 235        GOYA_ASYNC_EVENT_ID_TPC6_ECC,
 236        GOYA_ASYNC_EVENT_ID_TPC7_ECC,
 237        GOYA_ASYNC_EVENT_ID_MME_ECC,
 238        GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
 239        GOYA_ASYNC_EVENT_ID_MMU_ECC,
 240        GOYA_ASYNC_EVENT_ID_DMA_MACRO,
 241        GOYA_ASYNC_EVENT_ID_DMA_ECC,
 242        GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
 243        GOYA_ASYNC_EVENT_ID_PSOC_MEM,
 244        GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
 245        GOYA_ASYNC_EVENT_ID_SRAM0,
 246        GOYA_ASYNC_EVENT_ID_SRAM1,
 247        GOYA_ASYNC_EVENT_ID_SRAM2,
 248        GOYA_ASYNC_EVENT_ID_SRAM3,
 249        GOYA_ASYNC_EVENT_ID_SRAM4,
 250        GOYA_ASYNC_EVENT_ID_SRAM5,
 251        GOYA_ASYNC_EVENT_ID_SRAM6,
 252        GOYA_ASYNC_EVENT_ID_SRAM7,
 253        GOYA_ASYNC_EVENT_ID_SRAM8,
 254        GOYA_ASYNC_EVENT_ID_SRAM9,
 255        GOYA_ASYNC_EVENT_ID_SRAM10,
 256        GOYA_ASYNC_EVENT_ID_SRAM11,
 257        GOYA_ASYNC_EVENT_ID_SRAM12,
 258        GOYA_ASYNC_EVENT_ID_SRAM13,
 259        GOYA_ASYNC_EVENT_ID_SRAM14,
 260        GOYA_ASYNC_EVENT_ID_SRAM15,
 261        GOYA_ASYNC_EVENT_ID_SRAM16,
 262        GOYA_ASYNC_EVENT_ID_SRAM17,
 263        GOYA_ASYNC_EVENT_ID_SRAM18,
 264        GOYA_ASYNC_EVENT_ID_SRAM19,
 265        GOYA_ASYNC_EVENT_ID_SRAM20,
 266        GOYA_ASYNC_EVENT_ID_SRAM21,
 267        GOYA_ASYNC_EVENT_ID_SRAM22,
 268        GOYA_ASYNC_EVENT_ID_SRAM23,
 269        GOYA_ASYNC_EVENT_ID_SRAM24,
 270        GOYA_ASYNC_EVENT_ID_SRAM25,
 271        GOYA_ASYNC_EVENT_ID_SRAM26,
 272        GOYA_ASYNC_EVENT_ID_SRAM27,
 273        GOYA_ASYNC_EVENT_ID_SRAM28,
 274        GOYA_ASYNC_EVENT_ID_SRAM29,
 275        GOYA_ASYNC_EVENT_ID_GIC500,
 276        GOYA_ASYNC_EVENT_ID_PLL0,
 277        GOYA_ASYNC_EVENT_ID_PLL1,
 278        GOYA_ASYNC_EVENT_ID_PLL3,
 279        GOYA_ASYNC_EVENT_ID_PLL4,
 280        GOYA_ASYNC_EVENT_ID_PLL5,
 281        GOYA_ASYNC_EVENT_ID_PLL6,
 282        GOYA_ASYNC_EVENT_ID_AXI_ECC,
 283        GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
 284        GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
 285        GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
 286        GOYA_ASYNC_EVENT_ID_PCIE_DEC,
 287        GOYA_ASYNC_EVENT_ID_TPC0_DEC,
 288        GOYA_ASYNC_EVENT_ID_TPC1_DEC,
 289        GOYA_ASYNC_EVENT_ID_TPC2_DEC,
 290        GOYA_ASYNC_EVENT_ID_TPC3_DEC,
 291        GOYA_ASYNC_EVENT_ID_TPC4_DEC,
 292        GOYA_ASYNC_EVENT_ID_TPC5_DEC,
 293        GOYA_ASYNC_EVENT_ID_TPC6_DEC,
 294        GOYA_ASYNC_EVENT_ID_TPC7_DEC,
 295        GOYA_ASYNC_EVENT_ID_MME_WACS,
 296        GOYA_ASYNC_EVENT_ID_MME_WACSD,
 297        GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
 298        GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
 299        GOYA_ASYNC_EVENT_ID_PSOC,
 300        GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
 301        GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
 302        GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
 303        GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
 304        GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
 305        GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
 306        GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
 307        GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
 308        GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
 309        GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
 310        GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
 311        GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
 312        GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
 313        GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
 314        GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
 315        GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
 316        GOYA_ASYNC_EVENT_ID_TPC0_QM,
 317        GOYA_ASYNC_EVENT_ID_TPC1_QM,
 318        GOYA_ASYNC_EVENT_ID_TPC2_QM,
 319        GOYA_ASYNC_EVENT_ID_TPC3_QM,
 320        GOYA_ASYNC_EVENT_ID_TPC4_QM,
 321        GOYA_ASYNC_EVENT_ID_TPC5_QM,
 322        GOYA_ASYNC_EVENT_ID_TPC6_QM,
 323        GOYA_ASYNC_EVENT_ID_TPC7_QM,
 324        GOYA_ASYNC_EVENT_ID_MME_QM,
 325        GOYA_ASYNC_EVENT_ID_MME_CMDQ,
 326        GOYA_ASYNC_EVENT_ID_DMA0_QM,
 327        GOYA_ASYNC_EVENT_ID_DMA1_QM,
 328        GOYA_ASYNC_EVENT_ID_DMA2_QM,
 329        GOYA_ASYNC_EVENT_ID_DMA3_QM,
 330        GOYA_ASYNC_EVENT_ID_DMA4_QM,
 331        GOYA_ASYNC_EVENT_ID_DMA0_CH,
 332        GOYA_ASYNC_EVENT_ID_DMA1_CH,
 333        GOYA_ASYNC_EVENT_ID_DMA2_CH,
 334        GOYA_ASYNC_EVENT_ID_DMA3_CH,
 335        GOYA_ASYNC_EVENT_ID_DMA4_CH,
 336        GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
 337        GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
 338        GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
 339        GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
 340        GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
 341        GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
 342        GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
 343        GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
 344        GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
 345        GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
 346        GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
 347        GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
 348        GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
 349        GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
 350        GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
 351        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
 352        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
 353};
 354
 355static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
 356static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
 357static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
 358static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
 359
 360int goya_get_fixed_properties(struct hl_device *hdev)
 361{
 362        struct asic_fixed_properties *prop = &hdev->asic_prop;
 363        int i;
 364
 365        prop->max_queues = GOYA_QUEUE_ID_SIZE;
 366        prop->hw_queues_props = kcalloc(prop->max_queues,
 367                        sizeof(struct hw_queue_properties),
 368                        GFP_KERNEL);
 369
 370        if (!prop->hw_queues_props)
 371                return -ENOMEM;
 372
 373        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 374                prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 375                prop->hw_queues_props[i].driver_only = 0;
 376                prop->hw_queues_props[i].requires_kernel_cb = 1;
 377        }
 378
 379        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
 380                prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 381                prop->hw_queues_props[i].driver_only = 1;
 382                prop->hw_queues_props[i].requires_kernel_cb = 0;
 383        }
 384
 385        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
 386                        NUMBER_OF_INT_HW_QUEUES; i++) {
 387                prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 388                prop->hw_queues_props[i].driver_only = 0;
 389                prop->hw_queues_props[i].requires_kernel_cb = 0;
 390        }
 391
 392        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 393
 394        prop->dram_base_address = DRAM_PHYS_BASE;
 395        prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
 396        prop->dram_end_address = prop->dram_base_address + prop->dram_size;
 397        prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
 398
 399        prop->sram_base_address = SRAM_BASE_ADDR;
 400        prop->sram_size = SRAM_SIZE;
 401        prop->sram_end_address = prop->sram_base_address + prop->sram_size;
 402        prop->sram_user_base_address = prop->sram_base_address +
 403                                                SRAM_USER_BASE_OFFSET;
 404
 405        prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
 406        prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
 407        if (hdev->pldm)
 408                prop->mmu_pgt_size = 0x800000; /* 8MB */
 409        else
 410                prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
 411        prop->mmu_pte_size = HL_PTE_SIZE;
 412        prop->mmu_hop_table_size = HOP_TABLE_SIZE;
 413        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 414        prop->dram_page_size = PAGE_SIZE_2MB;
 415
 416        prop->dmmu.hop0_shift = HOP0_SHIFT;
 417        prop->dmmu.hop1_shift = HOP1_SHIFT;
 418        prop->dmmu.hop2_shift = HOP2_SHIFT;
 419        prop->dmmu.hop3_shift = HOP3_SHIFT;
 420        prop->dmmu.hop4_shift = HOP4_SHIFT;
 421        prop->dmmu.hop0_mask = HOP0_MASK;
 422        prop->dmmu.hop1_mask = HOP1_MASK;
 423        prop->dmmu.hop2_mask = HOP2_MASK;
 424        prop->dmmu.hop3_mask = HOP3_MASK;
 425        prop->dmmu.hop4_mask = HOP4_MASK;
 426        prop->dmmu.start_addr = VA_DDR_SPACE_START;
 427        prop->dmmu.end_addr = VA_DDR_SPACE_END;
 428        prop->dmmu.page_size = PAGE_SIZE_2MB;
 429        prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
 430
 431        /* shifts and masks are the same in PMMU and DMMU */
 432        memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
 433        prop->pmmu.start_addr = VA_HOST_SPACE_START;
 434        prop->pmmu.end_addr = VA_HOST_SPACE_END;
 435        prop->pmmu.page_size = PAGE_SIZE_4KB;
 436        prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 437
 438        /* PMMU and HPMMU are the same except of page size */
 439        memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
 440        prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
 441
 442        prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
 443        prop->cfg_size = CFG_SIZE;
 444        prop->max_asid = MAX_ASID;
 445        prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
 446        prop->high_pll = PLL_HIGH_DEFAULT;
 447        prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
 448        prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
 449        prop->max_power_default = MAX_POWER_DEFAULT;
 450        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 451        prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 452        prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 453
 454        strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 455                CARD_NAME_MAX_LEN);
 456
 457        prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 458
 459        return 0;
 460}
 461
 462/*
 463 * goya_pci_bars_map - Map PCI BARS of Goya device
 464 *
 465 * @hdev: pointer to hl_device structure
 466 *
 467 * Request PCI regions and map them to kernel virtual addresses.
 468 * Returns 0 on success
 469 *
 470 */
 471static int goya_pci_bars_map(struct hl_device *hdev)
 472{
 473        static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
 474        bool is_wc[3] = {false, false, true};
 475        int rc;
 476
 477        rc = hl_pci_bars_map(hdev, name, is_wc);
 478        if (rc)
 479                return rc;
 480
 481        hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
 482                        (CFG_BASE - SRAM_BASE_ADDR);
 483
 484        return 0;
 485}
 486
 487static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 488{
 489        struct goya_device *goya = hdev->asic_specific;
 490        struct hl_inbound_pci_region pci_region;
 491        u64 old_addr = addr;
 492        int rc;
 493
 494        if ((goya) && (goya->ddr_bar_cur_addr == addr))
 495                return old_addr;
 496
 497        /* Inbound Region 1 - Bar 4 - Point to DDR */
 498        pci_region.mode = PCI_BAR_MATCH_MODE;
 499        pci_region.bar = DDR_BAR_ID;
 500        pci_region.addr = addr;
 501        rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
 502        if (rc)
 503                return U64_MAX;
 504
 505        if (goya) {
 506                old_addr = goya->ddr_bar_cur_addr;
 507                goya->ddr_bar_cur_addr = addr;
 508        }
 509
 510        return old_addr;
 511}
 512
 513/*
 514 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
 515 *
 516 * @hdev: pointer to hl_device structure
 517 *
 518 * This is needed in case the firmware doesn't initialize the iATU
 519 *
 520 */
 521static int goya_init_iatu(struct hl_device *hdev)
 522{
 523        struct hl_inbound_pci_region inbound_region;
 524        struct hl_outbound_pci_region outbound_region;
 525        int rc;
 526
 527        /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
 528        inbound_region.mode = PCI_BAR_MATCH_MODE;
 529        inbound_region.bar = SRAM_CFG_BAR_ID;
 530        inbound_region.addr = SRAM_BASE_ADDR;
 531        rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 532        if (rc)
 533                goto done;
 534
 535        /* Inbound Region 1 - Bar 4 - Point to DDR */
 536        inbound_region.mode = PCI_BAR_MATCH_MODE;
 537        inbound_region.bar = DDR_BAR_ID;
 538        inbound_region.addr = DRAM_PHYS_BASE;
 539        rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 540        if (rc)
 541                goto done;
 542
 543        hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 544
 545        /* Outbound Region 0 - Point to Host  */
 546        outbound_region.addr = HOST_PHYS_BASE;
 547        outbound_region.size = HOST_PHYS_SIZE;
 548        rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 549
 550done:
 551        return rc;
 552}
 553
 554/*
 555 * goya_early_init - GOYA early initialization code
 556 *
 557 * @hdev: pointer to hl_device structure
 558 *
 559 * Verify PCI bars
 560 * Set DMA masks
 561 * PCI controller initialization
 562 * Map PCI bars
 563 *
 564 */
 565static int goya_early_init(struct hl_device *hdev)
 566{
 567        struct asic_fixed_properties *prop = &hdev->asic_prop;
 568        struct pci_dev *pdev = hdev->pdev;
 569        u32 val;
 570        int rc;
 571
 572        rc = goya_get_fixed_properties(hdev);
 573        if (rc) {
 574                dev_err(hdev->dev, "Failed to get fixed properties\n");
 575                return rc;
 576        }
 577
 578        /* Check BAR sizes */
 579        if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
 580                dev_err(hdev->dev,
 581                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 582                        SRAM_CFG_BAR_ID,
 583                        (unsigned long long) pci_resource_len(pdev,
 584                                                        SRAM_CFG_BAR_ID),
 585                        CFG_BAR_SIZE);
 586                rc = -ENODEV;
 587                goto free_queue_props;
 588        }
 589
 590        if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
 591                dev_err(hdev->dev,
 592                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 593                        MSIX_BAR_ID,
 594                        (unsigned long long) pci_resource_len(pdev,
 595                                                                MSIX_BAR_ID),
 596                        MSIX_BAR_SIZE);
 597                rc = -ENODEV;
 598                goto free_queue_props;
 599        }
 600
 601        prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 602
 603        rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 604                        mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
 605        if (rc)
 606                goto free_queue_props;
 607
 608        /* Goya Firmware does not support security */
 609        prop->fw_security_disabled = true;
 610        dev_info(hdev->dev, "firmware-level security is disabled\n");
 611
 612        if (!hdev->pldm) {
 613                val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
 614                if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
 615                        dev_warn(hdev->dev,
 616                                "PCI strap is not configured correctly, PCI bus errors may occur\n");
 617        }
 618
 619        return 0;
 620
 621free_queue_props:
 622        kfree(hdev->asic_prop.hw_queues_props);
 623        return rc;
 624}
 625
 626/*
 627 * goya_early_fini - GOYA early finalization code
 628 *
 629 * @hdev: pointer to hl_device structure
 630 *
 631 * Unmap PCI bars
 632 *
 633 */
 634static int goya_early_fini(struct hl_device *hdev)
 635{
 636        kfree(hdev->asic_prop.hw_queues_props);
 637        hl_pci_fini(hdev);
 638
 639        return 0;
 640}
 641
 642static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
 643{
 644        /* mask to zero the MMBP and ASID bits */
 645        WREG32_AND(reg, ~0x7FF);
 646        WREG32_OR(reg, asid);
 647}
 648
 649static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 650{
 651        struct goya_device *goya = hdev->asic_specific;
 652
 653        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
 654                return;
 655
 656        if (secure)
 657                WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
 658        else
 659                WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
 660
 661        RREG32(mmDMA_QM_0_GLBL_PROT);
 662}
 663
 664/*
 665 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
 666 *
 667 * @hdev: pointer to hl_device structure
 668 *
 669 */
 670static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 671{
 672        struct asic_fixed_properties *prop = &hdev->asic_prop;
 673        u32 trace_freq = 0;
 674        u32 pll_clk = 0;
 675        u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
 676        u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
 677        u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
 678        u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
 679        u32 od = RREG32(mmPSOC_PCI_PLL_OD);
 680
 681        if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
 682                if (div_sel == DIV_SEL_REF_CLK)
 683                        trace_freq = PLL_REF_CLK;
 684                else
 685                        trace_freq = PLL_REF_CLK / (div_fctr + 1);
 686        } else if (div_sel == DIV_SEL_PLL_CLK ||
 687                                        div_sel == DIV_SEL_DIVIDED_PLL) {
 688                pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
 689                if (div_sel == DIV_SEL_PLL_CLK)
 690                        trace_freq = pll_clk;
 691                else
 692                        trace_freq = pll_clk / (div_fctr + 1);
 693        } else {
 694                dev_warn(hdev->dev,
 695                        "Received invalid div select value: %d", div_sel);
 696        }
 697
 698        prop->psoc_timestamp_frequency = trace_freq;
 699        prop->psoc_pci_pll_nr = nr;
 700        prop->psoc_pci_pll_nf = nf;
 701        prop->psoc_pci_pll_od = od;
 702        prop->psoc_pci_pll_div_factor = div_fctr;
 703}
 704
 705int goya_late_init(struct hl_device *hdev)
 706{
 707        struct asic_fixed_properties *prop = &hdev->asic_prop;
 708        int rc;
 709
 710        goya_fetch_psoc_frequency(hdev);
 711
 712        rc = goya_mmu_clear_pgt_range(hdev);
 713        if (rc) {
 714                dev_err(hdev->dev,
 715                        "Failed to clear MMU page tables range %d\n", rc);
 716                return rc;
 717        }
 718
 719        rc = goya_mmu_set_dram_default_page(hdev);
 720        if (rc) {
 721                dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
 722                return rc;
 723        }
 724
 725        rc = goya_mmu_add_mappings_for_device_cpu(hdev);
 726        if (rc)
 727                return rc;
 728
 729        rc = goya_init_cpu_queues(hdev);
 730        if (rc)
 731                return rc;
 732
 733        rc = goya_test_cpu_queue(hdev);
 734        if (rc)
 735                return rc;
 736
 737        rc = goya_cpucp_info_get(hdev);
 738        if (rc) {
 739                dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
 740                return rc;
 741        }
 742
 743        /* Now that we have the DRAM size in ASIC prop, we need to check
 744         * its size and configure the DMA_IF DDR wrap protection (which is in
 745         * the MMU block) accordingly. The value is the log2 of the DRAM size
 746         */
 747        WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
 748
 749        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
 750        if (rc) {
 751                dev_err(hdev->dev,
 752                        "Failed to enable PCI access from CPU %d\n", rc);
 753                return rc;
 754        }
 755
 756        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 757                        GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
 758
 759        return 0;
 760}
 761
 762/*
 763 * goya_late_fini - GOYA late tear-down code
 764 *
 765 * @hdev: pointer to hl_device structure
 766 *
 767 * Free sensors allocated structures
 768 */
 769void goya_late_fini(struct hl_device *hdev)
 770{
 771        const struct hwmon_channel_info **channel_info_arr;
 772        int i = 0;
 773
 774        if (!hdev->hl_chip_info->info)
 775                return;
 776
 777        channel_info_arr = hdev->hl_chip_info->info;
 778
 779        while (channel_info_arr[i]) {
 780                kfree(channel_info_arr[i]->config);
 781                kfree(channel_info_arr[i]);
 782                i++;
 783        }
 784
 785        kfree(channel_info_arr);
 786
 787        hdev->hl_chip_info->info = NULL;
 788}
 789
 790/*
 791 * goya_sw_init - Goya software initialization code
 792 *
 793 * @hdev: pointer to hl_device structure
 794 *
 795 */
 796static int goya_sw_init(struct hl_device *hdev)
 797{
 798        struct goya_device *goya;
 799        int rc;
 800
 801        /* Allocate device structure */
 802        goya = kzalloc(sizeof(*goya), GFP_KERNEL);
 803        if (!goya)
 804                return -ENOMEM;
 805
 806        /* according to goya_init_iatu */
 807        goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
 808
 809        goya->mme_clk = GOYA_PLL_FREQ_LOW;
 810        goya->tpc_clk = GOYA_PLL_FREQ_LOW;
 811        goya->ic_clk = GOYA_PLL_FREQ_LOW;
 812
 813        hdev->asic_specific = goya;
 814
 815        /* Create DMA pool for small allocations */
 816        hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 817                        &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
 818        if (!hdev->dma_pool) {
 819                dev_err(hdev->dev, "failed to create DMA pool\n");
 820                rc = -ENOMEM;
 821                goto free_goya_device;
 822        }
 823
 824        hdev->cpu_accessible_dma_mem =
 825                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
 826                                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 827                                        &hdev->cpu_accessible_dma_address,
 828                                        GFP_KERNEL | __GFP_ZERO);
 829
 830        if (!hdev->cpu_accessible_dma_mem) {
 831                rc = -ENOMEM;
 832                goto free_dma_pool;
 833        }
 834
 835        dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
 836                &hdev->cpu_accessible_dma_address);
 837
 838        hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
 839        if (!hdev->cpu_accessible_dma_pool) {
 840                dev_err(hdev->dev,
 841                        "Failed to create CPU accessible DMA pool\n");
 842                rc = -ENOMEM;
 843                goto free_cpu_dma_mem;
 844        }
 845
 846        rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
 847                                (uintptr_t) hdev->cpu_accessible_dma_mem,
 848                                HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
 849        if (rc) {
 850                dev_err(hdev->dev,
 851                        "Failed to add memory to CPU accessible DMA pool\n");
 852                rc = -EFAULT;
 853                goto free_cpu_accessible_dma_pool;
 854        }
 855
 856        spin_lock_init(&goya->hw_queues_lock);
 857        hdev->supports_coresight = true;
 858        hdev->supports_soft_reset = true;
 859
 860        return 0;
 861
 862free_cpu_accessible_dma_pool:
 863        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 864free_cpu_dma_mem:
 865        hdev->asic_funcs->asic_dma_free_coherent(hdev,
 866                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 867                        hdev->cpu_accessible_dma_mem,
 868                        hdev->cpu_accessible_dma_address);
 869free_dma_pool:
 870        dma_pool_destroy(hdev->dma_pool);
 871free_goya_device:
 872        kfree(goya);
 873
 874        return rc;
 875}
 876
 877/*
 878 * goya_sw_fini - Goya software tear-down code
 879 *
 880 * @hdev: pointer to hl_device structure
 881 *
 882 */
 883static int goya_sw_fini(struct hl_device *hdev)
 884{
 885        struct goya_device *goya = hdev->asic_specific;
 886
 887        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 888
 889        hdev->asic_funcs->asic_dma_free_coherent(hdev,
 890                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 891                        hdev->cpu_accessible_dma_mem,
 892                        hdev->cpu_accessible_dma_address);
 893
 894        dma_pool_destroy(hdev->dma_pool);
 895
 896        kfree(goya);
 897
 898        return 0;
 899}
 900
 901static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
 902                dma_addr_t bus_address)
 903{
 904        struct goya_device *goya = hdev->asic_specific;
 905        u32 mtr_base_lo, mtr_base_hi;
 906        u32 so_base_lo, so_base_hi;
 907        u32 gic_base_lo, gic_base_hi;
 908        u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
 909        u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
 910
 911        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 912        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 913        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 914        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 915
 916        gic_base_lo =
 917                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 918        gic_base_hi =
 919                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 920
 921        WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
 922        WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
 923
 924        WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
 925        WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
 926        WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
 927
 928        WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
 929        WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
 930        WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
 931        WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
 932        WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
 933        WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
 934        WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
 935                        GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
 936
 937        /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
 938        WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
 939        WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
 940
 941        if (goya->hw_cap_initialized & HW_CAP_MMU)
 942                WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
 943        else
 944                WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
 945
 946        if (hdev->stop_on_err)
 947                dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
 948
 949        WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
 950        WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
 951}
 952
 953static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
 954{
 955        u32 gic_base_lo, gic_base_hi;
 956        u64 sob_addr;
 957        u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
 958
 959        gic_base_lo =
 960                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 961        gic_base_hi =
 962                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 963
 964        WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
 965        WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
 966        WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
 967                        GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
 968
 969        if (dma_id)
 970                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
 971                                (dma_id - 1) * 4;
 972        else
 973                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
 974
 975        WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
 976        WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
 977}
 978
 979/*
 980 * goya_init_dma_qmans - Initialize QMAN DMA registers
 981 *
 982 * @hdev: pointer to hl_device structure
 983 *
 984 * Initialize the H/W registers of the QMAN DMA channels
 985 *
 986 */
 987void goya_init_dma_qmans(struct hl_device *hdev)
 988{
 989        struct goya_device *goya = hdev->asic_specific;
 990        struct hl_hw_queue *q;
 991        int i;
 992
 993        if (goya->hw_cap_initialized & HW_CAP_DMA)
 994                return;
 995
 996        q = &hdev->kernel_queues[0];
 997
 998        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
 999                q->cq_id = q->msi_vec = i;
1000                goya_init_dma_qman(hdev, i, q->bus_address);
1001                goya_init_dma_ch(hdev, i);
1002        }
1003
1004        goya->hw_cap_initialized |= HW_CAP_DMA;
1005}
1006
1007/*
1008 * goya_disable_external_queues - Disable external queues
1009 *
1010 * @hdev: pointer to hl_device structure
1011 *
1012 */
1013static void goya_disable_external_queues(struct hl_device *hdev)
1014{
1015        struct goya_device *goya = hdev->asic_specific;
1016
1017        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1018                return;
1019
1020        WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1021        WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1022        WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1023        WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1024        WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1025}
1026
1027static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1028                                u32 cp_sts_reg, u32 glbl_sts0_reg)
1029{
1030        int rc;
1031        u32 status;
1032
1033        /* use the values of TPC0 as they are all the same*/
1034
1035        WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1036
1037        status = RREG32(cp_sts_reg);
1038        if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1039                rc = hl_poll_timeout(
1040                        hdev,
1041                        cp_sts_reg,
1042                        status,
1043                        !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1044                        1000,
1045                        QMAN_FENCE_TIMEOUT_USEC);
1046
1047                /* if QMAN is stuck in fence no need to check for stop */
1048                if (rc)
1049                        return 0;
1050        }
1051
1052        rc = hl_poll_timeout(
1053                hdev,
1054                glbl_sts0_reg,
1055                status,
1056                (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1057                1000,
1058                QMAN_STOP_TIMEOUT_USEC);
1059
1060        if (rc) {
1061                dev_err(hdev->dev,
1062                        "Timeout while waiting for QMAN to stop\n");
1063                return -EINVAL;
1064        }
1065
1066        return 0;
1067}
1068
1069/*
1070 * goya_stop_external_queues - Stop external queues
1071 *
1072 * @hdev: pointer to hl_device structure
1073 *
1074 * Returns 0 on success
1075 *
1076 */
1077static int goya_stop_external_queues(struct hl_device *hdev)
1078{
1079        int rc, retval = 0;
1080
1081        struct goya_device *goya = hdev->asic_specific;
1082
1083        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1084                return retval;
1085
1086        rc = goya_stop_queue(hdev,
1087                        mmDMA_QM_0_GLBL_CFG1,
1088                        mmDMA_QM_0_CP_STS,
1089                        mmDMA_QM_0_GLBL_STS0);
1090
1091        if (rc) {
1092                dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1093                retval = -EIO;
1094        }
1095
1096        rc = goya_stop_queue(hdev,
1097                        mmDMA_QM_1_GLBL_CFG1,
1098                        mmDMA_QM_1_CP_STS,
1099                        mmDMA_QM_1_GLBL_STS0);
1100
1101        if (rc) {
1102                dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1103                retval = -EIO;
1104        }
1105
1106        rc = goya_stop_queue(hdev,
1107                        mmDMA_QM_2_GLBL_CFG1,
1108                        mmDMA_QM_2_CP_STS,
1109                        mmDMA_QM_2_GLBL_STS0);
1110
1111        if (rc) {
1112                dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1113                retval = -EIO;
1114        }
1115
1116        rc = goya_stop_queue(hdev,
1117                        mmDMA_QM_3_GLBL_CFG1,
1118                        mmDMA_QM_3_CP_STS,
1119                        mmDMA_QM_3_GLBL_STS0);
1120
1121        if (rc) {
1122                dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1123                retval = -EIO;
1124        }
1125
1126        rc = goya_stop_queue(hdev,
1127                        mmDMA_QM_4_GLBL_CFG1,
1128                        mmDMA_QM_4_CP_STS,
1129                        mmDMA_QM_4_GLBL_STS0);
1130
1131        if (rc) {
1132                dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1133                retval = -EIO;
1134        }
1135
1136        return retval;
1137}
1138
1139/*
1140 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1141 *
1142 * @hdev: pointer to hl_device structure
1143 *
1144 * Returns 0 on success
1145 *
1146 */
1147int goya_init_cpu_queues(struct hl_device *hdev)
1148{
1149        struct goya_device *goya = hdev->asic_specific;
1150        struct hl_eq *eq;
1151        u32 status;
1152        struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1153        int err;
1154
1155        if (!hdev->cpu_queues_enable)
1156                return 0;
1157
1158        if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1159                return 0;
1160
1161        eq = &hdev->event_queue;
1162
1163        WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1164        WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1165
1166        WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1167        WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1168
1169        WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1170                        lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1171        WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1172                        upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1173
1174        WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1175        WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1176        WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1177
1178        /* Used for EQ CI */
1179        WREG32(mmCPU_EQ_CI, 0);
1180
1181        WREG32(mmCPU_IF_PF_PQ_PI, 0);
1182
1183        WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1184
1185        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1186                        GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1187
1188        err = hl_poll_timeout(
1189                hdev,
1190                mmCPU_PQ_INIT_STATUS,
1191                status,
1192                (status == PQ_INIT_STATUS_READY_FOR_HOST),
1193                1000,
1194                GOYA_CPU_TIMEOUT_USEC);
1195
1196        if (err) {
1197                dev_err(hdev->dev,
1198                        "Failed to setup communication with device CPU\n");
1199                return -EIO;
1200        }
1201
1202        goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1203        return 0;
1204}
1205
1206static void goya_set_pll_refclk(struct hl_device *hdev)
1207{
1208        WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1209        WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1210        WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1211        WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1212
1213        WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1214        WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1215        WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1216        WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1217
1218        WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1219        WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1220        WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1221        WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1222
1223        WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1224        WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1225        WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1226        WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1227
1228        WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1229        WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1230        WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1231        WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1232
1233        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1234        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1235        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1236        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1237
1238        WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1239        WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1240        WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1241        WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1242}
1243
1244static void goya_disable_clk_rlx(struct hl_device *hdev)
1245{
1246        WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1247        WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1248}
1249
1250static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1251{
1252        u64 tpc_eml_address;
1253        u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1254        int err, slm_index;
1255
1256        tpc_offset = tpc_id * 0x40000;
1257        tpc_eml_offset = tpc_id * 0x200000;
1258        tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1259        tpc_slm_offset = tpc_eml_address + 0x100000;
1260
1261        /*
1262         * Workaround for Bug H2 #2443 :
1263         * "TPC SB is not initialized on chip reset"
1264         */
1265
1266        val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1267        if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1268                dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1269                        tpc_id);
1270
1271        WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1272
1273        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1274        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1275        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1276        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1277        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1278        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1279        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1280        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1281        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1282        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1283
1284        WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1285                1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1286
1287        err = hl_poll_timeout(
1288                hdev,
1289                mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1290                val,
1291                (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1292                1000,
1293                HL_DEVICE_TIMEOUT_USEC);
1294
1295        if (err)
1296                dev_err(hdev->dev,
1297                        "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1298
1299        WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1300                1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1301
1302        msleep(GOYA_RESET_WAIT_MSEC);
1303
1304        WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1305                ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1306
1307        msleep(GOYA_RESET_WAIT_MSEC);
1308
1309        for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1310                WREG32(tpc_slm_offset + (slm_index << 2), 0);
1311
1312        val = RREG32(tpc_slm_offset);
1313}
1314
1315static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1316{
1317        struct goya_device *goya = hdev->asic_specific;
1318        int i;
1319
1320        if (hdev->pldm)
1321                return;
1322
1323        if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1324                return;
1325
1326        /* Workaround for H2 #2443 */
1327
1328        for (i = 0 ; i < TPC_MAX_NUM ; i++)
1329                _goya_tpc_mbist_workaround(hdev, i);
1330
1331        goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1332}
1333
1334/*
1335 * goya_init_golden_registers - Initialize golden registers
1336 *
1337 * @hdev: pointer to hl_device structure
1338 *
1339 * Initialize the H/W registers of the device
1340 *
1341 */
1342static void goya_init_golden_registers(struct hl_device *hdev)
1343{
1344        struct goya_device *goya = hdev->asic_specific;
1345        u32 polynom[10], tpc_intr_mask, offset;
1346        int i;
1347
1348        if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1349                return;
1350
1351        polynom[0] = 0x00020080;
1352        polynom[1] = 0x00401000;
1353        polynom[2] = 0x00200800;
1354        polynom[3] = 0x00002000;
1355        polynom[4] = 0x00080200;
1356        polynom[5] = 0x00040100;
1357        polynom[6] = 0x00100400;
1358        polynom[7] = 0x00004000;
1359        polynom[8] = 0x00010000;
1360        polynom[9] = 0x00008000;
1361
1362        /* Mask all arithmetic interrupts from TPC */
1363        tpc_intr_mask = 0x7FFF;
1364
1365        for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1366                WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1367                WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1368                WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1369                WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1370                WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1371
1372                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1373                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1374                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1375                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1376                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1377
1378
1379                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1380                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1381                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1382                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1383                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1384
1385                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1386                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1387                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1388                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1389                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1390
1391                WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1392                WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1393                WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1394                WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1395                WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1396
1397                WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1398                WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1399                WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1400                WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1401                WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1402        }
1403
1404        WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1405        WREG32(mmMME_AGU, 0x0f0f0f10);
1406        WREG32(mmMME_SEI_MASK, ~0x0);
1407
1408        WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1409        WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1410        WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1411        WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1412        WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1413        WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1414        WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1415        WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1416        WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1417        WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1418        WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1419        WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1420        WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1421        WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1422        WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1423        WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1424        WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1425        WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1426        WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1427        WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1428        WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1429        WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1430        WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1431        WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1432        WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1433        WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1434        WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1435        WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1436        WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1437        WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1438        WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1439        WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1440        WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1441        WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1442        WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1443        WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1444        WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1445        WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1446        WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1447        WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1448        WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1449        WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1450        WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1451        WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1452        WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1453        WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1454        WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1455        WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1456        WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1457        WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1458        WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1459        WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1460        WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1461        WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1462        WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1463        WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1464        WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1465        WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1466        WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1467        WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1468        WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1469        WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1470        WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1471        WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1472        WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1473        WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1474        WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1475        WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1476        WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1477        WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1478        WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1479        WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1480        WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1481        WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1482        WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1483        WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1484        WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1485        WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1486        WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1487        WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1488        WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1489        WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1490        WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1491        WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1492
1493        WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1494        WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1495        WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1496        WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1497        WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1498        WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1499        WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1500        WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1501        WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1502        WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1503        WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1504        WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1505
1506        WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1507        WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1508        WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1509        WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1510        WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1511        WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1512        WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1513        WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1514        WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1515        WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1516        WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1517        WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1518
1519        WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1520        WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1521        WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1522        WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1523        WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1524        WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1525        WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1526        WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1527        WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1528        WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1529        WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1530        WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1531
1532        WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1533        WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1534        WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1535        WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1536        WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1537        WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1538        WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1539        WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1540        WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1541        WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1542        WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1543        WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1544
1545        WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1546        WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1547        WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1548        WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1549        WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1550        WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1551        WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1552        WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1553        WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1554        WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1555        WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1556        WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1557
1558        WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1559        WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1560        WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1561        WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1562        WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1563        WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1564        WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1565        WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1566        WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1567        WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1568        WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1569        WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1570
1571        for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1572                WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1573                WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1574                WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1575                WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1576                WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1577                WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1578
1579                WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1580                WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1581                WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1582                WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1583                WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1584                WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1585                WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1586                WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1587
1588                WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1589                WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1590        }
1591
1592        for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1593                WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1594                                1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1595                WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1596                                1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1597        }
1598
1599        for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1600                /*
1601                 * Workaround for Bug H2 #2441 :
1602                 * "ST.NOP set trace event illegal opcode"
1603                 */
1604                WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1605
1606                WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1607                                1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1608                WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1609                                1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1610
1611                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1612                                ICACHE_FETCH_LINE_NUM, 2);
1613        }
1614
1615        WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1616        WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1617                        1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1618
1619        WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1620        WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1621                        1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1622
1623        /*
1624         * Workaround for H2 #HW-23 bug
1625         * Set DMA max outstanding read requests to 240 on DMA CH 1.
1626         * This limitation is still large enough to not affect Gen4 bandwidth.
1627         * We need to only limit that DMA channel because the user can only read
1628         * from Host using DMA CH 1
1629         */
1630        WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1631
1632        WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1633
1634        goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1635}
1636
1637static void goya_init_mme_qman(struct hl_device *hdev)
1638{
1639        u32 mtr_base_lo, mtr_base_hi;
1640        u32 so_base_lo, so_base_hi;
1641        u32 gic_base_lo, gic_base_hi;
1642        u64 qman_base_addr;
1643
1644        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1645        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1646        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1647        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1648
1649        gic_base_lo =
1650                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1651        gic_base_hi =
1652                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1653
1654        qman_base_addr = hdev->asic_prop.sram_base_address +
1655                                MME_QMAN_BASE_OFFSET;
1656
1657        WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1658        WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1659        WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1660        WREG32(mmMME_QM_PQ_PI, 0);
1661        WREG32(mmMME_QM_PQ_CI, 0);
1662        WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1663        WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1664        WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1665        WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1666
1667        WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1668        WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1669        WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1670        WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1671
1672        /* QMAN CQ has 8 cache lines */
1673        WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1674
1675        WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1676        WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1677
1678        WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1679
1680        WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1681
1682        WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1683
1684        WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1685}
1686
1687static void goya_init_mme_cmdq(struct hl_device *hdev)
1688{
1689        u32 mtr_base_lo, mtr_base_hi;
1690        u32 so_base_lo, so_base_hi;
1691        u32 gic_base_lo, gic_base_hi;
1692
1693        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1694        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1695        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1696        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1697
1698        gic_base_lo =
1699                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1700        gic_base_hi =
1701                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1702
1703        WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1704        WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1705        WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1706        WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1707
1708        /* CMDQ CQ has 20 cache lines */
1709        WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1710
1711        WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1712        WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1713
1714        WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1715
1716        WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1717
1718        WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1719
1720        WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1721}
1722
1723void goya_init_mme_qmans(struct hl_device *hdev)
1724{
1725        struct goya_device *goya = hdev->asic_specific;
1726        u32 so_base_lo, so_base_hi;
1727
1728        if (goya->hw_cap_initialized & HW_CAP_MME)
1729                return;
1730
1731        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1732        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1733
1734        WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1735        WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1736
1737        goya_init_mme_qman(hdev);
1738        goya_init_mme_cmdq(hdev);
1739
1740        goya->hw_cap_initialized |= HW_CAP_MME;
1741}
1742
1743static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1744{
1745        u32 mtr_base_lo, mtr_base_hi;
1746        u32 so_base_lo, so_base_hi;
1747        u32 gic_base_lo, gic_base_hi;
1748        u64 qman_base_addr;
1749        u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1750
1751        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1752        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1753        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1754        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1755
1756        gic_base_lo =
1757                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1758        gic_base_hi =
1759                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1760
1761        qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1762
1763        WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1764        WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1765        WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1766        WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1767        WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1768        WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1769        WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1770        WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1771        WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1772
1773        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1774        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1775        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1776        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1777
1778        WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1779
1780        WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1781        WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1782
1783        WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1784                        GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1785
1786        WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1787
1788        WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1789
1790        WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1791}
1792
1793static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1794{
1795        u32 mtr_base_lo, mtr_base_hi;
1796        u32 so_base_lo, so_base_hi;
1797        u32 gic_base_lo, gic_base_hi;
1798        u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1799
1800        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1801        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1802        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1803        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1804
1805        gic_base_lo =
1806                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1807        gic_base_hi =
1808                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1809
1810        WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1811        WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1812        WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1813        WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1814
1815        WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1816
1817        WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1818        WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1819
1820        WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1821                        GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1822
1823        WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1824
1825        WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1826
1827        WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1828}
1829
1830void goya_init_tpc_qmans(struct hl_device *hdev)
1831{
1832        struct goya_device *goya = hdev->asic_specific;
1833        u32 so_base_lo, so_base_hi;
1834        u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1835                        mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1836        int i;
1837
1838        if (goya->hw_cap_initialized & HW_CAP_TPC)
1839                return;
1840
1841        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1842        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1843
1844        for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1845                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1846                                so_base_lo);
1847                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1848                                so_base_hi);
1849        }
1850
1851        goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1852        goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1853        goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1854        goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1855        goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1856        goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1857        goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1858        goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1859
1860        for (i = 0 ; i < TPC_MAX_NUM ; i++)
1861                goya_init_tpc_cmdq(hdev, i);
1862
1863        goya->hw_cap_initialized |= HW_CAP_TPC;
1864}
1865
1866/*
1867 * goya_disable_internal_queues - Disable internal queues
1868 *
1869 * @hdev: pointer to hl_device structure
1870 *
1871 */
1872static void goya_disable_internal_queues(struct hl_device *hdev)
1873{
1874        struct goya_device *goya = hdev->asic_specific;
1875
1876        if (!(goya->hw_cap_initialized & HW_CAP_MME))
1877                goto disable_tpc;
1878
1879        WREG32(mmMME_QM_GLBL_CFG0, 0);
1880        WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1881
1882disable_tpc:
1883        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1884                return;
1885
1886        WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1887        WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1888
1889        WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1890        WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1891
1892        WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1893        WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1894
1895        WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1896        WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1897
1898        WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1899        WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1900
1901        WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1902        WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1903
1904        WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1905        WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1906
1907        WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1908        WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1909}
1910
1911/*
1912 * goya_stop_internal_queues - Stop internal queues
1913 *
1914 * @hdev: pointer to hl_device structure
1915 *
1916 * Returns 0 on success
1917 *
1918 */
1919static int goya_stop_internal_queues(struct hl_device *hdev)
1920{
1921        struct goya_device *goya = hdev->asic_specific;
1922        int rc, retval = 0;
1923
1924        if (!(goya->hw_cap_initialized & HW_CAP_MME))
1925                goto stop_tpc;
1926
1927        /*
1928         * Each queue (QMAN) is a separate H/W logic. That means that each
1929         * QMAN can be stopped independently and failure to stop one does NOT
1930         * mandate we should not try to stop other QMANs
1931         */
1932
1933        rc = goya_stop_queue(hdev,
1934                        mmMME_QM_GLBL_CFG1,
1935                        mmMME_QM_CP_STS,
1936                        mmMME_QM_GLBL_STS0);
1937
1938        if (rc) {
1939                dev_err(hdev->dev, "failed to stop MME QMAN\n");
1940                retval = -EIO;
1941        }
1942
1943        rc = goya_stop_queue(hdev,
1944                        mmMME_CMDQ_GLBL_CFG1,
1945                        mmMME_CMDQ_CP_STS,
1946                        mmMME_CMDQ_GLBL_STS0);
1947
1948        if (rc) {
1949                dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1950                retval = -EIO;
1951        }
1952
1953stop_tpc:
1954        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1955                return retval;
1956
1957        rc = goya_stop_queue(hdev,
1958                        mmTPC0_QM_GLBL_CFG1,
1959                        mmTPC0_QM_CP_STS,
1960                        mmTPC0_QM_GLBL_STS0);
1961
1962        if (rc) {
1963                dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1964                retval = -EIO;
1965        }
1966
1967        rc = goya_stop_queue(hdev,
1968                        mmTPC0_CMDQ_GLBL_CFG1,
1969                        mmTPC0_CMDQ_CP_STS,
1970                        mmTPC0_CMDQ_GLBL_STS0);
1971
1972        if (rc) {
1973                dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1974                retval = -EIO;
1975        }
1976
1977        rc = goya_stop_queue(hdev,
1978                        mmTPC1_QM_GLBL_CFG1,
1979                        mmTPC1_QM_CP_STS,
1980                        mmTPC1_QM_GLBL_STS0);
1981
1982        if (rc) {
1983                dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1984                retval = -EIO;
1985        }
1986
1987        rc = goya_stop_queue(hdev,
1988                        mmTPC1_CMDQ_GLBL_CFG1,
1989                        mmTPC1_CMDQ_CP_STS,
1990                        mmTPC1_CMDQ_GLBL_STS0);
1991
1992        if (rc) {
1993                dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1994                retval = -EIO;
1995        }
1996
1997        rc = goya_stop_queue(hdev,
1998                        mmTPC2_QM_GLBL_CFG1,
1999                        mmTPC2_QM_CP_STS,
2000                        mmTPC2_QM_GLBL_STS0);
2001
2002        if (rc) {
2003                dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2004                retval = -EIO;
2005        }
2006
2007        rc = goya_stop_queue(hdev,
2008                        mmTPC2_CMDQ_GLBL_CFG1,
2009                        mmTPC2_CMDQ_CP_STS,
2010                        mmTPC2_CMDQ_GLBL_STS0);
2011
2012        if (rc) {
2013                dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2014                retval = -EIO;
2015        }
2016
2017        rc = goya_stop_queue(hdev,
2018                        mmTPC3_QM_GLBL_CFG1,
2019                        mmTPC3_QM_CP_STS,
2020                        mmTPC3_QM_GLBL_STS0);
2021
2022        if (rc) {
2023                dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2024                retval = -EIO;
2025        }
2026
2027        rc = goya_stop_queue(hdev,
2028                        mmTPC3_CMDQ_GLBL_CFG1,
2029                        mmTPC3_CMDQ_CP_STS,
2030                        mmTPC3_CMDQ_GLBL_STS0);
2031
2032        if (rc) {
2033                dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2034                retval = -EIO;
2035        }
2036
2037        rc = goya_stop_queue(hdev,
2038                        mmTPC4_QM_GLBL_CFG1,
2039                        mmTPC4_QM_CP_STS,
2040                        mmTPC4_QM_GLBL_STS0);
2041
2042        if (rc) {
2043                dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2044                retval = -EIO;
2045        }
2046
2047        rc = goya_stop_queue(hdev,
2048                        mmTPC4_CMDQ_GLBL_CFG1,
2049                        mmTPC4_CMDQ_CP_STS,
2050                        mmTPC4_CMDQ_GLBL_STS0);
2051
2052        if (rc) {
2053                dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2054                retval = -EIO;
2055        }
2056
2057        rc = goya_stop_queue(hdev,
2058                        mmTPC5_QM_GLBL_CFG1,
2059                        mmTPC5_QM_CP_STS,
2060                        mmTPC5_QM_GLBL_STS0);
2061
2062        if (rc) {
2063                dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2064                retval = -EIO;
2065        }
2066
2067        rc = goya_stop_queue(hdev,
2068                        mmTPC5_CMDQ_GLBL_CFG1,
2069                        mmTPC5_CMDQ_CP_STS,
2070                        mmTPC5_CMDQ_GLBL_STS0);
2071
2072        if (rc) {
2073                dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2074                retval = -EIO;
2075        }
2076
2077        rc = goya_stop_queue(hdev,
2078                        mmTPC6_QM_GLBL_CFG1,
2079                        mmTPC6_QM_CP_STS,
2080                        mmTPC6_QM_GLBL_STS0);
2081
2082        if (rc) {
2083                dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2084                retval = -EIO;
2085        }
2086
2087        rc = goya_stop_queue(hdev,
2088                        mmTPC6_CMDQ_GLBL_CFG1,
2089                        mmTPC6_CMDQ_CP_STS,
2090                        mmTPC6_CMDQ_GLBL_STS0);
2091
2092        if (rc) {
2093                dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2094                retval = -EIO;
2095        }
2096
2097        rc = goya_stop_queue(hdev,
2098                        mmTPC7_QM_GLBL_CFG1,
2099                        mmTPC7_QM_CP_STS,
2100                        mmTPC7_QM_GLBL_STS0);
2101
2102        if (rc) {
2103                dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2104                retval = -EIO;
2105        }
2106
2107        rc = goya_stop_queue(hdev,
2108                        mmTPC7_CMDQ_GLBL_CFG1,
2109                        mmTPC7_CMDQ_CP_STS,
2110                        mmTPC7_CMDQ_GLBL_STS0);
2111
2112        if (rc) {
2113                dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2114                retval = -EIO;
2115        }
2116
2117        return retval;
2118}
2119
2120static void goya_dma_stall(struct hl_device *hdev)
2121{
2122        struct goya_device *goya = hdev->asic_specific;
2123
2124        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2125                return;
2126
2127        WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2128        WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2129        WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2130        WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2131        WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2132}
2133
2134static void goya_tpc_stall(struct hl_device *hdev)
2135{
2136        struct goya_device *goya = hdev->asic_specific;
2137
2138        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2139                return;
2140
2141        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2142        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2143        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2144        WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2145        WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2146        WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2147        WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2148        WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2149}
2150
2151static void goya_mme_stall(struct hl_device *hdev)
2152{
2153        struct goya_device *goya = hdev->asic_specific;
2154
2155        if (!(goya->hw_cap_initialized & HW_CAP_MME))
2156                return;
2157
2158        WREG32(mmMME_STALL, 0xFFFFFFFF);
2159}
2160
2161static int goya_enable_msix(struct hl_device *hdev)
2162{
2163        struct goya_device *goya = hdev->asic_specific;
2164        int cq_cnt = hdev->asic_prop.completion_queues_count;
2165        int rc, i, irq_cnt_init, irq;
2166
2167        if (goya->hw_cap_initialized & HW_CAP_MSIX)
2168                return 0;
2169
2170        rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2171                                GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2172        if (rc < 0) {
2173                dev_err(hdev->dev,
2174                        "MSI-X: Failed to enable support -- %d/%d\n",
2175                        GOYA_MSIX_ENTRIES, rc);
2176                return rc;
2177        }
2178
2179        for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2180                irq = pci_irq_vector(hdev->pdev, i);
2181                rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2182                                &hdev->completion_queue[i]);
2183                if (rc) {
2184                        dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2185                        goto free_irqs;
2186                }
2187        }
2188
2189        irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2190
2191        rc = request_irq(irq, hl_irq_handler_eq, 0,
2192                        goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2193                        &hdev->event_queue);
2194        if (rc) {
2195                dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2196                goto free_irqs;
2197        }
2198
2199        goya->hw_cap_initialized |= HW_CAP_MSIX;
2200        return 0;
2201
2202free_irqs:
2203        for (i = 0 ; i < irq_cnt_init ; i++)
2204                free_irq(pci_irq_vector(hdev->pdev, i),
2205                        &hdev->completion_queue[i]);
2206
2207        pci_free_irq_vectors(hdev->pdev);
2208        return rc;
2209}
2210
2211static void goya_sync_irqs(struct hl_device *hdev)
2212{
2213        struct goya_device *goya = hdev->asic_specific;
2214        int i;
2215
2216        if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2217                return;
2218
2219        /* Wait for all pending IRQs to be finished */
2220        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2221                synchronize_irq(pci_irq_vector(hdev->pdev, i));
2222
2223        synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2224}
2225
2226static void goya_disable_msix(struct hl_device *hdev)
2227{
2228        struct goya_device *goya = hdev->asic_specific;
2229        int i, irq;
2230
2231        if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2232                return;
2233
2234        goya_sync_irqs(hdev);
2235
2236        irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2237        free_irq(irq, &hdev->event_queue);
2238
2239        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2240                irq = pci_irq_vector(hdev->pdev, i);
2241                free_irq(irq, &hdev->completion_queue[i]);
2242        }
2243
2244        pci_free_irq_vectors(hdev->pdev);
2245
2246        goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2247}
2248
2249static void goya_enable_timestamp(struct hl_device *hdev)
2250{
2251        /* Disable the timestamp counter */
2252        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2253
2254        /* Zero the lower/upper parts of the 64-bit counter */
2255        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2256        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2257
2258        /* Enable the counter */
2259        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2260}
2261
2262static void goya_disable_timestamp(struct hl_device *hdev)
2263{
2264        /* Disable the timestamp counter */
2265        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2266}
2267
2268static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2269{
2270        u32 wait_timeout_ms;
2271
2272        dev_info(hdev->dev,
2273                "Halting compute engines and disabling interrupts\n");
2274
2275        if (hdev->pldm)
2276                wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2277        else
2278                wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2279
2280        goya_stop_external_queues(hdev);
2281        goya_stop_internal_queues(hdev);
2282
2283        msleep(wait_timeout_ms);
2284
2285        goya_dma_stall(hdev);
2286        goya_tpc_stall(hdev);
2287        goya_mme_stall(hdev);
2288
2289        msleep(wait_timeout_ms);
2290
2291        goya_disable_external_queues(hdev);
2292        goya_disable_internal_queues(hdev);
2293
2294        goya_disable_timestamp(hdev);
2295
2296        if (hard_reset) {
2297                goya_disable_msix(hdev);
2298                goya_mmu_remove_device_cpu_mappings(hdev);
2299        } else {
2300                goya_sync_irqs(hdev);
2301        }
2302}
2303
2304/*
2305 * goya_load_firmware_to_device() - Load LINUX FW code to device.
2306 * @hdev: Pointer to hl_device structure.
2307 *
2308 * Copy LINUX fw code from firmware file to HBM BAR.
2309 *
2310 * Return: 0 on success, non-zero for failure.
2311 */
2312static int goya_load_firmware_to_device(struct hl_device *hdev)
2313{
2314        void __iomem *dst;
2315
2316        dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2317
2318        return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
2319}
2320
2321/*
2322 * goya_load_boot_fit_to_device() - Load boot fit to device.
2323 * @hdev: Pointer to hl_device structure.
2324 *
2325 * Copy boot fit file to SRAM BAR.
2326 *
2327 * Return: 0 on success, non-zero for failure.
2328 */
2329static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2330{
2331        void __iomem *dst;
2332
2333        dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2334
2335        return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst);
2336}
2337
2338/*
2339 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2340 * The version string should be located by that offset.
2341 */
2342static void goya_read_device_fw_version(struct hl_device *hdev,
2343                                        enum hl_fw_component fwc)
2344{
2345        const char *name;
2346        u32 ver_off;
2347        char *dest;
2348
2349        switch (fwc) {
2350        case FW_COMP_UBOOT:
2351                ver_off = RREG32(mmUBOOT_VER_OFFSET);
2352                dest = hdev->asic_prop.uboot_ver;
2353                name = "U-Boot";
2354                break;
2355        case FW_COMP_PREBOOT:
2356                ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2357                dest = hdev->asic_prop.preboot_ver;
2358                name = "Preboot";
2359                break;
2360        default:
2361                dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2362                return;
2363        }
2364
2365        ver_off &= ~((u32)SRAM_BASE_ADDR);
2366
2367        if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2368                memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2369                                                        VERSION_MAX_LEN);
2370        } else {
2371                dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2372                                                                name, ver_off);
2373                strcpy(dest, "unavailable");
2374        }
2375}
2376
2377static int goya_init_cpu(struct hl_device *hdev)
2378{
2379        struct goya_device *goya = hdev->asic_specific;
2380        int rc;
2381
2382        if (!hdev->cpu_enable)
2383                return 0;
2384
2385        if (goya->hw_cap_initialized & HW_CAP_CPU)
2386                return 0;
2387
2388        /*
2389         * Before pushing u-boot/linux to device, need to set the ddr bar to
2390         * base address of dram
2391         */
2392        if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2393                dev_err(hdev->dev,
2394                        "failed to map DDR bar to DRAM base address\n");
2395                return -EIO;
2396        }
2397
2398        rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2399                        mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
2400                        mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0,
2401                        false, GOYA_CPU_TIMEOUT_USEC,
2402                        GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
2403
2404        if (rc)
2405                return rc;
2406
2407        goya->hw_cap_initialized |= HW_CAP_CPU;
2408
2409        return 0;
2410}
2411
2412static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2413                                                u64 phys_addr)
2414{
2415        u32 status, timeout_usec;
2416        int rc;
2417
2418        if (hdev->pldm)
2419                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2420        else
2421                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2422
2423        WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2424        WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2425        WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2426
2427        rc = hl_poll_timeout(
2428                hdev,
2429                MMU_ASID_BUSY,
2430                status,
2431                !(status & 0x80000000),
2432                1000,
2433                timeout_usec);
2434
2435        if (rc) {
2436                dev_err(hdev->dev,
2437                        "Timeout during MMU hop0 config of asid %d\n", asid);
2438                return rc;
2439        }
2440
2441        return 0;
2442}
2443
2444int goya_mmu_init(struct hl_device *hdev)
2445{
2446        struct asic_fixed_properties *prop = &hdev->asic_prop;
2447        struct goya_device *goya = hdev->asic_specific;
2448        u64 hop0_addr;
2449        int rc, i;
2450
2451        if (!hdev->mmu_enable)
2452                return 0;
2453
2454        if (goya->hw_cap_initialized & HW_CAP_MMU)
2455                return 0;
2456
2457        hdev->dram_supports_virtual_memory = true;
2458        hdev->dram_default_page_mapping = true;
2459
2460        for (i = 0 ; i < prop->max_asid ; i++) {
2461                hop0_addr = prop->mmu_pgt_addr +
2462                                (i * prop->mmu_hop_table_size);
2463
2464                rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2465                if (rc) {
2466                        dev_err(hdev->dev,
2467                                "failed to set hop0 addr for asid %d\n", i);
2468                        goto err;
2469                }
2470        }
2471
2472        goya->hw_cap_initialized |= HW_CAP_MMU;
2473
2474        /* init MMU cache manage page */
2475        WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2476                                lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2477        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2478
2479        /* Remove follower feature due to performance bug */
2480        WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2481                        (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2482
2483        hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2484                                        VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2485
2486        WREG32(mmMMU_MMU_ENABLE, 1);
2487        WREG32(mmMMU_SPI_MASK, 0xF);
2488
2489        return 0;
2490
2491err:
2492        return rc;
2493}
2494
2495/*
2496 * goya_hw_init - Goya hardware initialization code
2497 *
2498 * @hdev: pointer to hl_device structure
2499 *
2500 * Returns 0 on success
2501 *
2502 */
2503static int goya_hw_init(struct hl_device *hdev)
2504{
2505        struct asic_fixed_properties *prop = &hdev->asic_prop;
2506        int rc;
2507
2508        dev_info(hdev->dev, "Starting initialization of H/W\n");
2509
2510        /* Perform read from the device to make sure device is up */
2511        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2512
2513        /*
2514         * Let's mark in the H/W that we have reached this point. We check
2515         * this value in the reset_before_init function to understand whether
2516         * we need to reset the chip before doing H/W init. This register is
2517         * cleared by the H/W upon H/W reset
2518         */
2519        WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2520
2521        rc = goya_init_cpu(hdev);
2522        if (rc) {
2523                dev_err(hdev->dev, "failed to initialize CPU\n");
2524                return rc;
2525        }
2526
2527        goya_tpc_mbist_workaround(hdev);
2528
2529        goya_init_golden_registers(hdev);
2530
2531        /*
2532         * After CPU initialization is finished, change DDR bar mapping inside
2533         * iATU to point to the start address of the MMU page tables
2534         */
2535        if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2536                        ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2537                dev_err(hdev->dev,
2538                        "failed to map DDR bar to MMU page tables\n");
2539                return -EIO;
2540        }
2541
2542        rc = goya_mmu_init(hdev);
2543        if (rc)
2544                return rc;
2545
2546        goya_init_security(hdev);
2547
2548        goya_init_dma_qmans(hdev);
2549
2550        goya_init_mme_qmans(hdev);
2551
2552        goya_init_tpc_qmans(hdev);
2553
2554        goya_enable_timestamp(hdev);
2555
2556        /* MSI-X must be enabled before CPU queues are initialized */
2557        rc = goya_enable_msix(hdev);
2558        if (rc)
2559                goto disable_queues;
2560
2561        /* Perform read from the device to flush all MSI-X configuration */
2562        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2563
2564        return 0;
2565
2566disable_queues:
2567        goya_disable_internal_queues(hdev);
2568        goya_disable_external_queues(hdev);
2569
2570        return rc;
2571}
2572
2573/*
2574 * goya_hw_fini - Goya hardware tear-down code
2575 *
2576 * @hdev: pointer to hl_device structure
2577 * @hard_reset: should we do hard reset to all engines or just reset the
2578 *              compute/dma engines
2579 */
2580static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2581{
2582        struct goya_device *goya = hdev->asic_specific;
2583        u32 reset_timeout_ms, cpu_timeout_ms, status;
2584
2585        if (hdev->pldm) {
2586                reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2587                cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2588        } else {
2589                reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2590                cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2591        }
2592
2593        if (hard_reset) {
2594                /* I don't know what is the state of the CPU so make sure it is
2595                 * stopped in any means necessary
2596                 */
2597                WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2598                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2599                        GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2600
2601                msleep(cpu_timeout_ms);
2602
2603                goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2604                goya_disable_clk_rlx(hdev);
2605                goya_set_pll_refclk(hdev);
2606
2607                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2608                dev_info(hdev->dev,
2609                        "Issued HARD reset command, going to wait %dms\n",
2610                        reset_timeout_ms);
2611        } else {
2612                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2613                dev_info(hdev->dev,
2614                        "Issued SOFT reset command, going to wait %dms\n",
2615                        reset_timeout_ms);
2616        }
2617
2618        /*
2619         * After hard reset, we can't poll the BTM_FSM register because the PSOC
2620         * itself is in reset. In either reset we need to wait until the reset
2621         * is deasserted
2622         */
2623        msleep(reset_timeout_ms);
2624
2625        status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2626        if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2627                dev_err(hdev->dev,
2628                        "Timeout while waiting for device to reset 0x%x\n",
2629                        status);
2630
2631        if (!hard_reset) {
2632                goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2633                                                HW_CAP_GOLDEN | HW_CAP_TPC);
2634                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2635                                GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2636                return;
2637        }
2638
2639        /* Chicken bit to re-initiate boot sequencer flow */
2640        WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2641                1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2642        /* Move boot manager FSM to pre boot sequencer init state */
2643        WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2644                        0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2645
2646        goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2647                                        HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2648                                        HW_CAP_DMA | HW_CAP_MME |
2649                                        HW_CAP_MMU | HW_CAP_TPC_MBIST |
2650                                        HW_CAP_GOLDEN | HW_CAP_TPC);
2651        memset(goya->events_stat, 0, sizeof(goya->events_stat));
2652}
2653
2654int goya_suspend(struct hl_device *hdev)
2655{
2656        int rc;
2657
2658        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
2659        if (rc)
2660                dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2661
2662        return rc;
2663}
2664
2665int goya_resume(struct hl_device *hdev)
2666{
2667        return goya_init_iatu(hdev);
2668}
2669
2670static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2671                        void *cpu_addr, dma_addr_t dma_addr, size_t size)
2672{
2673        int rc;
2674
2675        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2676                        VM_DONTCOPY | VM_NORESERVE;
2677
2678        rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
2679        if (rc)
2680                dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2681
2682        return rc;
2683}
2684
2685void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2686{
2687        u32 db_reg_offset, db_value;
2688
2689        switch (hw_queue_id) {
2690        case GOYA_QUEUE_ID_DMA_0:
2691                db_reg_offset = mmDMA_QM_0_PQ_PI;
2692                break;
2693
2694        case GOYA_QUEUE_ID_DMA_1:
2695                db_reg_offset = mmDMA_QM_1_PQ_PI;
2696                break;
2697
2698        case GOYA_QUEUE_ID_DMA_2:
2699                db_reg_offset = mmDMA_QM_2_PQ_PI;
2700                break;
2701
2702        case GOYA_QUEUE_ID_DMA_3:
2703                db_reg_offset = mmDMA_QM_3_PQ_PI;
2704                break;
2705
2706        case GOYA_QUEUE_ID_DMA_4:
2707                db_reg_offset = mmDMA_QM_4_PQ_PI;
2708                break;
2709
2710        case GOYA_QUEUE_ID_CPU_PQ:
2711                db_reg_offset = mmCPU_IF_PF_PQ_PI;
2712                break;
2713
2714        case GOYA_QUEUE_ID_MME:
2715                db_reg_offset = mmMME_QM_PQ_PI;
2716                break;
2717
2718        case GOYA_QUEUE_ID_TPC0:
2719                db_reg_offset = mmTPC0_QM_PQ_PI;
2720                break;
2721
2722        case GOYA_QUEUE_ID_TPC1:
2723                db_reg_offset = mmTPC1_QM_PQ_PI;
2724                break;
2725
2726        case GOYA_QUEUE_ID_TPC2:
2727                db_reg_offset = mmTPC2_QM_PQ_PI;
2728                break;
2729
2730        case GOYA_QUEUE_ID_TPC3:
2731                db_reg_offset = mmTPC3_QM_PQ_PI;
2732                break;
2733
2734        case GOYA_QUEUE_ID_TPC4:
2735                db_reg_offset = mmTPC4_QM_PQ_PI;
2736                break;
2737
2738        case GOYA_QUEUE_ID_TPC5:
2739                db_reg_offset = mmTPC5_QM_PQ_PI;
2740                break;
2741
2742        case GOYA_QUEUE_ID_TPC6:
2743                db_reg_offset = mmTPC6_QM_PQ_PI;
2744                break;
2745
2746        case GOYA_QUEUE_ID_TPC7:
2747                db_reg_offset = mmTPC7_QM_PQ_PI;
2748                break;
2749
2750        default:
2751                /* Should never get here */
2752                dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2753                        hw_queue_id);
2754                return;
2755        }
2756
2757        db_value = pi;
2758
2759        /* ring the doorbell */
2760        WREG32(db_reg_offset, db_value);
2761
2762        if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2763                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2764                                GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2765}
2766
2767void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2768{
2769        /* The QMANs are on the SRAM so need to copy to IO space */
2770        memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2771}
2772
2773static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2774                                        dma_addr_t *dma_handle, gfp_t flags)
2775{
2776        void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2777                                                dma_handle, flags);
2778
2779        /* Shift to the device's base physical address of host memory */
2780        if (kernel_addr)
2781                *dma_handle += HOST_PHYS_BASE;
2782
2783        return kernel_addr;
2784}
2785
2786static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2787                                        void *cpu_addr, dma_addr_t dma_handle)
2788{
2789        /* Cancel the device's base physical address of host memory */
2790        dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2791
2792        dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2793}
2794
2795void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2796                                dma_addr_t *dma_handle, u16 *queue_len)
2797{
2798        void *base;
2799        u32 offset;
2800
2801        *dma_handle = hdev->asic_prop.sram_base_address;
2802
2803        base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2804
2805        switch (queue_id) {
2806        case GOYA_QUEUE_ID_MME:
2807                offset = MME_QMAN_BASE_OFFSET;
2808                *queue_len = MME_QMAN_LENGTH;
2809                break;
2810        case GOYA_QUEUE_ID_TPC0:
2811                offset = TPC0_QMAN_BASE_OFFSET;
2812                *queue_len = TPC_QMAN_LENGTH;
2813                break;
2814        case GOYA_QUEUE_ID_TPC1:
2815                offset = TPC1_QMAN_BASE_OFFSET;
2816                *queue_len = TPC_QMAN_LENGTH;
2817                break;
2818        case GOYA_QUEUE_ID_TPC2:
2819                offset = TPC2_QMAN_BASE_OFFSET;
2820                *queue_len = TPC_QMAN_LENGTH;
2821                break;
2822        case GOYA_QUEUE_ID_TPC3:
2823                offset = TPC3_QMAN_BASE_OFFSET;
2824                *queue_len = TPC_QMAN_LENGTH;
2825                break;
2826        case GOYA_QUEUE_ID_TPC4:
2827                offset = TPC4_QMAN_BASE_OFFSET;
2828                *queue_len = TPC_QMAN_LENGTH;
2829                break;
2830        case GOYA_QUEUE_ID_TPC5:
2831                offset = TPC5_QMAN_BASE_OFFSET;
2832                *queue_len = TPC_QMAN_LENGTH;
2833                break;
2834        case GOYA_QUEUE_ID_TPC6:
2835                offset = TPC6_QMAN_BASE_OFFSET;
2836                *queue_len = TPC_QMAN_LENGTH;
2837                break;
2838        case GOYA_QUEUE_ID_TPC7:
2839                offset = TPC7_QMAN_BASE_OFFSET;
2840                *queue_len = TPC_QMAN_LENGTH;
2841                break;
2842        default:
2843                dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2844                return NULL;
2845        }
2846
2847        base += offset;
2848        *dma_handle += offset;
2849
2850        return base;
2851}
2852
2853static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2854{
2855        struct packet_msg_prot *fence_pkt;
2856        u32 *fence_ptr;
2857        dma_addr_t fence_dma_addr;
2858        struct hl_cb *cb;
2859        u32 tmp, timeout;
2860        int rc;
2861
2862        if (hdev->pldm)
2863                timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2864        else
2865                timeout = HL_DEVICE_TIMEOUT_USEC;
2866
2867        if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2868                dev_err_ratelimited(hdev->dev,
2869                        "Can't send driver job on QMAN0 because the device is not idle\n");
2870                return -EBUSY;
2871        }
2872
2873        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2874                                                        &fence_dma_addr);
2875        if (!fence_ptr) {
2876                dev_err(hdev->dev,
2877                        "Failed to allocate fence memory for QMAN0\n");
2878                return -ENOMEM;
2879        }
2880
2881        goya_qman0_set_security(hdev, true);
2882
2883        cb = job->patched_cb;
2884
2885        fence_pkt = cb->kernel_address +
2886                        job->job_cb_size - sizeof(struct packet_msg_prot);
2887
2888        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2889                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
2890                        (1 << GOYA_PKT_CTL_MB_SHIFT);
2891        fence_pkt->ctl = cpu_to_le32(tmp);
2892        fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2893        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2894
2895        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2896                                        job->job_cb_size, cb->bus_address);
2897        if (rc) {
2898                dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2899                goto free_fence_ptr;
2900        }
2901
2902        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2903                                (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2904                                timeout, true);
2905
2906        hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2907
2908        if (rc == -ETIMEDOUT) {
2909                dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2910                goto free_fence_ptr;
2911        }
2912
2913free_fence_ptr:
2914        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2915                                        fence_dma_addr);
2916
2917        goya_qman0_set_security(hdev, false);
2918
2919        return rc;
2920}
2921
2922int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2923                                u32 timeout, long *result)
2924{
2925        struct goya_device *goya = hdev->asic_specific;
2926
2927        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2928                if (result)
2929                        *result = 0;
2930                return 0;
2931        }
2932
2933        if (!timeout)
2934                timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
2935
2936        return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2937                                        timeout, result);
2938}
2939
2940int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2941{
2942        struct packet_msg_prot *fence_pkt;
2943        dma_addr_t pkt_dma_addr;
2944        u32 fence_val, tmp;
2945        dma_addr_t fence_dma_addr;
2946        u32 *fence_ptr;
2947        int rc;
2948
2949        fence_val = GOYA_QMAN0_FENCE_VAL;
2950
2951        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2952                                                        &fence_dma_addr);
2953        if (!fence_ptr) {
2954                dev_err(hdev->dev,
2955                        "Failed to allocate memory for H/W queue %d testing\n",
2956                        hw_queue_id);
2957                return -ENOMEM;
2958        }
2959
2960        *fence_ptr = 0;
2961
2962        fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2963                                        sizeof(struct packet_msg_prot),
2964                                        GFP_KERNEL, &pkt_dma_addr);
2965        if (!fence_pkt) {
2966                dev_err(hdev->dev,
2967                        "Failed to allocate packet for H/W queue %d testing\n",
2968                        hw_queue_id);
2969                rc = -ENOMEM;
2970                goto free_fence_ptr;
2971        }
2972
2973        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2974                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
2975                        (1 << GOYA_PKT_CTL_MB_SHIFT);
2976        fence_pkt->ctl = cpu_to_le32(tmp);
2977        fence_pkt->value = cpu_to_le32(fence_val);
2978        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2979
2980        rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2981                                        sizeof(struct packet_msg_prot),
2982                                        pkt_dma_addr);
2983        if (rc) {
2984                dev_err(hdev->dev,
2985                        "Failed to send fence packet to H/W queue %d\n",
2986                        hw_queue_id);
2987                goto free_pkt;
2988        }
2989
2990        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2991                                        1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2992
2993        hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
2994
2995        if (rc == -ETIMEDOUT) {
2996                dev_err(hdev->dev,
2997                        "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2998                        hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
2999                rc = -EIO;
3000        }
3001
3002free_pkt:
3003        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3004                                        pkt_dma_addr);
3005free_fence_ptr:
3006        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3007                                        fence_dma_addr);
3008        return rc;
3009}
3010
3011int goya_test_cpu_queue(struct hl_device *hdev)
3012{
3013        struct goya_device *goya = hdev->asic_specific;
3014
3015        /*
3016         * check capability here as send_cpu_message() won't update the result
3017         * value if no capability
3018         */
3019        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3020                return 0;
3021
3022        return hl_fw_test_cpu_queue(hdev);
3023}
3024
3025int goya_test_queues(struct hl_device *hdev)
3026{
3027        int i, rc, ret_val = 0;
3028
3029        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3030                rc = goya_test_queue(hdev, i);
3031                if (rc)
3032                        ret_val = -EINVAL;
3033        }
3034
3035        return ret_val;
3036}
3037
3038static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3039                                        gfp_t mem_flags, dma_addr_t *dma_handle)
3040{
3041        void *kernel_addr;
3042
3043        if (size > GOYA_DMA_POOL_BLK_SIZE)
3044                return NULL;
3045
3046        kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3047
3048        /* Shift to the device's base physical address of host memory */
3049        if (kernel_addr)
3050                *dma_handle += HOST_PHYS_BASE;
3051
3052        return kernel_addr;
3053}
3054
3055static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3056                                dma_addr_t dma_addr)
3057{
3058        /* Cancel the device's base physical address of host memory */
3059        dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3060
3061        dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3062}
3063
3064void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3065                                        dma_addr_t *dma_handle)
3066{
3067        void *vaddr;
3068
3069        vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3070        *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3071                        VA_CPU_ACCESSIBLE_MEM_ADDR;
3072
3073        return vaddr;
3074}
3075
3076void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3077                                        void *vaddr)
3078{
3079        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3080}
3081
3082static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3083                                int nents, enum dma_data_direction dir)
3084{
3085        struct scatterlist *sg;
3086        int i;
3087
3088        if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3089                return -ENOMEM;
3090
3091        /* Shift to the device's base physical address of host memory */
3092        for_each_sg(sgl, sg, nents, i)
3093                sg->dma_address += HOST_PHYS_BASE;
3094
3095        return 0;
3096}
3097
3098static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3099                                int nents, enum dma_data_direction dir)
3100{
3101        struct scatterlist *sg;
3102        int i;
3103
3104        /* Cancel the device's base physical address of host memory */
3105        for_each_sg(sgl, sg, nents, i)
3106                sg->dma_address -= HOST_PHYS_BASE;
3107
3108        dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3109}
3110
3111u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3112{
3113        struct scatterlist *sg, *sg_next_iter;
3114        u32 count, dma_desc_cnt;
3115        u64 len, len_next;
3116        dma_addr_t addr, addr_next;
3117
3118        dma_desc_cnt = 0;
3119
3120        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3121
3122                len = sg_dma_len(sg);
3123                addr = sg_dma_address(sg);
3124
3125                if (len == 0)
3126                        break;
3127
3128                while ((count + 1) < sgt->nents) {
3129                        sg_next_iter = sg_next(sg);
3130                        len_next = sg_dma_len(sg_next_iter);
3131                        addr_next = sg_dma_address(sg_next_iter);
3132
3133                        if (len_next == 0)
3134                                break;
3135
3136                        if ((addr + len == addr_next) &&
3137                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3138                                len += len_next;
3139                                count++;
3140                                sg = sg_next_iter;
3141                        } else {
3142                                break;
3143                        }
3144                }
3145
3146                dma_desc_cnt++;
3147        }
3148
3149        return dma_desc_cnt * sizeof(struct packet_lin_dma);
3150}
3151
3152static int goya_pin_memory_before_cs(struct hl_device *hdev,
3153                                struct hl_cs_parser *parser,
3154                                struct packet_lin_dma *user_dma_pkt,
3155                                u64 addr, enum dma_data_direction dir)
3156{
3157        struct hl_userptr *userptr;
3158        int rc;
3159
3160        if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3161                        parser->job_userptr_list, &userptr))
3162                goto already_pinned;
3163
3164        userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3165        if (!userptr)
3166                return -ENOMEM;
3167
3168        rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3169                                userptr);
3170        if (rc)
3171                goto free_userptr;
3172
3173        list_add_tail(&userptr->job_node, parser->job_userptr_list);
3174
3175        rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3176                                        userptr->sgt->nents, dir);
3177        if (rc) {
3178                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3179                goto unpin_memory;
3180        }
3181
3182        userptr->dma_mapped = true;
3183        userptr->dir = dir;
3184
3185already_pinned:
3186        parser->patched_cb_size +=
3187                        goya_get_dma_desc_list_size(hdev, userptr->sgt);
3188
3189        return 0;
3190
3191unpin_memory:
3192        hl_unpin_host_memory(hdev, userptr);
3193free_userptr:
3194        kfree(userptr);
3195        return rc;
3196}
3197
3198static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3199                                struct hl_cs_parser *parser,
3200                                struct packet_lin_dma *user_dma_pkt)
3201{
3202        u64 device_memory_addr, addr;
3203        enum dma_data_direction dir;
3204        enum goya_dma_direction user_dir;
3205        bool sram_addr = true;
3206        bool skip_host_mem_pin = false;
3207        bool user_memset;
3208        u32 ctl;
3209        int rc = 0;
3210
3211        ctl = le32_to_cpu(user_dma_pkt->ctl);
3212
3213        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3214                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3215
3216        user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3217                        GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3218
3219        switch (user_dir) {
3220        case DMA_HOST_TO_DRAM:
3221                dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3222                dir = DMA_TO_DEVICE;
3223                sram_addr = false;
3224                addr = le64_to_cpu(user_dma_pkt->src_addr);
3225                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3226                if (user_memset)
3227                        skip_host_mem_pin = true;
3228                break;
3229
3230        case DMA_DRAM_TO_HOST:
3231                dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3232                dir = DMA_FROM_DEVICE;
3233                sram_addr = false;
3234                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3235                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3236                break;
3237
3238        case DMA_HOST_TO_SRAM:
3239                dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3240                dir = DMA_TO_DEVICE;
3241                addr = le64_to_cpu(user_dma_pkt->src_addr);
3242                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3243                if (user_memset)
3244                        skip_host_mem_pin = true;
3245                break;
3246
3247        case DMA_SRAM_TO_HOST:
3248                dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3249                dir = DMA_FROM_DEVICE;
3250                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3251                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3252                break;
3253        default:
3254                dev_err(hdev->dev, "DMA direction is undefined\n");
3255                return -EFAULT;
3256        }
3257
3258        if (sram_addr) {
3259                if (!hl_mem_area_inside_range(device_memory_addr,
3260                                le32_to_cpu(user_dma_pkt->tsize),
3261                                hdev->asic_prop.sram_user_base_address,
3262                                hdev->asic_prop.sram_end_address)) {
3263
3264                        dev_err(hdev->dev,
3265                                "SRAM address 0x%llx + 0x%x is invalid\n",
3266                                device_memory_addr,
3267                                user_dma_pkt->tsize);
3268                        return -EFAULT;
3269                }
3270        } else {
3271                if (!hl_mem_area_inside_range(device_memory_addr,
3272                                le32_to_cpu(user_dma_pkt->tsize),
3273                                hdev->asic_prop.dram_user_base_address,
3274                                hdev->asic_prop.dram_end_address)) {
3275
3276                        dev_err(hdev->dev,
3277                                "DRAM address 0x%llx + 0x%x is invalid\n",
3278                                device_memory_addr,
3279                                user_dma_pkt->tsize);
3280                        return -EFAULT;
3281                }
3282        }
3283
3284        if (skip_host_mem_pin)
3285                parser->patched_cb_size += sizeof(*user_dma_pkt);
3286        else {
3287                if ((dir == DMA_TO_DEVICE) &&
3288                                (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3289                        dev_err(hdev->dev,
3290                                "Can't DMA from host on queue other then 1\n");
3291                        return -EFAULT;
3292                }
3293
3294                rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3295                                                addr, dir);
3296        }
3297
3298        return rc;
3299}
3300
3301static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3302                                struct hl_cs_parser *parser,
3303                                struct packet_lin_dma *user_dma_pkt)
3304{
3305        u64 sram_memory_addr, dram_memory_addr;
3306        enum goya_dma_direction user_dir;
3307        u32 ctl;
3308
3309        ctl = le32_to_cpu(user_dma_pkt->ctl);
3310        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3311                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3312
3313        if (user_dir == DMA_DRAM_TO_SRAM) {
3314                dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3315                dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3316                sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3317        } else {
3318                dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3319                sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3320                dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3321        }
3322
3323        if (!hl_mem_area_inside_range(sram_memory_addr,
3324                                le32_to_cpu(user_dma_pkt->tsize),
3325                                hdev->asic_prop.sram_user_base_address,
3326                                hdev->asic_prop.sram_end_address)) {
3327                dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3328                        sram_memory_addr, user_dma_pkt->tsize);
3329                return -EFAULT;
3330        }
3331
3332        if (!hl_mem_area_inside_range(dram_memory_addr,
3333                                le32_to_cpu(user_dma_pkt->tsize),
3334                                hdev->asic_prop.dram_user_base_address,
3335                                hdev->asic_prop.dram_end_address)) {
3336                dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3337                        dram_memory_addr, user_dma_pkt->tsize);
3338                return -EFAULT;
3339        }
3340
3341        parser->patched_cb_size += sizeof(*user_dma_pkt);
3342
3343        return 0;
3344}
3345
3346static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3347                                struct hl_cs_parser *parser,
3348                                struct packet_lin_dma *user_dma_pkt)
3349{
3350        enum goya_dma_direction user_dir;
3351        u32 ctl;
3352        int rc;
3353
3354        dev_dbg(hdev->dev, "DMA packet details:\n");
3355        dev_dbg(hdev->dev, "source == 0x%llx\n",
3356                le64_to_cpu(user_dma_pkt->src_addr));
3357        dev_dbg(hdev->dev, "destination == 0x%llx\n",
3358                le64_to_cpu(user_dma_pkt->dst_addr));
3359        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3360
3361        ctl = le32_to_cpu(user_dma_pkt->ctl);
3362        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3363                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3364
3365        /*
3366         * Special handling for DMA with size 0. The H/W has a bug where
3367         * this can cause the QMAN DMA to get stuck, so block it here.
3368         */
3369        if (user_dma_pkt->tsize == 0) {
3370                dev_err(hdev->dev,
3371                        "Got DMA with size 0, might reset the device\n");
3372                return -EINVAL;
3373        }
3374
3375        if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3376                rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3377        else
3378                rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3379
3380        return rc;
3381}
3382
3383static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3384                                struct hl_cs_parser *parser,
3385                                struct packet_lin_dma *user_dma_pkt)
3386{
3387        dev_dbg(hdev->dev, "DMA packet details:\n");
3388        dev_dbg(hdev->dev, "source == 0x%llx\n",
3389                le64_to_cpu(user_dma_pkt->src_addr));
3390        dev_dbg(hdev->dev, "destination == 0x%llx\n",
3391                le64_to_cpu(user_dma_pkt->dst_addr));
3392        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3393
3394        /*
3395         * WA for HW-23.
3396         * We can't allow user to read from Host using QMANs other than 1.
3397         * PMMU and HPMMU addresses are equal, check only one of them.
3398         */
3399        if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3400                hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3401                                le32_to_cpu(user_dma_pkt->tsize),
3402                                hdev->asic_prop.pmmu.start_addr,
3403                                hdev->asic_prop.pmmu.end_addr)) {
3404                dev_err(hdev->dev,
3405                        "Can't DMA from host on queue other then 1\n");
3406                return -EFAULT;
3407        }
3408
3409        if (user_dma_pkt->tsize == 0) {
3410                dev_err(hdev->dev,
3411                        "Got DMA with size 0, might reset the device\n");
3412                return -EINVAL;
3413        }
3414
3415        parser->patched_cb_size += sizeof(*user_dma_pkt);
3416
3417        return 0;
3418}
3419
3420static int goya_validate_wreg32(struct hl_device *hdev,
3421                                struct hl_cs_parser *parser,
3422                                struct packet_wreg32 *wreg_pkt)
3423{
3424        struct goya_device *goya = hdev->asic_specific;
3425        u32 sob_start_addr, sob_end_addr;
3426        u16 reg_offset;
3427
3428        reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3429                        GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3430
3431        dev_dbg(hdev->dev, "WREG32 packet details:\n");
3432        dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3433        dev_dbg(hdev->dev, "value      == 0x%x\n",
3434                le32_to_cpu(wreg_pkt->value));
3435
3436        if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3437                dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3438                        reg_offset);
3439                return -EPERM;
3440        }
3441
3442        /*
3443         * With MMU, DMA channels are not secured, so it doesn't matter where
3444         * the WR COMP will be written to because it will go out with
3445         * non-secured property
3446         */
3447        if (goya->hw_cap_initialized & HW_CAP_MMU)
3448                return 0;
3449
3450        sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3451        sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3452
3453        if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3454                        (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3455
3456                dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3457                        wreg_pkt->value);
3458                return -EPERM;
3459        }
3460
3461        return 0;
3462}
3463
3464static int goya_validate_cb(struct hl_device *hdev,
3465                        struct hl_cs_parser *parser, bool is_mmu)
3466{
3467        u32 cb_parsed_length = 0;
3468        int rc = 0;
3469
3470        parser->patched_cb_size = 0;
3471
3472        /* cb_user_size is more than 0 so loop will always be executed */
3473        while (cb_parsed_length < parser->user_cb_size) {
3474                enum packet_id pkt_id;
3475                u16 pkt_size;
3476                struct goya_packet *user_pkt;
3477
3478                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3479
3480                pkt_id = (enum packet_id) (
3481                                (le64_to_cpu(user_pkt->header) &
3482                                PACKET_HEADER_PACKET_ID_MASK) >>
3483                                        PACKET_HEADER_PACKET_ID_SHIFT);
3484
3485                if (!validate_packet_id(pkt_id)) {
3486                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3487                        rc = -EINVAL;
3488                        break;
3489                }
3490
3491                pkt_size = goya_packet_sizes[pkt_id];
3492                cb_parsed_length += pkt_size;
3493                if (cb_parsed_length > parser->user_cb_size) {
3494                        dev_err(hdev->dev,
3495                                "packet 0x%x is out of CB boundary\n", pkt_id);
3496                        rc = -EINVAL;
3497                        break;
3498                }
3499
3500                switch (pkt_id) {
3501                case PACKET_WREG_32:
3502                        /*
3503                         * Although it is validated after copy in patch_cb(),
3504                         * need to validate here as well because patch_cb() is
3505                         * not called in MMU path while this function is called
3506                         */
3507                        rc = goya_validate_wreg32(hdev,
3508                                parser, (struct packet_wreg32 *) user_pkt);
3509                        parser->patched_cb_size += pkt_size;
3510                        break;
3511
3512                case PACKET_WREG_BULK:
3513                        dev_err(hdev->dev,
3514                                "User not allowed to use WREG_BULK\n");
3515                        rc = -EPERM;
3516                        break;
3517
3518                case PACKET_MSG_PROT:
3519                        dev_err(hdev->dev,
3520                                "User not allowed to use MSG_PROT\n");
3521                        rc = -EPERM;
3522                        break;
3523
3524                case PACKET_CP_DMA:
3525                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3526                        rc = -EPERM;
3527                        break;
3528
3529                case PACKET_STOP:
3530                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3531                        rc = -EPERM;
3532                        break;
3533
3534                case PACKET_LIN_DMA:
3535                        if (is_mmu)
3536                                rc = goya_validate_dma_pkt_mmu(hdev, parser,
3537                                        (struct packet_lin_dma *) user_pkt);
3538                        else
3539                                rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3540                                        (struct packet_lin_dma *) user_pkt);
3541                        break;
3542
3543                case PACKET_MSG_LONG:
3544                case PACKET_MSG_SHORT:
3545                case PACKET_FENCE:
3546                case PACKET_NOP:
3547                        parser->patched_cb_size += pkt_size;
3548                        break;
3549
3550                default:
3551                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3552                                pkt_id);
3553                        rc = -EINVAL;
3554                        break;
3555                }
3556
3557                if (rc)
3558                        break;
3559        }
3560
3561        /*
3562         * The new CB should have space at the end for two MSG_PROT packets:
3563         * 1. A packet that will act as a completion packet
3564         * 2. A packet that will generate MSI-X interrupt
3565         */
3566        parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3567
3568        return rc;
3569}
3570
3571static int goya_patch_dma_packet(struct hl_device *hdev,
3572                                struct hl_cs_parser *parser,
3573                                struct packet_lin_dma *user_dma_pkt,
3574                                struct packet_lin_dma *new_dma_pkt,
3575                                u32 *new_dma_pkt_size)
3576{
3577        struct hl_userptr *userptr;
3578        struct scatterlist *sg, *sg_next_iter;
3579        u32 count, dma_desc_cnt;
3580        u64 len, len_next;
3581        dma_addr_t dma_addr, dma_addr_next;
3582        enum goya_dma_direction user_dir;
3583        u64 device_memory_addr, addr;
3584        enum dma_data_direction dir;
3585        struct sg_table *sgt;
3586        bool skip_host_mem_pin = false;
3587        bool user_memset;
3588        u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3589
3590        ctl = le32_to_cpu(user_dma_pkt->ctl);
3591
3592        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3593                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3594
3595        user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3596                        GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3597
3598        if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3599                        (user_dma_pkt->tsize == 0)) {
3600                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3601                *new_dma_pkt_size = sizeof(*new_dma_pkt);
3602                return 0;
3603        }
3604
3605        if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3606                addr = le64_to_cpu(user_dma_pkt->src_addr);
3607                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3608                dir = DMA_TO_DEVICE;
3609                if (user_memset)
3610                        skip_host_mem_pin = true;
3611        } else {
3612                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3613                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3614                dir = DMA_FROM_DEVICE;
3615        }
3616
3617        if ((!skip_host_mem_pin) &&
3618                (hl_userptr_is_pinned(hdev, addr,
3619                        le32_to_cpu(user_dma_pkt->tsize),
3620                        parser->job_userptr_list, &userptr) == false)) {
3621                dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3622                                addr, user_dma_pkt->tsize);
3623                return -EFAULT;
3624        }
3625
3626        if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3627                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3628                *new_dma_pkt_size = sizeof(*user_dma_pkt);
3629                return 0;
3630        }
3631
3632        user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3633
3634        user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3635
3636        sgt = userptr->sgt;
3637        dma_desc_cnt = 0;
3638
3639        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640                len = sg_dma_len(sg);
3641                dma_addr = sg_dma_address(sg);
3642
3643                if (len == 0)
3644                        break;
3645
3646                while ((count + 1) < sgt->nents) {
3647                        sg_next_iter = sg_next(sg);
3648                        len_next = sg_dma_len(sg_next_iter);
3649                        dma_addr_next = sg_dma_address(sg_next_iter);
3650
3651                        if (len_next == 0)
3652                                break;
3653
3654                        if ((dma_addr + len == dma_addr_next) &&
3655                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3656                                len += len_next;
3657                                count++;
3658                                sg = sg_next_iter;
3659                        } else {
3660                                break;
3661                        }
3662                }
3663
3664                ctl = le32_to_cpu(user_dma_pkt->ctl);
3665                if (likely(dma_desc_cnt))
3666                        ctl &= ~GOYA_PKT_CTL_EB_MASK;
3667                ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3668                                GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3669                new_dma_pkt->ctl = cpu_to_le32(ctl);
3670                new_dma_pkt->tsize = cpu_to_le32((u32) len);
3671
3672                if (dir == DMA_TO_DEVICE) {
3673                        new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3674                        new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3675                } else {
3676                        new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3677                        new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3678                }
3679
3680                if (!user_memset)
3681                        device_memory_addr += len;
3682                dma_desc_cnt++;
3683                new_dma_pkt++;
3684        }
3685
3686        if (!dma_desc_cnt) {
3687                dev_err(hdev->dev,
3688                        "Error of 0 SG entries when patching DMA packet\n");
3689                return -EFAULT;
3690        }
3691
3692        /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3693        new_dma_pkt--;
3694        new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3695
3696        *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3697
3698        return 0;
3699}
3700
3701static int goya_patch_cb(struct hl_device *hdev,
3702                                struct hl_cs_parser *parser)
3703{
3704        u32 cb_parsed_length = 0;
3705        u32 cb_patched_cur_length = 0;
3706        int rc = 0;
3707
3708        /* cb_user_size is more than 0 so loop will always be executed */
3709        while (cb_parsed_length < parser->user_cb_size) {
3710                enum packet_id pkt_id;
3711                u16 pkt_size;
3712                u32 new_pkt_size = 0;
3713                struct goya_packet *user_pkt, *kernel_pkt;
3714
3715                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3716                kernel_pkt = parser->patched_cb->kernel_address +
3717                                        cb_patched_cur_length;
3718
3719                pkt_id = (enum packet_id) (
3720                                (le64_to_cpu(user_pkt->header) &
3721                                PACKET_HEADER_PACKET_ID_MASK) >>
3722                                        PACKET_HEADER_PACKET_ID_SHIFT);
3723
3724                if (!validate_packet_id(pkt_id)) {
3725                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3726                        rc = -EINVAL;
3727                        break;
3728                }
3729
3730                pkt_size = goya_packet_sizes[pkt_id];
3731                cb_parsed_length += pkt_size;
3732                if (cb_parsed_length > parser->user_cb_size) {
3733                        dev_err(hdev->dev,
3734                                "packet 0x%x is out of CB boundary\n", pkt_id);
3735                        rc = -EINVAL;
3736                        break;
3737                }
3738
3739                switch (pkt_id) {
3740                case PACKET_LIN_DMA:
3741                        rc = goya_patch_dma_packet(hdev, parser,
3742                                        (struct packet_lin_dma *) user_pkt,
3743                                        (struct packet_lin_dma *) kernel_pkt,
3744                                        &new_pkt_size);
3745                        cb_patched_cur_length += new_pkt_size;
3746                        break;
3747
3748                case PACKET_WREG_32:
3749                        memcpy(kernel_pkt, user_pkt, pkt_size);
3750                        cb_patched_cur_length += pkt_size;
3751                        rc = goya_validate_wreg32(hdev, parser,
3752                                        (struct packet_wreg32 *) kernel_pkt);
3753                        break;
3754
3755                case PACKET_WREG_BULK:
3756                        dev_err(hdev->dev,
3757                                "User not allowed to use WREG_BULK\n");
3758                        rc = -EPERM;
3759                        break;
3760
3761                case PACKET_MSG_PROT:
3762                        dev_err(hdev->dev,
3763                                "User not allowed to use MSG_PROT\n");
3764                        rc = -EPERM;
3765                        break;
3766
3767                case PACKET_CP_DMA:
3768                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3769                        rc = -EPERM;
3770                        break;
3771
3772                case PACKET_STOP:
3773                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3774                        rc = -EPERM;
3775                        break;
3776
3777                case PACKET_MSG_LONG:
3778                case PACKET_MSG_SHORT:
3779                case PACKET_FENCE:
3780                case PACKET_NOP:
3781                        memcpy(kernel_pkt, user_pkt, pkt_size);
3782                        cb_patched_cur_length += pkt_size;
3783                        break;
3784
3785                default:
3786                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3787                                pkt_id);
3788                        rc = -EINVAL;
3789                        break;
3790                }
3791
3792                if (rc)
3793                        break;
3794        }
3795
3796        return rc;
3797}
3798
3799static int goya_parse_cb_mmu(struct hl_device *hdev,
3800                struct hl_cs_parser *parser)
3801{
3802        u64 patched_cb_handle;
3803        u32 patched_cb_size;
3804        struct hl_cb *user_cb;
3805        int rc;
3806
3807        /*
3808         * The new CB should have space at the end for two MSG_PROT pkt:
3809         * 1. A packet that will act as a completion packet
3810         * 2. A packet that will generate MSI-X interrupt
3811         */
3812        parser->patched_cb_size = parser->user_cb_size +
3813                        sizeof(struct packet_msg_prot) * 2;
3814
3815        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3816                                parser->patched_cb_size, false, false,
3817                                &patched_cb_handle);
3818
3819        if (rc) {
3820                dev_err(hdev->dev,
3821                        "Failed to allocate patched CB for DMA CS %d\n",
3822                        rc);
3823                return rc;
3824        }
3825
3826        patched_cb_handle >>= PAGE_SHIFT;
3827        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3828                                (u32) patched_cb_handle);
3829        /* hl_cb_get should never fail here so use kernel WARN */
3830        WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3831                        (u32) patched_cb_handle);
3832        if (!parser->patched_cb) {
3833                rc = -EFAULT;
3834                goto out;
3835        }
3836
3837        /*
3838         * The check that parser->user_cb_size <= parser->user_cb->size was done
3839         * in validate_queue_index().
3840         */
3841        memcpy(parser->patched_cb->kernel_address,
3842                parser->user_cb->kernel_address,
3843                parser->user_cb_size);
3844
3845        patched_cb_size = parser->patched_cb_size;
3846
3847        /* validate patched CB instead of user CB */
3848        user_cb = parser->user_cb;
3849        parser->user_cb = parser->patched_cb;
3850        rc = goya_validate_cb(hdev, parser, true);
3851        parser->user_cb = user_cb;
3852
3853        if (rc) {
3854                hl_cb_put(parser->patched_cb);
3855                goto out;
3856        }
3857
3858        if (patched_cb_size != parser->patched_cb_size) {
3859                dev_err(hdev->dev, "user CB size mismatch\n");
3860                hl_cb_put(parser->patched_cb);
3861                rc = -EINVAL;
3862                goto out;
3863        }
3864
3865out:
3866        /*
3867         * Always call cb destroy here because we still have 1 reference
3868         * to it by calling cb_get earlier. After the job will be completed,
3869         * cb_put will release it, but here we want to remove it from the
3870         * idr
3871         */
3872        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3873                                        patched_cb_handle << PAGE_SHIFT);
3874
3875        return rc;
3876}
3877
3878static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3879                                struct hl_cs_parser *parser)
3880{
3881        u64 patched_cb_handle;
3882        int rc;
3883
3884        rc = goya_validate_cb(hdev, parser, false);
3885
3886        if (rc)
3887                goto free_userptr;
3888
3889        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3890                                parser->patched_cb_size, false, false,
3891                                &patched_cb_handle);
3892        if (rc) {
3893                dev_err(hdev->dev,
3894                        "Failed to allocate patched CB for DMA CS %d\n", rc);
3895                goto free_userptr;
3896        }
3897
3898        patched_cb_handle >>= PAGE_SHIFT;
3899        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3900                                (u32) patched_cb_handle);
3901        /* hl_cb_get should never fail here so use kernel WARN */
3902        WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3903                        (u32) patched_cb_handle);
3904        if (!parser->patched_cb) {
3905                rc = -EFAULT;
3906                goto out;
3907        }
3908
3909        rc = goya_patch_cb(hdev, parser);
3910
3911        if (rc)
3912                hl_cb_put(parser->patched_cb);
3913
3914out:
3915        /*
3916         * Always call cb destroy here because we still have 1 reference
3917         * to it by calling cb_get earlier. After the job will be completed,
3918         * cb_put will release it, but here we want to remove it from the
3919         * idr
3920         */
3921        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3922                                patched_cb_handle << PAGE_SHIFT);
3923
3924free_userptr:
3925        if (rc)
3926                hl_userptr_delete_list(hdev, parser->job_userptr_list);
3927        return rc;
3928}
3929
3930static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3931                                        struct hl_cs_parser *parser)
3932{
3933        struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3934        struct goya_device *goya = hdev->asic_specific;
3935
3936        if (goya->hw_cap_initialized & HW_CAP_MMU)
3937                return 0;
3938
3939        /* For internal queue jobs, just check if CB address is valid */
3940        if (hl_mem_area_inside_range(
3941                        (u64) (uintptr_t) parser->user_cb,
3942                        parser->user_cb_size,
3943                        asic_prop->sram_user_base_address,
3944                        asic_prop->sram_end_address))
3945                return 0;
3946
3947        if (hl_mem_area_inside_range(
3948                        (u64) (uintptr_t) parser->user_cb,
3949                        parser->user_cb_size,
3950                        asic_prop->dram_user_base_address,
3951                        asic_prop->dram_end_address))
3952                return 0;
3953
3954        dev_err(hdev->dev,
3955                "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
3956                parser->user_cb, parser->user_cb_size);
3957
3958        return -EFAULT;
3959}
3960
3961int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3962{
3963        struct goya_device *goya = hdev->asic_specific;
3964
3965        if (parser->queue_type == QUEUE_TYPE_INT)
3966                return goya_parse_cb_no_ext_queue(hdev, parser);
3967
3968        if (goya->hw_cap_initialized & HW_CAP_MMU)
3969                return goya_parse_cb_mmu(hdev, parser);
3970        else
3971                return goya_parse_cb_no_mmu(hdev, parser);
3972}
3973
3974void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
3975                                u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
3976                                bool eb)
3977{
3978        struct packet_msg_prot *cq_pkt;
3979        u32 tmp;
3980
3981        cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
3982
3983        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3984                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
3985                        (1 << GOYA_PKT_CTL_MB_SHIFT);
3986        cq_pkt->ctl = cpu_to_le32(tmp);
3987        cq_pkt->value = cpu_to_le32(cq_val);
3988        cq_pkt->addr = cpu_to_le64(cq_addr);
3989
3990        cq_pkt++;
3991
3992        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3993                        (1 << GOYA_PKT_CTL_MB_SHIFT);
3994        cq_pkt->ctl = cpu_to_le32(tmp);
3995        cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3996        cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3997}
3998
3999void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4000{
4001        WREG32(mmCPU_EQ_CI, val);
4002}
4003
4004void goya_restore_phase_topology(struct hl_device *hdev)
4005{
4006
4007}
4008
4009static void goya_clear_sm_regs(struct hl_device *hdev)
4010{
4011        int i, num_of_sob_in_longs, num_of_mon_in_longs;
4012
4013        num_of_sob_in_longs =
4014                ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4015
4016        num_of_mon_in_longs =
4017                ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4018
4019        for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4020                WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4021
4022        for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4023                WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4024
4025        /* Flush all WREG to prevent race */
4026        i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4027}
4028
4029/*
4030 * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4031 *                       address.
4032 *
4033 * @hdev:       pointer to hl_device structure
4034 * @addr:       device or host mapped address
4035 * @val:        returned value
4036 *
4037 * In case of DDR address that is not mapped into the default aperture that
4038 * the DDR bar exposes, the function will configure the iATU so that the DDR
4039 * bar will be positioned at a base address that allows reading from the
4040 * required address. Configuring the iATU during normal operation can
4041 * lead to undefined behavior and therefore, should be done with extreme care
4042 *
4043 */
4044static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4045{
4046        struct asic_fixed_properties *prop = &hdev->asic_prop;
4047        u64 ddr_bar_addr;
4048        int rc = 0;
4049
4050        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4051                *val = RREG32(addr - CFG_BASE);
4052
4053        } else if ((addr >= SRAM_BASE_ADDR) &&
4054                        (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4055
4056                *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4057                                (addr - SRAM_BASE_ADDR));
4058
4059        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4060
4061                u64 bar_base_addr = DRAM_PHYS_BASE +
4062                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4063
4064                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4065                if (ddr_bar_addr != U64_MAX) {
4066                        *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4067                                                (addr - bar_base_addr));
4068
4069                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4070                                                        ddr_bar_addr);
4071                }
4072                if (ddr_bar_addr == U64_MAX)
4073                        rc = -EIO;
4074
4075        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4076                *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4077
4078        } else {
4079                rc = -EFAULT;
4080        }
4081
4082        return rc;
4083}
4084
4085/*
4086 * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4087 *                        address.
4088 *
4089 * @hdev:       pointer to hl_device structure
4090 * @addr:       device or host mapped address
4091 * @val:        returned value
4092 *
4093 * In case of DDR address that is not mapped into the default aperture that
4094 * the DDR bar exposes, the function will configure the iATU so that the DDR
4095 * bar will be positioned at a base address that allows writing to the
4096 * required address. Configuring the iATU during normal operation can
4097 * lead to undefined behavior and therefore, should be done with extreme care
4098 *
4099 */
4100static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4101{
4102        struct asic_fixed_properties *prop = &hdev->asic_prop;
4103        u64 ddr_bar_addr;
4104        int rc = 0;
4105
4106        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4107                WREG32(addr - CFG_BASE, val);
4108
4109        } else if ((addr >= SRAM_BASE_ADDR) &&
4110                        (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4111
4112                writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4113                                        (addr - SRAM_BASE_ADDR));
4114
4115        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4116
4117                u64 bar_base_addr = DRAM_PHYS_BASE +
4118                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4119
4120                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4121                if (ddr_bar_addr != U64_MAX) {
4122                        writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4123                                                (addr - bar_base_addr));
4124
4125                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4126                                                        ddr_bar_addr);
4127                }
4128                if (ddr_bar_addr == U64_MAX)
4129                        rc = -EIO;
4130
4131        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4132                *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4133
4134        } else {
4135                rc = -EFAULT;
4136        }
4137
4138        return rc;
4139}
4140
4141static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4142{
4143        struct asic_fixed_properties *prop = &hdev->asic_prop;
4144        u64 ddr_bar_addr;
4145        int rc = 0;
4146
4147        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4148                u32 val_l = RREG32(addr - CFG_BASE);
4149                u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4150
4151                *val = (((u64) val_h) << 32) | val_l;
4152
4153        } else if ((addr >= SRAM_BASE_ADDR) &&
4154                        (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4155
4156                *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4157                                (addr - SRAM_BASE_ADDR));
4158
4159        } else if (addr <=
4160                   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4161
4162                u64 bar_base_addr = DRAM_PHYS_BASE +
4163                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4164
4165                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4166                if (ddr_bar_addr != U64_MAX) {
4167                        *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4168                                                (addr - bar_base_addr));
4169
4170                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4171                                                        ddr_bar_addr);
4172                }
4173                if (ddr_bar_addr == U64_MAX)
4174                        rc = -EIO;
4175
4176        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4177                *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4178
4179        } else {
4180                rc = -EFAULT;
4181        }
4182
4183        return rc;
4184}
4185
4186static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4187{
4188        struct asic_fixed_properties *prop = &hdev->asic_prop;
4189        u64 ddr_bar_addr;
4190        int rc = 0;
4191
4192        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4193                WREG32(addr - CFG_BASE, lower_32_bits(val));
4194                WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4195
4196        } else if ((addr >= SRAM_BASE_ADDR) &&
4197                        (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4198
4199                writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4200                                        (addr - SRAM_BASE_ADDR));
4201
4202        } else if (addr <=
4203                   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4204
4205                u64 bar_base_addr = DRAM_PHYS_BASE +
4206                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4207
4208                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4209                if (ddr_bar_addr != U64_MAX) {
4210                        writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4211                                                (addr - bar_base_addr));
4212
4213                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4214                                                        ddr_bar_addr);
4215                }
4216                if (ddr_bar_addr == U64_MAX)
4217                        rc = -EIO;
4218
4219        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4220                *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4221
4222        } else {
4223                rc = -EFAULT;
4224        }
4225
4226        return rc;
4227}
4228
4229static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4230{
4231        struct goya_device *goya = hdev->asic_specific;
4232
4233        if (hdev->hard_reset_pending)
4234                return U64_MAX;
4235
4236        return readq(hdev->pcie_bar[DDR_BAR_ID] +
4237                        (addr - goya->ddr_bar_cur_addr));
4238}
4239
4240static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4241{
4242        struct goya_device *goya = hdev->asic_specific;
4243
4244        if (hdev->hard_reset_pending)
4245                return;
4246
4247        writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4248                        (addr - goya->ddr_bar_cur_addr));
4249}
4250
4251static const char *_goya_get_event_desc(u16 event_type)
4252{
4253        switch (event_type) {
4254        case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4255                return "PCIe_if";
4256        case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4257        case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4258        case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4259        case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4260        case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4261        case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4262        case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4263        case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4264                return "TPC%d_ecc";
4265        case GOYA_ASYNC_EVENT_ID_MME_ECC:
4266                return "MME_ecc";
4267        case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4268                return "MME_ecc_ext";
4269        case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4270                return "MMU_ecc";
4271        case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4272                return "DMA_macro";
4273        case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4274                return "DMA_ecc";
4275        case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4276                return "CPU_if_ecc";
4277        case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4278                return "PSOC_mem";
4279        case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4280                return "PSOC_coresight";
4281        case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4282                return "SRAM%d";
4283        case GOYA_ASYNC_EVENT_ID_GIC500:
4284                return "GIC500";
4285        case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4286                return "PLL%d";
4287        case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4288                return "AXI_ecc";
4289        case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4290                return "L2_ram_ecc";
4291        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4292                return "PSOC_gpio_05_sw_reset";
4293        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4294                return "PSOC_gpio_10_vrhot_icrit";
4295        case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4296                return "PCIe_dec";
4297        case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4298        case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4299        case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4300        case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4301        case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4302        case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4303        case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4304        case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4305                return "TPC%d_dec";
4306        case GOYA_ASYNC_EVENT_ID_MME_WACS:
4307                return "MME_wacs";
4308        case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4309                return "MME_wacsd";
4310        case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4311                return "CPU_axi_splitter";
4312        case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4313                return "PSOC_axi_dec";
4314        case GOYA_ASYNC_EVENT_ID_PSOC:
4315                return "PSOC";
4316        case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4317        case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4318        case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4319        case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4320        case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4321        case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4322        case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4323        case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4324                return "TPC%d_krn_err";
4325        case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4326                return "TPC%d_cq";
4327        case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4328                return "TPC%d_qm";
4329        case GOYA_ASYNC_EVENT_ID_MME_QM:
4330                return "MME_qm";
4331        case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4332                return "MME_cq";
4333        case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4334                return "DMA%d_qm";
4335        case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4336                return "DMA%d_ch";
4337        case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4338        case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4339        case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4340        case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4341        case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4342        case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4343        case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4344        case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4345                return "TPC%d_bmon_spmu";
4346        case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4347                return "DMA_bm_ch%d";
4348        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4349                return "POWER_ENV_S";
4350        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4351                return "POWER_ENV_E";
4352        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4353                return "THERMAL_ENV_S";
4354        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4355                return "THERMAL_ENV_E";
4356        default:
4357                return "N/A";
4358        }
4359}
4360
4361static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4362{
4363        u8 index;
4364
4365        switch (event_type) {
4366        case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4367        case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4368        case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4369        case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4370        case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4371        case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4372        case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4373        case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4374                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4375                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4376                break;
4377        case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4378                index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4379                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4380                break;
4381        case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4382                index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4383                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4384                break;
4385        case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4386        case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4387        case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4388        case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4389        case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4390        case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4391        case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4392        case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4393                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4394                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4395                break;
4396        case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4397        case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4398        case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4399        case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4400        case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4401        case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4402        case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4403        case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4404                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4405                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4406                break;
4407        case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4408                index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4409                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4410                break;
4411        case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4412                index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4413                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4414                break;
4415        case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4416                index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4417                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4418                break;
4419        case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4420                index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4421                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4422                break;
4423        case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4424        case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4425        case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4426        case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4427        case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4428        case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4429        case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4430        case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4431                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4432                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4433                break;
4434        case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4435                index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4436                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4437                break;
4438        default:
4439                snprintf(desc, size, _goya_get_event_desc(event_type));
4440                break;
4441        }
4442}
4443
4444static void goya_print_razwi_info(struct hl_device *hdev)
4445{
4446        if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4447                dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4448                WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4449        }
4450
4451        if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4452                dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4453                WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4454        }
4455
4456        if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4457                dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4458                WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4459        }
4460
4461        if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4462                dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4463                WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4464        }
4465}
4466
4467static void goya_print_mmu_error_info(struct hl_device *hdev)
4468{
4469        struct goya_device *goya = hdev->asic_specific;
4470        u64 addr;
4471        u32 val;
4472
4473        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4474                return;
4475
4476        val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4477        if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4478                addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4479                addr <<= 32;
4480                addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4481
4482                dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4483                                        addr);
4484
4485                WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4486        }
4487}
4488
4489static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4490                                bool razwi)
4491{
4492        char desc[20] = "";
4493
4494        goya_get_event_desc(event_type, desc, sizeof(desc));
4495        dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4496                event_type, desc);
4497
4498        if (razwi) {
4499                goya_print_razwi_info(hdev);
4500                goya_print_mmu_error_info(hdev);
4501        }
4502}
4503
4504static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4505                size_t irq_arr_size)
4506{
4507        struct cpucp_unmask_irq_arr_packet *pkt;
4508        size_t total_pkt_size;
4509        long result;
4510        int rc;
4511        int irq_num_entries, irq_arr_index;
4512        __le32 *goya_irq_arr;
4513
4514        total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4515                        irq_arr_size;
4516
4517        /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4518        total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4519
4520        /* total_pkt_size is casted to u16 later on */
4521        if (total_pkt_size > USHRT_MAX) {
4522                dev_err(hdev->dev, "too many elements in IRQ array\n");
4523                return -EINVAL;
4524        }
4525
4526        pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4527        if (!pkt)
4528                return -ENOMEM;
4529
4530        irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4531        pkt->length = cpu_to_le32(irq_num_entries);
4532
4533        /* We must perform any necessary endianness conversation on the irq
4534         * array being passed to the goya hardware
4535         */
4536        for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4537                        irq_arr_index < irq_num_entries ; irq_arr_index++)
4538                goya_irq_arr[irq_arr_index] =
4539                                cpu_to_le32(irq_arr[irq_arr_index]);
4540
4541        pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4542                                                CPUCP_PKT_CTL_OPCODE_SHIFT);
4543
4544        rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4545                                                total_pkt_size, 0, &result);
4546
4547        if (rc)
4548                dev_err(hdev->dev, "failed to unmask IRQ array\n");
4549
4550        kfree(pkt);
4551
4552        return rc;
4553}
4554
4555static int goya_soft_reset_late_init(struct hl_device *hdev)
4556{
4557        /*
4558         * Unmask all IRQs since some could have been received
4559         * during the soft reset
4560         */
4561        return goya_unmask_irq_arr(hdev, goya_all_events,
4562                                        sizeof(goya_all_events));
4563}
4564
4565static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4566{
4567        struct cpucp_packet pkt;
4568        long result;
4569        int rc;
4570
4571        memset(&pkt, 0, sizeof(pkt));
4572
4573        pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4574                                CPUCP_PKT_CTL_OPCODE_SHIFT);
4575        pkt.value = cpu_to_le64(event_type);
4576
4577        rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4578                                                0, &result);
4579
4580        if (rc)
4581                dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4582
4583        return rc;
4584}
4585
4586static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4587{
4588        switch (event_type) {
4589        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4590                hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
4591                dev_info_ratelimited(hdev->dev,
4592                        "Clock throttling due to power consumption\n");
4593                break;
4594        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4595                hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
4596                dev_info_ratelimited(hdev->dev,
4597                        "Power envelop is safe, back to optimal clock\n");
4598                break;
4599        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4600                hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
4601                dev_info_ratelimited(hdev->dev,
4602                        "Clock throttling due to overheating\n");
4603                break;
4604        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4605                hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
4606                dev_info_ratelimited(hdev->dev,
4607                        "Thermal envelop is safe, back to optimal clock\n");
4608                break;
4609
4610        default:
4611                dev_err(hdev->dev, "Received invalid clock change event %d\n",
4612                        event_type);
4613                break;
4614        }
4615}
4616
4617void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4618{
4619        u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4620        u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4621                                >> EQ_CTL_EVENT_TYPE_SHIFT);
4622        struct goya_device *goya = hdev->asic_specific;
4623
4624        goya->events_stat[event_type]++;
4625        goya->events_stat_aggregate[event_type]++;
4626
4627        switch (event_type) {
4628        case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4629        case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4630        case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4631        case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4632        case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4633        case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4634        case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4635        case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4636        case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4637        case GOYA_ASYNC_EVENT_ID_MME_ECC:
4638        case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4639        case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4640        case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4641        case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4642        case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4643        case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4644        case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4645        case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4646        case GOYA_ASYNC_EVENT_ID_GIC500:
4647        case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4648        case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4649        case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4650        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4651                goya_print_irq_info(hdev, event_type, false);
4652                if (hdev->hard_reset_on_fw_events)
4653                        hl_device_reset(hdev, true, false);
4654                break;
4655
4656        case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4657        case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4658        case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4659        case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4660        case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4661        case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4662        case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4663        case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4664        case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4665        case GOYA_ASYNC_EVENT_ID_MME_WACS:
4666        case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4667        case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4668        case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4669        case GOYA_ASYNC_EVENT_ID_PSOC:
4670        case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4671        case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4672        case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4673        case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4674        case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4675        case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4676        case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4677        case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4678        case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4679        case GOYA_ASYNC_EVENT_ID_MME_QM:
4680        case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4681        case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4682        case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4683                goya_print_irq_info(hdev, event_type, true);
4684                goya_unmask_irq(hdev, event_type);
4685                break;
4686
4687        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4688        case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4689        case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4690        case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4691        case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4692        case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4693        case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4694        case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4695        case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4696        case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4697                goya_print_irq_info(hdev, event_type, false);
4698                goya_unmask_irq(hdev, event_type);
4699                break;
4700
4701        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4702        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4703        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4704        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4705                goya_print_clk_change_info(hdev, event_type);
4706                goya_unmask_irq(hdev, event_type);
4707                break;
4708
4709        default:
4710                dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4711                                event_type);
4712                break;
4713        }
4714}
4715
4716void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4717{
4718        struct goya_device *goya = hdev->asic_specific;
4719
4720        if (aggregate) {
4721                *size = (u32) sizeof(goya->events_stat_aggregate);
4722                return goya->events_stat_aggregate;
4723        }
4724
4725        *size = (u32) sizeof(goya->events_stat);
4726        return goya->events_stat;
4727}
4728
4729static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4730                                u64 val, bool is_dram)
4731{
4732        struct packet_lin_dma *lin_dma_pkt;
4733        struct hl_cs_job *job;
4734        u32 cb_size, ctl;
4735        struct hl_cb *cb;
4736        int rc, lin_dma_pkts_cnt;
4737
4738        lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4739        cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4740                                                sizeof(struct packet_msg_prot);
4741        cb = hl_cb_kernel_create(hdev, cb_size, false);
4742        if (!cb)
4743                return -ENOMEM;
4744
4745        lin_dma_pkt = cb->kernel_address;
4746
4747        do {
4748                memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4749
4750                ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4751                                (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4752                                (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4753                                (1 << GOYA_PKT_CTL_RB_SHIFT) |
4754                                (1 << GOYA_PKT_CTL_MB_SHIFT));
4755                ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4756                                GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4757                lin_dma_pkt->ctl = cpu_to_le32(ctl);
4758
4759                lin_dma_pkt->src_addr = cpu_to_le64(val);
4760                lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4761                if (lin_dma_pkts_cnt > 1)
4762                        lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4763                else
4764                        lin_dma_pkt->tsize = cpu_to_le32(size);
4765
4766                size -= SZ_2G;
4767                addr += SZ_2G;
4768                lin_dma_pkt++;
4769        } while (--lin_dma_pkts_cnt);
4770
4771        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4772        if (!job) {
4773                dev_err(hdev->dev, "Failed to allocate a new job\n");
4774                rc = -ENOMEM;
4775                goto release_cb;
4776        }
4777
4778        job->id = 0;
4779        job->user_cb = cb;
4780        job->user_cb->cs_cnt++;
4781        job->user_cb_size = cb_size;
4782        job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4783        job->patched_cb = job->user_cb;
4784        job->job_cb_size = job->user_cb_size;
4785
4786        hl_debugfs_add_job(hdev, job);
4787
4788        rc = goya_send_job_on_qman0(hdev, job);
4789
4790        hl_debugfs_remove_job(hdev, job);
4791        kfree(job);
4792        cb->cs_cnt--;
4793
4794release_cb:
4795        hl_cb_put(cb);
4796        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4797
4798        return rc;
4799}
4800
4801int goya_context_switch(struct hl_device *hdev, u32 asid)
4802{
4803        struct asic_fixed_properties *prop = &hdev->asic_prop;
4804        u64 addr = prop->sram_base_address, sob_addr;
4805        u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4806        u64 val = 0x7777777777777777ull;
4807        int rc, dma_id;
4808        u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4809                                        mmDMA_CH_0_WR_COMP_ADDR_LO;
4810
4811        rc = goya_memset_device_memory(hdev, addr, size, val, false);
4812        if (rc) {
4813                dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4814                return rc;
4815        }
4816
4817        /* we need to reset registers that the user is allowed to change */
4818        sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4819        WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4820
4821        for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4822                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4823                                                        (dma_id - 1) * 4;
4824                WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4825                                                lower_32_bits(sob_addr));
4826        }
4827
4828        WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4829
4830        goya_mmu_prepare(hdev, asid);
4831
4832        goya_clear_sm_regs(hdev);
4833
4834        return 0;
4835}
4836
4837static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4838{
4839        struct asic_fixed_properties *prop = &hdev->asic_prop;
4840        struct goya_device *goya = hdev->asic_specific;
4841        u64 addr = prop->mmu_pgt_addr;
4842        u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4843                        MMU_CACHE_MNG_SIZE;
4844
4845        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4846                return 0;
4847
4848        return goya_memset_device_memory(hdev, addr, size, 0, true);
4849}
4850
4851static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4852{
4853        struct goya_device *goya = hdev->asic_specific;
4854        u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4855        u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4856        u64 val = 0x9999999999999999ull;
4857
4858        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4859                return 0;
4860
4861        return goya_memset_device_memory(hdev, addr, size, val, true);
4862}
4863
4864static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4865{
4866        struct asic_fixed_properties *prop = &hdev->asic_prop;
4867        struct goya_device *goya = hdev->asic_specific;
4868        s64 off, cpu_off;
4869        int rc;
4870
4871        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4872                return 0;
4873
4874        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4875                rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4876                                prop->dram_base_address + off, PAGE_SIZE_2MB,
4877                                (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4878                if (rc) {
4879                        dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4880                                prop->dram_base_address + off);
4881                        goto unmap;
4882                }
4883        }
4884
4885        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4886                rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4887                        hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
4888
4889                if (rc) {
4890                        dev_err(hdev->dev,
4891                                "Map failed for CPU accessible memory\n");
4892                        off -= PAGE_SIZE_2MB;
4893                        goto unmap;
4894                }
4895        } else {
4896                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4897                        rc = hl_mmu_map(hdev->kernel_ctx,
4898                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4899                                hdev->cpu_accessible_dma_address + cpu_off,
4900                                PAGE_SIZE_4KB, true);
4901                        if (rc) {
4902                                dev_err(hdev->dev,
4903                                        "Map failed for CPU accessible memory\n");
4904                                cpu_off -= PAGE_SIZE_4KB;
4905                                goto unmap_cpu;
4906                        }
4907                }
4908        }
4909
4910        goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4911        goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4912        WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4913        WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4914
4915        /* Make sure configuration is flushed to device */
4916        RREG32(mmCPU_IF_AWUSER_OVR_EN);
4917
4918        goya->device_cpu_mmu_mappings_done = true;
4919
4920        return 0;
4921
4922unmap_cpu:
4923        for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4924                if (hl_mmu_unmap(hdev->kernel_ctx,
4925                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4926                                PAGE_SIZE_4KB, true))
4927                        dev_warn_ratelimited(hdev->dev,
4928                                "failed to unmap address 0x%llx\n",
4929                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4930unmap:
4931        for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4932                if (hl_mmu_unmap(hdev->kernel_ctx,
4933                                prop->dram_base_address + off, PAGE_SIZE_2MB,
4934                                true))
4935                        dev_warn_ratelimited(hdev->dev,
4936                                "failed to unmap address 0x%llx\n",
4937                                prop->dram_base_address + off);
4938
4939        return rc;
4940}
4941
4942void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4943{
4944        struct asic_fixed_properties *prop = &hdev->asic_prop;
4945        struct goya_device *goya = hdev->asic_specific;
4946        u32 off, cpu_off;
4947
4948        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4949                return;
4950
4951        if (!goya->device_cpu_mmu_mappings_done)
4952                return;
4953
4954        WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4955        WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4956
4957        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4958                if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4959                                PAGE_SIZE_2MB, true))
4960                        dev_warn(hdev->dev,
4961                                "Failed to unmap CPU accessible memory\n");
4962        } else {
4963                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4964                        if (hl_mmu_unmap(hdev->kernel_ctx,
4965                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4966                                        PAGE_SIZE_4KB,
4967                                        (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
4968                                dev_warn_ratelimited(hdev->dev,
4969                                        "failed to unmap address 0x%llx\n",
4970                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4971        }
4972
4973        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4974                if (hl_mmu_unmap(hdev->kernel_ctx,
4975                                prop->dram_base_address + off, PAGE_SIZE_2MB,
4976                                (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
4977                        dev_warn_ratelimited(hdev->dev,
4978                                        "Failed to unmap address 0x%llx\n",
4979                                        prop->dram_base_address + off);
4980
4981        goya->device_cpu_mmu_mappings_done = false;
4982}
4983
4984static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4985{
4986        struct goya_device *goya = hdev->asic_specific;
4987        int i;
4988
4989        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4990                return;
4991
4992        if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4993                WARN(1, "asid %u is too big\n", asid);
4994                return;
4995        }
4996
4997        /* zero the MMBP and ASID bits and then set the ASID */
4998        for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
4999                goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5000}
5001
5002static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5003                                        u32 flags)
5004{
5005        struct goya_device *goya = hdev->asic_specific;
5006        u32 status, timeout_usec;
5007        int rc;
5008
5009        if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5010                hdev->hard_reset_pending)
5011                return 0;
5012
5013        /* no need in L1 only invalidation in Goya */
5014        if (!is_hard)
5015                return 0;
5016
5017        if (hdev->pldm)
5018                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5019        else
5020                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5021
5022        mutex_lock(&hdev->mmu_cache_lock);
5023
5024        /* L0 & L1 invalidation */
5025        WREG32(mmSTLB_INV_ALL_START, 1);
5026
5027        rc = hl_poll_timeout(
5028                hdev,
5029                mmSTLB_INV_ALL_START,
5030                status,
5031                !status,
5032                1000,
5033                timeout_usec);
5034
5035        mutex_unlock(&hdev->mmu_cache_lock);
5036
5037        if (rc) {
5038                dev_err_ratelimited(hdev->dev,
5039                                        "MMU cache invalidation timeout\n");
5040                hl_device_reset(hdev, true, false);
5041        }
5042
5043        return rc;
5044}
5045
5046static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5047                                bool is_hard, u32 asid, u64 va, u64 size)
5048{
5049        struct goya_device *goya = hdev->asic_specific;
5050        u32 status, timeout_usec, inv_data, pi;
5051        int rc;
5052
5053        if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5054                hdev->hard_reset_pending)
5055                return 0;
5056
5057        /* no need in L1 only invalidation in Goya */
5058        if (!is_hard)
5059                return 0;
5060
5061        if (hdev->pldm)
5062                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5063        else
5064                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5065
5066        mutex_lock(&hdev->mmu_cache_lock);
5067
5068        /*
5069         * TODO: currently invalidate entire L0 & L1 as in regular hard
5070         * invalidation. Need to apply invalidation of specific cache lines with
5071         * mask of ASID & VA & size.
5072         * Note that L1 with be flushed entirely in any case.
5073         */
5074
5075        /* L0 & L1 invalidation */
5076        inv_data = RREG32(mmSTLB_CACHE_INV);
5077        /* PI is 8 bit */
5078        pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5079        WREG32(mmSTLB_CACHE_INV,
5080                        (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5081
5082        rc = hl_poll_timeout(
5083                hdev,
5084                mmSTLB_INV_CONSUMER_INDEX,
5085                status,
5086                status == pi,
5087                1000,
5088                timeout_usec);
5089
5090        mutex_unlock(&hdev->mmu_cache_lock);
5091
5092        if (rc) {
5093                dev_err_ratelimited(hdev->dev,
5094                                        "MMU cache invalidation timeout\n");
5095                hl_device_reset(hdev, true, false);
5096        }
5097
5098        return rc;
5099}
5100
5101int goya_send_heartbeat(struct hl_device *hdev)
5102{
5103        struct goya_device *goya = hdev->asic_specific;
5104
5105        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5106                return 0;
5107
5108        return hl_fw_send_heartbeat(hdev);
5109}
5110
5111int goya_cpucp_info_get(struct hl_device *hdev)
5112{
5113        struct goya_device *goya = hdev->asic_specific;
5114        struct asic_fixed_properties *prop = &hdev->asic_prop;
5115        u64 dram_size;
5116        int rc;
5117
5118        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5119                return 0;
5120
5121        rc = hl_fw_cpucp_info_get(hdev);
5122        if (rc)
5123                return rc;
5124
5125        dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5126        if (dram_size) {
5127                if ((!is_power_of_2(dram_size)) ||
5128                                (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5129                        dev_err(hdev->dev,
5130                                "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5131                                dram_size);
5132                        dram_size = DRAM_PHYS_DEFAULT_SIZE;
5133                }
5134
5135                prop->dram_size = dram_size;
5136                prop->dram_end_address = prop->dram_base_address + dram_size;
5137        }
5138
5139        if (!strlen(prop->cpucp_info.card_name))
5140                strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5141                                CARD_NAME_MAX_LEN);
5142
5143        return 0;
5144}
5145
5146static void goya_set_clock_gating(struct hl_device *hdev)
5147{
5148        /* clock gating not supported in Goya */
5149}
5150
5151static void goya_disable_clock_gating(struct hl_device *hdev)
5152{
5153        /* clock gating not supported in Goya */
5154}
5155
5156static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
5157                                struct seq_file *s)
5158{
5159        const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5160        const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5161        u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5162                mme_arch_sts;
5163        bool is_idle = true, is_eng_idle;
5164        u64 offset;
5165        int i;
5166
5167        if (s)
5168                seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5169                                "---  -------  ------------  -------------\n");
5170
5171        offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5172
5173        for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5174                qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5175                dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5176                is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5177                                IS_DMA_IDLE(dma_core_sts0);
5178                is_idle &= is_eng_idle;
5179
5180                if (mask)
5181                        *mask |= ((u64) !is_eng_idle) <<
5182                                                (GOYA_ENGINE_ID_DMA_0 + i);
5183                if (s)
5184                        seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5185                                        qm_glbl_sts0, dma_core_sts0);
5186        }
5187
5188        if (s)
5189                seq_puts(s,
5190                        "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5191                        "---  -------  ------------  --------------  ----------\n");
5192
5193        offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5194
5195        for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5196                qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5197                cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5198                tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5199                is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5200                                IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5201                                IS_TPC_IDLE(tpc_cfg_sts);
5202                is_idle &= is_eng_idle;
5203
5204                if (mask)
5205                        *mask |= ((u64) !is_eng_idle) <<
5206                                                (GOYA_ENGINE_ID_TPC_0 + i);
5207                if (s)
5208                        seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5209                                qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5210        }
5211
5212        if (s)
5213                seq_puts(s,
5214                        "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5215                        "---  -------  ------------  --------------  -----------\n");
5216
5217        qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5218        cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5219        mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5220        is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5221                        IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5222                        IS_MME_IDLE(mme_arch_sts);
5223        is_idle &= is_eng_idle;
5224
5225        if (mask)
5226                *mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
5227        if (s) {
5228                seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5229                                cmdq_glbl_sts0, mme_arch_sts);
5230                seq_puts(s, "\n");
5231        }
5232
5233        return is_idle;
5234}
5235
5236static void goya_hw_queues_lock(struct hl_device *hdev)
5237        __acquires(&goya->hw_queues_lock)
5238{
5239        struct goya_device *goya = hdev->asic_specific;
5240
5241        spin_lock(&goya->hw_queues_lock);
5242}
5243
5244static void goya_hw_queues_unlock(struct hl_device *hdev)
5245        __releases(&goya->hw_queues_lock)
5246{
5247        struct goya_device *goya = hdev->asic_specific;
5248
5249        spin_unlock(&goya->hw_queues_lock);
5250}
5251
5252static u32 goya_get_pci_id(struct hl_device *hdev)
5253{
5254        return hdev->pdev->device;
5255}
5256
5257static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5258                                size_t max_size)
5259{
5260        struct goya_device *goya = hdev->asic_specific;
5261
5262        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5263                return 0;
5264
5265        return hl_fw_get_eeprom_data(hdev, data, max_size);
5266}
5267
5268static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5269{
5270        return RREG32(mmHW_STATE);
5271}
5272
5273static int goya_ctx_init(struct hl_ctx *ctx)
5274{
5275        return 0;
5276}
5277
5278u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5279{
5280        return cq_idx;
5281}
5282
5283static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5284{
5285        return 0;
5286}
5287
5288static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5289{
5290        return 0;
5291}
5292
5293static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
5294{
5295
5296}
5297
5298static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
5299                        u16 sob_val, u16 mon_id, u32 q_idx)
5300{
5301
5302}
5303
5304static void goya_reset_sob(struct hl_device *hdev, void *data)
5305{
5306
5307}
5308
5309static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5310{
5311        if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
5312                                                        HL_POWER9_HOST_MAGIC) {
5313                dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
5314                hdev->power9_64bit_dma_enable = 1;
5315                hdev->dma_mask = 64;
5316        } else {
5317                dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
5318                hdev->power9_64bit_dma_enable = 0;
5319                hdev->dma_mask = 48;
5320        }
5321}
5322
5323u64 goya_get_device_time(struct hl_device *hdev)
5324{
5325        u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5326
5327        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5328}
5329
5330static const struct hl_asic_funcs goya_funcs = {
5331        .early_init = goya_early_init,
5332        .early_fini = goya_early_fini,
5333        .late_init = goya_late_init,
5334        .late_fini = goya_late_fini,
5335        .sw_init = goya_sw_init,
5336        .sw_fini = goya_sw_fini,
5337        .hw_init = goya_hw_init,
5338        .hw_fini = goya_hw_fini,
5339        .halt_engines = goya_halt_engines,
5340        .suspend = goya_suspend,
5341        .resume = goya_resume,
5342        .cb_mmap = goya_cb_mmap,
5343        .ring_doorbell = goya_ring_doorbell,
5344        .pqe_write = goya_pqe_write,
5345        .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5346        .asic_dma_free_coherent = goya_dma_free_coherent,
5347        .get_int_queue_base = goya_get_int_queue_base,
5348        .test_queues = goya_test_queues,
5349        .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5350        .asic_dma_pool_free = goya_dma_pool_free,
5351        .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5352        .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5353        .hl_dma_unmap_sg = goya_dma_unmap_sg,
5354        .cs_parser = goya_cs_parser,
5355        .asic_dma_map_sg = goya_dma_map_sg,
5356        .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5357        .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5358        .update_eq_ci = goya_update_eq_ci,
5359        .context_switch = goya_context_switch,
5360        .restore_phase_topology = goya_restore_phase_topology,
5361        .debugfs_read32 = goya_debugfs_read32,
5362        .debugfs_write32 = goya_debugfs_write32,
5363        .debugfs_read64 = goya_debugfs_read64,
5364        .debugfs_write64 = goya_debugfs_write64,
5365        .add_device_attr = goya_add_device_attr,
5366        .handle_eqe = goya_handle_eqe,
5367        .set_pll_profile = goya_set_pll_profile,
5368        .get_events_stat = goya_get_events_stat,
5369        .read_pte = goya_read_pte,
5370        .write_pte = goya_write_pte,
5371        .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5372        .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5373        .send_heartbeat = goya_send_heartbeat,
5374        .set_clock_gating = goya_set_clock_gating,
5375        .disable_clock_gating = goya_disable_clock_gating,
5376        .debug_coresight = goya_debug_coresight,
5377        .is_device_idle = goya_is_device_idle,
5378        .soft_reset_late_init = goya_soft_reset_late_init,
5379        .hw_queues_lock = goya_hw_queues_lock,
5380        .hw_queues_unlock = goya_hw_queues_unlock,
5381        .get_pci_id = goya_get_pci_id,
5382        .get_eeprom_data = goya_get_eeprom_data,
5383        .send_cpu_message = goya_send_cpu_message,
5384        .get_hw_state = goya_get_hw_state,
5385        .pci_bars_map = goya_pci_bars_map,
5386        .init_iatu = goya_init_iatu,
5387        .rreg = hl_rreg,
5388        .wreg = hl_wreg,
5389        .halt_coresight = goya_halt_coresight,
5390        .ctx_init = goya_ctx_init,
5391        .get_clk_rate = goya_get_clk_rate,
5392        .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5393        .read_device_fw_version = goya_read_device_fw_version,
5394        .load_firmware_to_device = goya_load_firmware_to_device,
5395        .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5396        .get_signal_cb_size = goya_get_signal_cb_size,
5397        .get_wait_cb_size = goya_get_wait_cb_size,
5398        .gen_signal_cb = goya_gen_signal_cb,
5399        .gen_wait_cb = goya_gen_wait_cb,
5400        .reset_sob = goya_reset_sob,
5401        .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5402        .get_device_time = goya_get_device_time
5403};
5404
5405/*
5406 * goya_set_asic_funcs - set Goya function pointers
5407 *
5408 * @*hdev: pointer to hl_device structure
5409 *
5410 */
5411void goya_set_asic_funcs(struct hl_device *hdev)
5412{
5413        hdev->asic_funcs = &goya_funcs;
5414}
5415