linux/drivers/misc/habanalabs/goya/goya.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "goyaP.h"
   9#include "../include/hw_ip/mmu/mmu_general.h"
  10#include "../include/hw_ip/mmu/mmu_v1_0.h"
  11#include "../include/goya/asic_reg/goya_masks.h"
  12#include "../include/goya/goya_reg_map.h"
  13
  14#include <linux/pci.h>
  15#include <linux/genalloc.h>
  16#include <linux/hwmon.h>
  17#include <linux/io-64-nonatomic-lo-hi.h>
  18#include <linux/iommu.h>
  19#include <linux/seq_file.h>
  20
  21/*
  22 * GOYA security scheme:
  23 *
  24 * 1. Host is protected by:
  25 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
  26 *        - MMU
  27 *
  28 * 2. DRAM is protected by:
  29 *        - Range registers (protect the first 512MB)
  30 *        - MMU (isolation between users)
  31 *
  32 * 3. Configuration is protected by:
  33 *        - Range registers
  34 *        - Protection bits
  35 *
  36 * When MMU is disabled:
  37 *
  38 * QMAN DMA: PQ, CQ, CP, DMA are secured.
  39 * PQ, CB and the data are on the host.
  40 *
  41 * QMAN TPC/MME:
  42 * PQ, CQ and CP are not secured.
  43 * PQ, CB and the data are on the SRAM/DRAM.
  44 *
  45 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
  46 *     - checks DMA pointer
  47 *     - WREG, MSG_PROT are not allowed.
  48 *     - MSG_LONG/SHORT are allowed.
  49 *
  50 * A read/write transaction by the QMAN to a protected area will succeed if
  51 * and only if the QMAN's CP is secured and MSG_PROT is used
  52 *
  53 *
  54 * When MMU is enabled:
  55 *
  56 * QMAN DMA: PQ, CQ and CP are secured.
  57 * MMU is set to bypass on the Secure props register of the QMAN.
  58 * The reasons we don't enable MMU for PQ, CQ and CP are:
  59 *     - PQ entry is in kernel address space and the driver doesn't map it.
  60 *     - CP writes to MSIX register and to kernel address space (completion
  61 *       queue).
  62 *
  63 * DMA is not secured but because CP is secured, the driver still needs to parse
  64 * the CB, but doesn't need to check the DMA addresses.
  65 *
  66 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
  67 * the driver doesn't map memory in MMU.
  68 *
  69 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
  70 *
  71 * DMA RR does NOT protect host because DMA is not secured
  72 *
  73 */
  74
  75#define GOYA_BOOT_FIT_FILE      "habanalabs/goya/goya-boot-fit.itb"
  76#define GOYA_LINUX_FW_FILE      "habanalabs/goya/goya-fit.itb"
  77
  78#define GOYA_MMU_REGS_NUM               63
  79
  80#define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
  81
  82#define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
  83#define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
  84#define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
  85#define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
  86#define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
  87#define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
  88#define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
  89#define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
  90#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC  1000000         /* 1s */
  91#define GOYA_MSG_TO_CPU_TIMEOUT_USEC    4000000         /* 4s */
  92
  93#define GOYA_QMAN0_FENCE_VAL            0xD169B243
  94
  95#define GOYA_MAX_STRING_LEN             20
  96
  97#define GOYA_CB_POOL_CB_CNT             512
  98#define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
  99
 100#define IS_QM_IDLE(engine, qm_glbl_sts0) \
 101        (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
 102#define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
 103#define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
 104#define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
 105
 106#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
 107        (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
 108                        engine##_CMDQ_IDLE_MASK)
 109#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
 110        IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
 111#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
 112        IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
 113
 114#define IS_DMA_IDLE(dma_core_sts0) \
 115        !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
 116
 117#define IS_TPC_IDLE(tpc_cfg_sts) \
 118        (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
 119
 120#define IS_MME_IDLE(mme_arch_sts) \
 121        (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
 122
 123
 124static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 125                "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 126                "goya cq 4", "goya cpu eq"
 127};
 128
 129static u16 goya_packet_sizes[MAX_PACKET_ID] = {
 130        [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
 131        [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
 132        [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
 133        [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
 134        [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
 135        [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
 136        [PACKET_FENCE]          = sizeof(struct packet_fence),
 137        [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
 138        [PACKET_NOP]            = sizeof(struct packet_nop),
 139        [PACKET_STOP]           = sizeof(struct packet_stop)
 140};
 141
 142static inline bool validate_packet_id(enum packet_id id)
 143{
 144        switch (id) {
 145        case PACKET_WREG_32:
 146        case PACKET_WREG_BULK:
 147        case PACKET_MSG_LONG:
 148        case PACKET_MSG_SHORT:
 149        case PACKET_CP_DMA:
 150        case PACKET_MSG_PROT:
 151        case PACKET_FENCE:
 152        case PACKET_LIN_DMA:
 153        case PACKET_NOP:
 154        case PACKET_STOP:
 155                return true;
 156        default:
 157                return false;
 158        }
 159}
 160
 161static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
 162        mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
 163        mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
 164        mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
 165        mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
 166        mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
 167        mmTPC0_QM_GLBL_SECURE_PROPS,
 168        mmTPC0_QM_GLBL_NON_SECURE_PROPS,
 169        mmTPC0_CMDQ_GLBL_SECURE_PROPS,
 170        mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
 171        mmTPC0_CFG_ARUSER,
 172        mmTPC0_CFG_AWUSER,
 173        mmTPC1_QM_GLBL_SECURE_PROPS,
 174        mmTPC1_QM_GLBL_NON_SECURE_PROPS,
 175        mmTPC1_CMDQ_GLBL_SECURE_PROPS,
 176        mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
 177        mmTPC1_CFG_ARUSER,
 178        mmTPC1_CFG_AWUSER,
 179        mmTPC2_QM_GLBL_SECURE_PROPS,
 180        mmTPC2_QM_GLBL_NON_SECURE_PROPS,
 181        mmTPC2_CMDQ_GLBL_SECURE_PROPS,
 182        mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
 183        mmTPC2_CFG_ARUSER,
 184        mmTPC2_CFG_AWUSER,
 185        mmTPC3_QM_GLBL_SECURE_PROPS,
 186        mmTPC3_QM_GLBL_NON_SECURE_PROPS,
 187        mmTPC3_CMDQ_GLBL_SECURE_PROPS,
 188        mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
 189        mmTPC3_CFG_ARUSER,
 190        mmTPC3_CFG_AWUSER,
 191        mmTPC4_QM_GLBL_SECURE_PROPS,
 192        mmTPC4_QM_GLBL_NON_SECURE_PROPS,
 193        mmTPC4_CMDQ_GLBL_SECURE_PROPS,
 194        mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
 195        mmTPC4_CFG_ARUSER,
 196        mmTPC4_CFG_AWUSER,
 197        mmTPC5_QM_GLBL_SECURE_PROPS,
 198        mmTPC5_QM_GLBL_NON_SECURE_PROPS,
 199        mmTPC5_CMDQ_GLBL_SECURE_PROPS,
 200        mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
 201        mmTPC5_CFG_ARUSER,
 202        mmTPC5_CFG_AWUSER,
 203        mmTPC6_QM_GLBL_SECURE_PROPS,
 204        mmTPC6_QM_GLBL_NON_SECURE_PROPS,
 205        mmTPC6_CMDQ_GLBL_SECURE_PROPS,
 206        mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
 207        mmTPC6_CFG_ARUSER,
 208        mmTPC6_CFG_AWUSER,
 209        mmTPC7_QM_GLBL_SECURE_PROPS,
 210        mmTPC7_QM_GLBL_NON_SECURE_PROPS,
 211        mmTPC7_CMDQ_GLBL_SECURE_PROPS,
 212        mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
 213        mmTPC7_CFG_ARUSER,
 214        mmTPC7_CFG_AWUSER,
 215        mmMME_QM_GLBL_SECURE_PROPS,
 216        mmMME_QM_GLBL_NON_SECURE_PROPS,
 217        mmMME_CMDQ_GLBL_SECURE_PROPS,
 218        mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
 219        mmMME_SBA_CONTROL_DATA,
 220        mmMME_SBB_CONTROL_DATA,
 221        mmMME_SBC_CONTROL_DATA,
 222        mmMME_WBC_CONTROL_DATA,
 223        mmPCIE_WRAP_PSOC_ARUSER,
 224        mmPCIE_WRAP_PSOC_AWUSER
 225};
 226
 227static u32 goya_all_events[] = {
 228        GOYA_ASYNC_EVENT_ID_PCIE_IF,
 229        GOYA_ASYNC_EVENT_ID_TPC0_ECC,
 230        GOYA_ASYNC_EVENT_ID_TPC1_ECC,
 231        GOYA_ASYNC_EVENT_ID_TPC2_ECC,
 232        GOYA_ASYNC_EVENT_ID_TPC3_ECC,
 233        GOYA_ASYNC_EVENT_ID_TPC4_ECC,
 234        GOYA_ASYNC_EVENT_ID_TPC5_ECC,
 235        GOYA_ASYNC_EVENT_ID_TPC6_ECC,
 236        GOYA_ASYNC_EVENT_ID_TPC7_ECC,
 237        GOYA_ASYNC_EVENT_ID_MME_ECC,
 238        GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
 239        GOYA_ASYNC_EVENT_ID_MMU_ECC,
 240        GOYA_ASYNC_EVENT_ID_DMA_MACRO,
 241        GOYA_ASYNC_EVENT_ID_DMA_ECC,
 242        GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
 243        GOYA_ASYNC_EVENT_ID_PSOC_MEM,
 244        GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
 245        GOYA_ASYNC_EVENT_ID_SRAM0,
 246        GOYA_ASYNC_EVENT_ID_SRAM1,
 247        GOYA_ASYNC_EVENT_ID_SRAM2,
 248        GOYA_ASYNC_EVENT_ID_SRAM3,
 249        GOYA_ASYNC_EVENT_ID_SRAM4,
 250        GOYA_ASYNC_EVENT_ID_SRAM5,
 251        GOYA_ASYNC_EVENT_ID_SRAM6,
 252        GOYA_ASYNC_EVENT_ID_SRAM7,
 253        GOYA_ASYNC_EVENT_ID_SRAM8,
 254        GOYA_ASYNC_EVENT_ID_SRAM9,
 255        GOYA_ASYNC_EVENT_ID_SRAM10,
 256        GOYA_ASYNC_EVENT_ID_SRAM11,
 257        GOYA_ASYNC_EVENT_ID_SRAM12,
 258        GOYA_ASYNC_EVENT_ID_SRAM13,
 259        GOYA_ASYNC_EVENT_ID_SRAM14,
 260        GOYA_ASYNC_EVENT_ID_SRAM15,
 261        GOYA_ASYNC_EVENT_ID_SRAM16,
 262        GOYA_ASYNC_EVENT_ID_SRAM17,
 263        GOYA_ASYNC_EVENT_ID_SRAM18,
 264        GOYA_ASYNC_EVENT_ID_SRAM19,
 265        GOYA_ASYNC_EVENT_ID_SRAM20,
 266        GOYA_ASYNC_EVENT_ID_SRAM21,
 267        GOYA_ASYNC_EVENT_ID_SRAM22,
 268        GOYA_ASYNC_EVENT_ID_SRAM23,
 269        GOYA_ASYNC_EVENT_ID_SRAM24,
 270        GOYA_ASYNC_EVENT_ID_SRAM25,
 271        GOYA_ASYNC_EVENT_ID_SRAM26,
 272        GOYA_ASYNC_EVENT_ID_SRAM27,
 273        GOYA_ASYNC_EVENT_ID_SRAM28,
 274        GOYA_ASYNC_EVENT_ID_SRAM29,
 275        GOYA_ASYNC_EVENT_ID_GIC500,
 276        GOYA_ASYNC_EVENT_ID_PLL0,
 277        GOYA_ASYNC_EVENT_ID_PLL1,
 278        GOYA_ASYNC_EVENT_ID_PLL3,
 279        GOYA_ASYNC_EVENT_ID_PLL4,
 280        GOYA_ASYNC_EVENT_ID_PLL5,
 281        GOYA_ASYNC_EVENT_ID_PLL6,
 282        GOYA_ASYNC_EVENT_ID_AXI_ECC,
 283        GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
 284        GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
 285        GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
 286        GOYA_ASYNC_EVENT_ID_PCIE_DEC,
 287        GOYA_ASYNC_EVENT_ID_TPC0_DEC,
 288        GOYA_ASYNC_EVENT_ID_TPC1_DEC,
 289        GOYA_ASYNC_EVENT_ID_TPC2_DEC,
 290        GOYA_ASYNC_EVENT_ID_TPC3_DEC,
 291        GOYA_ASYNC_EVENT_ID_TPC4_DEC,
 292        GOYA_ASYNC_EVENT_ID_TPC5_DEC,
 293        GOYA_ASYNC_EVENT_ID_TPC6_DEC,
 294        GOYA_ASYNC_EVENT_ID_TPC7_DEC,
 295        GOYA_ASYNC_EVENT_ID_MME_WACS,
 296        GOYA_ASYNC_EVENT_ID_MME_WACSD,
 297        GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
 298        GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
 299        GOYA_ASYNC_EVENT_ID_PSOC,
 300        GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
 301        GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
 302        GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
 303        GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
 304        GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
 305        GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
 306        GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
 307        GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
 308        GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
 309        GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
 310        GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
 311        GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
 312        GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
 313        GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
 314        GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
 315        GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
 316        GOYA_ASYNC_EVENT_ID_TPC0_QM,
 317        GOYA_ASYNC_EVENT_ID_TPC1_QM,
 318        GOYA_ASYNC_EVENT_ID_TPC2_QM,
 319        GOYA_ASYNC_EVENT_ID_TPC3_QM,
 320        GOYA_ASYNC_EVENT_ID_TPC4_QM,
 321        GOYA_ASYNC_EVENT_ID_TPC5_QM,
 322        GOYA_ASYNC_EVENT_ID_TPC6_QM,
 323        GOYA_ASYNC_EVENT_ID_TPC7_QM,
 324        GOYA_ASYNC_EVENT_ID_MME_QM,
 325        GOYA_ASYNC_EVENT_ID_MME_CMDQ,
 326        GOYA_ASYNC_EVENT_ID_DMA0_QM,
 327        GOYA_ASYNC_EVENT_ID_DMA1_QM,
 328        GOYA_ASYNC_EVENT_ID_DMA2_QM,
 329        GOYA_ASYNC_EVENT_ID_DMA3_QM,
 330        GOYA_ASYNC_EVENT_ID_DMA4_QM,
 331        GOYA_ASYNC_EVENT_ID_DMA0_CH,
 332        GOYA_ASYNC_EVENT_ID_DMA1_CH,
 333        GOYA_ASYNC_EVENT_ID_DMA2_CH,
 334        GOYA_ASYNC_EVENT_ID_DMA3_CH,
 335        GOYA_ASYNC_EVENT_ID_DMA4_CH,
 336        GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
 337        GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
 338        GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
 339        GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
 340        GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
 341        GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
 342        GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
 343        GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
 344        GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
 345        GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
 346        GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
 347        GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
 348        GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
 349        GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
 350        GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
 351        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
 352        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
 353};
 354
 355static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
 356static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
 357static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
 358static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
 359
 360int goya_get_fixed_properties(struct hl_device *hdev)
 361{
 362        struct asic_fixed_properties *prop = &hdev->asic_prop;
 363        int i;
 364
 365        prop->max_queues = GOYA_QUEUE_ID_SIZE;
 366        prop->hw_queues_props = kcalloc(prop->max_queues,
 367                        sizeof(struct hw_queue_properties),
 368                        GFP_KERNEL);
 369
 370        if (!prop->hw_queues_props)
 371                return -ENOMEM;
 372
 373        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 374                prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 375                prop->hw_queues_props[i].driver_only = 0;
 376                prop->hw_queues_props[i].requires_kernel_cb = 1;
 377        }
 378
 379        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
 380                prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 381                prop->hw_queues_props[i].driver_only = 1;
 382                prop->hw_queues_props[i].requires_kernel_cb = 0;
 383        }
 384
 385        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
 386                        NUMBER_OF_INT_HW_QUEUES; i++) {
 387                prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 388                prop->hw_queues_props[i].driver_only = 0;
 389                prop->hw_queues_props[i].requires_kernel_cb = 0;
 390        }
 391
 392        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 393
 394        prop->dram_base_address = DRAM_PHYS_BASE;
 395        prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
 396        prop->dram_end_address = prop->dram_base_address + prop->dram_size;
 397        prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
 398
 399        prop->sram_base_address = SRAM_BASE_ADDR;
 400        prop->sram_size = SRAM_SIZE;
 401        prop->sram_end_address = prop->sram_base_address + prop->sram_size;
 402        prop->sram_user_base_address = prop->sram_base_address +
 403                                                SRAM_USER_BASE_OFFSET;
 404
 405        prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
 406        prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
 407        if (hdev->pldm)
 408                prop->mmu_pgt_size = 0x800000; /* 8MB */
 409        else
 410                prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
 411        prop->mmu_pte_size = HL_PTE_SIZE;
 412        prop->mmu_hop_table_size = HOP_TABLE_SIZE;
 413        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 414        prop->dram_page_size = PAGE_SIZE_2MB;
 415
 416        prop->dmmu.hop0_shift = HOP0_SHIFT;
 417        prop->dmmu.hop1_shift = HOP1_SHIFT;
 418        prop->dmmu.hop2_shift = HOP2_SHIFT;
 419        prop->dmmu.hop3_shift = HOP3_SHIFT;
 420        prop->dmmu.hop4_shift = HOP4_SHIFT;
 421        prop->dmmu.hop0_mask = HOP0_MASK;
 422        prop->dmmu.hop1_mask = HOP1_MASK;
 423        prop->dmmu.hop2_mask = HOP2_MASK;
 424        prop->dmmu.hop3_mask = HOP3_MASK;
 425        prop->dmmu.hop4_mask = HOP4_MASK;
 426        prop->dmmu.start_addr = VA_DDR_SPACE_START;
 427        prop->dmmu.end_addr = VA_DDR_SPACE_END;
 428        prop->dmmu.page_size = PAGE_SIZE_2MB;
 429
 430        /* shifts and masks are the same in PMMU and DMMU */
 431        memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
 432        prop->pmmu.start_addr = VA_HOST_SPACE_START;
 433        prop->pmmu.end_addr = VA_HOST_SPACE_END;
 434        prop->pmmu.page_size = PAGE_SIZE_4KB;
 435
 436        /* PMMU and HPMMU are the same except of page size */
 437        memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
 438        prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
 439
 440        prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
 441        prop->cfg_size = CFG_SIZE;
 442        prop->max_asid = MAX_ASID;
 443        prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
 444        prop->high_pll = PLL_HIGH_DEFAULT;
 445        prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
 446        prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
 447        prop->max_power_default = MAX_POWER_DEFAULT;
 448        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 449        prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 450        prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 451
 452        strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 453                CARD_NAME_MAX_LEN);
 454
 455        prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 456
 457        return 0;
 458}
 459
 460/*
 461 * goya_pci_bars_map - Map PCI BARS of Goya device
 462 *
 463 * @hdev: pointer to hl_device structure
 464 *
 465 * Request PCI regions and map them to kernel virtual addresses.
 466 * Returns 0 on success
 467 *
 468 */
 469static int goya_pci_bars_map(struct hl_device *hdev)
 470{
 471        static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
 472        bool is_wc[3] = {false, false, true};
 473        int rc;
 474
 475        rc = hl_pci_bars_map(hdev, name, is_wc);
 476        if (rc)
 477                return rc;
 478
 479        hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
 480                        (CFG_BASE - SRAM_BASE_ADDR);
 481
 482        return 0;
 483}
 484
 485static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 486{
 487        struct goya_device *goya = hdev->asic_specific;
 488        struct hl_inbound_pci_region pci_region;
 489        u64 old_addr = addr;
 490        int rc;
 491
 492        if ((goya) && (goya->ddr_bar_cur_addr == addr))
 493                return old_addr;
 494
 495        /* Inbound Region 1 - Bar 4 - Point to DDR */
 496        pci_region.mode = PCI_BAR_MATCH_MODE;
 497        pci_region.bar = DDR_BAR_ID;
 498        pci_region.addr = addr;
 499        rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
 500        if (rc)
 501                return U64_MAX;
 502
 503        if (goya) {
 504                old_addr = goya->ddr_bar_cur_addr;
 505                goya->ddr_bar_cur_addr = addr;
 506        }
 507
 508        return old_addr;
 509}
 510
 511/*
 512 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
 513 *
 514 * @hdev: pointer to hl_device structure
 515 *
 516 * This is needed in case the firmware doesn't initialize the iATU
 517 *
 518 */
 519static int goya_init_iatu(struct hl_device *hdev)
 520{
 521        struct hl_inbound_pci_region inbound_region;
 522        struct hl_outbound_pci_region outbound_region;
 523        int rc;
 524
 525        /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
 526        inbound_region.mode = PCI_BAR_MATCH_MODE;
 527        inbound_region.bar = SRAM_CFG_BAR_ID;
 528        inbound_region.addr = SRAM_BASE_ADDR;
 529        rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 530        if (rc)
 531                goto done;
 532
 533        /* Inbound Region 1 - Bar 4 - Point to DDR */
 534        inbound_region.mode = PCI_BAR_MATCH_MODE;
 535        inbound_region.bar = DDR_BAR_ID;
 536        inbound_region.addr = DRAM_PHYS_BASE;
 537        rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 538        if (rc)
 539                goto done;
 540
 541        hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 542
 543        /* Outbound Region 0 - Point to Host  */
 544        outbound_region.addr = HOST_PHYS_BASE;
 545        outbound_region.size = HOST_PHYS_SIZE;
 546        rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 547
 548done:
 549        return rc;
 550}
 551
 552/*
 553 * goya_early_init - GOYA early initialization code
 554 *
 555 * @hdev: pointer to hl_device structure
 556 *
 557 * Verify PCI bars
 558 * Set DMA masks
 559 * PCI controller initialization
 560 * Map PCI bars
 561 *
 562 */
 563static int goya_early_init(struct hl_device *hdev)
 564{
 565        struct asic_fixed_properties *prop = &hdev->asic_prop;
 566        struct pci_dev *pdev = hdev->pdev;
 567        u32 val;
 568        int rc;
 569
 570        rc = goya_get_fixed_properties(hdev);
 571        if (rc) {
 572                dev_err(hdev->dev, "Failed to get fixed properties\n");
 573                return rc;
 574        }
 575
 576        /* Check BAR sizes */
 577        if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
 578                dev_err(hdev->dev,
 579                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 580                        SRAM_CFG_BAR_ID,
 581                        (unsigned long long) pci_resource_len(pdev,
 582                                                        SRAM_CFG_BAR_ID),
 583                        CFG_BAR_SIZE);
 584                rc = -ENODEV;
 585                goto free_queue_props;
 586        }
 587
 588        if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
 589                dev_err(hdev->dev,
 590                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 591                        MSIX_BAR_ID,
 592                        (unsigned long long) pci_resource_len(pdev,
 593                                                                MSIX_BAR_ID),
 594                        MSIX_BAR_SIZE);
 595                rc = -ENODEV;
 596                goto free_queue_props;
 597        }
 598
 599        prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 600
 601        rc = hl_pci_init(hdev);
 602        if (rc)
 603                goto free_queue_props;
 604
 605        if (!hdev->pldm) {
 606                val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
 607                if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
 608                        dev_warn(hdev->dev,
 609                                "PCI strap is not configured correctly, PCI bus errors may occur\n");
 610        }
 611
 612        return 0;
 613
 614free_queue_props:
 615        kfree(hdev->asic_prop.hw_queues_props);
 616        return rc;
 617}
 618
 619/*
 620 * goya_early_fini - GOYA early finalization code
 621 *
 622 * @hdev: pointer to hl_device structure
 623 *
 624 * Unmap PCI bars
 625 *
 626 */
 627static int goya_early_fini(struct hl_device *hdev)
 628{
 629        kfree(hdev->asic_prop.hw_queues_props);
 630        hl_pci_fini(hdev);
 631
 632        return 0;
 633}
 634
 635static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
 636{
 637        /* mask to zero the MMBP and ASID bits */
 638        WREG32_AND(reg, ~0x7FF);
 639        WREG32_OR(reg, asid);
 640}
 641
 642static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 643{
 644        struct goya_device *goya = hdev->asic_specific;
 645
 646        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
 647                return;
 648
 649        if (secure)
 650                WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
 651        else
 652                WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
 653
 654        RREG32(mmDMA_QM_0_GLBL_PROT);
 655}
 656
 657/*
 658 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
 659 *
 660 * @hdev: pointer to hl_device structure
 661 *
 662 */
 663static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 664{
 665        struct asic_fixed_properties *prop = &hdev->asic_prop;
 666        u32 trace_freq = 0;
 667        u32 pll_clk = 0;
 668        u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
 669        u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
 670        u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
 671        u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
 672        u32 od = RREG32(mmPSOC_PCI_PLL_OD);
 673
 674        if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
 675                if (div_sel == DIV_SEL_REF_CLK)
 676                        trace_freq = PLL_REF_CLK;
 677                else
 678                        trace_freq = PLL_REF_CLK / (div_fctr + 1);
 679        } else if (div_sel == DIV_SEL_PLL_CLK ||
 680                                        div_sel == DIV_SEL_DIVIDED_PLL) {
 681                pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
 682                if (div_sel == DIV_SEL_PLL_CLK)
 683                        trace_freq = pll_clk;
 684                else
 685                        trace_freq = pll_clk / (div_fctr + 1);
 686        } else {
 687                dev_warn(hdev->dev,
 688                        "Received invalid div select value: %d", div_sel);
 689        }
 690
 691        prop->psoc_timestamp_frequency = trace_freq;
 692        prop->psoc_pci_pll_nr = nr;
 693        prop->psoc_pci_pll_nf = nf;
 694        prop->psoc_pci_pll_od = od;
 695        prop->psoc_pci_pll_div_factor = div_fctr;
 696}
 697
 698int goya_late_init(struct hl_device *hdev)
 699{
 700        struct asic_fixed_properties *prop = &hdev->asic_prop;
 701        int rc;
 702
 703        goya_fetch_psoc_frequency(hdev);
 704
 705        rc = goya_mmu_clear_pgt_range(hdev);
 706        if (rc) {
 707                dev_err(hdev->dev,
 708                        "Failed to clear MMU page tables range %d\n", rc);
 709                return rc;
 710        }
 711
 712        rc = goya_mmu_set_dram_default_page(hdev);
 713        if (rc) {
 714                dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
 715                return rc;
 716        }
 717
 718        rc = goya_mmu_add_mappings_for_device_cpu(hdev);
 719        if (rc)
 720                return rc;
 721
 722        rc = goya_init_cpu_queues(hdev);
 723        if (rc)
 724                return rc;
 725
 726        rc = goya_test_cpu_queue(hdev);
 727        if (rc)
 728                return rc;
 729
 730        rc = goya_armcp_info_get(hdev);
 731        if (rc) {
 732                dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
 733                return rc;
 734        }
 735
 736        /* Now that we have the DRAM size in ASIC prop, we need to check
 737         * its size and configure the DMA_IF DDR wrap protection (which is in
 738         * the MMU block) accordingly. The value is the log2 of the DRAM size
 739         */
 740        WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
 741
 742        rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
 743        if (rc) {
 744                dev_err(hdev->dev,
 745                        "Failed to enable PCI access from CPU %d\n", rc);
 746                return rc;
 747        }
 748
 749        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 750                        GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
 751
 752        return 0;
 753}
 754
 755/*
 756 * goya_late_fini - GOYA late tear-down code
 757 *
 758 * @hdev: pointer to hl_device structure
 759 *
 760 * Free sensors allocated structures
 761 */
 762void goya_late_fini(struct hl_device *hdev)
 763{
 764        const struct hwmon_channel_info **channel_info_arr;
 765        int i = 0;
 766
 767        if (!hdev->hl_chip_info->info)
 768                return;
 769
 770        channel_info_arr = hdev->hl_chip_info->info;
 771
 772        while (channel_info_arr[i]) {
 773                kfree(channel_info_arr[i]->config);
 774                kfree(channel_info_arr[i]);
 775                i++;
 776        }
 777
 778        kfree(channel_info_arr);
 779
 780        hdev->hl_chip_info->info = NULL;
 781}
 782
 783/*
 784 * goya_sw_init - Goya software initialization code
 785 *
 786 * @hdev: pointer to hl_device structure
 787 *
 788 */
 789static int goya_sw_init(struct hl_device *hdev)
 790{
 791        struct goya_device *goya;
 792        int rc;
 793
 794        /* Allocate device structure */
 795        goya = kzalloc(sizeof(*goya), GFP_KERNEL);
 796        if (!goya)
 797                return -ENOMEM;
 798
 799        /* according to goya_init_iatu */
 800        goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
 801
 802        goya->mme_clk = GOYA_PLL_FREQ_LOW;
 803        goya->tpc_clk = GOYA_PLL_FREQ_LOW;
 804        goya->ic_clk = GOYA_PLL_FREQ_LOW;
 805
 806        hdev->asic_specific = goya;
 807
 808        /* Create DMA pool for small allocations */
 809        hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 810                        &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
 811        if (!hdev->dma_pool) {
 812                dev_err(hdev->dev, "failed to create DMA pool\n");
 813                rc = -ENOMEM;
 814                goto free_goya_device;
 815        }
 816
 817        hdev->cpu_accessible_dma_mem =
 818                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
 819                                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 820                                        &hdev->cpu_accessible_dma_address,
 821                                        GFP_KERNEL | __GFP_ZERO);
 822
 823        if (!hdev->cpu_accessible_dma_mem) {
 824                rc = -ENOMEM;
 825                goto free_dma_pool;
 826        }
 827
 828        dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
 829                &hdev->cpu_accessible_dma_address);
 830
 831        hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
 832        if (!hdev->cpu_accessible_dma_pool) {
 833                dev_err(hdev->dev,
 834                        "Failed to create CPU accessible DMA pool\n");
 835                rc = -ENOMEM;
 836                goto free_cpu_dma_mem;
 837        }
 838
 839        rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
 840                                (uintptr_t) hdev->cpu_accessible_dma_mem,
 841                                HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
 842        if (rc) {
 843                dev_err(hdev->dev,
 844                        "Failed to add memory to CPU accessible DMA pool\n");
 845                rc = -EFAULT;
 846                goto free_cpu_accessible_dma_pool;
 847        }
 848
 849        spin_lock_init(&goya->hw_queues_lock);
 850        hdev->supports_coresight = true;
 851        hdev->supports_soft_reset = true;
 852
 853        return 0;
 854
 855free_cpu_accessible_dma_pool:
 856        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 857free_cpu_dma_mem:
 858        hdev->asic_funcs->asic_dma_free_coherent(hdev,
 859                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 860                        hdev->cpu_accessible_dma_mem,
 861                        hdev->cpu_accessible_dma_address);
 862free_dma_pool:
 863        dma_pool_destroy(hdev->dma_pool);
 864free_goya_device:
 865        kfree(goya);
 866
 867        return rc;
 868}
 869
 870/*
 871 * goya_sw_fini - Goya software tear-down code
 872 *
 873 * @hdev: pointer to hl_device structure
 874 *
 875 */
 876static int goya_sw_fini(struct hl_device *hdev)
 877{
 878        struct goya_device *goya = hdev->asic_specific;
 879
 880        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 881
 882        hdev->asic_funcs->asic_dma_free_coherent(hdev,
 883                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 884                        hdev->cpu_accessible_dma_mem,
 885                        hdev->cpu_accessible_dma_address);
 886
 887        dma_pool_destroy(hdev->dma_pool);
 888
 889        kfree(goya);
 890
 891        return 0;
 892}
 893
 894static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
 895                dma_addr_t bus_address)
 896{
 897        struct goya_device *goya = hdev->asic_specific;
 898        u32 mtr_base_lo, mtr_base_hi;
 899        u32 so_base_lo, so_base_hi;
 900        u32 gic_base_lo, gic_base_hi;
 901        u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
 902        u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
 903
 904        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 905        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 906        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 907        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 908
 909        gic_base_lo =
 910                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 911        gic_base_hi =
 912                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 913
 914        WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
 915        WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
 916
 917        WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
 918        WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
 919        WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
 920
 921        WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
 922        WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
 923        WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
 924        WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
 925        WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
 926        WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
 927        WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
 928                        GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
 929
 930        /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
 931        WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
 932        WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
 933
 934        if (goya->hw_cap_initialized & HW_CAP_MMU)
 935                WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
 936        else
 937                WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
 938
 939        if (hdev->stop_on_err)
 940                dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
 941
 942        WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
 943        WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
 944}
 945
 946static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
 947{
 948        u32 gic_base_lo, gic_base_hi;
 949        u64 sob_addr;
 950        u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
 951
 952        gic_base_lo =
 953                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 954        gic_base_hi =
 955                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 956
 957        WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
 958        WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
 959        WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
 960                        GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
 961
 962        if (dma_id)
 963                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
 964                                (dma_id - 1) * 4;
 965        else
 966                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
 967
 968        WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
 969        WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
 970}
 971
 972/*
 973 * goya_init_dma_qmans - Initialize QMAN DMA registers
 974 *
 975 * @hdev: pointer to hl_device structure
 976 *
 977 * Initialize the H/W registers of the QMAN DMA channels
 978 *
 979 */
 980void goya_init_dma_qmans(struct hl_device *hdev)
 981{
 982        struct goya_device *goya = hdev->asic_specific;
 983        struct hl_hw_queue *q;
 984        int i;
 985
 986        if (goya->hw_cap_initialized & HW_CAP_DMA)
 987                return;
 988
 989        q = &hdev->kernel_queues[0];
 990
 991        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
 992                q->cq_id = q->msi_vec = i;
 993                goya_init_dma_qman(hdev, i, q->bus_address);
 994                goya_init_dma_ch(hdev, i);
 995        }
 996
 997        goya->hw_cap_initialized |= HW_CAP_DMA;
 998}
 999
1000/*
1001 * goya_disable_external_queues - Disable external queues
1002 *
1003 * @hdev: pointer to hl_device structure
1004 *
1005 */
1006static void goya_disable_external_queues(struct hl_device *hdev)
1007{
1008        struct goya_device *goya = hdev->asic_specific;
1009
1010        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1011                return;
1012
1013        WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1014        WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1015        WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1016        WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1017        WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1018}
1019
1020static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1021                                u32 cp_sts_reg, u32 glbl_sts0_reg)
1022{
1023        int rc;
1024        u32 status;
1025
1026        /* use the values of TPC0 as they are all the same*/
1027
1028        WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1029
1030        status = RREG32(cp_sts_reg);
1031        if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1032                rc = hl_poll_timeout(
1033                        hdev,
1034                        cp_sts_reg,
1035                        status,
1036                        !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1037                        1000,
1038                        QMAN_FENCE_TIMEOUT_USEC);
1039
1040                /* if QMAN is stuck in fence no need to check for stop */
1041                if (rc)
1042                        return 0;
1043        }
1044
1045        rc = hl_poll_timeout(
1046                hdev,
1047                glbl_sts0_reg,
1048                status,
1049                (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1050                1000,
1051                QMAN_STOP_TIMEOUT_USEC);
1052
1053        if (rc) {
1054                dev_err(hdev->dev,
1055                        "Timeout while waiting for QMAN to stop\n");
1056                return -EINVAL;
1057        }
1058
1059        return 0;
1060}
1061
1062/*
1063 * goya_stop_external_queues - Stop external queues
1064 *
1065 * @hdev: pointer to hl_device structure
1066 *
1067 * Returns 0 on success
1068 *
1069 */
1070static int goya_stop_external_queues(struct hl_device *hdev)
1071{
1072        int rc, retval = 0;
1073
1074        struct goya_device *goya = hdev->asic_specific;
1075
1076        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1077                return retval;
1078
1079        rc = goya_stop_queue(hdev,
1080                        mmDMA_QM_0_GLBL_CFG1,
1081                        mmDMA_QM_0_CP_STS,
1082                        mmDMA_QM_0_GLBL_STS0);
1083
1084        if (rc) {
1085                dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1086                retval = -EIO;
1087        }
1088
1089        rc = goya_stop_queue(hdev,
1090                        mmDMA_QM_1_GLBL_CFG1,
1091                        mmDMA_QM_1_CP_STS,
1092                        mmDMA_QM_1_GLBL_STS0);
1093
1094        if (rc) {
1095                dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1096                retval = -EIO;
1097        }
1098
1099        rc = goya_stop_queue(hdev,
1100                        mmDMA_QM_2_GLBL_CFG1,
1101                        mmDMA_QM_2_CP_STS,
1102                        mmDMA_QM_2_GLBL_STS0);
1103
1104        if (rc) {
1105                dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1106                retval = -EIO;
1107        }
1108
1109        rc = goya_stop_queue(hdev,
1110                        mmDMA_QM_3_GLBL_CFG1,
1111                        mmDMA_QM_3_CP_STS,
1112                        mmDMA_QM_3_GLBL_STS0);
1113
1114        if (rc) {
1115                dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1116                retval = -EIO;
1117        }
1118
1119        rc = goya_stop_queue(hdev,
1120                        mmDMA_QM_4_GLBL_CFG1,
1121                        mmDMA_QM_4_CP_STS,
1122                        mmDMA_QM_4_GLBL_STS0);
1123
1124        if (rc) {
1125                dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1126                retval = -EIO;
1127        }
1128
1129        return retval;
1130}
1131
1132/*
1133 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1134 *
1135 * @hdev: pointer to hl_device structure
1136 *
1137 * Returns 0 on success
1138 *
1139 */
1140int goya_init_cpu_queues(struct hl_device *hdev)
1141{
1142        struct goya_device *goya = hdev->asic_specific;
1143        struct hl_eq *eq;
1144        u32 status;
1145        struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1146        int err;
1147
1148        if (!hdev->cpu_queues_enable)
1149                return 0;
1150
1151        if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1152                return 0;
1153
1154        eq = &hdev->event_queue;
1155
1156        WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1157        WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1158
1159        WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1160        WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1161
1162        WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1163                        lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1164        WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1165                        upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1166
1167        WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1168        WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1169        WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1170
1171        /* Used for EQ CI */
1172        WREG32(mmCPU_EQ_CI, 0);
1173
1174        WREG32(mmCPU_IF_PF_PQ_PI, 0);
1175
1176        WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1177
1178        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1179                        GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1180
1181        err = hl_poll_timeout(
1182                hdev,
1183                mmCPU_PQ_INIT_STATUS,
1184                status,
1185                (status == PQ_INIT_STATUS_READY_FOR_HOST),
1186                1000,
1187                GOYA_CPU_TIMEOUT_USEC);
1188
1189        if (err) {
1190                dev_err(hdev->dev,
1191                        "Failed to setup communication with device CPU\n");
1192                return -EIO;
1193        }
1194
1195        goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1196        return 0;
1197}
1198
1199static void goya_set_pll_refclk(struct hl_device *hdev)
1200{
1201        WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1202        WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1203        WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1204        WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1205
1206        WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1207        WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1208        WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1209        WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1210
1211        WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1212        WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1213        WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1214        WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1215
1216        WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1217        WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1218        WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1219        WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1220
1221        WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1222        WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1223        WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1224        WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1225
1226        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1227        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1228        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1229        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1230
1231        WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1232        WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1233        WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1234        WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1235}
1236
1237static void goya_disable_clk_rlx(struct hl_device *hdev)
1238{
1239        WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1240        WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1241}
1242
1243static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1244{
1245        u64 tpc_eml_address;
1246        u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1247        int err, slm_index;
1248
1249        tpc_offset = tpc_id * 0x40000;
1250        tpc_eml_offset = tpc_id * 0x200000;
1251        tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1252        tpc_slm_offset = tpc_eml_address + 0x100000;
1253
1254        /*
1255         * Workaround for Bug H2 #2443 :
1256         * "TPC SB is not initialized on chip reset"
1257         */
1258
1259        val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1260        if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1261                dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1262                        tpc_id);
1263
1264        WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1265
1266        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1267        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1268        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1269        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1270        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1271        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1272        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1273        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1274        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1275        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1276
1277        WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1278                1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1279
1280        err = hl_poll_timeout(
1281                hdev,
1282                mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1283                val,
1284                (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1285                1000,
1286                HL_DEVICE_TIMEOUT_USEC);
1287
1288        if (err)
1289                dev_err(hdev->dev,
1290                        "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1291
1292        WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1293                1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1294
1295        msleep(GOYA_RESET_WAIT_MSEC);
1296
1297        WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1298                ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1299
1300        msleep(GOYA_RESET_WAIT_MSEC);
1301
1302        for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1303                WREG32(tpc_slm_offset + (slm_index << 2), 0);
1304
1305        val = RREG32(tpc_slm_offset);
1306}
1307
1308static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1309{
1310        struct goya_device *goya = hdev->asic_specific;
1311        int i;
1312
1313        if (hdev->pldm)
1314                return;
1315
1316        if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1317                return;
1318
1319        /* Workaround for H2 #2443 */
1320
1321        for (i = 0 ; i < TPC_MAX_NUM ; i++)
1322                _goya_tpc_mbist_workaround(hdev, i);
1323
1324        goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1325}
1326
1327/*
1328 * goya_init_golden_registers - Initialize golden registers
1329 *
1330 * @hdev: pointer to hl_device structure
1331 *
1332 * Initialize the H/W registers of the device
1333 *
1334 */
1335static void goya_init_golden_registers(struct hl_device *hdev)
1336{
1337        struct goya_device *goya = hdev->asic_specific;
1338        u32 polynom[10], tpc_intr_mask, offset;
1339        int i;
1340
1341        if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1342                return;
1343
1344        polynom[0] = 0x00020080;
1345        polynom[1] = 0x00401000;
1346        polynom[2] = 0x00200800;
1347        polynom[3] = 0x00002000;
1348        polynom[4] = 0x00080200;
1349        polynom[5] = 0x00040100;
1350        polynom[6] = 0x00100400;
1351        polynom[7] = 0x00004000;
1352        polynom[8] = 0x00010000;
1353        polynom[9] = 0x00008000;
1354
1355        /* Mask all arithmetic interrupts from TPC */
1356        tpc_intr_mask = 0x7FFF;
1357
1358        for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1359                WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1360                WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1361                WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1362                WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1363                WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1364
1365                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1366                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1367                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1368                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1369                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1370
1371
1372                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1373                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1374                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1375                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1376                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1377
1378                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1379                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1380                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1381                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1382                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1383
1384                WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1385                WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1386                WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1387                WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1388                WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1389
1390                WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1391                WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1392                WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1393                WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1394                WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1395        }
1396
1397        WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1398        WREG32(mmMME_AGU, 0x0f0f0f10);
1399        WREG32(mmMME_SEI_MASK, ~0x0);
1400
1401        WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1402        WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1403        WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1404        WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1405        WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1406        WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1407        WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1408        WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1409        WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1410        WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1411        WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1412        WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1413        WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1414        WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1415        WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1416        WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1417        WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1418        WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1419        WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1420        WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1421        WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1422        WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1423        WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1424        WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1425        WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1426        WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1427        WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1428        WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1429        WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1430        WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1431        WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1432        WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1433        WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1434        WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1435        WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1436        WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1437        WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1438        WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1439        WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1440        WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1441        WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1442        WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1443        WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1444        WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1445        WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1446        WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1447        WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1448        WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1449        WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1450        WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1451        WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1452        WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1453        WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1454        WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1455        WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1456        WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1457        WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1458        WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1459        WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1460        WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1461        WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1462        WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1463        WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1464        WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1465        WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1466        WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1467        WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1468        WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1469        WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1470        WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1471        WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1472        WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1473        WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1474        WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1475        WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1476        WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1477        WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1478        WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1479        WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1480        WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1481        WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1482        WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1483        WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1484        WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1485
1486        WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1487        WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1488        WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1489        WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1490        WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1491        WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1492        WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1493        WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1494        WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1495        WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1496        WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1497        WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1498
1499        WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1500        WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1501        WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1502        WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1503        WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1504        WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1505        WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1506        WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1507        WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1508        WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1509        WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1510        WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1511
1512        WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1513        WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1514        WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1515        WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1516        WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1517        WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1518        WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1519        WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1520        WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1521        WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1522        WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1523        WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1524
1525        WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1526        WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1527        WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1528        WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1529        WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1530        WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1531        WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1532        WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1533        WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1534        WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1535        WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1536        WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1537
1538        WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1539        WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1540        WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1541        WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1542        WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1543        WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1544        WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1545        WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1546        WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1547        WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1548        WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1549        WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1550
1551        WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1552        WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1553        WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1554        WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1555        WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1556        WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1557        WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1558        WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1559        WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1560        WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1561        WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1562        WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1563
1564        for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1565                WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1566                WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1567                WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1568                WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1569                WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1570                WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1571
1572                WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1573                WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1574                WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1575                WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1576                WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1577                WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1578                WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1579                WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1580
1581                WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1582                WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1583        }
1584
1585        for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1586                WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1587                                1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1588                WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1589                                1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1590        }
1591
1592        for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1593                /*
1594                 * Workaround for Bug H2 #2441 :
1595                 * "ST.NOP set trace event illegal opcode"
1596                 */
1597                WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1598
1599                WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1600                                1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1601                WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1602                                1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1603
1604                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1605                                ICACHE_FETCH_LINE_NUM, 2);
1606        }
1607
1608        WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1609        WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1610                        1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1611
1612        WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1613        WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1614                        1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1615
1616        /*
1617         * Workaround for H2 #HW-23 bug
1618         * Set DMA max outstanding read requests to 240 on DMA CH 1.
1619         * This limitation is still large enough to not affect Gen4 bandwidth.
1620         * We need to only limit that DMA channel because the user can only read
1621         * from Host using DMA CH 1
1622         */
1623        WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1624
1625        WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1626
1627        goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1628}
1629
1630static void goya_init_mme_qman(struct hl_device *hdev)
1631{
1632        u32 mtr_base_lo, mtr_base_hi;
1633        u32 so_base_lo, so_base_hi;
1634        u32 gic_base_lo, gic_base_hi;
1635        u64 qman_base_addr;
1636
1637        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1638        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1639        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1640        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1641
1642        gic_base_lo =
1643                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1644        gic_base_hi =
1645                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1646
1647        qman_base_addr = hdev->asic_prop.sram_base_address +
1648                                MME_QMAN_BASE_OFFSET;
1649
1650        WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1651        WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1652        WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1653        WREG32(mmMME_QM_PQ_PI, 0);
1654        WREG32(mmMME_QM_PQ_CI, 0);
1655        WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1656        WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1657        WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1658        WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1659
1660        WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1661        WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1662        WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1663        WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1664
1665        /* QMAN CQ has 8 cache lines */
1666        WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1667
1668        WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1669        WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1670
1671        WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1672
1673        WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1674
1675        WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1676
1677        WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1678}
1679
1680static void goya_init_mme_cmdq(struct hl_device *hdev)
1681{
1682        u32 mtr_base_lo, mtr_base_hi;
1683        u32 so_base_lo, so_base_hi;
1684        u32 gic_base_lo, gic_base_hi;
1685
1686        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1687        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1688        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1689        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1690
1691        gic_base_lo =
1692                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1693        gic_base_hi =
1694                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1695
1696        WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1697        WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1698        WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1699        WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1700
1701        /* CMDQ CQ has 20 cache lines */
1702        WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1703
1704        WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1705        WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1706
1707        WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1708
1709        WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1710
1711        WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1712
1713        WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1714}
1715
1716void goya_init_mme_qmans(struct hl_device *hdev)
1717{
1718        struct goya_device *goya = hdev->asic_specific;
1719        u32 so_base_lo, so_base_hi;
1720
1721        if (goya->hw_cap_initialized & HW_CAP_MME)
1722                return;
1723
1724        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1725        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1726
1727        WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1728        WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1729
1730        goya_init_mme_qman(hdev);
1731        goya_init_mme_cmdq(hdev);
1732
1733        goya->hw_cap_initialized |= HW_CAP_MME;
1734}
1735
1736static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1737{
1738        u32 mtr_base_lo, mtr_base_hi;
1739        u32 so_base_lo, so_base_hi;
1740        u32 gic_base_lo, gic_base_hi;
1741        u64 qman_base_addr;
1742        u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1743
1744        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1745        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1746        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1747        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1748
1749        gic_base_lo =
1750                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1751        gic_base_hi =
1752                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1753
1754        qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1755
1756        WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1757        WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1758        WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1759        WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1760        WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1761        WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1762        WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1763        WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1764        WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1765
1766        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1767        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1768        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1769        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1770
1771        WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1772
1773        WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1774        WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1775
1776        WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1777                        GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1778
1779        WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1780
1781        WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1782
1783        WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1784}
1785
1786static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1787{
1788        u32 mtr_base_lo, mtr_base_hi;
1789        u32 so_base_lo, so_base_hi;
1790        u32 gic_base_lo, gic_base_hi;
1791        u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1792
1793        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1794        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1795        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1796        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1797
1798        gic_base_lo =
1799                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1800        gic_base_hi =
1801                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1802
1803        WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1804        WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1805        WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1806        WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1807
1808        WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1809
1810        WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1811        WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1812
1813        WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1814                        GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1815
1816        WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1817
1818        WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1819
1820        WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1821}
1822
1823void goya_init_tpc_qmans(struct hl_device *hdev)
1824{
1825        struct goya_device *goya = hdev->asic_specific;
1826        u32 so_base_lo, so_base_hi;
1827        u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1828                        mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1829        int i;
1830
1831        if (goya->hw_cap_initialized & HW_CAP_TPC)
1832                return;
1833
1834        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1835        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1836
1837        for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1838                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1839                                so_base_lo);
1840                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1841                                so_base_hi);
1842        }
1843
1844        goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1845        goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1846        goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1847        goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1848        goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1849        goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1850        goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1851        goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1852
1853        for (i = 0 ; i < TPC_MAX_NUM ; i++)
1854                goya_init_tpc_cmdq(hdev, i);
1855
1856        goya->hw_cap_initialized |= HW_CAP_TPC;
1857}
1858
1859/*
1860 * goya_disable_internal_queues - Disable internal queues
1861 *
1862 * @hdev: pointer to hl_device structure
1863 *
1864 */
1865static void goya_disable_internal_queues(struct hl_device *hdev)
1866{
1867        struct goya_device *goya = hdev->asic_specific;
1868
1869        if (!(goya->hw_cap_initialized & HW_CAP_MME))
1870                goto disable_tpc;
1871
1872        WREG32(mmMME_QM_GLBL_CFG0, 0);
1873        WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1874
1875disable_tpc:
1876        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1877                return;
1878
1879        WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1880        WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1881
1882        WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1883        WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1884
1885        WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1886        WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1887
1888        WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1889        WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1890
1891        WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1892        WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1893
1894        WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1895        WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1896
1897        WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1898        WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1899
1900        WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1901        WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1902}
1903
1904/*
1905 * goya_stop_internal_queues - Stop internal queues
1906 *
1907 * @hdev: pointer to hl_device structure
1908 *
1909 * Returns 0 on success
1910 *
1911 */
1912static int goya_stop_internal_queues(struct hl_device *hdev)
1913{
1914        struct goya_device *goya = hdev->asic_specific;
1915        int rc, retval = 0;
1916
1917        if (!(goya->hw_cap_initialized & HW_CAP_MME))
1918                goto stop_tpc;
1919
1920        /*
1921         * Each queue (QMAN) is a separate H/W logic. That means that each
1922         * QMAN can be stopped independently and failure to stop one does NOT
1923         * mandate we should not try to stop other QMANs
1924         */
1925
1926        rc = goya_stop_queue(hdev,
1927                        mmMME_QM_GLBL_CFG1,
1928                        mmMME_QM_CP_STS,
1929                        mmMME_QM_GLBL_STS0);
1930
1931        if (rc) {
1932                dev_err(hdev->dev, "failed to stop MME QMAN\n");
1933                retval = -EIO;
1934        }
1935
1936        rc = goya_stop_queue(hdev,
1937                        mmMME_CMDQ_GLBL_CFG1,
1938                        mmMME_CMDQ_CP_STS,
1939                        mmMME_CMDQ_GLBL_STS0);
1940
1941        if (rc) {
1942                dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1943                retval = -EIO;
1944        }
1945
1946stop_tpc:
1947        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1948                return retval;
1949
1950        rc = goya_stop_queue(hdev,
1951                        mmTPC0_QM_GLBL_CFG1,
1952                        mmTPC0_QM_CP_STS,
1953                        mmTPC0_QM_GLBL_STS0);
1954
1955        if (rc) {
1956                dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1957                retval = -EIO;
1958        }
1959
1960        rc = goya_stop_queue(hdev,
1961                        mmTPC0_CMDQ_GLBL_CFG1,
1962                        mmTPC0_CMDQ_CP_STS,
1963                        mmTPC0_CMDQ_GLBL_STS0);
1964
1965        if (rc) {
1966                dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1967                retval = -EIO;
1968        }
1969
1970        rc = goya_stop_queue(hdev,
1971                        mmTPC1_QM_GLBL_CFG1,
1972                        mmTPC1_QM_CP_STS,
1973                        mmTPC1_QM_GLBL_STS0);
1974
1975        if (rc) {
1976                dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1977                retval = -EIO;
1978        }
1979
1980        rc = goya_stop_queue(hdev,
1981                        mmTPC1_CMDQ_GLBL_CFG1,
1982                        mmTPC1_CMDQ_CP_STS,
1983                        mmTPC1_CMDQ_GLBL_STS0);
1984
1985        if (rc) {
1986                dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1987                retval = -EIO;
1988        }
1989
1990        rc = goya_stop_queue(hdev,
1991                        mmTPC2_QM_GLBL_CFG1,
1992                        mmTPC2_QM_CP_STS,
1993                        mmTPC2_QM_GLBL_STS0);
1994
1995        if (rc) {
1996                dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
1997                retval = -EIO;
1998        }
1999
2000        rc = goya_stop_queue(hdev,
2001                        mmTPC2_CMDQ_GLBL_CFG1,
2002                        mmTPC2_CMDQ_CP_STS,
2003                        mmTPC2_CMDQ_GLBL_STS0);
2004
2005        if (rc) {
2006                dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2007                retval = -EIO;
2008        }
2009
2010        rc = goya_stop_queue(hdev,
2011                        mmTPC3_QM_GLBL_CFG1,
2012                        mmTPC3_QM_CP_STS,
2013                        mmTPC3_QM_GLBL_STS0);
2014
2015        if (rc) {
2016                dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2017                retval = -EIO;
2018        }
2019
2020        rc = goya_stop_queue(hdev,
2021                        mmTPC3_CMDQ_GLBL_CFG1,
2022                        mmTPC3_CMDQ_CP_STS,
2023                        mmTPC3_CMDQ_GLBL_STS0);
2024
2025        if (rc) {
2026                dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2027                retval = -EIO;
2028        }
2029
2030        rc = goya_stop_queue(hdev,
2031                        mmTPC4_QM_GLBL_CFG1,
2032                        mmTPC4_QM_CP_STS,
2033                        mmTPC4_QM_GLBL_STS0);
2034
2035        if (rc) {
2036                dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2037                retval = -EIO;
2038        }
2039
2040        rc = goya_stop_queue(hdev,
2041                        mmTPC4_CMDQ_GLBL_CFG1,
2042                        mmTPC4_CMDQ_CP_STS,
2043                        mmTPC4_CMDQ_GLBL_STS0);
2044
2045        if (rc) {
2046                dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2047                retval = -EIO;
2048        }
2049
2050        rc = goya_stop_queue(hdev,
2051                        mmTPC5_QM_GLBL_CFG1,
2052                        mmTPC5_QM_CP_STS,
2053                        mmTPC5_QM_GLBL_STS0);
2054
2055        if (rc) {
2056                dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2057                retval = -EIO;
2058        }
2059
2060        rc = goya_stop_queue(hdev,
2061                        mmTPC5_CMDQ_GLBL_CFG1,
2062                        mmTPC5_CMDQ_CP_STS,
2063                        mmTPC5_CMDQ_GLBL_STS0);
2064
2065        if (rc) {
2066                dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2067                retval = -EIO;
2068        }
2069
2070        rc = goya_stop_queue(hdev,
2071                        mmTPC6_QM_GLBL_CFG1,
2072                        mmTPC6_QM_CP_STS,
2073                        mmTPC6_QM_GLBL_STS0);
2074
2075        if (rc) {
2076                dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2077                retval = -EIO;
2078        }
2079
2080        rc = goya_stop_queue(hdev,
2081                        mmTPC6_CMDQ_GLBL_CFG1,
2082                        mmTPC6_CMDQ_CP_STS,
2083                        mmTPC6_CMDQ_GLBL_STS0);
2084
2085        if (rc) {
2086                dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2087                retval = -EIO;
2088        }
2089
2090        rc = goya_stop_queue(hdev,
2091                        mmTPC7_QM_GLBL_CFG1,
2092                        mmTPC7_QM_CP_STS,
2093                        mmTPC7_QM_GLBL_STS0);
2094
2095        if (rc) {
2096                dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2097                retval = -EIO;
2098        }
2099
2100        rc = goya_stop_queue(hdev,
2101                        mmTPC7_CMDQ_GLBL_CFG1,
2102                        mmTPC7_CMDQ_CP_STS,
2103                        mmTPC7_CMDQ_GLBL_STS0);
2104
2105        if (rc) {
2106                dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2107                retval = -EIO;
2108        }
2109
2110        return retval;
2111}
2112
2113static void goya_dma_stall(struct hl_device *hdev)
2114{
2115        struct goya_device *goya = hdev->asic_specific;
2116
2117        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2118                return;
2119
2120        WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2121        WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2122        WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2123        WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2124        WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2125}
2126
2127static void goya_tpc_stall(struct hl_device *hdev)
2128{
2129        struct goya_device *goya = hdev->asic_specific;
2130
2131        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2132                return;
2133
2134        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2135        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2136        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2137        WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2138        WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2139        WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2140        WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2141        WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2142}
2143
2144static void goya_mme_stall(struct hl_device *hdev)
2145{
2146        struct goya_device *goya = hdev->asic_specific;
2147
2148        if (!(goya->hw_cap_initialized & HW_CAP_MME))
2149                return;
2150
2151        WREG32(mmMME_STALL, 0xFFFFFFFF);
2152}
2153
2154static int goya_enable_msix(struct hl_device *hdev)
2155{
2156        struct goya_device *goya = hdev->asic_specific;
2157        int cq_cnt = hdev->asic_prop.completion_queues_count;
2158        int rc, i, irq_cnt_init, irq;
2159
2160        if (goya->hw_cap_initialized & HW_CAP_MSIX)
2161                return 0;
2162
2163        rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2164                                GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2165        if (rc < 0) {
2166                dev_err(hdev->dev,
2167                        "MSI-X: Failed to enable support -- %d/%d\n",
2168                        GOYA_MSIX_ENTRIES, rc);
2169                return rc;
2170        }
2171
2172        for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2173                irq = pci_irq_vector(hdev->pdev, i);
2174                rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2175                                &hdev->completion_queue[i]);
2176                if (rc) {
2177                        dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2178                        goto free_irqs;
2179                }
2180        }
2181
2182        irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2183
2184        rc = request_irq(irq, hl_irq_handler_eq, 0,
2185                        goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2186                        &hdev->event_queue);
2187        if (rc) {
2188                dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2189                goto free_irqs;
2190        }
2191
2192        goya->hw_cap_initialized |= HW_CAP_MSIX;
2193        return 0;
2194
2195free_irqs:
2196        for (i = 0 ; i < irq_cnt_init ; i++)
2197                free_irq(pci_irq_vector(hdev->pdev, i),
2198                        &hdev->completion_queue[i]);
2199
2200        pci_free_irq_vectors(hdev->pdev);
2201        return rc;
2202}
2203
2204static void goya_sync_irqs(struct hl_device *hdev)
2205{
2206        struct goya_device *goya = hdev->asic_specific;
2207        int i;
2208
2209        if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2210                return;
2211
2212        /* Wait for all pending IRQs to be finished */
2213        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2214                synchronize_irq(pci_irq_vector(hdev->pdev, i));
2215
2216        synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2217}
2218
2219static void goya_disable_msix(struct hl_device *hdev)
2220{
2221        struct goya_device *goya = hdev->asic_specific;
2222        int i, irq;
2223
2224        if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2225                return;
2226
2227        goya_sync_irqs(hdev);
2228
2229        irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2230        free_irq(irq, &hdev->event_queue);
2231
2232        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2233                irq = pci_irq_vector(hdev->pdev, i);
2234                free_irq(irq, &hdev->completion_queue[i]);
2235        }
2236
2237        pci_free_irq_vectors(hdev->pdev);
2238
2239        goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2240}
2241
2242static void goya_enable_timestamp(struct hl_device *hdev)
2243{
2244        /* Disable the timestamp counter */
2245        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2246
2247        /* Zero the lower/upper parts of the 64-bit counter */
2248        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2249        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2250
2251        /* Enable the counter */
2252        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2253}
2254
2255static void goya_disable_timestamp(struct hl_device *hdev)
2256{
2257        /* Disable the timestamp counter */
2258        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2259}
2260
2261static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2262{
2263        u32 wait_timeout_ms;
2264
2265        dev_info(hdev->dev,
2266                "Halting compute engines and disabling interrupts\n");
2267
2268        if (hdev->pldm)
2269                wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2270        else
2271                wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2272
2273        goya_stop_external_queues(hdev);
2274        goya_stop_internal_queues(hdev);
2275
2276        msleep(wait_timeout_ms);
2277
2278        goya_dma_stall(hdev);
2279        goya_tpc_stall(hdev);
2280        goya_mme_stall(hdev);
2281
2282        msleep(wait_timeout_ms);
2283
2284        goya_disable_external_queues(hdev);
2285        goya_disable_internal_queues(hdev);
2286
2287        goya_disable_timestamp(hdev);
2288
2289        if (hard_reset) {
2290                goya_disable_msix(hdev);
2291                goya_mmu_remove_device_cpu_mappings(hdev);
2292        } else {
2293                goya_sync_irqs(hdev);
2294        }
2295}
2296
2297/*
2298 * goya_load_firmware_to_device() - Load LINUX FW code to device.
2299 * @hdev: Pointer to hl_device structure.
2300 *
2301 * Copy LINUX fw code from firmware file to HBM BAR.
2302 *
2303 * Return: 0 on success, non-zero for failure.
2304 */
2305static int goya_load_firmware_to_device(struct hl_device *hdev)
2306{
2307        void __iomem *dst;
2308
2309        dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2310
2311        return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
2312}
2313
2314/*
2315 * goya_load_boot_fit_to_device() - Load boot fit to device.
2316 * @hdev: Pointer to hl_device structure.
2317 *
2318 * Copy boot fit file to SRAM BAR.
2319 *
2320 * Return: 0 on success, non-zero for failure.
2321 */
2322static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2323{
2324        void __iomem *dst;
2325
2326        dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2327
2328        return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst);
2329}
2330
2331/*
2332 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2333 * The version string should be located by that offset.
2334 */
2335static void goya_read_device_fw_version(struct hl_device *hdev,
2336                                        enum hl_fw_component fwc)
2337{
2338        const char *name;
2339        u32 ver_off;
2340        char *dest;
2341
2342        switch (fwc) {
2343        case FW_COMP_UBOOT:
2344                ver_off = RREG32(mmUBOOT_VER_OFFSET);
2345                dest = hdev->asic_prop.uboot_ver;
2346                name = "U-Boot";
2347                break;
2348        case FW_COMP_PREBOOT:
2349                ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2350                dest = hdev->asic_prop.preboot_ver;
2351                name = "Preboot";
2352                break;
2353        default:
2354                dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2355                return;
2356        }
2357
2358        ver_off &= ~((u32)SRAM_BASE_ADDR);
2359
2360        if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2361                memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2362                                                        VERSION_MAX_LEN);
2363        } else {
2364                dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2365                                                                name, ver_off);
2366                strcpy(dest, "unavailable");
2367        }
2368}
2369
2370static int goya_init_cpu(struct hl_device *hdev)
2371{
2372        struct goya_device *goya = hdev->asic_specific;
2373        int rc;
2374
2375        if (!hdev->cpu_enable)
2376                return 0;
2377
2378        if (goya->hw_cap_initialized & HW_CAP_CPU)
2379                return 0;
2380
2381        /*
2382         * Before pushing u-boot/linux to device, need to set the ddr bar to
2383         * base address of dram
2384         */
2385        if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2386                dev_err(hdev->dev,
2387                        "failed to map DDR bar to DRAM base address\n");
2388                return -EIO;
2389        }
2390
2391        rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2392                        mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
2393                        mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0,
2394                        false, GOYA_CPU_TIMEOUT_USEC,
2395                        GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
2396
2397        if (rc)
2398                return rc;
2399
2400        goya->hw_cap_initialized |= HW_CAP_CPU;
2401
2402        return 0;
2403}
2404
2405static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2406                                                u64 phys_addr)
2407{
2408        u32 status, timeout_usec;
2409        int rc;
2410
2411        if (hdev->pldm)
2412                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2413        else
2414                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2415
2416        WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2417        WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2418        WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2419
2420        rc = hl_poll_timeout(
2421                hdev,
2422                MMU_ASID_BUSY,
2423                status,
2424                !(status & 0x80000000),
2425                1000,
2426                timeout_usec);
2427
2428        if (rc) {
2429                dev_err(hdev->dev,
2430                        "Timeout during MMU hop0 config of asid %d\n", asid);
2431                return rc;
2432        }
2433
2434        return 0;
2435}
2436
2437int goya_mmu_init(struct hl_device *hdev)
2438{
2439        struct asic_fixed_properties *prop = &hdev->asic_prop;
2440        struct goya_device *goya = hdev->asic_specific;
2441        u64 hop0_addr;
2442        int rc, i;
2443
2444        if (!hdev->mmu_enable)
2445                return 0;
2446
2447        if (goya->hw_cap_initialized & HW_CAP_MMU)
2448                return 0;
2449
2450        hdev->dram_supports_virtual_memory = true;
2451        hdev->dram_default_page_mapping = true;
2452
2453        for (i = 0 ; i < prop->max_asid ; i++) {
2454                hop0_addr = prop->mmu_pgt_addr +
2455                                (i * prop->mmu_hop_table_size);
2456
2457                rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2458                if (rc) {
2459                        dev_err(hdev->dev,
2460                                "failed to set hop0 addr for asid %d\n", i);
2461                        goto err;
2462                }
2463        }
2464
2465        goya->hw_cap_initialized |= HW_CAP_MMU;
2466
2467        /* init MMU cache manage page */
2468        WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2469                                lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2470        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2471
2472        /* Remove follower feature due to performance bug */
2473        WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2474                        (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2475
2476        hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2477                                        VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2478
2479        WREG32(mmMMU_MMU_ENABLE, 1);
2480        WREG32(mmMMU_SPI_MASK, 0xF);
2481
2482        return 0;
2483
2484err:
2485        return rc;
2486}
2487
2488/*
2489 * goya_hw_init - Goya hardware initialization code
2490 *
2491 * @hdev: pointer to hl_device structure
2492 *
2493 * Returns 0 on success
2494 *
2495 */
2496static int goya_hw_init(struct hl_device *hdev)
2497{
2498        struct asic_fixed_properties *prop = &hdev->asic_prop;
2499        int rc;
2500
2501        dev_info(hdev->dev, "Starting initialization of H/W\n");
2502
2503        /* Perform read from the device to make sure device is up */
2504        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2505
2506        /*
2507         * Let's mark in the H/W that we have reached this point. We check
2508         * this value in the reset_before_init function to understand whether
2509         * we need to reset the chip before doing H/W init. This register is
2510         * cleared by the H/W upon H/W reset
2511         */
2512        WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2513
2514        rc = goya_init_cpu(hdev);
2515        if (rc) {
2516                dev_err(hdev->dev, "failed to initialize CPU\n");
2517                return rc;
2518        }
2519
2520        goya_tpc_mbist_workaround(hdev);
2521
2522        goya_init_golden_registers(hdev);
2523
2524        /*
2525         * After CPU initialization is finished, change DDR bar mapping inside
2526         * iATU to point to the start address of the MMU page tables
2527         */
2528        if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2529                        ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2530                dev_err(hdev->dev,
2531                        "failed to map DDR bar to MMU page tables\n");
2532                return -EIO;
2533        }
2534
2535        rc = goya_mmu_init(hdev);
2536        if (rc)
2537                return rc;
2538
2539        goya_init_security(hdev);
2540
2541        goya_init_dma_qmans(hdev);
2542
2543        goya_init_mme_qmans(hdev);
2544
2545        goya_init_tpc_qmans(hdev);
2546
2547        goya_enable_timestamp(hdev);
2548
2549        /* MSI-X must be enabled before CPU queues are initialized */
2550        rc = goya_enable_msix(hdev);
2551        if (rc)
2552                goto disable_queues;
2553
2554        /* Perform read from the device to flush all MSI-X configuration */
2555        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2556
2557        return 0;
2558
2559disable_queues:
2560        goya_disable_internal_queues(hdev);
2561        goya_disable_external_queues(hdev);
2562
2563        return rc;
2564}
2565
2566/*
2567 * goya_hw_fini - Goya hardware tear-down code
2568 *
2569 * @hdev: pointer to hl_device structure
2570 * @hard_reset: should we do hard reset to all engines or just reset the
2571 *              compute/dma engines
2572 */
2573static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2574{
2575        struct goya_device *goya = hdev->asic_specific;
2576        u32 reset_timeout_ms, cpu_timeout_ms, status;
2577
2578        if (hdev->pldm) {
2579                reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2580                cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2581        } else {
2582                reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2583                cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2584        }
2585
2586        if (hard_reset) {
2587                /* I don't know what is the state of the CPU so make sure it is
2588                 * stopped in any means necessary
2589                 */
2590                WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2591                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2592                        GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2593
2594                msleep(cpu_timeout_ms);
2595
2596                goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2597                goya_disable_clk_rlx(hdev);
2598                goya_set_pll_refclk(hdev);
2599
2600                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2601                dev_info(hdev->dev,
2602                        "Issued HARD reset command, going to wait %dms\n",
2603                        reset_timeout_ms);
2604        } else {
2605                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2606                dev_info(hdev->dev,
2607                        "Issued SOFT reset command, going to wait %dms\n",
2608                        reset_timeout_ms);
2609        }
2610
2611        /*
2612         * After hard reset, we can't poll the BTM_FSM register because the PSOC
2613         * itself is in reset. In either reset we need to wait until the reset
2614         * is deasserted
2615         */
2616        msleep(reset_timeout_ms);
2617
2618        status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2619        if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2620                dev_err(hdev->dev,
2621                        "Timeout while waiting for device to reset 0x%x\n",
2622                        status);
2623
2624        if (!hard_reset) {
2625                goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2626                                                HW_CAP_GOLDEN | HW_CAP_TPC);
2627                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2628                                GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2629                return;
2630        }
2631
2632        /* Chicken bit to re-initiate boot sequencer flow */
2633        WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2634                1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2635        /* Move boot manager FSM to pre boot sequencer init state */
2636        WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2637                        0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2638
2639        goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2640                                        HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2641                                        HW_CAP_DMA | HW_CAP_MME |
2642                                        HW_CAP_MMU | HW_CAP_TPC_MBIST |
2643                                        HW_CAP_GOLDEN | HW_CAP_TPC);
2644        memset(goya->events_stat, 0, sizeof(goya->events_stat));
2645}
2646
2647int goya_suspend(struct hl_device *hdev)
2648{
2649        int rc;
2650
2651        rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2652        if (rc)
2653                dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2654
2655        return rc;
2656}
2657
2658int goya_resume(struct hl_device *hdev)
2659{
2660        return goya_init_iatu(hdev);
2661}
2662
2663static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2664                u64 kaddress, phys_addr_t paddress, u32 size)
2665{
2666        int rc;
2667
2668        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2669                        VM_DONTCOPY | VM_NORESERVE;
2670
2671        rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2672                                size, vma->vm_page_prot);
2673        if (rc)
2674                dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2675
2676        return rc;
2677}
2678
2679void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2680{
2681        u32 db_reg_offset, db_value;
2682
2683        switch (hw_queue_id) {
2684        case GOYA_QUEUE_ID_DMA_0:
2685                db_reg_offset = mmDMA_QM_0_PQ_PI;
2686                break;
2687
2688        case GOYA_QUEUE_ID_DMA_1:
2689                db_reg_offset = mmDMA_QM_1_PQ_PI;
2690                break;
2691
2692        case GOYA_QUEUE_ID_DMA_2:
2693                db_reg_offset = mmDMA_QM_2_PQ_PI;
2694                break;
2695
2696        case GOYA_QUEUE_ID_DMA_3:
2697                db_reg_offset = mmDMA_QM_3_PQ_PI;
2698                break;
2699
2700        case GOYA_QUEUE_ID_DMA_4:
2701                db_reg_offset = mmDMA_QM_4_PQ_PI;
2702                break;
2703
2704        case GOYA_QUEUE_ID_CPU_PQ:
2705                db_reg_offset = mmCPU_IF_PF_PQ_PI;
2706                break;
2707
2708        case GOYA_QUEUE_ID_MME:
2709                db_reg_offset = mmMME_QM_PQ_PI;
2710                break;
2711
2712        case GOYA_QUEUE_ID_TPC0:
2713                db_reg_offset = mmTPC0_QM_PQ_PI;
2714                break;
2715
2716        case GOYA_QUEUE_ID_TPC1:
2717                db_reg_offset = mmTPC1_QM_PQ_PI;
2718                break;
2719
2720        case GOYA_QUEUE_ID_TPC2:
2721                db_reg_offset = mmTPC2_QM_PQ_PI;
2722                break;
2723
2724        case GOYA_QUEUE_ID_TPC3:
2725                db_reg_offset = mmTPC3_QM_PQ_PI;
2726                break;
2727
2728        case GOYA_QUEUE_ID_TPC4:
2729                db_reg_offset = mmTPC4_QM_PQ_PI;
2730                break;
2731
2732        case GOYA_QUEUE_ID_TPC5:
2733                db_reg_offset = mmTPC5_QM_PQ_PI;
2734                break;
2735
2736        case GOYA_QUEUE_ID_TPC6:
2737                db_reg_offset = mmTPC6_QM_PQ_PI;
2738                break;
2739
2740        case GOYA_QUEUE_ID_TPC7:
2741                db_reg_offset = mmTPC7_QM_PQ_PI;
2742                break;
2743
2744        default:
2745                /* Should never get here */
2746                dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2747                        hw_queue_id);
2748                return;
2749        }
2750
2751        db_value = pi;
2752
2753        /* ring the doorbell */
2754        WREG32(db_reg_offset, db_value);
2755
2756        if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2757                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2758                                GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2759}
2760
2761void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2762{
2763        /* The QMANs are on the SRAM so need to copy to IO space */
2764        memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2765}
2766
2767static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2768                                        dma_addr_t *dma_handle, gfp_t flags)
2769{
2770        void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2771                                                dma_handle, flags);
2772
2773        /* Shift to the device's base physical address of host memory */
2774        if (kernel_addr)
2775                *dma_handle += HOST_PHYS_BASE;
2776
2777        return kernel_addr;
2778}
2779
2780static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2781                                        void *cpu_addr, dma_addr_t dma_handle)
2782{
2783        /* Cancel the device's base physical address of host memory */
2784        dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2785
2786        dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2787}
2788
2789void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2790                                dma_addr_t *dma_handle, u16 *queue_len)
2791{
2792        void *base;
2793        u32 offset;
2794
2795        *dma_handle = hdev->asic_prop.sram_base_address;
2796
2797        base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2798
2799        switch (queue_id) {
2800        case GOYA_QUEUE_ID_MME:
2801                offset = MME_QMAN_BASE_OFFSET;
2802                *queue_len = MME_QMAN_LENGTH;
2803                break;
2804        case GOYA_QUEUE_ID_TPC0:
2805                offset = TPC0_QMAN_BASE_OFFSET;
2806                *queue_len = TPC_QMAN_LENGTH;
2807                break;
2808        case GOYA_QUEUE_ID_TPC1:
2809                offset = TPC1_QMAN_BASE_OFFSET;
2810                *queue_len = TPC_QMAN_LENGTH;
2811                break;
2812        case GOYA_QUEUE_ID_TPC2:
2813                offset = TPC2_QMAN_BASE_OFFSET;
2814                *queue_len = TPC_QMAN_LENGTH;
2815                break;
2816        case GOYA_QUEUE_ID_TPC3:
2817                offset = TPC3_QMAN_BASE_OFFSET;
2818                *queue_len = TPC_QMAN_LENGTH;
2819                break;
2820        case GOYA_QUEUE_ID_TPC4:
2821                offset = TPC4_QMAN_BASE_OFFSET;
2822                *queue_len = TPC_QMAN_LENGTH;
2823                break;
2824        case GOYA_QUEUE_ID_TPC5:
2825                offset = TPC5_QMAN_BASE_OFFSET;
2826                *queue_len = TPC_QMAN_LENGTH;
2827                break;
2828        case GOYA_QUEUE_ID_TPC6:
2829                offset = TPC6_QMAN_BASE_OFFSET;
2830                *queue_len = TPC_QMAN_LENGTH;
2831                break;
2832        case GOYA_QUEUE_ID_TPC7:
2833                offset = TPC7_QMAN_BASE_OFFSET;
2834                *queue_len = TPC_QMAN_LENGTH;
2835                break;
2836        default:
2837                dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2838                return NULL;
2839        }
2840
2841        base += offset;
2842        *dma_handle += offset;
2843
2844        return base;
2845}
2846
2847static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2848{
2849        struct packet_msg_prot *fence_pkt;
2850        u32 *fence_ptr;
2851        dma_addr_t fence_dma_addr;
2852        struct hl_cb *cb;
2853        u32 tmp, timeout;
2854        int rc;
2855
2856        if (hdev->pldm)
2857                timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2858        else
2859                timeout = HL_DEVICE_TIMEOUT_USEC;
2860
2861        if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2862                dev_err_ratelimited(hdev->dev,
2863                        "Can't send driver job on QMAN0 because the device is not idle\n");
2864                return -EBUSY;
2865        }
2866
2867        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2868                                                        &fence_dma_addr);
2869        if (!fence_ptr) {
2870                dev_err(hdev->dev,
2871                        "Failed to allocate fence memory for QMAN0\n");
2872                return -ENOMEM;
2873        }
2874
2875        goya_qman0_set_security(hdev, true);
2876
2877        cb = job->patched_cb;
2878
2879        fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
2880                        job->job_cb_size - sizeof(struct packet_msg_prot));
2881
2882        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2883                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
2884                        (1 << GOYA_PKT_CTL_MB_SHIFT);
2885        fence_pkt->ctl = cpu_to_le32(tmp);
2886        fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2887        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2888
2889        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2890                                        job->job_cb_size, cb->bus_address);
2891        if (rc) {
2892                dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2893                goto free_fence_ptr;
2894        }
2895
2896        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2897                                (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2898                                timeout, true);
2899
2900        hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2901
2902        if (rc == -ETIMEDOUT) {
2903                dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2904                goto free_fence_ptr;
2905        }
2906
2907free_fence_ptr:
2908        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2909                                        fence_dma_addr);
2910
2911        goya_qman0_set_security(hdev, false);
2912
2913        return rc;
2914}
2915
2916int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2917                                u32 timeout, long *result)
2918{
2919        struct goya_device *goya = hdev->asic_specific;
2920
2921        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2922                if (result)
2923                        *result = 0;
2924                return 0;
2925        }
2926
2927        if (!timeout)
2928                timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
2929
2930        return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2931                                        timeout, result);
2932}
2933
2934int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2935{
2936        struct packet_msg_prot *fence_pkt;
2937        dma_addr_t pkt_dma_addr;
2938        u32 fence_val, tmp;
2939        dma_addr_t fence_dma_addr;
2940        u32 *fence_ptr;
2941        int rc;
2942
2943        fence_val = GOYA_QMAN0_FENCE_VAL;
2944
2945        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2946                                                        &fence_dma_addr);
2947        if (!fence_ptr) {
2948                dev_err(hdev->dev,
2949                        "Failed to allocate memory for queue testing\n");
2950                return -ENOMEM;
2951        }
2952
2953        *fence_ptr = 0;
2954
2955        fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2956                                        sizeof(struct packet_msg_prot),
2957                                        GFP_KERNEL, &pkt_dma_addr);
2958        if (!fence_pkt) {
2959                dev_err(hdev->dev,
2960                        "Failed to allocate packet for queue testing\n");
2961                rc = -ENOMEM;
2962                goto free_fence_ptr;
2963        }
2964
2965        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2966                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
2967                        (1 << GOYA_PKT_CTL_MB_SHIFT);
2968        fence_pkt->ctl = cpu_to_le32(tmp);
2969        fence_pkt->value = cpu_to_le32(fence_val);
2970        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2971
2972        rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2973                                        sizeof(struct packet_msg_prot),
2974                                        pkt_dma_addr);
2975        if (rc) {
2976                dev_err(hdev->dev,
2977                        "Failed to send fence packet\n");
2978                goto free_pkt;
2979        }
2980
2981        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2982                                        1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2983
2984        hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
2985
2986        if (rc == -ETIMEDOUT) {
2987                dev_err(hdev->dev,
2988                        "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2989                        hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
2990                rc = -EIO;
2991        }
2992
2993free_pkt:
2994        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
2995                                        pkt_dma_addr);
2996free_fence_ptr:
2997        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2998                                        fence_dma_addr);
2999        return rc;
3000}
3001
3002int goya_test_cpu_queue(struct hl_device *hdev)
3003{
3004        struct goya_device *goya = hdev->asic_specific;
3005
3006        /*
3007         * check capability here as send_cpu_message() won't update the result
3008         * value if no capability
3009         */
3010        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3011                return 0;
3012
3013        return hl_fw_test_cpu_queue(hdev);
3014}
3015
3016int goya_test_queues(struct hl_device *hdev)
3017{
3018        int i, rc, ret_val = 0;
3019
3020        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3021                rc = goya_test_queue(hdev, i);
3022                if (rc)
3023                        ret_val = -EINVAL;
3024        }
3025
3026        return ret_val;
3027}
3028
3029static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3030                                        gfp_t mem_flags, dma_addr_t *dma_handle)
3031{
3032        void *kernel_addr;
3033
3034        if (size > GOYA_DMA_POOL_BLK_SIZE)
3035                return NULL;
3036
3037        kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3038
3039        /* Shift to the device's base physical address of host memory */
3040        if (kernel_addr)
3041                *dma_handle += HOST_PHYS_BASE;
3042
3043        return kernel_addr;
3044}
3045
3046static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3047                                dma_addr_t dma_addr)
3048{
3049        /* Cancel the device's base physical address of host memory */
3050        dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3051
3052        dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3053}
3054
3055void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3056                                        dma_addr_t *dma_handle)
3057{
3058        void *vaddr;
3059
3060        vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3061        *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3062                        VA_CPU_ACCESSIBLE_MEM_ADDR;
3063
3064        return vaddr;
3065}
3066
3067void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3068                                        void *vaddr)
3069{
3070        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3071}
3072
3073static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3074                                int nents, enum dma_data_direction dir)
3075{
3076        struct scatterlist *sg;
3077        int i;
3078
3079        if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3080                return -ENOMEM;
3081
3082        /* Shift to the device's base physical address of host memory */
3083        for_each_sg(sgl, sg, nents, i)
3084                sg->dma_address += HOST_PHYS_BASE;
3085
3086        return 0;
3087}
3088
3089static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3090                                int nents, enum dma_data_direction dir)
3091{
3092        struct scatterlist *sg;
3093        int i;
3094
3095        /* Cancel the device's base physical address of host memory */
3096        for_each_sg(sgl, sg, nents, i)
3097                sg->dma_address -= HOST_PHYS_BASE;
3098
3099        dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3100}
3101
3102u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3103{
3104        struct scatterlist *sg, *sg_next_iter;
3105        u32 count, dma_desc_cnt;
3106        u64 len, len_next;
3107        dma_addr_t addr, addr_next;
3108
3109        dma_desc_cnt = 0;
3110
3111        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3112
3113                len = sg_dma_len(sg);
3114                addr = sg_dma_address(sg);
3115
3116                if (len == 0)
3117                        break;
3118
3119                while ((count + 1) < sgt->nents) {
3120                        sg_next_iter = sg_next(sg);
3121                        len_next = sg_dma_len(sg_next_iter);
3122                        addr_next = sg_dma_address(sg_next_iter);
3123
3124                        if (len_next == 0)
3125                                break;
3126
3127                        if ((addr + len == addr_next) &&
3128                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3129                                len += len_next;
3130                                count++;
3131                                sg = sg_next_iter;
3132                        } else {
3133                                break;
3134                        }
3135                }
3136
3137                dma_desc_cnt++;
3138        }
3139
3140        return dma_desc_cnt * sizeof(struct packet_lin_dma);
3141}
3142
3143static int goya_pin_memory_before_cs(struct hl_device *hdev,
3144                                struct hl_cs_parser *parser,
3145                                struct packet_lin_dma *user_dma_pkt,
3146                                u64 addr, enum dma_data_direction dir)
3147{
3148        struct hl_userptr *userptr;
3149        int rc;
3150
3151        if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3152                        parser->job_userptr_list, &userptr))
3153                goto already_pinned;
3154
3155        userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3156        if (!userptr)
3157                return -ENOMEM;
3158
3159        rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3160                                userptr);
3161        if (rc)
3162                goto free_userptr;
3163
3164        list_add_tail(&userptr->job_node, parser->job_userptr_list);
3165
3166        rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3167                                        userptr->sgt->nents, dir);
3168        if (rc) {
3169                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3170                goto unpin_memory;
3171        }
3172
3173        userptr->dma_mapped = true;
3174        userptr->dir = dir;
3175
3176already_pinned:
3177        parser->patched_cb_size +=
3178                        goya_get_dma_desc_list_size(hdev, userptr->sgt);
3179
3180        return 0;
3181
3182unpin_memory:
3183        hl_unpin_host_memory(hdev, userptr);
3184free_userptr:
3185        kfree(userptr);
3186        return rc;
3187}
3188
3189static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3190                                struct hl_cs_parser *parser,
3191                                struct packet_lin_dma *user_dma_pkt)
3192{
3193        u64 device_memory_addr, addr;
3194        enum dma_data_direction dir;
3195        enum goya_dma_direction user_dir;
3196        bool sram_addr = true;
3197        bool skip_host_mem_pin = false;
3198        bool user_memset;
3199        u32 ctl;
3200        int rc = 0;
3201
3202        ctl = le32_to_cpu(user_dma_pkt->ctl);
3203
3204        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3205                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3206
3207        user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3208                        GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3209
3210        switch (user_dir) {
3211        case DMA_HOST_TO_DRAM:
3212                dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3213                dir = DMA_TO_DEVICE;
3214                sram_addr = false;
3215                addr = le64_to_cpu(user_dma_pkt->src_addr);
3216                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3217                if (user_memset)
3218                        skip_host_mem_pin = true;
3219                break;
3220
3221        case DMA_DRAM_TO_HOST:
3222                dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3223                dir = DMA_FROM_DEVICE;
3224                sram_addr = false;
3225                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3226                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3227                break;
3228
3229        case DMA_HOST_TO_SRAM:
3230                dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3231                dir = DMA_TO_DEVICE;
3232                addr = le64_to_cpu(user_dma_pkt->src_addr);
3233                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3234                if (user_memset)
3235                        skip_host_mem_pin = true;
3236                break;
3237
3238        case DMA_SRAM_TO_HOST:
3239                dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3240                dir = DMA_FROM_DEVICE;
3241                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3242                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3243                break;
3244        default:
3245                dev_err(hdev->dev, "DMA direction is undefined\n");
3246                return -EFAULT;
3247        }
3248
3249        if (sram_addr) {
3250                if (!hl_mem_area_inside_range(device_memory_addr,
3251                                le32_to_cpu(user_dma_pkt->tsize),
3252                                hdev->asic_prop.sram_user_base_address,
3253                                hdev->asic_prop.sram_end_address)) {
3254
3255                        dev_err(hdev->dev,
3256                                "SRAM address 0x%llx + 0x%x is invalid\n",
3257                                device_memory_addr,
3258                                user_dma_pkt->tsize);
3259                        return -EFAULT;
3260                }
3261        } else {
3262                if (!hl_mem_area_inside_range(device_memory_addr,
3263                                le32_to_cpu(user_dma_pkt->tsize),
3264                                hdev->asic_prop.dram_user_base_address,
3265                                hdev->asic_prop.dram_end_address)) {
3266
3267                        dev_err(hdev->dev,
3268                                "DRAM address 0x%llx + 0x%x is invalid\n",
3269                                device_memory_addr,
3270                                user_dma_pkt->tsize);
3271                        return -EFAULT;
3272                }
3273        }
3274
3275        if (skip_host_mem_pin)
3276                parser->patched_cb_size += sizeof(*user_dma_pkt);
3277        else {
3278                if ((dir == DMA_TO_DEVICE) &&
3279                                (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3280                        dev_err(hdev->dev,
3281                                "Can't DMA from host on queue other then 1\n");
3282                        return -EFAULT;
3283                }
3284
3285                rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3286                                                addr, dir);
3287        }
3288
3289        return rc;
3290}
3291
3292static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3293                                struct hl_cs_parser *parser,
3294                                struct packet_lin_dma *user_dma_pkt)
3295{
3296        u64 sram_memory_addr, dram_memory_addr;
3297        enum goya_dma_direction user_dir;
3298        u32 ctl;
3299
3300        ctl = le32_to_cpu(user_dma_pkt->ctl);
3301        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3302                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3303
3304        if (user_dir == DMA_DRAM_TO_SRAM) {
3305                dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3306                dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3307                sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3308        } else {
3309                dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3310                sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3311                dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3312        }
3313
3314        if (!hl_mem_area_inside_range(sram_memory_addr,
3315                                le32_to_cpu(user_dma_pkt->tsize),
3316                                hdev->asic_prop.sram_user_base_address,
3317                                hdev->asic_prop.sram_end_address)) {
3318                dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3319                        sram_memory_addr, user_dma_pkt->tsize);
3320                return -EFAULT;
3321        }
3322
3323        if (!hl_mem_area_inside_range(dram_memory_addr,
3324                                le32_to_cpu(user_dma_pkt->tsize),
3325                                hdev->asic_prop.dram_user_base_address,
3326                                hdev->asic_prop.dram_end_address)) {
3327                dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3328                        dram_memory_addr, user_dma_pkt->tsize);
3329                return -EFAULT;
3330        }
3331
3332        parser->patched_cb_size += sizeof(*user_dma_pkt);
3333
3334        return 0;
3335}
3336
3337static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3338                                struct hl_cs_parser *parser,
3339                                struct packet_lin_dma *user_dma_pkt)
3340{
3341        enum goya_dma_direction user_dir;
3342        u32 ctl;
3343        int rc;
3344
3345        dev_dbg(hdev->dev, "DMA packet details:\n");
3346        dev_dbg(hdev->dev, "source == 0x%llx\n",
3347                le64_to_cpu(user_dma_pkt->src_addr));
3348        dev_dbg(hdev->dev, "destination == 0x%llx\n",
3349                le64_to_cpu(user_dma_pkt->dst_addr));
3350        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3351
3352        ctl = le32_to_cpu(user_dma_pkt->ctl);
3353        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3354                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3355
3356        /*
3357         * Special handling for DMA with size 0. The H/W has a bug where
3358         * this can cause the QMAN DMA to get stuck, so block it here.
3359         */
3360        if (user_dma_pkt->tsize == 0) {
3361                dev_err(hdev->dev,
3362                        "Got DMA with size 0, might reset the device\n");
3363                return -EINVAL;
3364        }
3365
3366        if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3367                rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3368        else
3369                rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3370
3371        return rc;
3372}
3373
3374static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3375                                struct hl_cs_parser *parser,
3376                                struct packet_lin_dma *user_dma_pkt)
3377{
3378        dev_dbg(hdev->dev, "DMA packet details:\n");
3379        dev_dbg(hdev->dev, "source == 0x%llx\n",
3380                le64_to_cpu(user_dma_pkt->src_addr));
3381        dev_dbg(hdev->dev, "destination == 0x%llx\n",
3382                le64_to_cpu(user_dma_pkt->dst_addr));
3383        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3384
3385        /*
3386         * WA for HW-23.
3387         * We can't allow user to read from Host using QMANs other than 1.
3388         * PMMU and HPMMU addresses are equal, check only one of them.
3389         */
3390        if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3391                hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3392                                le32_to_cpu(user_dma_pkt->tsize),
3393                                hdev->asic_prop.pmmu.start_addr,
3394                                hdev->asic_prop.pmmu.end_addr)) {
3395                dev_err(hdev->dev,
3396                        "Can't DMA from host on queue other then 1\n");
3397                return -EFAULT;
3398        }
3399
3400        if (user_dma_pkt->tsize == 0) {
3401                dev_err(hdev->dev,
3402                        "Got DMA with size 0, might reset the device\n");
3403                return -EINVAL;
3404        }
3405
3406        parser->patched_cb_size += sizeof(*user_dma_pkt);
3407
3408        return 0;
3409}
3410
3411static int goya_validate_wreg32(struct hl_device *hdev,
3412                                struct hl_cs_parser *parser,
3413                                struct packet_wreg32 *wreg_pkt)
3414{
3415        struct goya_device *goya = hdev->asic_specific;
3416        u32 sob_start_addr, sob_end_addr;
3417        u16 reg_offset;
3418
3419        reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3420                        GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3421
3422        dev_dbg(hdev->dev, "WREG32 packet details:\n");
3423        dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3424        dev_dbg(hdev->dev, "value      == 0x%x\n",
3425                le32_to_cpu(wreg_pkt->value));
3426
3427        if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3428                dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3429                        reg_offset);
3430                return -EPERM;
3431        }
3432
3433        /*
3434         * With MMU, DMA channels are not secured, so it doesn't matter where
3435         * the WR COMP will be written to because it will go out with
3436         * non-secured property
3437         */
3438        if (goya->hw_cap_initialized & HW_CAP_MMU)
3439                return 0;
3440
3441        sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3442        sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3443
3444        if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3445                        (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3446
3447                dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3448                        wreg_pkt->value);
3449                return -EPERM;
3450        }
3451
3452        return 0;
3453}
3454
3455static int goya_validate_cb(struct hl_device *hdev,
3456                        struct hl_cs_parser *parser, bool is_mmu)
3457{
3458        u32 cb_parsed_length = 0;
3459        int rc = 0;
3460
3461        parser->patched_cb_size = 0;
3462
3463        /* cb_user_size is more than 0 so loop will always be executed */
3464        while (cb_parsed_length < parser->user_cb_size) {
3465                enum packet_id pkt_id;
3466                u16 pkt_size;
3467                struct goya_packet *user_pkt;
3468
3469                user_pkt = (struct goya_packet *) (uintptr_t)
3470                        (parser->user_cb->kernel_address + cb_parsed_length);
3471
3472                pkt_id = (enum packet_id) (
3473                                (le64_to_cpu(user_pkt->header) &
3474                                PACKET_HEADER_PACKET_ID_MASK) >>
3475                                        PACKET_HEADER_PACKET_ID_SHIFT);
3476
3477                if (!validate_packet_id(pkt_id)) {
3478                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3479                        rc = -EINVAL;
3480                        break;
3481                }
3482
3483                pkt_size = goya_packet_sizes[pkt_id];
3484                cb_parsed_length += pkt_size;
3485                if (cb_parsed_length > parser->user_cb_size) {
3486                        dev_err(hdev->dev,
3487                                "packet 0x%x is out of CB boundary\n", pkt_id);
3488                        rc = -EINVAL;
3489                        break;
3490                }
3491
3492                switch (pkt_id) {
3493                case PACKET_WREG_32:
3494                        /*
3495                         * Although it is validated after copy in patch_cb(),
3496                         * need to validate here as well because patch_cb() is
3497                         * not called in MMU path while this function is called
3498                         */
3499                        rc = goya_validate_wreg32(hdev,
3500                                parser, (struct packet_wreg32 *) user_pkt);
3501                        parser->patched_cb_size += pkt_size;
3502                        break;
3503
3504                case PACKET_WREG_BULK:
3505                        dev_err(hdev->dev,
3506                                "User not allowed to use WREG_BULK\n");
3507                        rc = -EPERM;
3508                        break;
3509
3510                case PACKET_MSG_PROT:
3511                        dev_err(hdev->dev,
3512                                "User not allowed to use MSG_PROT\n");
3513                        rc = -EPERM;
3514                        break;
3515
3516                case PACKET_CP_DMA:
3517                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3518                        rc = -EPERM;
3519                        break;
3520
3521                case PACKET_STOP:
3522                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3523                        rc = -EPERM;
3524                        break;
3525
3526                case PACKET_LIN_DMA:
3527                        if (is_mmu)
3528                                rc = goya_validate_dma_pkt_mmu(hdev, parser,
3529                                        (struct packet_lin_dma *) user_pkt);
3530                        else
3531                                rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3532                                        (struct packet_lin_dma *) user_pkt);
3533                        break;
3534
3535                case PACKET_MSG_LONG:
3536                case PACKET_MSG_SHORT:
3537                case PACKET_FENCE:
3538                case PACKET_NOP:
3539                        parser->patched_cb_size += pkt_size;
3540                        break;
3541
3542                default:
3543                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3544                                pkt_id);
3545                        rc = -EINVAL;
3546                        break;
3547                }
3548
3549                if (rc)
3550                        break;
3551        }
3552
3553        /*
3554         * The new CB should have space at the end for two MSG_PROT packets:
3555         * 1. A packet that will act as a completion packet
3556         * 2. A packet that will generate MSI-X interrupt
3557         */
3558        parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3559
3560        return rc;
3561}
3562
3563static int goya_patch_dma_packet(struct hl_device *hdev,
3564                                struct hl_cs_parser *parser,
3565                                struct packet_lin_dma *user_dma_pkt,
3566                                struct packet_lin_dma *new_dma_pkt,
3567                                u32 *new_dma_pkt_size)
3568{
3569        struct hl_userptr *userptr;
3570        struct scatterlist *sg, *sg_next_iter;
3571        u32 count, dma_desc_cnt;
3572        u64 len, len_next;
3573        dma_addr_t dma_addr, dma_addr_next;
3574        enum goya_dma_direction user_dir;
3575        u64 device_memory_addr, addr;
3576        enum dma_data_direction dir;
3577        struct sg_table *sgt;
3578        bool skip_host_mem_pin = false;
3579        bool user_memset;
3580        u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3581
3582        ctl = le32_to_cpu(user_dma_pkt->ctl);
3583
3584        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3585                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3586
3587        user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3588                        GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3589
3590        if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3591                        (user_dma_pkt->tsize == 0)) {
3592                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3593                *new_dma_pkt_size = sizeof(*new_dma_pkt);
3594                return 0;
3595        }
3596
3597        if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3598                addr = le64_to_cpu(user_dma_pkt->src_addr);
3599                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3600                dir = DMA_TO_DEVICE;
3601                if (user_memset)
3602                        skip_host_mem_pin = true;
3603        } else {
3604                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3605                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3606                dir = DMA_FROM_DEVICE;
3607        }
3608
3609        if ((!skip_host_mem_pin) &&
3610                (hl_userptr_is_pinned(hdev, addr,
3611                        le32_to_cpu(user_dma_pkt->tsize),
3612                        parser->job_userptr_list, &userptr) == false)) {
3613                dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3614                                addr, user_dma_pkt->tsize);
3615                return -EFAULT;
3616        }
3617
3618        if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3619                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3620                *new_dma_pkt_size = sizeof(*user_dma_pkt);
3621                return 0;
3622        }
3623
3624        user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3625
3626        user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3627
3628        sgt = userptr->sgt;
3629        dma_desc_cnt = 0;
3630
3631        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3632                len = sg_dma_len(sg);
3633                dma_addr = sg_dma_address(sg);
3634
3635                if (len == 0)
3636                        break;
3637
3638                while ((count + 1) < sgt->nents) {
3639                        sg_next_iter = sg_next(sg);
3640                        len_next = sg_dma_len(sg_next_iter);
3641                        dma_addr_next = sg_dma_address(sg_next_iter);
3642
3643                        if (len_next == 0)
3644                                break;
3645
3646                        if ((dma_addr + len == dma_addr_next) &&
3647                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3648                                len += len_next;
3649                                count++;
3650                                sg = sg_next_iter;
3651                        } else {
3652                                break;
3653                        }
3654                }
3655
3656                ctl = le32_to_cpu(user_dma_pkt->ctl);
3657                if (likely(dma_desc_cnt))
3658                        ctl &= ~GOYA_PKT_CTL_EB_MASK;
3659                ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3660                                GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3661                new_dma_pkt->ctl = cpu_to_le32(ctl);
3662                new_dma_pkt->tsize = cpu_to_le32((u32) len);
3663
3664                if (dir == DMA_TO_DEVICE) {
3665                        new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3666                        new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3667                } else {
3668                        new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3669                        new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3670                }
3671
3672                if (!user_memset)
3673                        device_memory_addr += len;
3674                dma_desc_cnt++;
3675                new_dma_pkt++;
3676        }
3677
3678        if (!dma_desc_cnt) {
3679                dev_err(hdev->dev,
3680                        "Error of 0 SG entries when patching DMA packet\n");
3681                return -EFAULT;
3682        }
3683
3684        /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3685        new_dma_pkt--;
3686        new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3687
3688        *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3689
3690        return 0;
3691}
3692
3693static int goya_patch_cb(struct hl_device *hdev,
3694                                struct hl_cs_parser *parser)
3695{
3696        u32 cb_parsed_length = 0;
3697        u32 cb_patched_cur_length = 0;
3698        int rc = 0;
3699
3700        /* cb_user_size is more than 0 so loop will always be executed */
3701        while (cb_parsed_length < parser->user_cb_size) {
3702                enum packet_id pkt_id;
3703                u16 pkt_size;
3704                u32 new_pkt_size = 0;
3705                struct goya_packet *user_pkt, *kernel_pkt;
3706
3707                user_pkt = (struct goya_packet *) (uintptr_t)
3708                        (parser->user_cb->kernel_address + cb_parsed_length);
3709                kernel_pkt = (struct goya_packet *) (uintptr_t)
3710                        (parser->patched_cb->kernel_address +
3711                                        cb_patched_cur_length);
3712
3713                pkt_id = (enum packet_id) (
3714                                (le64_to_cpu(user_pkt->header) &
3715                                PACKET_HEADER_PACKET_ID_MASK) >>
3716                                        PACKET_HEADER_PACKET_ID_SHIFT);
3717
3718                if (!validate_packet_id(pkt_id)) {
3719                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3720                        rc = -EINVAL;
3721                        break;
3722                }
3723
3724                pkt_size = goya_packet_sizes[pkt_id];
3725                cb_parsed_length += pkt_size;
3726                if (cb_parsed_length > parser->user_cb_size) {
3727                        dev_err(hdev->dev,
3728                                "packet 0x%x is out of CB boundary\n", pkt_id);
3729                        rc = -EINVAL;
3730                        break;
3731                }
3732
3733                switch (pkt_id) {
3734                case PACKET_LIN_DMA:
3735                        rc = goya_patch_dma_packet(hdev, parser,
3736                                        (struct packet_lin_dma *) user_pkt,
3737                                        (struct packet_lin_dma *) kernel_pkt,
3738                                        &new_pkt_size);
3739                        cb_patched_cur_length += new_pkt_size;
3740                        break;
3741
3742                case PACKET_WREG_32:
3743                        memcpy(kernel_pkt, user_pkt, pkt_size);
3744                        cb_patched_cur_length += pkt_size;
3745                        rc = goya_validate_wreg32(hdev, parser,
3746                                        (struct packet_wreg32 *) kernel_pkt);
3747                        break;
3748
3749                case PACKET_WREG_BULK:
3750                        dev_err(hdev->dev,
3751                                "User not allowed to use WREG_BULK\n");
3752                        rc = -EPERM;
3753                        break;
3754
3755                case PACKET_MSG_PROT:
3756                        dev_err(hdev->dev,
3757                                "User not allowed to use MSG_PROT\n");
3758                        rc = -EPERM;
3759                        break;
3760
3761                case PACKET_CP_DMA:
3762                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3763                        rc = -EPERM;
3764                        break;
3765
3766                case PACKET_STOP:
3767                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3768                        rc = -EPERM;
3769                        break;
3770
3771                case PACKET_MSG_LONG:
3772                case PACKET_MSG_SHORT:
3773                case PACKET_FENCE:
3774                case PACKET_NOP:
3775                        memcpy(kernel_pkt, user_pkt, pkt_size);
3776                        cb_patched_cur_length += pkt_size;
3777                        break;
3778
3779                default:
3780                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3781                                pkt_id);
3782                        rc = -EINVAL;
3783                        break;
3784                }
3785
3786                if (rc)
3787                        break;
3788        }
3789
3790        return rc;
3791}
3792
3793static int goya_parse_cb_mmu(struct hl_device *hdev,
3794                struct hl_cs_parser *parser)
3795{
3796        u64 patched_cb_handle;
3797        u32 patched_cb_size;
3798        struct hl_cb *user_cb;
3799        int rc;
3800
3801        /*
3802         * The new CB should have space at the end for two MSG_PROT pkt:
3803         * 1. A packet that will act as a completion packet
3804         * 2. A packet that will generate MSI-X interrupt
3805         */
3806        parser->patched_cb_size = parser->user_cb_size +
3807                        sizeof(struct packet_msg_prot) * 2;
3808
3809        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
3810                        &patched_cb_handle, HL_KERNEL_ASID_ID, false);
3811
3812        if (rc) {
3813                dev_err(hdev->dev,
3814                        "Failed to allocate patched CB for DMA CS %d\n",
3815                        rc);
3816                return rc;
3817        }
3818
3819        patched_cb_handle >>= PAGE_SHIFT;
3820        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3821                                (u32) patched_cb_handle);
3822        /* hl_cb_get should never fail here so use kernel WARN */
3823        WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3824                        (u32) patched_cb_handle);
3825        if (!parser->patched_cb) {
3826                rc = -EFAULT;
3827                goto out;
3828        }
3829
3830        /*
3831         * The check that parser->user_cb_size <= parser->user_cb->size was done
3832         * in validate_queue_index().
3833         */
3834        memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
3835                (void *) (uintptr_t) parser->user_cb->kernel_address,
3836                parser->user_cb_size);
3837
3838        patched_cb_size = parser->patched_cb_size;
3839
3840        /* validate patched CB instead of user CB */
3841        user_cb = parser->user_cb;
3842        parser->user_cb = parser->patched_cb;
3843        rc = goya_validate_cb(hdev, parser, true);
3844        parser->user_cb = user_cb;
3845
3846        if (rc) {
3847                hl_cb_put(parser->patched_cb);
3848                goto out;
3849        }
3850
3851        if (patched_cb_size != parser->patched_cb_size) {
3852                dev_err(hdev->dev, "user CB size mismatch\n");
3853                hl_cb_put(parser->patched_cb);
3854                rc = -EINVAL;
3855                goto out;
3856        }
3857
3858out:
3859        /*
3860         * Always call cb destroy here because we still have 1 reference
3861         * to it by calling cb_get earlier. After the job will be completed,
3862         * cb_put will release it, but here we want to remove it from the
3863         * idr
3864         */
3865        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3866                                        patched_cb_handle << PAGE_SHIFT);
3867
3868        return rc;
3869}
3870
3871static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3872                                struct hl_cs_parser *parser)
3873{
3874        u64 patched_cb_handle;
3875        int rc;
3876
3877        rc = goya_validate_cb(hdev, parser, false);
3878
3879        if (rc)
3880                goto free_userptr;
3881
3882        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
3883                        &patched_cb_handle, HL_KERNEL_ASID_ID, false);
3884        if (rc) {
3885                dev_err(hdev->dev,
3886                        "Failed to allocate patched CB for DMA CS %d\n", rc);
3887                goto free_userptr;
3888        }
3889
3890        patched_cb_handle >>= PAGE_SHIFT;
3891        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3892                                (u32) patched_cb_handle);
3893        /* hl_cb_get should never fail here so use kernel WARN */
3894        WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3895                        (u32) patched_cb_handle);
3896        if (!parser->patched_cb) {
3897                rc = -EFAULT;
3898                goto out;
3899        }
3900
3901        rc = goya_patch_cb(hdev, parser);
3902
3903        if (rc)
3904                hl_cb_put(parser->patched_cb);
3905
3906out:
3907        /*
3908         * Always call cb destroy here because we still have 1 reference
3909         * to it by calling cb_get earlier. After the job will be completed,
3910         * cb_put will release it, but here we want to remove it from the
3911         * idr
3912         */
3913        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3914                                patched_cb_handle << PAGE_SHIFT);
3915
3916free_userptr:
3917        if (rc)
3918                hl_userptr_delete_list(hdev, parser->job_userptr_list);
3919        return rc;
3920}
3921
3922static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3923                                        struct hl_cs_parser *parser)
3924{
3925        struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3926        struct goya_device *goya = hdev->asic_specific;
3927
3928        if (goya->hw_cap_initialized & HW_CAP_MMU)
3929                return 0;
3930
3931        /* For internal queue jobs, just check if CB address is valid */
3932        if (hl_mem_area_inside_range(
3933                        (u64) (uintptr_t) parser->user_cb,
3934                        parser->user_cb_size,
3935                        asic_prop->sram_user_base_address,
3936                        asic_prop->sram_end_address))
3937                return 0;
3938
3939        if (hl_mem_area_inside_range(
3940                        (u64) (uintptr_t) parser->user_cb,
3941                        parser->user_cb_size,
3942                        asic_prop->dram_user_base_address,
3943                        asic_prop->dram_end_address))
3944                return 0;
3945
3946        dev_err(hdev->dev,
3947                "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
3948                parser->user_cb, parser->user_cb_size);
3949
3950        return -EFAULT;
3951}
3952
3953int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3954{
3955        struct goya_device *goya = hdev->asic_specific;
3956
3957        if (parser->queue_type == QUEUE_TYPE_INT)
3958                return goya_parse_cb_no_ext_queue(hdev, parser);
3959
3960        if (goya->hw_cap_initialized & HW_CAP_MMU)
3961                return goya_parse_cb_mmu(hdev, parser);
3962        else
3963                return goya_parse_cb_no_mmu(hdev, parser);
3964}
3965
3966void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
3967                                u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
3968                                bool eb)
3969{
3970        struct packet_msg_prot *cq_pkt;
3971        u32 tmp;
3972
3973        cq_pkt = (struct packet_msg_prot *) (uintptr_t)
3974                (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
3975
3976        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3977                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
3978                        (1 << GOYA_PKT_CTL_MB_SHIFT);
3979        cq_pkt->ctl = cpu_to_le32(tmp);
3980        cq_pkt->value = cpu_to_le32(cq_val);
3981        cq_pkt->addr = cpu_to_le64(cq_addr);
3982
3983        cq_pkt++;
3984
3985        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3986                        (1 << GOYA_PKT_CTL_MB_SHIFT);
3987        cq_pkt->ctl = cpu_to_le32(tmp);
3988        cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3989        cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3990}
3991
3992void goya_update_eq_ci(struct hl_device *hdev, u32 val)
3993{
3994        WREG32(mmCPU_EQ_CI, val);
3995}
3996
3997void goya_restore_phase_topology(struct hl_device *hdev)
3998{
3999
4000}
4001
4002static void goya_clear_sm_regs(struct hl_device *hdev)
4003{
4004        int i, num_of_sob_in_longs, num_of_mon_in_longs;
4005
4006        num_of_sob_in_longs =
4007                ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4008
4009        num_of_mon_in_longs =
4010                ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4011
4012        for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4013                WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4014
4015        for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4016                WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4017
4018        /* Flush all WREG to prevent race */
4019        i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4020}
4021
4022/*
4023 * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4024 *                       address.
4025 *
4026 * @hdev:       pointer to hl_device structure
4027 * @addr:       device or host mapped address
4028 * @val:        returned value
4029 *
4030 * In case of DDR address that is not mapped into the default aperture that
4031 * the DDR bar exposes, the function will configure the iATU so that the DDR
4032 * bar will be positioned at a base address that allows reading from the
4033 * required address. Configuring the iATU during normal operation can
4034 * lead to undefined behavior and therefore, should be done with extreme care
4035 *
4036 */
4037static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4038{
4039        struct asic_fixed_properties *prop = &hdev->asic_prop;
4040        u64 ddr_bar_addr;
4041        int rc = 0;
4042
4043        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4044                *val = RREG32(addr - CFG_BASE);
4045
4046        } else if ((addr >= SRAM_BASE_ADDR) &&
4047                        (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4048
4049                *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4050                                (addr - SRAM_BASE_ADDR));
4051
4052        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4053
4054                u64 bar_base_addr = DRAM_PHYS_BASE +
4055                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4056
4057                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4058                if (ddr_bar_addr != U64_MAX) {
4059                        *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4060                                                (addr - bar_base_addr));
4061
4062                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4063                                                        ddr_bar_addr);
4064                }
4065                if (ddr_bar_addr == U64_MAX)
4066                        rc = -EIO;
4067
4068        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4069                *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4070
4071        } else {
4072                rc = -EFAULT;
4073        }
4074
4075        return rc;
4076}
4077
4078/*
4079 * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4080 *                        address.
4081 *
4082 * @hdev:       pointer to hl_device structure
4083 * @addr:       device or host mapped address
4084 * @val:        returned value
4085 *
4086 * In case of DDR address that is not mapped into the default aperture that
4087 * the DDR bar exposes, the function will configure the iATU so that the DDR
4088 * bar will be positioned at a base address that allows writing to the
4089 * required address. Configuring the iATU during normal operation can
4090 * lead to undefined behavior and therefore, should be done with extreme care
4091 *
4092 */
4093static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4094{
4095        struct asic_fixed_properties *prop = &hdev->asic_prop;
4096        u64 ddr_bar_addr;
4097        int rc = 0;
4098
4099        if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4100                WREG32(addr - CFG_BASE, val);
4101
4102        } else if ((addr >= SRAM_BASE_ADDR) &&
4103                        (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4104
4105                writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4106                                        (addr - SRAM_BASE_ADDR));
4107
4108        } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4109
4110                u64 bar_base_addr = DRAM_PHYS_BASE +
4111                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4112
4113                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4114                if (ddr_bar_addr != U64_MAX) {
4115                        writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4116                                                (addr - bar_base_addr));
4117
4118                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4119                                                        ddr_bar_addr);
4120                }
4121                if (ddr_bar_addr == U64_MAX)
4122                        rc = -EIO;
4123
4124        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4125                *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4126
4127        } else {
4128                rc = -EFAULT;
4129        }
4130
4131        return rc;
4132}
4133
4134static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4135{
4136        struct asic_fixed_properties *prop = &hdev->asic_prop;
4137        u64 ddr_bar_addr;
4138        int rc = 0;
4139
4140        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4141                u32 val_l = RREG32(addr - CFG_BASE);
4142                u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4143
4144                *val = (((u64) val_h) << 32) | val_l;
4145
4146        } else if ((addr >= SRAM_BASE_ADDR) &&
4147                        (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4148
4149                *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4150                                (addr - SRAM_BASE_ADDR));
4151
4152        } else if (addr <=
4153                   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4154
4155                u64 bar_base_addr = DRAM_PHYS_BASE +
4156                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4157
4158                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4159                if (ddr_bar_addr != U64_MAX) {
4160                        *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4161                                                (addr - bar_base_addr));
4162
4163                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4164                                                        ddr_bar_addr);
4165                }
4166                if (ddr_bar_addr == U64_MAX)
4167                        rc = -EIO;
4168
4169        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4170                *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4171
4172        } else {
4173                rc = -EFAULT;
4174        }
4175
4176        return rc;
4177}
4178
4179static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4180{
4181        struct asic_fixed_properties *prop = &hdev->asic_prop;
4182        u64 ddr_bar_addr;
4183        int rc = 0;
4184
4185        if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4186                WREG32(addr - CFG_BASE, lower_32_bits(val));
4187                WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4188
4189        } else if ((addr >= SRAM_BASE_ADDR) &&
4190                        (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4191
4192                writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4193                                        (addr - SRAM_BASE_ADDR));
4194
4195        } else if (addr <=
4196                   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4197
4198                u64 bar_base_addr = DRAM_PHYS_BASE +
4199                                (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4200
4201                ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4202                if (ddr_bar_addr != U64_MAX) {
4203                        writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4204                                                (addr - bar_base_addr));
4205
4206                        ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4207                                                        ddr_bar_addr);
4208                }
4209                if (ddr_bar_addr == U64_MAX)
4210                        rc = -EIO;
4211
4212        } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4213                *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4214
4215        } else {
4216                rc = -EFAULT;
4217        }
4218
4219        return rc;
4220}
4221
4222static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4223{
4224        struct goya_device *goya = hdev->asic_specific;
4225
4226        if (hdev->hard_reset_pending)
4227                return U64_MAX;
4228
4229        return readq(hdev->pcie_bar[DDR_BAR_ID] +
4230                        (addr - goya->ddr_bar_cur_addr));
4231}
4232
4233static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4234{
4235        struct goya_device *goya = hdev->asic_specific;
4236
4237        if (hdev->hard_reset_pending)
4238                return;
4239
4240        writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4241                        (addr - goya->ddr_bar_cur_addr));
4242}
4243
4244static const char *_goya_get_event_desc(u16 event_type)
4245{
4246        switch (event_type) {
4247        case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4248                return "PCIe_if";
4249        case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4250        case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4251        case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4252        case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4253        case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4254        case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4255        case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4256        case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4257                return "TPC%d_ecc";
4258        case GOYA_ASYNC_EVENT_ID_MME_ECC:
4259                return "MME_ecc";
4260        case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4261                return "MME_ecc_ext";
4262        case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4263                return "MMU_ecc";
4264        case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4265                return "DMA_macro";
4266        case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4267                return "DMA_ecc";
4268        case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4269                return "CPU_if_ecc";
4270        case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4271                return "PSOC_mem";
4272        case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4273                return "PSOC_coresight";
4274        case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4275                return "SRAM%d";
4276        case GOYA_ASYNC_EVENT_ID_GIC500:
4277                return "GIC500";
4278        case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4279                return "PLL%d";
4280        case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4281                return "AXI_ecc";
4282        case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4283                return "L2_ram_ecc";
4284        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4285                return "PSOC_gpio_05_sw_reset";
4286        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4287                return "PSOC_gpio_10_vrhot_icrit";
4288        case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4289                return "PCIe_dec";
4290        case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4291        case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4292        case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4293        case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4294        case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4295        case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4296        case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4297        case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4298                return "TPC%d_dec";
4299        case GOYA_ASYNC_EVENT_ID_MME_WACS:
4300                return "MME_wacs";
4301        case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4302                return "MME_wacsd";
4303        case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4304                return "CPU_axi_splitter";
4305        case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4306                return "PSOC_axi_dec";
4307        case GOYA_ASYNC_EVENT_ID_PSOC:
4308                return "PSOC";
4309        case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4310        case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4311        case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4312        case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4313        case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4314        case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4315        case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4316        case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4317                return "TPC%d_krn_err";
4318        case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4319                return "TPC%d_cq";
4320        case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4321                return "TPC%d_qm";
4322        case GOYA_ASYNC_EVENT_ID_MME_QM:
4323                return "MME_qm";
4324        case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4325                return "MME_cq";
4326        case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4327                return "DMA%d_qm";
4328        case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4329                return "DMA%d_ch";
4330        case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4331        case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4332        case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4333        case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4334        case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4335        case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4336        case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4337        case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4338                return "TPC%d_bmon_spmu";
4339        case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4340                return "DMA_bm_ch%d";
4341        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4342                return "POWER_ENV_S";
4343        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4344                return "POWER_ENV_E";
4345        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4346                return "THERMAL_ENV_S";
4347        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4348                return "THERMAL_ENV_E";
4349        default:
4350                return "N/A";
4351        }
4352}
4353
4354static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4355{
4356        u8 index;
4357
4358        switch (event_type) {
4359        case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4360        case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4361        case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4362        case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4363        case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4364        case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4365        case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4366        case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4367                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4368                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4369                break;
4370        case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4371                index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4372                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4373                break;
4374        case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4375                index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4376                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4377                break;
4378        case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4379        case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4380        case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4381        case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4382        case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4383        case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4384        case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4385        case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4386                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4387                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4388                break;
4389        case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4390        case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4391        case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4392        case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4393        case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4394        case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4395        case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4396        case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4397                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4398                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4399                break;
4400        case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4401                index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4402                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4403                break;
4404        case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4405                index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4406                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4407                break;
4408        case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4409                index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4410                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4411                break;
4412        case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4413                index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4414                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4415                break;
4416        case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4417        case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4418        case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4419        case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4420        case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4421        case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4422        case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4423        case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4424                index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4425                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4426                break;
4427        case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4428                index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4429                snprintf(desc, size, _goya_get_event_desc(event_type), index);
4430                break;
4431        default:
4432                snprintf(desc, size, _goya_get_event_desc(event_type));
4433                break;
4434        }
4435}
4436
4437static void goya_print_razwi_info(struct hl_device *hdev)
4438{
4439        if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4440                dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4441                WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4442        }
4443
4444        if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4445                dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4446                WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4447        }
4448
4449        if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4450                dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4451                WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4452        }
4453
4454        if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4455                dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4456                WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4457        }
4458}
4459
4460static void goya_print_mmu_error_info(struct hl_device *hdev)
4461{
4462        struct goya_device *goya = hdev->asic_specific;
4463        u64 addr;
4464        u32 val;
4465
4466        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4467                return;
4468
4469        val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4470        if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4471                addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4472                addr <<= 32;
4473                addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4474
4475                dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4476                                        addr);
4477
4478                WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4479        }
4480}
4481
4482static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4483                                bool razwi)
4484{
4485        char desc[20] = "";
4486
4487        goya_get_event_desc(event_type, desc, sizeof(desc));
4488        dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4489                event_type, desc);
4490
4491        if (razwi) {
4492                goya_print_razwi_info(hdev);
4493                goya_print_mmu_error_info(hdev);
4494        }
4495}
4496
4497static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4498                size_t irq_arr_size)
4499{
4500        struct armcp_unmask_irq_arr_packet *pkt;
4501        size_t total_pkt_size;
4502        long result;
4503        int rc;
4504        int irq_num_entries, irq_arr_index;
4505        __le32 *goya_irq_arr;
4506
4507        total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4508                        irq_arr_size;
4509
4510        /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4511        total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4512
4513        /* total_pkt_size is casted to u16 later on */
4514        if (total_pkt_size > USHRT_MAX) {
4515                dev_err(hdev->dev, "too many elements in IRQ array\n");
4516                return -EINVAL;
4517        }
4518
4519        pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4520        if (!pkt)
4521                return -ENOMEM;
4522
4523        irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4524        pkt->length = cpu_to_le32(irq_num_entries);
4525
4526        /* We must perform any necessary endianness conversation on the irq
4527         * array being passed to the goya hardware
4528         */
4529        for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4530                        irq_arr_index < irq_num_entries ; irq_arr_index++)
4531                goya_irq_arr[irq_arr_index] =
4532                                cpu_to_le32(irq_arr[irq_arr_index]);
4533
4534        pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4535                                                ARMCP_PKT_CTL_OPCODE_SHIFT);
4536
4537        rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4538                                                total_pkt_size, 0, &result);
4539
4540        if (rc)
4541                dev_err(hdev->dev, "failed to unmask IRQ array\n");
4542
4543        kfree(pkt);
4544
4545        return rc;
4546}
4547
4548static int goya_soft_reset_late_init(struct hl_device *hdev)
4549{
4550        /*
4551         * Unmask all IRQs since some could have been received
4552         * during the soft reset
4553         */
4554        return goya_unmask_irq_arr(hdev, goya_all_events,
4555                                        sizeof(goya_all_events));
4556}
4557
4558static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4559{
4560        struct armcp_packet pkt;
4561        long result;
4562        int rc;
4563
4564        memset(&pkt, 0, sizeof(pkt));
4565
4566        pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
4567                                ARMCP_PKT_CTL_OPCODE_SHIFT);
4568        pkt.value = cpu_to_le64(event_type);
4569
4570        rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4571                                                0, &result);
4572
4573        if (rc)
4574                dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4575
4576        return rc;
4577}
4578
4579static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4580{
4581        switch (event_type) {
4582        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4583                dev_info_ratelimited(hdev->dev,
4584                        "Clock throttling due to power consumption\n");
4585                break;
4586        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4587                dev_info_ratelimited(hdev->dev,
4588                        "Power envelop is safe, back to optimal clock\n");
4589                break;
4590        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4591                dev_info_ratelimited(hdev->dev,
4592                        "Clock throttling due to overheating\n");
4593                break;
4594        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4595                dev_info_ratelimited(hdev->dev,
4596                        "Thermal envelop is safe, back to optimal clock\n");
4597                break;
4598
4599        default:
4600                dev_err(hdev->dev, "Received invalid clock change event %d\n",
4601                        event_type);
4602                break;
4603        }
4604}
4605
4606void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4607{
4608        u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4609        u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4610                                >> EQ_CTL_EVENT_TYPE_SHIFT);
4611        struct goya_device *goya = hdev->asic_specific;
4612
4613        goya->events_stat[event_type]++;
4614        goya->events_stat_aggregate[event_type]++;
4615
4616        switch (event_type) {
4617        case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4618        case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4619        case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4620        case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4621        case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4622        case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4623        case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4624        case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4625        case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4626        case GOYA_ASYNC_EVENT_ID_MME_ECC:
4627        case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4628        case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4629        case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4630        case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4631        case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4632        case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4633        case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4634        case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4635        case GOYA_ASYNC_EVENT_ID_GIC500:
4636        case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4637        case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4638        case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4639        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4640                goya_print_irq_info(hdev, event_type, false);
4641                hl_device_reset(hdev, true, false);
4642                break;
4643
4644        case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4645        case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4646        case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4647        case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4648        case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4649        case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4650        case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4651        case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4652        case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4653        case GOYA_ASYNC_EVENT_ID_MME_WACS:
4654        case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4655        case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4656        case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4657        case GOYA_ASYNC_EVENT_ID_PSOC:
4658        case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4659        case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4660        case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4661        case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4662        case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4663        case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4664        case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4665        case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4666        case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4667        case GOYA_ASYNC_EVENT_ID_MME_QM:
4668        case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4669        case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4670        case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4671                goya_print_irq_info(hdev, event_type, true);
4672                goya_unmask_irq(hdev, event_type);
4673                break;
4674
4675        case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4676        case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4677        case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4678        case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4679        case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4680        case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4681        case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4682        case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4683        case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4684        case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4685                goya_print_irq_info(hdev, event_type, false);
4686                goya_unmask_irq(hdev, event_type);
4687                break;
4688
4689        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4690        case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4691        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4692        case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4693                goya_print_clk_change_info(hdev, event_type);
4694                goya_unmask_irq(hdev, event_type);
4695                break;
4696
4697        default:
4698                dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4699                                event_type);
4700                break;
4701        }
4702}
4703
4704void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4705{
4706        struct goya_device *goya = hdev->asic_specific;
4707
4708        if (aggregate) {
4709                *size = (u32) sizeof(goya->events_stat_aggregate);
4710                return goya->events_stat_aggregate;
4711        }
4712
4713        *size = (u32) sizeof(goya->events_stat);
4714        return goya->events_stat;
4715}
4716
4717static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4718                                u64 val, bool is_dram)
4719{
4720        struct packet_lin_dma *lin_dma_pkt;
4721        struct hl_cs_job *job;
4722        u32 cb_size, ctl;
4723        struct hl_cb *cb;
4724        int rc, lin_dma_pkts_cnt;
4725
4726        lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4727        cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4728                                                sizeof(struct packet_msg_prot);
4729        cb = hl_cb_kernel_create(hdev, cb_size, false);
4730        if (!cb)
4731                return -ENOMEM;
4732
4733        lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4734
4735        do {
4736                memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4737
4738                ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4739                                (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4740                                (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4741                                (1 << GOYA_PKT_CTL_RB_SHIFT) |
4742                                (1 << GOYA_PKT_CTL_MB_SHIFT));
4743                ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4744                                GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4745                lin_dma_pkt->ctl = cpu_to_le32(ctl);
4746
4747                lin_dma_pkt->src_addr = cpu_to_le64(val);
4748                lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4749                if (lin_dma_pkts_cnt > 1)
4750                        lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4751                else
4752                        lin_dma_pkt->tsize = cpu_to_le32(size);
4753
4754                size -= SZ_2G;
4755                addr += SZ_2G;
4756                lin_dma_pkt++;
4757        } while (--lin_dma_pkts_cnt);
4758
4759        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4760        if (!job) {
4761                dev_err(hdev->dev, "Failed to allocate a new job\n");
4762                rc = -ENOMEM;
4763                goto release_cb;
4764        }
4765
4766        job->id = 0;
4767        job->user_cb = cb;
4768        job->user_cb->cs_cnt++;
4769        job->user_cb_size = cb_size;
4770        job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4771        job->patched_cb = job->user_cb;
4772        job->job_cb_size = job->user_cb_size;
4773
4774        hl_debugfs_add_job(hdev, job);
4775
4776        rc = goya_send_job_on_qman0(hdev, job);
4777
4778        hl_debugfs_remove_job(hdev, job);
4779        kfree(job);
4780        cb->cs_cnt--;
4781
4782release_cb:
4783        hl_cb_put(cb);
4784        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4785
4786        return rc;
4787}
4788
4789int goya_context_switch(struct hl_device *hdev, u32 asid)
4790{
4791        struct asic_fixed_properties *prop = &hdev->asic_prop;
4792        u64 addr = prop->sram_base_address, sob_addr;
4793        u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4794        u64 val = 0x7777777777777777ull;
4795        int rc, dma_id;
4796        u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4797                                        mmDMA_CH_0_WR_COMP_ADDR_LO;
4798
4799        rc = goya_memset_device_memory(hdev, addr, size, val, false);
4800        if (rc) {
4801                dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4802                return rc;
4803        }
4804
4805        /* we need to reset registers that the user is allowed to change */
4806        sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4807        WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4808
4809        for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4810                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4811                                                        (dma_id - 1) * 4;
4812                WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4813                                                lower_32_bits(sob_addr));
4814        }
4815
4816        WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4817
4818        goya_mmu_prepare(hdev, asid);
4819
4820        goya_clear_sm_regs(hdev);
4821
4822        return 0;
4823}
4824
4825static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4826{
4827        struct asic_fixed_properties *prop = &hdev->asic_prop;
4828        struct goya_device *goya = hdev->asic_specific;
4829        u64 addr = prop->mmu_pgt_addr;
4830        u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4831                        MMU_CACHE_MNG_SIZE;
4832
4833        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4834                return 0;
4835
4836        return goya_memset_device_memory(hdev, addr, size, 0, true);
4837}
4838
4839static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4840{
4841        struct goya_device *goya = hdev->asic_specific;
4842        u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4843        u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4844        u64 val = 0x9999999999999999ull;
4845
4846        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4847                return 0;
4848
4849        return goya_memset_device_memory(hdev, addr, size, val, true);
4850}
4851
4852static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4853{
4854        struct asic_fixed_properties *prop = &hdev->asic_prop;
4855        struct goya_device *goya = hdev->asic_specific;
4856        s64 off, cpu_off;
4857        int rc;
4858
4859        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4860                return 0;
4861
4862        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4863                rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4864                                prop->dram_base_address + off, PAGE_SIZE_2MB,
4865                                (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4866                if (rc) {
4867                        dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4868                                prop->dram_base_address + off);
4869                        goto unmap;
4870                }
4871        }
4872
4873        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4874                rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4875                        hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
4876
4877                if (rc) {
4878                        dev_err(hdev->dev,
4879                                "Map failed for CPU accessible memory\n");
4880                        off -= PAGE_SIZE_2MB;
4881                        goto unmap;
4882                }
4883        } else {
4884                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4885                        rc = hl_mmu_map(hdev->kernel_ctx,
4886                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4887                                hdev->cpu_accessible_dma_address + cpu_off,
4888                                PAGE_SIZE_4KB, true);
4889                        if (rc) {
4890                                dev_err(hdev->dev,
4891                                        "Map failed for CPU accessible memory\n");
4892                                cpu_off -= PAGE_SIZE_4KB;
4893                                goto unmap_cpu;
4894                        }
4895                }
4896        }
4897
4898        goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4899        goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4900        WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4901        WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4902
4903        /* Make sure configuration is flushed to device */
4904        RREG32(mmCPU_IF_AWUSER_OVR_EN);
4905
4906        goya->device_cpu_mmu_mappings_done = true;
4907
4908        return 0;
4909
4910unmap_cpu:
4911        for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4912                if (hl_mmu_unmap(hdev->kernel_ctx,
4913                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4914                                PAGE_SIZE_4KB, true))
4915                        dev_warn_ratelimited(hdev->dev,
4916                                "failed to unmap address 0x%llx\n",
4917                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4918unmap:
4919        for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4920                if (hl_mmu_unmap(hdev->kernel_ctx,
4921                                prop->dram_base_address + off, PAGE_SIZE_2MB,
4922                                true))
4923                        dev_warn_ratelimited(hdev->dev,
4924                                "failed to unmap address 0x%llx\n",
4925                                prop->dram_base_address + off);
4926
4927        return rc;
4928}
4929
4930void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4931{
4932        struct asic_fixed_properties *prop = &hdev->asic_prop;
4933        struct goya_device *goya = hdev->asic_specific;
4934        u32 off, cpu_off;
4935
4936        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4937                return;
4938
4939        if (!goya->device_cpu_mmu_mappings_done)
4940                return;
4941
4942        WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4943        WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4944
4945        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4946                if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4947                                PAGE_SIZE_2MB, true))
4948                        dev_warn(hdev->dev,
4949                                "Failed to unmap CPU accessible memory\n");
4950        } else {
4951                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4952                        if (hl_mmu_unmap(hdev->kernel_ctx,
4953                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4954                                        PAGE_SIZE_4KB,
4955                                        (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
4956                                dev_warn_ratelimited(hdev->dev,
4957                                        "failed to unmap address 0x%llx\n",
4958                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4959        }
4960
4961        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4962                if (hl_mmu_unmap(hdev->kernel_ctx,
4963                                prop->dram_base_address + off, PAGE_SIZE_2MB,
4964                                (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
4965                        dev_warn_ratelimited(hdev->dev,
4966                                        "Failed to unmap address 0x%llx\n",
4967                                        prop->dram_base_address + off);
4968
4969        goya->device_cpu_mmu_mappings_done = false;
4970}
4971
4972static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4973{
4974        struct goya_device *goya = hdev->asic_specific;
4975        int i;
4976
4977        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4978                return;
4979
4980        if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4981                WARN(1, "asid %u is too big\n", asid);
4982                return;
4983        }
4984
4985        /* zero the MMBP and ASID bits and then set the ASID */
4986        for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
4987                goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
4988}
4989
4990static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
4991                                        u32 flags)
4992{
4993        struct goya_device *goya = hdev->asic_specific;
4994        u32 status, timeout_usec;
4995        int rc;
4996
4997        if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
4998                hdev->hard_reset_pending)
4999                return 0;
5000
5001        /* no need in L1 only invalidation in Goya */
5002        if (!is_hard)
5003                return 0;
5004
5005        if (hdev->pldm)
5006                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5007        else
5008                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5009
5010        mutex_lock(&hdev->mmu_cache_lock);
5011
5012        /* L0 & L1 invalidation */
5013        WREG32(mmSTLB_INV_ALL_START, 1);
5014
5015        rc = hl_poll_timeout(
5016                hdev,
5017                mmSTLB_INV_ALL_START,
5018                status,
5019                !status,
5020                1000,
5021                timeout_usec);
5022
5023        mutex_unlock(&hdev->mmu_cache_lock);
5024
5025        if (rc) {
5026                dev_err_ratelimited(hdev->dev,
5027                                        "MMU cache invalidation timeout\n");
5028                hl_device_reset(hdev, true, false);
5029        }
5030
5031        return rc;
5032}
5033
5034static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5035                                bool is_hard, u32 asid, u64 va, u64 size)
5036{
5037        struct goya_device *goya = hdev->asic_specific;
5038        u32 status, timeout_usec, inv_data, pi;
5039        int rc;
5040
5041        if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5042                hdev->hard_reset_pending)
5043                return 0;
5044
5045        /* no need in L1 only invalidation in Goya */
5046        if (!is_hard)
5047                return 0;
5048
5049        if (hdev->pldm)
5050                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5051        else
5052                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5053
5054        mutex_lock(&hdev->mmu_cache_lock);
5055
5056        /*
5057         * TODO: currently invalidate entire L0 & L1 as in regular hard
5058         * invalidation. Need to apply invalidation of specific cache lines with
5059         * mask of ASID & VA & size.
5060         * Note that L1 with be flushed entirely in any case.
5061         */
5062
5063        /* L0 & L1 invalidation */
5064        inv_data = RREG32(mmSTLB_CACHE_INV);
5065        /* PI is 8 bit */
5066        pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5067        WREG32(mmSTLB_CACHE_INV,
5068                        (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5069
5070        rc = hl_poll_timeout(
5071                hdev,
5072                mmSTLB_INV_CONSUMER_INDEX,
5073                status,
5074                status == pi,
5075                1000,
5076                timeout_usec);
5077
5078        mutex_unlock(&hdev->mmu_cache_lock);
5079
5080        if (rc) {
5081                dev_err_ratelimited(hdev->dev,
5082                                        "MMU cache invalidation timeout\n");
5083                hl_device_reset(hdev, true, false);
5084        }
5085
5086        return rc;
5087}
5088
5089int goya_send_heartbeat(struct hl_device *hdev)
5090{
5091        struct goya_device *goya = hdev->asic_specific;
5092
5093        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5094                return 0;
5095
5096        return hl_fw_send_heartbeat(hdev);
5097}
5098
5099int goya_armcp_info_get(struct hl_device *hdev)
5100{
5101        struct goya_device *goya = hdev->asic_specific;
5102        struct asic_fixed_properties *prop = &hdev->asic_prop;
5103        u64 dram_size;
5104        int rc;
5105
5106        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5107                return 0;
5108
5109        rc = hl_fw_armcp_info_get(hdev);
5110        if (rc)
5111                return rc;
5112
5113        dram_size = le64_to_cpu(prop->armcp_info.dram_size);
5114        if (dram_size) {
5115                if ((!is_power_of_2(dram_size)) ||
5116                                (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5117                        dev_err(hdev->dev,
5118                                "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5119                                dram_size);
5120                        dram_size = DRAM_PHYS_DEFAULT_SIZE;
5121                }
5122
5123                prop->dram_size = dram_size;
5124                prop->dram_end_address = prop->dram_base_address + dram_size;
5125        }
5126
5127        if (!strlen(prop->armcp_info.card_name))
5128                strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5129                                CARD_NAME_MAX_LEN);
5130
5131        return 0;
5132}
5133
5134static void goya_set_clock_gating(struct hl_device *hdev)
5135{
5136        /* clock gating not supported in Goya */
5137}
5138
5139static void goya_disable_clock_gating(struct hl_device *hdev)
5140{
5141        /* clock gating not supported in Goya */
5142}
5143
5144static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
5145                                struct seq_file *s)
5146{
5147        const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5148        const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5149        u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5150                mme_arch_sts;
5151        bool is_idle = true, is_eng_idle;
5152        u64 offset;
5153        int i;
5154
5155        if (s)
5156                seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5157                                "---  -------  ------------  -------------\n");
5158
5159        offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5160
5161        for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5162                qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5163                dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5164                is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5165                                IS_DMA_IDLE(dma_core_sts0);
5166                is_idle &= is_eng_idle;
5167
5168                if (mask)
5169                        *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
5170                if (s)
5171                        seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5172                                        qm_glbl_sts0, dma_core_sts0);
5173        }
5174
5175        if (s)
5176                seq_puts(s,
5177                        "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5178                        "---  -------  ------------  --------------  ----------\n");
5179
5180        offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5181
5182        for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5183                qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5184                cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5185                tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5186                is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5187                                IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5188                                IS_TPC_IDLE(tpc_cfg_sts);
5189                is_idle &= is_eng_idle;
5190
5191                if (mask)
5192                        *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
5193                if (s)
5194                        seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5195                                qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5196        }
5197
5198        if (s)
5199                seq_puts(s,
5200                        "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5201                        "---  -------  ------------  --------------  -----------\n");
5202
5203        qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5204        cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5205        mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5206        is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5207                        IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5208                        IS_MME_IDLE(mme_arch_sts);
5209        is_idle &= is_eng_idle;
5210
5211        if (mask)
5212                *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
5213        if (s) {
5214                seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5215                                cmdq_glbl_sts0, mme_arch_sts);
5216                seq_puts(s, "\n");
5217        }
5218
5219        return is_idle;
5220}
5221
5222static void goya_hw_queues_lock(struct hl_device *hdev)
5223        __acquires(&goya->hw_queues_lock)
5224{
5225        struct goya_device *goya = hdev->asic_specific;
5226
5227        spin_lock(&goya->hw_queues_lock);
5228}
5229
5230static void goya_hw_queues_unlock(struct hl_device *hdev)
5231        __releases(&goya->hw_queues_lock)
5232{
5233        struct goya_device *goya = hdev->asic_specific;
5234
5235        spin_unlock(&goya->hw_queues_lock);
5236}
5237
5238static u32 goya_get_pci_id(struct hl_device *hdev)
5239{
5240        return hdev->pdev->device;
5241}
5242
5243static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5244                                size_t max_size)
5245{
5246        struct goya_device *goya = hdev->asic_specific;
5247
5248        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5249                return 0;
5250
5251        return hl_fw_get_eeprom_data(hdev, data, max_size);
5252}
5253
5254static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5255{
5256        return RREG32(mmHW_STATE);
5257}
5258
5259static int goya_ctx_init(struct hl_ctx *ctx)
5260{
5261        return 0;
5262}
5263
5264u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5265{
5266        return cq_idx;
5267}
5268
5269static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5270{
5271        return 0;
5272}
5273
5274static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5275{
5276        return 0;
5277}
5278
5279static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
5280{
5281
5282}
5283
5284static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
5285                        u16 sob_val, u16 mon_id, u32 q_idx)
5286{
5287
5288}
5289
5290static void goya_reset_sob(struct hl_device *hdev, void *data)
5291{
5292
5293}
5294
5295static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5296{
5297        if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
5298                                                        HL_POWER9_HOST_MAGIC) {
5299                dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
5300                hdev->power9_64bit_dma_enable = 1;
5301                hdev->dma_mask = 64;
5302        } else {
5303                dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
5304                hdev->power9_64bit_dma_enable = 0;
5305                hdev->dma_mask = 48;
5306        }
5307}
5308
5309u64 goya_get_device_time(struct hl_device *hdev)
5310{
5311        u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5312
5313        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5314}
5315
5316static const struct hl_asic_funcs goya_funcs = {
5317        .early_init = goya_early_init,
5318        .early_fini = goya_early_fini,
5319        .late_init = goya_late_init,
5320        .late_fini = goya_late_fini,
5321        .sw_init = goya_sw_init,
5322        .sw_fini = goya_sw_fini,
5323        .hw_init = goya_hw_init,
5324        .hw_fini = goya_hw_fini,
5325        .halt_engines = goya_halt_engines,
5326        .suspend = goya_suspend,
5327        .resume = goya_resume,
5328        .cb_mmap = goya_cb_mmap,
5329        .ring_doorbell = goya_ring_doorbell,
5330        .pqe_write = goya_pqe_write,
5331        .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5332        .asic_dma_free_coherent = goya_dma_free_coherent,
5333        .get_int_queue_base = goya_get_int_queue_base,
5334        .test_queues = goya_test_queues,
5335        .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5336        .asic_dma_pool_free = goya_dma_pool_free,
5337        .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5338        .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5339        .hl_dma_unmap_sg = goya_dma_unmap_sg,
5340        .cs_parser = goya_cs_parser,
5341        .asic_dma_map_sg = goya_dma_map_sg,
5342        .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5343        .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5344        .update_eq_ci = goya_update_eq_ci,
5345        .context_switch = goya_context_switch,
5346        .restore_phase_topology = goya_restore_phase_topology,
5347        .debugfs_read32 = goya_debugfs_read32,
5348        .debugfs_write32 = goya_debugfs_write32,
5349        .debugfs_read64 = goya_debugfs_read64,
5350        .debugfs_write64 = goya_debugfs_write64,
5351        .add_device_attr = goya_add_device_attr,
5352        .handle_eqe = goya_handle_eqe,
5353        .set_pll_profile = goya_set_pll_profile,
5354        .get_events_stat = goya_get_events_stat,
5355        .read_pte = goya_read_pte,
5356        .write_pte = goya_write_pte,
5357        .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5358        .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5359        .send_heartbeat = goya_send_heartbeat,
5360        .set_clock_gating = goya_set_clock_gating,
5361        .disable_clock_gating = goya_disable_clock_gating,
5362        .debug_coresight = goya_debug_coresight,
5363        .is_device_idle = goya_is_device_idle,
5364        .soft_reset_late_init = goya_soft_reset_late_init,
5365        .hw_queues_lock = goya_hw_queues_lock,
5366        .hw_queues_unlock = goya_hw_queues_unlock,
5367        .get_pci_id = goya_get_pci_id,
5368        .get_eeprom_data = goya_get_eeprom_data,
5369        .send_cpu_message = goya_send_cpu_message,
5370        .get_hw_state = goya_get_hw_state,
5371        .pci_bars_map = goya_pci_bars_map,
5372        .set_dram_bar_base = goya_set_ddr_bar_base,
5373        .init_iatu = goya_init_iatu,
5374        .rreg = hl_rreg,
5375        .wreg = hl_wreg,
5376        .halt_coresight = goya_halt_coresight,
5377        .ctx_init = goya_ctx_init,
5378        .get_clk_rate = goya_get_clk_rate,
5379        .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5380        .read_device_fw_version = goya_read_device_fw_version,
5381        .load_firmware_to_device = goya_load_firmware_to_device,
5382        .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5383        .get_signal_cb_size = goya_get_signal_cb_size,
5384        .get_wait_cb_size = goya_get_wait_cb_size,
5385        .gen_signal_cb = goya_gen_signal_cb,
5386        .gen_wait_cb = goya_gen_wait_cb,
5387        .reset_sob = goya_reset_sob,
5388        .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5389        .get_device_time = goya_get_device_time
5390};
5391
5392/*
5393 * goya_set_asic_funcs - set Goya function pointers
5394 *
5395 * @*hdev: pointer to hl_device structure
5396 *
5397 */
5398void goya_set_asic_funcs(struct hl_device *hdev)
5399{
5400        hdev->asic_funcs = &goya_funcs;
5401}
5402