1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note 2 * 3 * Copyright 2016-2018 HabanaLabs, Ltd. 4 * All Rights Reserved. 5 * 6 */ 7 8#ifndef HABANALABS_H_ 9#define HABANALABS_H_ 10 11#include <linux/types.h> 12#include <linux/ioctl.h> 13 14/* 15 * Defines that are asic-specific but constitutes as ABI between kernel driver 16 * and userspace 17 */ 18#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ 19 20/* 21 * Queue Numbering 22 * 23 * The external queues (PCI DMA channels) MUST be before the internal queues 24 * and each group (PCI DMA channels and internal) must be contiguous inside 25 * itself but there can be a gap between the two groups (although not 26 * recommended) 27 */ 28 29enum goya_queue_id { 30 GOYA_QUEUE_ID_DMA_0 = 0, 31 GOYA_QUEUE_ID_DMA_1, 32 GOYA_QUEUE_ID_DMA_2, 33 GOYA_QUEUE_ID_DMA_3, 34 GOYA_QUEUE_ID_DMA_4, 35 GOYA_QUEUE_ID_CPU_PQ, 36 GOYA_QUEUE_ID_MME, /* Internal queues start here */ 37 GOYA_QUEUE_ID_TPC0, 38 GOYA_QUEUE_ID_TPC1, 39 GOYA_QUEUE_ID_TPC2, 40 GOYA_QUEUE_ID_TPC3, 41 GOYA_QUEUE_ID_TPC4, 42 GOYA_QUEUE_ID_TPC5, 43 GOYA_QUEUE_ID_TPC6, 44 GOYA_QUEUE_ID_TPC7, 45 GOYA_QUEUE_ID_SIZE 46}; 47 48/* 49 * Engine Numbering 50 * 51 * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' 52 */ 53 54enum goya_engine_id { 55 GOYA_ENGINE_ID_DMA_0 = 0, 56 GOYA_ENGINE_ID_DMA_1, 57 GOYA_ENGINE_ID_DMA_2, 58 GOYA_ENGINE_ID_DMA_3, 59 GOYA_ENGINE_ID_DMA_4, 60 GOYA_ENGINE_ID_MME_0, 61 GOYA_ENGINE_ID_TPC_0, 62 GOYA_ENGINE_ID_TPC_1, 63 GOYA_ENGINE_ID_TPC_2, 64 GOYA_ENGINE_ID_TPC_3, 65 GOYA_ENGINE_ID_TPC_4, 66 GOYA_ENGINE_ID_TPC_5, 67 GOYA_ENGINE_ID_TPC_6, 68 GOYA_ENGINE_ID_TPC_7, 69 GOYA_ENGINE_ID_SIZE 70}; 71 72enum hl_device_status { 73 HL_DEVICE_STATUS_OPERATIONAL, 74 HL_DEVICE_STATUS_IN_RESET, 75 HL_DEVICE_STATUS_MALFUNCTION 76}; 77 78/* Opcode for management ioctl */ 79#define HL_INFO_HW_IP_INFO 0 80#define HL_INFO_HW_EVENTS 1 81#define HL_INFO_DRAM_USAGE 2 82#define HL_INFO_HW_IDLE 3 83#define HL_INFO_DEVICE_STATUS 4 84 85#define HL_INFO_VERSION_MAX_LEN 128 86 87struct hl_info_hw_ip_info { 88 __u64 sram_base_address; 89 __u64 dram_base_address; 90 __u64 dram_size; 91 __u32 sram_size; 92 __u32 num_of_events; 93 __u32 device_id; /* PCI Device ID */ 94 __u32 reserved[3]; 95 __u32 armcp_cpld_version; 96 __u32 psoc_pci_pll_nr; 97 __u32 psoc_pci_pll_nf; 98 __u32 psoc_pci_pll_od; 99 __u32 psoc_pci_pll_div_factor; 100 __u8 tpc_enabled_mask; 101 __u8 dram_enabled; 102 __u8 pad[2]; 103 __u8 armcp_version[HL_INFO_VERSION_MAX_LEN]; 104}; 105 106struct hl_info_dram_usage { 107 __u64 dram_free_mem; 108 __u64 ctx_dram_mem; 109}; 110 111struct hl_info_hw_idle { 112 __u32 is_idle; 113 /* 114 * Bitmask of busy engines. 115 * Bits definition is according to `enum <chip>_enging_id'. 116 */ 117 __u32 busy_engines_mask; 118}; 119 120struct hl_info_device_status { 121 __u32 status; 122 __u32 pad; 123}; 124 125struct hl_info_args { 126 /* Location of relevant struct in userspace */ 127 __u64 return_pointer; 128 /* 129 * The size of the return value. Just like "size" in "snprintf", 130 * it limits how many bytes the kernel can write 131 * 132 * For hw_events array, the size should be 133 * hl_info_hw_ip_info.num_of_events * sizeof(__u32) 134 */ 135 __u32 return_size; 136 137 /* HL_INFO_* */ 138 __u32 op; 139 140 /* Context ID - Currently not in use */ 141 __u32 ctx_id; 142 __u32 pad; 143}; 144 145/* Opcode to create a new command buffer */ 146#define HL_CB_OP_CREATE 0 147/* Opcode to destroy previously created command buffer */ 148#define HL_CB_OP_DESTROY 1 149 150struct hl_cb_in { 151 /* Handle of CB or 0 if we want to create one */ 152 __u64 cb_handle; 153 /* HL_CB_OP_* */ 154 __u32 op; 155 /* Size of CB. Maximum size is 2MB. The minimum size that will be 156 * allocated, regardless of this parameter's value, is PAGE_SIZE 157 */ 158 __u32 cb_size; 159 /* Context ID - Currently not in use */ 160 __u32 ctx_id; 161 __u32 pad; 162}; 163 164struct hl_cb_out { 165 /* Handle of CB */ 166 __u64 cb_handle; 167}; 168 169union hl_cb_args { 170 struct hl_cb_in in; 171 struct hl_cb_out out; 172}; 173 174/* 175 * This structure size must always be fixed to 64-bytes for backward 176 * compatibility 177 */ 178struct hl_cs_chunk { 179 /* 180 * For external queue, this represents a Handle of CB on the Host 181 * For internal queue, this represents an SRAM or DRAM address of the 182 * internal CB 183 */ 184 __u64 cb_handle; 185 /* Index of queue to put the CB on */ 186 __u32 queue_index; 187 /* 188 * Size of command buffer with valid packets 189 * Can be smaller then actual CB size 190 */ 191 __u32 cb_size; 192 /* HL_CS_CHUNK_FLAGS_* */ 193 __u32 cs_chunk_flags; 194 /* Align structure to 64 bytes */ 195 __u32 pad[11]; 196}; 197 198#define HL_CS_FLAGS_FORCE_RESTORE 0x1 199 200#define HL_CS_STATUS_SUCCESS 0 201 202struct hl_cs_in { 203 /* this holds address of array of hl_cs_chunk for restore phase */ 204 __u64 chunks_restore; 205 /* this holds address of array of hl_cs_chunk for execution phase */ 206 __u64 chunks_execute; 207 /* this holds address of array of hl_cs_chunk for store phase - 208 * Currently not in use 209 */ 210 __u64 chunks_store; 211 /* Number of chunks in restore phase array */ 212 __u32 num_chunks_restore; 213 /* Number of chunks in execution array */ 214 __u32 num_chunks_execute; 215 /* Number of chunks in restore phase array - Currently not in use */ 216 __u32 num_chunks_store; 217 /* HL_CS_FLAGS_* */ 218 __u32 cs_flags; 219 /* Context ID - Currently not in use */ 220 __u32 ctx_id; 221}; 222 223struct hl_cs_out { 224 /* 225 * seq holds the sequence number of the CS to pass to wait ioctl. All 226 * values are valid except for 0 and ULLONG_MAX 227 */ 228 __u64 seq; 229 /* HL_CS_STATUS_* */ 230 __u32 status; 231 __u32 pad; 232}; 233 234union hl_cs_args { 235 struct hl_cs_in in; 236 struct hl_cs_out out; 237}; 238 239struct hl_wait_cs_in { 240 /* Command submission sequence number */ 241 __u64 seq; 242 /* Absolute timeout to wait in microseconds */ 243 __u64 timeout_us; 244 /* Context ID - Currently not in use */ 245 __u32 ctx_id; 246 __u32 pad; 247}; 248 249#define HL_WAIT_CS_STATUS_COMPLETED 0 250#define HL_WAIT_CS_STATUS_BUSY 1 251#define HL_WAIT_CS_STATUS_TIMEDOUT 2 252#define HL_WAIT_CS_STATUS_ABORTED 3 253#define HL_WAIT_CS_STATUS_INTERRUPTED 4 254 255struct hl_wait_cs_out { 256 /* HL_WAIT_CS_STATUS_* */ 257 __u32 status; 258 __u32 pad; 259}; 260 261union hl_wait_cs_args { 262 struct hl_wait_cs_in in; 263 struct hl_wait_cs_out out; 264}; 265 266/* Opcode to alloc device memory */ 267#define HL_MEM_OP_ALLOC 0 268/* Opcode to free previously allocated device memory */ 269#define HL_MEM_OP_FREE 1 270/* Opcode to map host memory */ 271#define HL_MEM_OP_MAP 2 272/* Opcode to unmap previously mapped host memory */ 273#define HL_MEM_OP_UNMAP 3 274 275/* Memory flags */ 276#define HL_MEM_CONTIGUOUS 0x1 277#define HL_MEM_SHARED 0x2 278#define HL_MEM_USERPTR 0x4 279 280struct hl_mem_in { 281 union { 282 /* HL_MEM_OP_ALLOC- allocate device memory */ 283 struct { 284 /* Size to alloc */ 285 __u64 mem_size; 286 } alloc; 287 288 /* HL_MEM_OP_FREE - free device memory */ 289 struct { 290 /* Handle returned from HL_MEM_OP_ALLOC */ 291 __u64 handle; 292 } free; 293 294 /* HL_MEM_OP_MAP - map device memory */ 295 struct { 296 /* 297 * Requested virtual address of mapped memory. 298 * KMD will try to map the requested region to this 299 * hint address, as long as the address is valid and 300 * not already mapped. The user should check the 301 * returned address of the IOCTL to make sure he got 302 * the hint address. Passing 0 here means that KMD 303 * will choose the address itself. 304 */ 305 __u64 hint_addr; 306 /* Handle returned from HL_MEM_OP_ALLOC */ 307 __u64 handle; 308 } map_device; 309 310 /* HL_MEM_OP_MAP - map host memory */ 311 struct { 312 /* Address of allocated host memory */ 313 __u64 host_virt_addr; 314 /* 315 * Requested virtual address of mapped memory. 316 * KMD will try to map the requested region to this 317 * hint address, as long as the address is valid and 318 * not already mapped. The user should check the 319 * returned address of the IOCTL to make sure he got 320 * the hint address. Passing 0 here means that KMD 321 * will choose the address itself. 322 */ 323 __u64 hint_addr; 324 /* Size of allocated host memory */ 325 __u64 mem_size; 326 } map_host; 327 328 /* HL_MEM_OP_UNMAP - unmap host memory */ 329 struct { 330 /* Virtual address returned from HL_MEM_OP_MAP */ 331 __u64 device_virt_addr; 332 } unmap; 333 }; 334 335 /* HL_MEM_OP_* */ 336 __u32 op; 337 /* HL_MEM_* flags */ 338 __u32 flags; 339 /* Context ID - Currently not in use */ 340 __u32 ctx_id; 341 __u32 pad; 342}; 343 344struct hl_mem_out { 345 union { 346 /* 347 * Used for HL_MEM_OP_MAP as the virtual address that was 348 * assigned in the device VA space. 349 * A value of 0 means the requested operation failed. 350 */ 351 __u64 device_virt_addr; 352 353 /* 354 * Used for HL_MEM_OP_ALLOC. This is the assigned 355 * handle for the allocated memory 356 */ 357 __u64 handle; 358 }; 359}; 360 361union hl_mem_args { 362 struct hl_mem_in in; 363 struct hl_mem_out out; 364}; 365 366#define HL_DEBUG_MAX_AUX_VALUES 10 367 368struct hl_debug_params_etr { 369 /* Address in memory to allocate buffer */ 370 __u64 buffer_address; 371 372 /* Size of buffer to allocate */ 373 __u64 buffer_size; 374 375 /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ 376 __u32 sink_mode; 377 __u32 pad; 378}; 379 380struct hl_debug_params_etf { 381 /* Address in memory to allocate buffer */ 382 __u64 buffer_address; 383 384 /* Size of buffer to allocate */ 385 __u64 buffer_size; 386 387 /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ 388 __u32 sink_mode; 389 __u32 pad; 390}; 391 392struct hl_debug_params_stm { 393 /* Two bit masks for HW event and Stimulus Port */ 394 __u64 he_mask; 395 __u64 sp_mask; 396 397 /* Trace source ID */ 398 __u32 id; 399 400 /* Frequency for the timestamp register */ 401 __u32 frequency; 402}; 403 404struct hl_debug_params_bmon { 405 /* Two address ranges that the user can request to filter */ 406 __u64 start_addr0; 407 __u64 addr_mask0; 408 409 __u64 start_addr1; 410 __u64 addr_mask1; 411 412 /* Capture window configuration */ 413 __u32 bw_win; 414 __u32 win_capture; 415 416 /* Trace source ID */ 417 __u32 id; 418 __u32 pad; 419}; 420 421struct hl_debug_params_spmu { 422 /* Event types selection */ 423 __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; 424 425 /* Number of event types selection */ 426 __u32 event_types_num; 427 __u32 pad; 428}; 429 430/* Opcode for ETR component */ 431#define HL_DEBUG_OP_ETR 0 432/* Opcode for ETF component */ 433#define HL_DEBUG_OP_ETF 1 434/* Opcode for STM component */ 435#define HL_DEBUG_OP_STM 2 436/* Opcode for FUNNEL component */ 437#define HL_DEBUG_OP_FUNNEL 3 438/* Opcode for BMON component */ 439#define HL_DEBUG_OP_BMON 4 440/* Opcode for SPMU component */ 441#define HL_DEBUG_OP_SPMU 5 442/* Opcode for timestamp */ 443#define HL_DEBUG_OP_TIMESTAMP 6 444/* Opcode for setting the device into or out of debug mode. The enable 445 * variable should be 1 for enabling debug mode and 0 for disabling it 446 */ 447#define HL_DEBUG_OP_SET_MODE 7 448 449struct hl_debug_args { 450 /* 451 * Pointer to user input structure. 452 * This field is relevant to specific opcodes. 453 */ 454 __u64 input_ptr; 455 /* Pointer to user output structure */ 456 __u64 output_ptr; 457 /* Size of user input structure */ 458 __u32 input_size; 459 /* Size of user output structure */ 460 __u32 output_size; 461 /* HL_DEBUG_OP_* */ 462 __u32 op; 463 /* 464 * Register index in the component, taken from the debug_regs_index enum 465 * in the various ASIC header files 466 */ 467 __u32 reg_idx; 468 /* Enable/disable */ 469 __u32 enable; 470 /* Context ID - Currently not in use */ 471 __u32 ctx_id; 472}; 473 474/* 475 * Various information operations such as: 476 * - H/W IP information 477 * - Current dram usage 478 * 479 * The user calls this IOCTL with an opcode that describes the required 480 * information. The user should supply a pointer to a user-allocated memory 481 * chunk, which will be filled by the driver with the requested information. 482 * 483 * The user supplies the maximum amount of size to copy into the user's memory, 484 * in order to prevent data corruption in case of differences between the 485 * definitions of structures in kernel and userspace, e.g. in case of old 486 * userspace and new kernel driver 487 */ 488#define HL_IOCTL_INFO \ 489 _IOWR('H', 0x01, struct hl_info_args) 490 491/* 492 * Command Buffer 493 * - Request a Command Buffer 494 * - Destroy a Command Buffer 495 * 496 * The command buffers are memory blocks that reside in DMA-able address 497 * space and are physically contiguous so they can be accessed by the device 498 * directly. They are allocated using the coherent DMA API. 499 * 500 * When creating a new CB, the IOCTL returns a handle of it, and the user-space 501 * process needs to use that handle to mmap the buffer so it can access them. 502 * 503 */ 504#define HL_IOCTL_CB \ 505 _IOWR('H', 0x02, union hl_cb_args) 506 507/* 508 * Command Submission 509 * 510 * To submit work to the device, the user need to call this IOCTL with a set 511 * of JOBS. That set of JOBS constitutes a CS object. 512 * Each JOB will be enqueued on a specific queue, according to the user's input. 513 * There can be more then one JOB per queue. 514 * 515 * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase, 516 * a second set is for "execution" phase and a third set is for "store" phase. 517 * The JOBS on the "restore" phase are enqueued only after context-switch 518 * (or if its the first CS for this context). The user can also order the 519 * driver to run the "restore" phase explicitly 520 * 521 * There are two types of queues - external and internal. External queues 522 * are DMA queues which transfer data from/to the Host. All other queues are 523 * internal. The driver will get completion notifications from the device only 524 * on JOBS which are enqueued in the external queues. 525 * 526 * For jobs on external queues, the user needs to create command buffers 527 * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on 528 * internal queues, the user needs to prepare a "command buffer" with packets 529 * on either the SRAM or DRAM, and give the device address of that buffer to 530 * the CS ioctl. 531 * 532 * This IOCTL is asynchronous in regard to the actual execution of the CS. This 533 * means it returns immediately after ALL the JOBS were enqueued on their 534 * relevant queues. Therefore, the user mustn't assume the CS has been completed 535 * or has even started to execute. 536 * 537 * Upon successful enqueue, the IOCTL returns a sequence number which the user 538 * can use with the "Wait for CS" IOCTL to check whether the handle's CS 539 * external JOBS have been completed. Note that if the CS has internal JOBS 540 * which can execute AFTER the external JOBS have finished, the driver might 541 * report that the CS has finished executing BEFORE the internal JOBS have 542 * actually finish executing. 543 * 544 * Even though the sequence number increments per CS, the user can NOT 545 * automatically assume that if CS with sequence number N finished, then CS 546 * with sequence number N-1 also finished. The user can make this assumption if 547 * and only if CS N and CS N-1 are exactly the same (same CBs for the same 548 * queues). 549 */ 550#define HL_IOCTL_CS \ 551 _IOWR('H', 0x03, union hl_cs_args) 552 553/* 554 * Wait for Command Submission 555 * 556 * The user can call this IOCTL with a handle it received from the CS IOCTL 557 * to wait until the handle's CS has finished executing. The user will wait 558 * inside the kernel until the CS has finished or until the user-requeusted 559 * timeout has expired. 560 * 561 * The return value of the IOCTL is a standard Linux error code. The possible 562 * values are: 563 * 564 * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal 565 * that the user process received 566 * ETIMEDOUT - The CS has caused a timeout on the device 567 * EIO - The CS was aborted (usually because the device was reset) 568 * ENODEV - The device wants to do hard-reset (so user need to close FD) 569 * 570 * The driver also returns a custom define inside the IOCTL which can be: 571 * 572 * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) 573 * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) 574 * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device 575 * (ETIMEDOUT) 576 * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the 577 * device was reset (EIO) 578 * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR) 579 * 580 */ 581 582#define HL_IOCTL_WAIT_CS \ 583 _IOWR('H', 0x04, union hl_wait_cs_args) 584 585/* 586 * Memory 587 * - Map host memory to device MMU 588 * - Unmap host memory from device MMU 589 * 590 * This IOCTL allows the user to map host memory to the device MMU 591 * 592 * For host memory, the IOCTL doesn't allocate memory. The user is supposed 593 * to allocate the memory in user-space (malloc/new). The driver pins the 594 * physical pages (up to the allowed limit by the OS), assigns a virtual 595 * address in the device VA space and initializes the device MMU. 596 * 597 * There is an option for the user to specify the requested virtual address. 598 * 599 */ 600#define HL_IOCTL_MEMORY \ 601 _IOWR('H', 0x05, union hl_mem_args) 602 603/* 604 * Debug 605 * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces 606 * 607 * This IOCTL allows the user to get debug traces from the chip. 608 * 609 * Before the user can send configuration requests of the various 610 * debug/profile engines, it needs to set the device into debug mode. 611 * This is because the debug/profile infrastructure is shared component in the 612 * device and we can't allow multiple users to access it at the same time. 613 * 614 * Once a user set the device into debug mode, the driver won't allow other 615 * users to "work" with the device, i.e. open a FD. If there are multiple users 616 * opened on the device, the driver won't allow any user to debug the device. 617 * 618 * For each configuration request, the user needs to provide the register index 619 * and essential data such as buffer address and size. 620 * 621 * Once the user has finished using the debug/profile engines, he should 622 * set the device into non-debug mode, i.e. disable debug mode. 623 * 624 * The driver can decide to "kick out" the user if he abuses this interface. 625 * 626 */ 627#define HL_IOCTL_DEBUG \ 628 _IOWR('H', 0x06, struct hl_debug_args) 629 630#define HL_COMMAND_START 0x01 631#define HL_COMMAND_END 0x07 632 633#endif /* HABANALABS_H_ */ 634