linux/include/uapi/misc/habanalabs.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
   2 *
   3 * Copyright 2016-2018 HabanaLabs, Ltd.
   4 * All Rights Reserved.
   5 *
   6 */
   7
   8#ifndef HABANALABS_H_
   9#define HABANALABS_H_
  10
  11#include <linux/types.h>
  12#include <linux/ioctl.h>
  13
  14/*
  15 * Defines that are asic-specific but constitutes as ABI between kernel driver
  16 * and userspace
  17 */
  18#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START  0x8000  /* 32KB */
  19
  20/*
  21 * Queue Numbering
  22 *
  23 * The external queues (PCI DMA channels) MUST be before the internal queues
  24 * and each group (PCI DMA channels and internal) must be contiguous inside
  25 * itself but there can be a gap between the two groups (although not
  26 * recommended)
  27 */
  28
  29enum goya_queue_id {
  30        GOYA_QUEUE_ID_DMA_0 = 0,
  31        GOYA_QUEUE_ID_DMA_1,
  32        GOYA_QUEUE_ID_DMA_2,
  33        GOYA_QUEUE_ID_DMA_3,
  34        GOYA_QUEUE_ID_DMA_4,
  35        GOYA_QUEUE_ID_CPU_PQ,
  36        GOYA_QUEUE_ID_MME,      /* Internal queues start here */
  37        GOYA_QUEUE_ID_TPC0,
  38        GOYA_QUEUE_ID_TPC1,
  39        GOYA_QUEUE_ID_TPC2,
  40        GOYA_QUEUE_ID_TPC3,
  41        GOYA_QUEUE_ID_TPC4,
  42        GOYA_QUEUE_ID_TPC5,
  43        GOYA_QUEUE_ID_TPC6,
  44        GOYA_QUEUE_ID_TPC7,
  45        GOYA_QUEUE_ID_SIZE
  46};
  47
  48/*
  49 * Engine Numbering
  50 *
  51 * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle'
  52 */
  53
  54enum goya_engine_id {
  55        GOYA_ENGINE_ID_DMA_0 = 0,
  56        GOYA_ENGINE_ID_DMA_1,
  57        GOYA_ENGINE_ID_DMA_2,
  58        GOYA_ENGINE_ID_DMA_3,
  59        GOYA_ENGINE_ID_DMA_4,
  60        GOYA_ENGINE_ID_MME_0,
  61        GOYA_ENGINE_ID_TPC_0,
  62        GOYA_ENGINE_ID_TPC_1,
  63        GOYA_ENGINE_ID_TPC_2,
  64        GOYA_ENGINE_ID_TPC_3,
  65        GOYA_ENGINE_ID_TPC_4,
  66        GOYA_ENGINE_ID_TPC_5,
  67        GOYA_ENGINE_ID_TPC_6,
  68        GOYA_ENGINE_ID_TPC_7,
  69        GOYA_ENGINE_ID_SIZE
  70};
  71
  72enum hl_device_status {
  73        HL_DEVICE_STATUS_OPERATIONAL,
  74        HL_DEVICE_STATUS_IN_RESET,
  75        HL_DEVICE_STATUS_MALFUNCTION
  76};
  77
  78/* Opcode for management ioctl */
  79#define HL_INFO_HW_IP_INFO      0
  80#define HL_INFO_HW_EVENTS       1
  81#define HL_INFO_DRAM_USAGE      2
  82#define HL_INFO_HW_IDLE         3
  83#define HL_INFO_DEVICE_STATUS   4
  84
  85#define HL_INFO_VERSION_MAX_LEN 128
  86
  87struct hl_info_hw_ip_info {
  88        __u64 sram_base_address;
  89        __u64 dram_base_address;
  90        __u64 dram_size;
  91        __u32 sram_size;
  92        __u32 num_of_events;
  93        __u32 device_id; /* PCI Device ID */
  94        __u32 reserved[3];
  95        __u32 armcp_cpld_version;
  96        __u32 psoc_pci_pll_nr;
  97        __u32 psoc_pci_pll_nf;
  98        __u32 psoc_pci_pll_od;
  99        __u32 psoc_pci_pll_div_factor;
 100        __u8 tpc_enabled_mask;
 101        __u8 dram_enabled;
 102        __u8 pad[2];
 103        __u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
 104};
 105
 106struct hl_info_dram_usage {
 107        __u64 dram_free_mem;
 108        __u64 ctx_dram_mem;
 109};
 110
 111struct hl_info_hw_idle {
 112        __u32 is_idle;
 113        /*
 114         * Bitmask of busy engines.
 115         * Bits definition is according to `enum <chip>_enging_id'.
 116         */
 117        __u32 busy_engines_mask;
 118};
 119
 120struct hl_info_device_status {
 121        __u32 status;
 122        __u32 pad;
 123};
 124
 125struct hl_info_args {
 126        /* Location of relevant struct in userspace */
 127        __u64 return_pointer;
 128        /*
 129         * The size of the return value. Just like "size" in "snprintf",
 130         * it limits how many bytes the kernel can write
 131         *
 132         * For hw_events array, the size should be
 133         * hl_info_hw_ip_info.num_of_events * sizeof(__u32)
 134         */
 135        __u32 return_size;
 136
 137        /* HL_INFO_* */
 138        __u32 op;
 139
 140        /* Context ID - Currently not in use */
 141        __u32 ctx_id;
 142        __u32 pad;
 143};
 144
 145/* Opcode to create a new command buffer */
 146#define HL_CB_OP_CREATE         0
 147/* Opcode to destroy previously created command buffer */
 148#define HL_CB_OP_DESTROY        1
 149
 150struct hl_cb_in {
 151        /* Handle of CB or 0 if we want to create one */
 152        __u64 cb_handle;
 153        /* HL_CB_OP_* */
 154        __u32 op;
 155        /* Size of CB. Maximum size is 2MB. The minimum size that will be
 156         * allocated, regardless of this parameter's value, is PAGE_SIZE
 157         */
 158        __u32 cb_size;
 159        /* Context ID - Currently not in use */
 160        __u32 ctx_id;
 161        __u32 pad;
 162};
 163
 164struct hl_cb_out {
 165        /* Handle of CB */
 166        __u64 cb_handle;
 167};
 168
 169union hl_cb_args {
 170        struct hl_cb_in in;
 171        struct hl_cb_out out;
 172};
 173
 174/*
 175 * This structure size must always be fixed to 64-bytes for backward
 176 * compatibility
 177 */
 178struct hl_cs_chunk {
 179        /*
 180         * For external queue, this represents a Handle of CB on the Host
 181         * For internal queue, this represents an SRAM or DRAM address of the
 182         * internal CB
 183         */
 184        __u64 cb_handle;
 185        /* Index of queue to put the CB on */
 186        __u32 queue_index;
 187        /*
 188         * Size of command buffer with valid packets
 189         * Can be smaller then actual CB size
 190         */
 191        __u32 cb_size;
 192        /* HL_CS_CHUNK_FLAGS_* */
 193        __u32 cs_chunk_flags;
 194        /* Align structure to 64 bytes */
 195        __u32 pad[11];
 196};
 197
 198#define HL_CS_FLAGS_FORCE_RESTORE       0x1
 199
 200#define HL_CS_STATUS_SUCCESS            0
 201
 202struct hl_cs_in {
 203        /* this holds address of array of hl_cs_chunk for restore phase */
 204        __u64 chunks_restore;
 205        /* this holds address of array of hl_cs_chunk for execution phase */
 206        __u64 chunks_execute;
 207        /* this holds address of array of hl_cs_chunk for store phase -
 208         * Currently not in use
 209         */
 210        __u64 chunks_store;
 211        /* Number of chunks in restore phase array */
 212        __u32 num_chunks_restore;
 213        /* Number of chunks in execution array */
 214        __u32 num_chunks_execute;
 215        /* Number of chunks in restore phase array - Currently not in use */
 216        __u32 num_chunks_store;
 217        /* HL_CS_FLAGS_* */
 218        __u32 cs_flags;
 219        /* Context ID - Currently not in use */
 220        __u32 ctx_id;
 221};
 222
 223struct hl_cs_out {
 224        /*
 225         * seq holds the sequence number of the CS to pass to wait ioctl. All
 226         * values are valid except for 0 and ULLONG_MAX
 227         */
 228        __u64 seq;
 229        /* HL_CS_STATUS_* */
 230        __u32 status;
 231        __u32 pad;
 232};
 233
 234union hl_cs_args {
 235        struct hl_cs_in in;
 236        struct hl_cs_out out;
 237};
 238
 239struct hl_wait_cs_in {
 240        /* Command submission sequence number */
 241        __u64 seq;
 242        /* Absolute timeout to wait in microseconds */
 243        __u64 timeout_us;
 244        /* Context ID - Currently not in use */
 245        __u32 ctx_id;
 246        __u32 pad;
 247};
 248
 249#define HL_WAIT_CS_STATUS_COMPLETED     0
 250#define HL_WAIT_CS_STATUS_BUSY          1
 251#define HL_WAIT_CS_STATUS_TIMEDOUT      2
 252#define HL_WAIT_CS_STATUS_ABORTED       3
 253#define HL_WAIT_CS_STATUS_INTERRUPTED   4
 254
 255struct hl_wait_cs_out {
 256        /* HL_WAIT_CS_STATUS_* */
 257        __u32 status;
 258        __u32 pad;
 259};
 260
 261union hl_wait_cs_args {
 262        struct hl_wait_cs_in in;
 263        struct hl_wait_cs_out out;
 264};
 265
 266/* Opcode to alloc device memory */
 267#define HL_MEM_OP_ALLOC                 0
 268/* Opcode to free previously allocated device memory */
 269#define HL_MEM_OP_FREE                  1
 270/* Opcode to map host memory */
 271#define HL_MEM_OP_MAP                   2
 272/* Opcode to unmap previously mapped host memory */
 273#define HL_MEM_OP_UNMAP                 3
 274
 275/* Memory flags */
 276#define HL_MEM_CONTIGUOUS       0x1
 277#define HL_MEM_SHARED           0x2
 278#define HL_MEM_USERPTR          0x4
 279
 280struct hl_mem_in {
 281        union {
 282                /* HL_MEM_OP_ALLOC- allocate device memory */
 283                struct {
 284                        /* Size to alloc */
 285                        __u64 mem_size;
 286                } alloc;
 287
 288                /* HL_MEM_OP_FREE - free device memory */
 289                struct {
 290                        /* Handle returned from HL_MEM_OP_ALLOC */
 291                        __u64 handle;
 292                } free;
 293
 294                /* HL_MEM_OP_MAP - map device memory */
 295                struct {
 296                        /*
 297                         * Requested virtual address of mapped memory.
 298                         * KMD will try to map the requested region to this
 299                         * hint address, as long as the address is valid and
 300                         * not already mapped. The user should check the
 301                         * returned address of the IOCTL to make sure he got
 302                         * the hint address. Passing 0 here means that KMD
 303                         * will choose the address itself.
 304                         */
 305                        __u64 hint_addr;
 306                        /* Handle returned from HL_MEM_OP_ALLOC */
 307                        __u64 handle;
 308                } map_device;
 309
 310                /* HL_MEM_OP_MAP - map host memory */
 311                struct {
 312                        /* Address of allocated host memory */
 313                        __u64 host_virt_addr;
 314                        /*
 315                         * Requested virtual address of mapped memory.
 316                         * KMD will try to map the requested region to this
 317                         * hint address, as long as the address is valid and
 318                         * not already mapped. The user should check the
 319                         * returned address of the IOCTL to make sure he got
 320                         * the hint address. Passing 0 here means that KMD
 321                         * will choose the address itself.
 322                         */
 323                        __u64 hint_addr;
 324                        /* Size of allocated host memory */
 325                        __u64 mem_size;
 326                } map_host;
 327
 328                /* HL_MEM_OP_UNMAP - unmap host memory */
 329                struct {
 330                        /* Virtual address returned from HL_MEM_OP_MAP */
 331                        __u64 device_virt_addr;
 332                } unmap;
 333        };
 334
 335        /* HL_MEM_OP_* */
 336        __u32 op;
 337        /* HL_MEM_* flags */
 338        __u32 flags;
 339        /* Context ID - Currently not in use */
 340        __u32 ctx_id;
 341        __u32 pad;
 342};
 343
 344struct hl_mem_out {
 345        union {
 346                /*
 347                 * Used for HL_MEM_OP_MAP as the virtual address that was
 348                 * assigned in the device VA space.
 349                 * A value of 0 means the requested operation failed.
 350                 */
 351                __u64 device_virt_addr;
 352
 353                /*
 354                 * Used for HL_MEM_OP_ALLOC. This is the assigned
 355                 * handle for the allocated memory
 356                 */
 357                __u64 handle;
 358        };
 359};
 360
 361union hl_mem_args {
 362        struct hl_mem_in in;
 363        struct hl_mem_out out;
 364};
 365
 366#define HL_DEBUG_MAX_AUX_VALUES         10
 367
 368struct hl_debug_params_etr {
 369        /* Address in memory to allocate buffer */
 370        __u64 buffer_address;
 371
 372        /* Size of buffer to allocate */
 373        __u64 buffer_size;
 374
 375        /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
 376        __u32 sink_mode;
 377        __u32 pad;
 378};
 379
 380struct hl_debug_params_etf {
 381        /* Address in memory to allocate buffer */
 382        __u64 buffer_address;
 383
 384        /* Size of buffer to allocate */
 385        __u64 buffer_size;
 386
 387        /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
 388        __u32 sink_mode;
 389        __u32 pad;
 390};
 391
 392struct hl_debug_params_stm {
 393        /* Two bit masks for HW event and Stimulus Port */
 394        __u64 he_mask;
 395        __u64 sp_mask;
 396
 397        /* Trace source ID */
 398        __u32 id;
 399
 400        /* Frequency for the timestamp register */
 401        __u32 frequency;
 402};
 403
 404struct hl_debug_params_bmon {
 405        /* Two address ranges that the user can request to filter */
 406        __u64 start_addr0;
 407        __u64 addr_mask0;
 408
 409        __u64 start_addr1;
 410        __u64 addr_mask1;
 411
 412        /* Capture window configuration */
 413        __u32 bw_win;
 414        __u32 win_capture;
 415
 416        /* Trace source ID */
 417        __u32 id;
 418        __u32 pad;
 419};
 420
 421struct hl_debug_params_spmu {
 422        /* Event types selection */
 423        __u64 event_types[HL_DEBUG_MAX_AUX_VALUES];
 424
 425        /* Number of event types selection */
 426        __u32 event_types_num;
 427        __u32 pad;
 428};
 429
 430/* Opcode for ETR component */
 431#define HL_DEBUG_OP_ETR         0
 432/* Opcode for ETF component */
 433#define HL_DEBUG_OP_ETF         1
 434/* Opcode for STM component */
 435#define HL_DEBUG_OP_STM         2
 436/* Opcode for FUNNEL component */
 437#define HL_DEBUG_OP_FUNNEL      3
 438/* Opcode for BMON component */
 439#define HL_DEBUG_OP_BMON        4
 440/* Opcode for SPMU component */
 441#define HL_DEBUG_OP_SPMU        5
 442/* Opcode for timestamp */
 443#define HL_DEBUG_OP_TIMESTAMP   6
 444/* Opcode for setting the device into or out of debug mode. The enable
 445 * variable should be 1 for enabling debug mode and 0 for disabling it
 446 */
 447#define HL_DEBUG_OP_SET_MODE    7
 448
 449struct hl_debug_args {
 450        /*
 451         * Pointer to user input structure.
 452         * This field is relevant to specific opcodes.
 453         */
 454        __u64 input_ptr;
 455        /* Pointer to user output structure */
 456        __u64 output_ptr;
 457        /* Size of user input structure */
 458        __u32 input_size;
 459        /* Size of user output structure */
 460        __u32 output_size;
 461        /* HL_DEBUG_OP_* */
 462        __u32 op;
 463        /*
 464         * Register index in the component, taken from the debug_regs_index enum
 465         * in the various ASIC header files
 466         */
 467        __u32 reg_idx;
 468        /* Enable/disable */
 469        __u32 enable;
 470        /* Context ID - Currently not in use */
 471        __u32 ctx_id;
 472};
 473
 474/*
 475 * Various information operations such as:
 476 * - H/W IP information
 477 * - Current dram usage
 478 *
 479 * The user calls this IOCTL with an opcode that describes the required
 480 * information. The user should supply a pointer to a user-allocated memory
 481 * chunk, which will be filled by the driver with the requested information.
 482 *
 483 * The user supplies the maximum amount of size to copy into the user's memory,
 484 * in order to prevent data corruption in case of differences between the
 485 * definitions of structures in kernel and userspace, e.g. in case of old
 486 * userspace and new kernel driver
 487 */
 488#define HL_IOCTL_INFO   \
 489                _IOWR('H', 0x01, struct hl_info_args)
 490
 491/*
 492 * Command Buffer
 493 * - Request a Command Buffer
 494 * - Destroy a Command Buffer
 495 *
 496 * The command buffers are memory blocks that reside in DMA-able address
 497 * space and are physically contiguous so they can be accessed by the device
 498 * directly. They are allocated using the coherent DMA API.
 499 *
 500 * When creating a new CB, the IOCTL returns a handle of it, and the user-space
 501 * process needs to use that handle to mmap the buffer so it can access them.
 502 *
 503 */
 504#define HL_IOCTL_CB             \
 505                _IOWR('H', 0x02, union hl_cb_args)
 506
 507/*
 508 * Command Submission
 509 *
 510 * To submit work to the device, the user need to call this IOCTL with a set
 511 * of JOBS. That set of JOBS constitutes a CS object.
 512 * Each JOB will be enqueued on a specific queue, according to the user's input.
 513 * There can be more then one JOB per queue.
 514 *
 515 * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
 516 * a second set is for "execution" phase and a third set is for "store" phase.
 517 * The JOBS on the "restore" phase are enqueued only after context-switch
 518 * (or if its the first CS for this context). The user can also order the
 519 * driver to run the "restore" phase explicitly
 520 *
 521 * There are two types of queues - external and internal. External queues
 522 * are DMA queues which transfer data from/to the Host. All other queues are
 523 * internal. The driver will get completion notifications from the device only
 524 * on JOBS which are enqueued in the external queues.
 525 *
 526 * For jobs on external queues, the user needs to create command buffers
 527 * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on
 528 * internal queues, the user needs to prepare a "command buffer" with packets
 529 * on either the SRAM or DRAM, and give the device address of that buffer to
 530 * the CS ioctl.
 531 *
 532 * This IOCTL is asynchronous in regard to the actual execution of the CS. This
 533 * means it returns immediately after ALL the JOBS were enqueued on their
 534 * relevant queues. Therefore, the user mustn't assume the CS has been completed
 535 * or has even started to execute.
 536 *
 537 * Upon successful enqueue, the IOCTL returns a sequence number which the user
 538 * can use with the "Wait for CS" IOCTL to check whether the handle's CS
 539 * external JOBS have been completed. Note that if the CS has internal JOBS
 540 * which can execute AFTER the external JOBS have finished, the driver might
 541 * report that the CS has finished executing BEFORE the internal JOBS have
 542 * actually finish executing.
 543 *
 544 * Even though the sequence number increments per CS, the user can NOT
 545 * automatically assume that if CS with sequence number N finished, then CS
 546 * with sequence number N-1 also finished. The user can make this assumption if
 547 * and only if CS N and CS N-1 are exactly the same (same CBs for the same
 548 * queues).
 549 */
 550#define HL_IOCTL_CS                     \
 551                _IOWR('H', 0x03, union hl_cs_args)
 552
 553/*
 554 * Wait for Command Submission
 555 *
 556 * The user can call this IOCTL with a handle it received from the CS IOCTL
 557 * to wait until the handle's CS has finished executing. The user will wait
 558 * inside the kernel until the CS has finished or until the user-requeusted
 559 * timeout has expired.
 560 *
 561 * The return value of the IOCTL is a standard Linux error code. The possible
 562 * values are:
 563 *
 564 * EINTR     - Kernel waiting has been interrupted, e.g. due to OS signal
 565 *             that the user process received
 566 * ETIMEDOUT - The CS has caused a timeout on the device
 567 * EIO       - The CS was aborted (usually because the device was reset)
 568 * ENODEV    - The device wants to do hard-reset (so user need to close FD)
 569 *
 570 * The driver also returns a custom define inside the IOCTL which can be:
 571 *
 572 * HL_WAIT_CS_STATUS_COMPLETED   - The CS has been completed successfully (0)
 573 * HL_WAIT_CS_STATUS_BUSY        - The CS is still executing (0)
 574 * HL_WAIT_CS_STATUS_TIMEDOUT    - The CS has caused a timeout on the device
 575 *                                 (ETIMEDOUT)
 576 * HL_WAIT_CS_STATUS_ABORTED     - The CS was aborted, usually because the
 577 *                                 device was reset (EIO)
 578 * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
 579 *
 580 */
 581
 582#define HL_IOCTL_WAIT_CS                        \
 583                _IOWR('H', 0x04, union hl_wait_cs_args)
 584
 585/*
 586 * Memory
 587 * - Map host memory to device MMU
 588 * - Unmap host memory from device MMU
 589 *
 590 * This IOCTL allows the user to map host memory to the device MMU
 591 *
 592 * For host memory, the IOCTL doesn't allocate memory. The user is supposed
 593 * to allocate the memory in user-space (malloc/new). The driver pins the
 594 * physical pages (up to the allowed limit by the OS), assigns a virtual
 595 * address in the device VA space and initializes the device MMU.
 596 *
 597 * There is an option for the user to specify the requested virtual address.
 598 *
 599 */
 600#define HL_IOCTL_MEMORY         \
 601                _IOWR('H', 0x05, union hl_mem_args)
 602
 603/*
 604 * Debug
 605 * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces
 606 *
 607 * This IOCTL allows the user to get debug traces from the chip.
 608 *
 609 * Before the user can send configuration requests of the various
 610 * debug/profile engines, it needs to set the device into debug mode.
 611 * This is because the debug/profile infrastructure is shared component in the
 612 * device and we can't allow multiple users to access it at the same time.
 613 *
 614 * Once a user set the device into debug mode, the driver won't allow other
 615 * users to "work" with the device, i.e. open a FD. If there are multiple users
 616 * opened on the device, the driver won't allow any user to debug the device.
 617 *
 618 * For each configuration request, the user needs to provide the register index
 619 * and essential data such as buffer address and size.
 620 *
 621 * Once the user has finished using the debug/profile engines, he should
 622 * set the device into non-debug mode, i.e. disable debug mode.
 623 *
 624 * The driver can decide to "kick out" the user if he abuses this interface.
 625 *
 626 */
 627#define HL_IOCTL_DEBUG          \
 628                _IOWR('H', 0x06, struct hl_debug_args)
 629
 630#define HL_COMMAND_START        0x01
 631#define HL_COMMAND_END          0x07
 632
 633#endif /* HABANALABS_H_ */
 634