1
2
3
4
5
6
7
8#ifndef HABANALABSP_H_
9#define HABANALABSP_H_
10
11#include "../include/common/cpucp_if.h"
12#include "../include/common/qman_if.h"
13#include "../include/hw_ip/mmu/mmu_general.h"
14#include <uapi/misc/habanalabs.h>
15
16#include <linux/cdev.h>
17#include <linux/iopoll.h>
18#include <linux/irqreturn.h>
19#include <linux/dma-direction.h>
20#include <linux/scatterlist.h>
21#include <linux/hashtable.h>
22#include <linux/debugfs.h>
23#include <linux/bitfield.h>
24#include <linux/genalloc.h>
25#include <linux/sched/signal.h>
26#include <linux/io-64-nonatomic-lo-hi.h>
27#include <linux/coresight.h>
28
29#define HL_NAME "habanalabs"
30
31
32
33
34
35
36
37
38#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
39#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT)
40#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
41#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
42
43#define HL_MMAP_OFFSET_VALUE_MASK (0x1FFFFFFFFFFFull >> PAGE_SHIFT)
44#define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK)
45
46#define HL_PENDING_RESET_PER_SEC 10
47#define HL_PENDING_RESET_MAX_TRIALS 60
48#define HL_PENDING_RESET_LONG_SEC 60
49
50#define HL_HARD_RESET_MAX_TIMEOUT 120
51#define HL_PLDM_HARD_RESET_MAX_TIMEOUT (HL_HARD_RESET_MAX_TIMEOUT * 3)
52
53#define HL_DEVICE_TIMEOUT_USEC 1000000
54
55#define HL_HEARTBEAT_PER_USEC 5000000
56
57#define HL_PLL_LOW_JOB_FREQ_USEC 5000000
58
59#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000
60#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000
61
62#define HL_PCI_ELBI_TIMEOUT_MSEC 10
63
64#define HL_SIM_MAX_TIMEOUT_US 10000000
65
66#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
67
68
69#define MEM_HASH_TABLE_BITS 7
70
71
72#define MMU_HASH_TABLE_BITS 7
73
74
75
76
77
78
79
80enum hl_mmu_page_table_location {
81 MMU_DR_PGT = 0,
82 MMU_HR_PGT,
83 MMU_NUM_PGT_LOCATIONS
84};
85
86
87
88
89
90#define HL_RSVD_SOBS 2
91#define HL_RSVD_MONS 1
92
93
94
95
96#define HL_COLLECTIVE_RSVD_MSTR_MONS 2
97
98#define HL_MAX_SOB_VAL (1 << 15)
99
100#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0))
101#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1))
102
103#define HL_PCI_NUM_BARS 6
104
105#define HL_MAX_DCORES 4
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126#define HL_RESET_HARD (1 << 0)
127#define HL_RESET_FROM_RESET_THREAD (1 << 1)
128#define HL_RESET_HEARTBEAT (1 << 2)
129#define HL_RESET_TDR (1 << 3)
130#define HL_RESET_DEVICE_RELEASE (1 << 4)
131
132#define HL_MAX_SOBS_PER_MONITOR 8
133
134
135
136
137
138
139
140
141
142
143
144struct hl_gen_wait_properties {
145 void *data;
146 u32 q_idx;
147 u32 size;
148 u16 sob_base;
149 u16 sob_val;
150 u16 mon_id;
151 u8 sob_mask;
152};
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167struct pgt_info {
168 struct hlist_node node;
169 u64 phys_addr;
170 u64 shadow_addr;
171 struct hl_ctx *ctx;
172 int num_of_ptes;
173};
174
175struct hl_device;
176struct hl_fpriv;
177
178
179
180
181
182
183enum hl_pci_match_mode {
184 PCI_ADDRESS_MATCH_MODE,
185 PCI_BAR_MATCH_MODE
186};
187
188
189
190
191
192
193
194enum hl_fw_component {
195 FW_COMP_BOOT_FIT,
196 FW_COMP_PREBOOT,
197 FW_COMP_LINUX,
198};
199
200
201
202
203
204
205
206
207
208enum hl_fw_types {
209 FW_TYPE_LINUX = 0x1,
210 FW_TYPE_BOOT_CPU = 0x2,
211 FW_TYPE_PREBOOT_CPU = 0x4,
212 FW_TYPE_ALL_TYPES =
213 (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU)
214};
215
216
217
218
219
220
221
222
223
224
225
226
227enum hl_queue_type {
228 QUEUE_TYPE_NA,
229 QUEUE_TYPE_EXT,
230 QUEUE_TYPE_INT,
231 QUEUE_TYPE_CPU,
232 QUEUE_TYPE_HW
233};
234
235enum hl_cs_type {
236 CS_TYPE_DEFAULT,
237 CS_TYPE_SIGNAL,
238 CS_TYPE_WAIT,
239 CS_TYPE_COLLECTIVE_WAIT
240};
241
242
243
244
245
246
247
248
249
250struct hl_inbound_pci_region {
251 enum hl_pci_match_mode mode;
252 u64 addr;
253 u64 size;
254 u64 offset_in_bar;
255 u8 bar;
256};
257
258
259
260
261
262
263struct hl_outbound_pci_region {
264 u64 addr;
265 u64 size;
266};
267
268
269
270
271
272
273
274enum queue_cb_alloc_flags {
275 CB_ALLOC_KERNEL = 0x1,
276 CB_ALLOC_USER = 0x2
277};
278
279
280
281
282
283
284
285
286struct hl_hw_sob {
287 struct hl_device *hdev;
288 struct kref kref;
289 u32 sob_id;
290 u32 q_idx;
291};
292
293enum hl_collective_mode {
294 HL_COLLECTIVE_NOT_SUPPORTED = 0x0,
295 HL_COLLECTIVE_MASTER = 0x1,
296 HL_COLLECTIVE_SLAVE = 0x2
297};
298
299
300
301
302
303
304
305
306
307
308
309
310
311struct hw_queue_properties {
312 enum hl_queue_type type;
313 enum queue_cb_alloc_flags cb_alloc_flags;
314 enum hl_collective_mode collective_mode;
315 u8 driver_only;
316 u8 supports_sync_stream;
317};
318
319
320
321
322
323
324enum vm_type_t {
325 VM_TYPE_USERPTR = 0x1,
326 VM_TYPE_PHYS_PACK = 0x2
327};
328
329
330
331
332
333
334
335
336enum hl_device_hw_state {
337 HL_DEVICE_HW_STATE_CLEAN = 0,
338 HL_DEVICE_HW_STATE_DIRTY
339};
340
341#define HL_MMU_VA_ALIGNMENT_NOT_NEEDED 0
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364struct hl_mmu_properties {
365 u64 start_addr;
366 u64 end_addr;
367 u64 hop0_shift;
368 u64 hop1_shift;
369 u64 hop2_shift;
370 u64 hop3_shift;
371 u64 hop4_shift;
372 u64 hop5_shift;
373 u64 hop0_mask;
374 u64 hop1_mask;
375 u64 hop2_mask;
376 u64 hop3_mask;
377 u64 hop4_mask;
378 u64 hop5_mask;
379 u32 page_size;
380 u32 num_hops;
381 u8 host_resident;
382};
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478struct asic_fixed_properties {
479 struct hw_queue_properties *hw_queues_props;
480 struct cpucp_info cpucp_info;
481 char uboot_ver[VERSION_MAX_LEN];
482 char preboot_ver[VERSION_MAX_LEN];
483 struct hl_mmu_properties dmmu;
484 struct hl_mmu_properties pmmu;
485 struct hl_mmu_properties pmmu_huge;
486 u64 sram_base_address;
487 u64 sram_end_address;
488 u64 sram_user_base_address;
489 u64 dram_base_address;
490 u64 dram_end_address;
491 u64 dram_user_base_address;
492 u64 dram_size;
493 u64 dram_pci_bar_size;
494 u64 max_power_default;
495 u64 dc_power_default;
496 u64 dram_size_for_default_page_mapping;
497 u64 pcie_dbi_base_address;
498 u64 pcie_aux_dbi_reg_addr;
499 u64 mmu_pgt_addr;
500 u64 mmu_dram_default_page_addr;
501 u64 cb_va_start_addr;
502 u64 cb_va_end_addr;
503 u32 mmu_pgt_size;
504 u32 mmu_pte_size;
505 u32 mmu_hop_table_size;
506 u32 mmu_hop0_tables_total_size;
507 u32 dram_page_size;
508 u32 cfg_size;
509 u32 sram_size;
510 u32 max_asid;
511 u32 num_of_events;
512 u32 psoc_pci_pll_nr;
513 u32 psoc_pci_pll_nf;
514 u32 psoc_pci_pll_od;
515 u32 psoc_pci_pll_div_factor;
516 u32 psoc_timestamp_frequency;
517 u32 high_pll;
518 u32 cb_pool_cb_cnt;
519 u32 cb_pool_cb_size;
520 u32 max_pending_cs;
521 u32 max_queues;
522 u32 fw_preboot_cpu_boot_dev_sts0;
523 u32 fw_preboot_cpu_boot_dev_sts1;
524 u32 fw_bootfit_cpu_boot_dev_sts0;
525 u32 fw_bootfit_cpu_boot_dev_sts1;
526 u32 fw_app_cpu_boot_dev_sts0;
527 u32 fw_app_cpu_boot_dev_sts1;
528 u16 collective_first_sob;
529 u16 collective_first_mon;
530 u16 sync_stream_first_sob;
531 u16 sync_stream_first_mon;
532 u16 first_available_user_sob[HL_MAX_DCORES];
533 u16 first_available_user_mon[HL_MAX_DCORES];
534 u16 first_available_user_msix_interrupt;
535 u16 first_available_cq[HL_MAX_DCORES];
536 u16 user_interrupt_count;
537 u8 tpc_enabled_mask;
538 u8 completion_queues_count;
539 u8 fw_security_enabled;
540 u8 fw_cpu_boot_dev_sts0_valid;
541 u8 fw_cpu_boot_dev_sts1_valid;
542 u8 dram_supports_virtual_memory;
543 u8 hard_reset_done_by_fw;
544 u8 num_functional_hbms;
545 u8 iatu_done_by_fw;
546 u8 dynamic_fw_load;
547 u8 gic_interrupts_enable;
548};
549
550
551
552
553
554
555
556
557
558
559struct hl_fence {
560 struct completion completion;
561 struct kref refcount;
562 u64 cs_sequence;
563 int error;
564 ktime_t timestamp;
565};
566
567
568
569
570
571
572
573
574
575
576
577
578
579struct hl_cs_compl {
580 struct work_struct sob_reset_work;
581 struct hl_fence base_fence;
582 spinlock_t lock;
583 struct hl_device *hdev;
584 struct hl_hw_sob *hw_sob;
585 u64 cs_seq;
586 enum hl_cs_type type;
587 u16 sob_val;
588 u16 sob_group;
589};
590
591
592
593
594
595
596
597
598
599
600struct hl_cb_mgr {
601 spinlock_t cb_lock;
602 struct idr cb_handles;
603};
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626struct hl_cb {
627 struct kref refcount;
628 struct hl_device *hdev;
629 struct hl_ctx *ctx;
630 spinlock_t lock;
631 struct list_head debugfs_list;
632 struct list_head pool_list;
633 struct list_head va_block_list;
634 u64 id;
635 void *kernel_address;
636 dma_addr_t bus_address;
637 u32 mmap_size;
638 u32 size;
639 atomic_t cs_cnt;
640 u8 mmap;
641 u8 is_pool;
642 u8 is_internal;
643 u8 is_mmu_mapped;
644};
645
646
647
648
649
650
651struct hl_cs;
652struct hl_cs_job;
653
654
655#define HL_QUEUE_LENGTH 4096
656#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
657
658#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
659#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
660#endif
661
662
663#define HL_CQ_LENGTH HL_QUEUE_LENGTH
664#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
665
666
667#define HL_EQ_LENGTH 64
668#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
669
670
671#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689struct hl_sync_stream_properties {
690 struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
691 u16 next_sob_val;
692 u16 base_sob_id;
693 u16 base_mon_id;
694 u16 collective_mstr_mon_id[HL_COLLECTIVE_RSVD_MSTR_MONS];
695 u16 collective_slave_mon_id;
696 u16 collective_sob_id;
697 u8 curr_sob_offset;
698};
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718struct hl_hw_queue {
719 struct hl_cs_job **shadow_queue;
720 struct hl_sync_stream_properties sync_stream_prop;
721 enum hl_queue_type queue_type;
722 enum hl_collective_mode collective_mode;
723 void *kernel_address;
724 dma_addr_t bus_address;
725 u32 pi;
726 atomic_t ci;
727 u32 hw_queue_id;
728 u32 cq_id;
729 u32 msi_vec;
730 u16 int_queue_len;
731 u8 valid;
732 u8 supports_sync_stream;
733};
734
735
736
737
738
739
740
741
742
743
744
745
746struct hl_cq {
747 struct hl_device *hdev;
748 void *kernel_address;
749 dma_addr_t bus_address;
750 u32 cq_idx;
751 u32 hw_queue_id;
752 u32 ci;
753 u32 pi;
754 atomic_t free_slots_cnt;
755};
756
757
758
759
760
761
762
763
764struct hl_user_interrupt {
765 struct hl_device *hdev;
766 struct list_head wait_list_head;
767 spinlock_t wait_list_lock;
768 u32 interrupt_id;
769};
770
771
772
773
774
775
776
777struct hl_user_pending_interrupt {
778 struct list_head wait_list_node;
779 struct hl_fence fence;
780};
781
782
783
784
785
786
787
788
789
790
791
792
793
794struct hl_eq {
795 struct hl_device *hdev;
796 void *kernel_address;
797 dma_addr_t bus_address;
798 u32 ci;
799 u32 prev_eqe_index;
800 bool check_eqe_index;
801};
802
803
804
805
806
807
808
809
810
811
812
813
814
815enum hl_asic_type {
816 ASIC_INVALID,
817 ASIC_GOYA,
818 ASIC_GAUDI,
819 ASIC_GAUDI_SEC
820};
821
822struct hl_cs_parser;
823
824
825
826
827
828
829
830enum hl_pm_mng_profile {
831 PM_AUTO = 1,
832 PM_MANUAL,
833 PM_LAST
834};
835
836
837
838
839
840
841
842enum hl_pll_frequency {
843 PLL_HIGH = 1,
844 PLL_LOW,
845 PLL_LAST
846};
847
848#define PLL_REF_CLK 50
849
850enum div_select_defs {
851 DIV_SEL_REF_CLK = 0,
852 DIV_SEL_PLL_CLK = 1,
853 DIV_SEL_DIVIDED_REF = 2,
854 DIV_SEL_DIVIDED_PLL = 3,
855};
856
857enum pci_region {
858 PCI_REGION_CFG,
859 PCI_REGION_SRAM,
860 PCI_REGION_DRAM,
861 PCI_REGION_SP_SRAM,
862 PCI_REGION_NUMBER,
863};
864
865
866
867
868
869
870
871
872
873
874struct pci_mem_region {
875 u64 region_base;
876 u64 region_size;
877 u64 bar_size;
878 u32 offset_in_bar;
879 u8 bar_id;
880 u8 used;
881};
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899struct static_fw_load_mgr {
900 u64 preboot_version_max_off;
901 u64 boot_fit_version_max_off;
902 u32 kmd_msg_to_cpu_reg;
903 u32 cpu_cmd_status_to_host_reg;
904 u32 cpu_boot_status_reg;
905 u32 cpu_boot_dev_status0_reg;
906 u32 cpu_boot_dev_status1_reg;
907 u32 boot_err0_reg;
908 u32 boot_err1_reg;
909 u32 preboot_version_offset_reg;
910 u32 boot_fit_version_offset_reg;
911 u32 sram_offset_mask;
912 u32 cpu_reset_wait_msec;
913};
914
915
916
917
918
919
920
921struct fw_response {
922 u32 ram_offset;
923 u8 ram_type;
924 u8 status;
925};
926
927
928
929
930
931
932
933
934
935struct dynamic_fw_load_mgr {
936 struct fw_response response;
937 struct lkd_fw_comms_desc comm_desc;
938 struct pci_mem_region *image_region;
939 size_t fw_image_size;
940 u32 wait_for_bl_timeout;
941};
942
943
944
945
946
947
948
949struct fw_image_props {
950 char *image_name;
951 u32 src_off;
952 u32 copy_size;
953};
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968struct fw_load_mgr {
969 union {
970 struct dynamic_fw_load_mgr dynamic_loader;
971 struct static_fw_load_mgr static_loader;
972 };
973 struct fw_image_props boot_fit_img;
974 struct fw_image_props linux_img;
975 u32 cpu_timeout;
976 u32 boot_fit_timeout;
977 u8 skip_bmc;
978 u8 sram_bar_id;
979 u8 dram_bar_id;
980 u8 linux_loaded;
981};
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105struct hl_asic_funcs {
1106 int (*early_init)(struct hl_device *hdev);
1107 int (*early_fini)(struct hl_device *hdev);
1108 int (*late_init)(struct hl_device *hdev);
1109 void (*late_fini)(struct hl_device *hdev);
1110 int (*sw_init)(struct hl_device *hdev);
1111 int (*sw_fini)(struct hl_device *hdev);
1112 int (*hw_init)(struct hl_device *hdev);
1113 void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
1114 void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
1115 int (*suspend)(struct hl_device *hdev);
1116 int (*resume)(struct hl_device *hdev);
1117 int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
1118 void *cpu_addr, dma_addr_t dma_addr, size_t size);
1119 void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
1120 void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
1121 struct hl_bd *bd);
1122 void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
1123 dma_addr_t *dma_handle, gfp_t flag);
1124 void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
1125 void *cpu_addr, dma_addr_t dma_handle);
1126 int (*scrub_device_mem)(struct hl_device *hdev, u64 addr, u64 size);
1127 void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
1128 dma_addr_t *dma_handle, u16 *queue_len);
1129 int (*test_queues)(struct hl_device *hdev);
1130 void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
1131 gfp_t mem_flags, dma_addr_t *dma_handle);
1132 void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
1133 dma_addr_t dma_addr);
1134 void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
1135 size_t size, dma_addr_t *dma_handle);
1136 void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
1137 size_t size, void *vaddr);
1138 void (*hl_dma_unmap_sg)(struct hl_device *hdev,
1139 struct scatterlist *sgl, int nents,
1140 enum dma_data_direction dir);
1141 int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
1142 int (*asic_dma_map_sg)(struct hl_device *hdev,
1143 struct scatterlist *sgl, int nents,
1144 enum dma_data_direction dir);
1145 u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
1146 struct sg_table *sgt);
1147 void (*add_end_of_cb_packets)(struct hl_device *hdev,
1148 void *kernel_address, u32 len,
1149 u64 cq_addr, u32 cq_val, u32 msix_num,
1150 bool eb);
1151 void (*update_eq_ci)(struct hl_device *hdev, u32 val);
1152 int (*context_switch)(struct hl_device *hdev, u32 asid);
1153 void (*restore_phase_topology)(struct hl_device *hdev);
1154 int (*debugfs_read32)(struct hl_device *hdev, u64 addr,
1155 bool user_address, u32 *val);
1156 int (*debugfs_write32)(struct hl_device *hdev, u64 addr,
1157 bool user_address, u32 val);
1158 int (*debugfs_read64)(struct hl_device *hdev, u64 addr,
1159 bool user_address, u64 *val);
1160 int (*debugfs_write64)(struct hl_device *hdev, u64 addr,
1161 bool user_address, u64 val);
1162 int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
1163 void *blob_addr);
1164 void (*add_device_attr)(struct hl_device *hdev,
1165 struct attribute_group *dev_attr_grp);
1166 void (*handle_eqe)(struct hl_device *hdev,
1167 struct hl_eq_entry *eq_entry);
1168 void (*set_pll_profile)(struct hl_device *hdev,
1169 enum hl_pll_frequency freq);
1170 void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
1171 u32 *size);
1172 u64 (*read_pte)(struct hl_device *hdev, u64 addr);
1173 void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
1174 int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
1175 u32 flags);
1176 int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
1177 u32 flags, u32 asid, u64 va, u64 size);
1178 int (*send_heartbeat)(struct hl_device *hdev);
1179 void (*set_clock_gating)(struct hl_device *hdev);
1180 void (*disable_clock_gating)(struct hl_device *hdev);
1181 int (*debug_coresight)(struct hl_device *hdev, void *data);
1182 bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
1183 u8 mask_len, struct seq_file *s);
1184 int (*soft_reset_late_init)(struct hl_device *hdev);
1185 void (*hw_queues_lock)(struct hl_device *hdev);
1186 void (*hw_queues_unlock)(struct hl_device *hdev);
1187 u32 (*get_pci_id)(struct hl_device *hdev);
1188 int (*get_eeprom_data)(struct hl_device *hdev, void *data,
1189 size_t max_size);
1190 int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
1191 u16 len, u32 timeout, u64 *result);
1192 int (*pci_bars_map)(struct hl_device *hdev);
1193 int (*init_iatu)(struct hl_device *hdev);
1194 u32 (*rreg)(struct hl_device *hdev, u32 reg);
1195 void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
1196 void (*halt_coresight)(struct hl_device *hdev);
1197 int (*ctx_init)(struct hl_ctx *ctx);
1198 void (*ctx_fini)(struct hl_ctx *ctx);
1199 int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
1200 u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
1201 int (*load_firmware_to_device)(struct hl_device *hdev);
1202 int (*load_boot_fit_to_device)(struct hl_device *hdev);
1203 u32 (*get_signal_cb_size)(struct hl_device *hdev);
1204 u32 (*get_wait_cb_size)(struct hl_device *hdev);
1205 u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
1206 u32 size, bool eb);
1207 u32 (*gen_wait_cb)(struct hl_device *hdev,
1208 struct hl_gen_wait_properties *prop);
1209 void (*reset_sob)(struct hl_device *hdev, void *data);
1210 void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
1211 void (*set_dma_mask_from_fw)(struct hl_device *hdev);
1212 u64 (*get_device_time)(struct hl_device *hdev);
1213 void (*collective_wait_init_cs)(struct hl_cs *cs);
1214 int (*collective_wait_create_jobs)(struct hl_device *hdev,
1215 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1216 u32 collective_engine_id);
1217 u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
1218 u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
1219 void (*ack_protection_bits_errors)(struct hl_device *hdev);
1220 int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
1221 u32 *block_size, u32 *block_id);
1222 int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
1223 u32 block_id, u32 block_size);
1224 void (*enable_events_from_fw)(struct hl_device *hdev);
1225 void (*get_msi_info)(__le32 *table);
1226 int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
1227 void (*init_firmware_loader)(struct hl_device *hdev);
1228 void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
1229};
1230
1231
1232
1233
1234
1235
1236#define HL_KERNEL_ASID_ID 0
1237
1238
1239
1240
1241
1242
1243
1244enum hl_va_range_type {
1245 HL_VA_RANGE_TYPE_HOST,
1246 HL_VA_RANGE_TYPE_HOST_HUGE,
1247 HL_VA_RANGE_TYPE_DRAM,
1248 HL_VA_RANGE_TYPE_MAX
1249};
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259struct hl_va_range {
1260 struct mutex lock;
1261 struct list_head list;
1262 u64 start_addr;
1263 u64 end_addr;
1264 u32 page_size;
1265};
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276struct hl_cs_counters_atomic {
1277 atomic64_t out_of_mem_drop_cnt;
1278 atomic64_t parsing_drop_cnt;
1279 atomic64_t queue_full_drop_cnt;
1280 atomic64_t device_in_reset_drop_cnt;
1281 atomic64_t max_cs_in_flight_drop_cnt;
1282 atomic64_t validation_drop_cnt;
1283};
1284
1285
1286
1287
1288
1289
1290
1291
1292struct hl_pending_cb {
1293 struct list_head cb_node;
1294 struct hl_cb *cb;
1295 u32 cb_size;
1296 u32 hw_queue_id;
1297};
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344struct hl_ctx {
1345 DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
1346 DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
1347 struct hl_fpriv *hpriv;
1348 struct hl_device *hdev;
1349 struct kref refcount;
1350 struct hl_fence **cs_pending;
1351 struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
1352 struct mutex mem_hash_lock;
1353 struct mutex mmu_lock;
1354 struct mutex hw_block_list_lock;
1355 struct list_head debugfs_list;
1356 struct list_head pending_cb_list;
1357 struct list_head hw_block_mem_list;
1358 struct hl_cs_counters_atomic cs_counters;
1359 struct gen_pool *cb_va_pool;
1360 u64 cs_sequence;
1361 u64 *dram_default_hops;
1362 spinlock_t pending_cb_lock;
1363 spinlock_t cs_lock;
1364 atomic64_t dram_phys_mem;
1365 atomic_t thread_ctx_switch_token;
1366 atomic_t thread_pending_cb_token;
1367 u32 thread_ctx_switch_wait_token;
1368 u32 asid;
1369 u32 handle;
1370};
1371
1372
1373
1374
1375
1376
1377struct hl_ctx_mgr {
1378 struct mutex ctx_lock;
1379 struct idr ctx_handles;
1380};
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401struct hl_userptr {
1402 enum vm_type_t vm_type;
1403 struct list_head job_node;
1404 struct page **pages;
1405 unsigned int npages;
1406 struct sg_table *sgt;
1407 enum dma_data_direction dir;
1408 struct list_head debugfs_list;
1409 u64 addr;
1410 u32 size;
1411 u8 dma_mapped;
1412};
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449struct hl_cs {
1450 u16 *jobs_in_queue_cnt;
1451 struct hl_ctx *ctx;
1452 struct list_head job_list;
1453 spinlock_t job_lock;
1454 struct kref refcount;
1455 struct hl_fence *fence;
1456 struct hl_fence *signal_fence;
1457 struct work_struct finish_work;
1458 struct delayed_work work_tdr;
1459 struct list_head mirror_node;
1460 struct list_head staged_cs_node;
1461 struct list_head debugfs_list;
1462 u64 sequence;
1463 u64 staged_sequence;
1464 u64 timeout_jiffies;
1465 u64 submission_time_jiffies;
1466 enum hl_cs_type type;
1467 u8 submitted;
1468 u8 completed;
1469 u8 timedout;
1470 u8 tdr_active;
1471 u8 aborted;
1472 u8 timestamp;
1473 u8 staged_last;
1474 u8 staged_first;
1475 u8 staged_cs;
1476 u8 skip_reset_on_timeout;
1477};
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506struct hl_cs_job {
1507 struct list_head cs_node;
1508 struct hl_cs *cs;
1509 struct hl_cb *user_cb;
1510 struct hl_cb *patched_cb;
1511 struct work_struct finish_work;
1512 struct list_head userptr_list;
1513 struct list_head debugfs_list;
1514 struct kref refcount;
1515 enum hl_queue_type queue_type;
1516 u32 id;
1517 u32 hw_queue_id;
1518 u32 user_cb_size;
1519 u32 job_cb_size;
1520 u8 is_kernel_allocated_cb;
1521 u8 contains_dma_pkt;
1522};
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549struct hl_cs_parser {
1550 struct hl_cb *user_cb;
1551 struct hl_cb *patched_cb;
1552 struct list_head *job_userptr_list;
1553 u64 cs_sequence;
1554 enum hl_queue_type queue_type;
1555 u32 ctx_id;
1556 u32 hw_queue_id;
1557 u32 user_cb_size;
1558 u32 patched_cb_size;
1559 u8 job_id;
1560 u8 is_kernel_allocated_cb;
1561 u8 contains_dma_pkt;
1562 u8 completion;
1563};
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577struct hl_vm_hash_node {
1578 struct hlist_node node;
1579 u64 vaddr;
1580 void *ptr;
1581};
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592struct hl_vm_hw_block_list_node {
1593 struct list_head node;
1594 struct hl_ctx *ctx;
1595 unsigned long vaddr;
1596 u32 size;
1597 u32 id;
1598};
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615struct hl_vm_phys_pg_pack {
1616 enum vm_type_t vm_type;
1617 u64 *pages;
1618 u64 npages;
1619 u64 total_size;
1620 atomic_t mapping_cnt;
1621 u32 asid;
1622 u32 page_size;
1623 u32 flags;
1624 u32 handle;
1625 u32 offset;
1626 u8 contiguous;
1627 u8 created_from_userptr;
1628};
1629
1630
1631
1632
1633
1634
1635
1636
1637struct hl_vm_va_block {
1638 struct list_head node;
1639 u64 start;
1640 u64 end;
1641 u64 size;
1642};
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653struct hl_vm {
1654 struct gen_pool *dram_pg_pool;
1655 struct kref dram_pg_pool_refcount;
1656 spinlock_t idr_lock;
1657 struct idr phys_pg_pack_handles;
1658 u8 init_done;
1659};
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675struct hl_debug_params {
1676 void *input;
1677 void *output;
1678 u32 output_size;
1679 u32 reg_idx;
1680 u32 op;
1681 bool enable;
1682};
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702struct hl_fpriv {
1703 struct hl_device *hdev;
1704 struct file *filp;
1705 struct pid *taskpid;
1706 struct hl_ctx *ctx;
1707 struct hl_ctx_mgr ctx_mgr;
1708 struct hl_cb_mgr cb_mgr;
1709 struct list_head debugfs_list;
1710 struct list_head dev_node;
1711 struct kref refcount;
1712 struct mutex restore_phase_mutex;
1713 u8 is_control;
1714};
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727struct hl_info_list {
1728 const char *name;
1729 int (*show)(struct seq_file *s, void *data);
1730 ssize_t (*write)(struct file *file, const char __user *buf,
1731 size_t count, loff_t *f_pos);
1732};
1733
1734
1735
1736
1737
1738
1739struct hl_debugfs_entry {
1740 const struct hl_info_list *info_ent;
1741 struct hl_dbg_device_entry *dev_entry;
1742};
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769struct hl_dbg_device_entry {
1770 struct dentry *root;
1771 struct hl_device *hdev;
1772 struct hl_debugfs_entry *entry_arr;
1773 struct list_head file_list;
1774 struct mutex file_mutex;
1775 struct list_head cb_list;
1776 spinlock_t cb_spinlock;
1777 struct list_head cs_list;
1778 spinlock_t cs_spinlock;
1779 struct list_head cs_job_list;
1780 spinlock_t cs_job_spinlock;
1781 struct list_head userptr_list;
1782 spinlock_t userptr_spinlock;
1783 struct list_head ctx_mem_hash_list;
1784 spinlock_t ctx_mem_hash_spinlock;
1785 struct debugfs_blob_wrapper blob_desc;
1786 u64 addr;
1787 u64 mmu_addr;
1788 u32 mmu_asid;
1789 u8 i2c_bus;
1790 u8 i2c_addr;
1791 u8 i2c_reg;
1792};
1793
1794
1795
1796
1797
1798
1799#define HL_STR_MAX 32
1800
1801#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
1802
1803
1804
1805
1806#define HL_MAX_MINORS 256
1807
1808
1809
1810
1811
1812u32 hl_rreg(struct hl_device *hdev, u32 reg);
1813void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
1814
1815#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
1816#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
1817#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
1818 hdev->asic_funcs->rreg(hdev, (reg)))
1819
1820#define WREG32_P(reg, val, mask) \
1821 do { \
1822 u32 tmp_ = RREG32(reg); \
1823 tmp_ &= (mask); \
1824 tmp_ |= ((val) & ~(mask)); \
1825 WREG32(reg, tmp_); \
1826 } while (0)
1827#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
1828#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
1829
1830#define RMWREG32(reg, val, mask) \
1831 do { \
1832 u32 tmp_ = RREG32(reg); \
1833 tmp_ &= ~(mask); \
1834 tmp_ |= ((val) << __ffs(mask)); \
1835 WREG32(reg, tmp_); \
1836 } while (0)
1837
1838#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
1839
1840#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
1841#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
1842#define WREG32_FIELD(reg, offset, field, val) \
1843 WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
1844 ~REG_FIELD_MASK(reg, field)) | \
1845 (val) << REG_FIELD_SHIFT(reg, field))
1846
1847
1848
1849
1850#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
1851({ \
1852 ktime_t __timeout; \
1853 if (hdev->pdev) \
1854 __timeout = ktime_add_us(ktime_get(), timeout_us); \
1855 else \
1856 __timeout = ktime_add_us(ktime_get(),\
1857 min((u64)(timeout_us * 10), \
1858 (u64) HL_SIM_MAX_TIMEOUT_US)); \
1859 might_sleep_if(sleep_us); \
1860 for (;;) { \
1861 (val) = RREG32(addr); \
1862 if (cond) \
1863 break; \
1864 if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
1865 (val) = RREG32(addr); \
1866 break; \
1867 } \
1868 if (sleep_us) \
1869 usleep_range((sleep_us >> 2) + 1, sleep_us); \
1870 } \
1871 (cond) ? 0 : -ETIMEDOUT; \
1872})
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
1887 mem_written_by_device) \
1888({ \
1889 ktime_t __timeout; \
1890 if (hdev->pdev) \
1891 __timeout = ktime_add_us(ktime_get(), timeout_us); \
1892 else \
1893 __timeout = ktime_add_us(ktime_get(),\
1894 min((u64)(timeout_us * 10), \
1895 (u64) HL_SIM_MAX_TIMEOUT_US)); \
1896 might_sleep_if(sleep_us); \
1897 for (;;) { \
1898 \
1899 mb(); \
1900 (val) = *((u32 *)(addr)); \
1901 if (mem_written_by_device) \
1902 (val) = le32_to_cpu(*(__le32 *) &(val)); \
1903 if (cond) \
1904 break; \
1905 if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
1906 (val) = *((u32 *)(addr)); \
1907 if (mem_written_by_device) \
1908 (val) = le32_to_cpu(*(__le32 *) &(val)); \
1909 break; \
1910 } \
1911 if (sleep_us) \
1912 usleep_range((sleep_us >> 2) + 1, sleep_us); \
1913 } \
1914 (cond) ? 0 : -ETIMEDOUT; \
1915})
1916
1917#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
1918 timeout_us) \
1919({ \
1920 ktime_t __timeout; \
1921 if (hdev->pdev) \
1922 __timeout = ktime_add_us(ktime_get(), timeout_us); \
1923 else \
1924 __timeout = ktime_add_us(ktime_get(),\
1925 min((u64)(timeout_us * 10), \
1926 (u64) HL_SIM_MAX_TIMEOUT_US)); \
1927 might_sleep_if(sleep_us); \
1928 for (;;) { \
1929 (val) = readl(addr); \
1930 if (cond) \
1931 break; \
1932 if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
1933 (val) = readl(addr); \
1934 break; \
1935 } \
1936 if (sleep_us) \
1937 usleep_range((sleep_us >> 2) + 1, sleep_us); \
1938 } \
1939 (cond) ? 0 : -ETIMEDOUT; \
1940})
1941
1942struct hwmon_chip_info;
1943
1944
1945
1946
1947
1948
1949
1950struct hl_device_reset_work {
1951 struct workqueue_struct *wq;
1952 struct delayed_work reset_work;
1953 struct hl_device *hdev;
1954};
1955
1956
1957
1958
1959
1960
1961
1962
1963struct hr_mmu_hop_addrs {
1964 u64 virt_addr;
1965 u64 phys_addr;
1966 u64 shadow_addr;
1967};
1968
1969
1970
1971
1972
1973
1974
1975struct hl_mmu_hr_priv {
1976 struct gen_pool *mmu_pgt_pool;
1977 struct hr_mmu_hop_addrs *mmu_shadow_hop0;
1978};
1979
1980
1981
1982
1983
1984
1985
1986struct hl_mmu_dr_priv {
1987 struct gen_pool *mmu_pgt_pool;
1988 void *mmu_shadow_hop0;
1989};
1990
1991
1992
1993
1994
1995
1996struct hl_mmu_priv {
1997 struct hl_mmu_dr_priv dr;
1998 struct hl_mmu_hr_priv hr;
1999};
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009struct hl_mmu_per_hop_info {
2010 u64 hop_addr;
2011 u64 hop_pte_addr;
2012 u64 hop_pte_val;
2013};
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027struct hl_mmu_hop_info {
2028 u64 scrambled_vaddr;
2029 u64 unscrambled_paddr;
2030 struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
2031 u32 used_hops;
2032 enum hl_va_range_type range_type;
2033};
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050struct hl_mmu_funcs {
2051 int (*init)(struct hl_device *hdev);
2052 void (*fini)(struct hl_device *hdev);
2053 int (*ctx_init)(struct hl_ctx *ctx);
2054 void (*ctx_fini)(struct hl_ctx *ctx);
2055 int (*map)(struct hl_ctx *ctx,
2056 u64 virt_addr, u64 phys_addr, u32 page_size,
2057 bool is_dram_addr);
2058 int (*unmap)(struct hl_ctx *ctx,
2059 u64 virt_addr, bool is_dram_addr);
2060 void (*flush)(struct hl_ctx *ctx);
2061 void (*swap_out)(struct hl_ctx *ctx);
2062 void (*swap_in)(struct hl_ctx *ctx);
2063 int (*get_tlb_info)(struct hl_ctx *ctx,
2064 u64 virt_addr, struct hl_mmu_hop_info *hops);
2065};
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209struct hl_device {
2210 struct pci_dev *pdev;
2211 u64 pcie_bar_phys[HL_PCI_NUM_BARS];
2212 void __iomem *pcie_bar[HL_PCI_NUM_BARS];
2213 void __iomem *rmmio;
2214 struct cdev cdev;
2215 struct cdev cdev_ctrl;
2216 struct device *dev;
2217 struct device *dev_ctrl;
2218 struct delayed_work work_freq;
2219 struct delayed_work work_heartbeat;
2220 struct hl_device_reset_work device_reset_work;
2221 char asic_name[HL_STR_MAX];
2222 char status[HL_DEV_STS_MAX][HL_STR_MAX];
2223 enum hl_asic_type asic_type;
2224 struct hl_cq *completion_queue;
2225 struct hl_user_interrupt *user_interrupt;
2226 struct hl_user_interrupt common_user_interrupt;
2227 struct workqueue_struct **cq_wq;
2228 struct workqueue_struct *eq_wq;
2229 struct workqueue_struct *sob_reset_wq;
2230 struct hl_ctx *kernel_ctx;
2231 struct hl_hw_queue *kernel_queues;
2232 struct list_head cs_mirror_list;
2233 spinlock_t cs_mirror_lock;
2234 struct hl_cb_mgr kernel_cb_mgr;
2235 struct hl_eq event_queue;
2236 struct dma_pool *dma_pool;
2237 void *cpu_accessible_dma_mem;
2238 dma_addr_t cpu_accessible_dma_address;
2239 struct gen_pool *cpu_accessible_dma_pool;
2240 unsigned long *asid_bitmap;
2241 struct mutex asid_mutex;
2242 struct mutex send_cpu_message_lock;
2243 struct mutex debug_lock;
2244 struct asic_fixed_properties asic_prop;
2245 const struct hl_asic_funcs *asic_funcs;
2246 void *asic_specific;
2247 struct hl_vm vm;
2248 struct device *hwmon_dev;
2249 enum hl_pm_mng_profile pm_mng_profile;
2250 struct hwmon_chip_info *hl_chip_info;
2251
2252 struct hl_dbg_device_entry hl_debugfs;
2253
2254 struct list_head cb_pool;
2255 spinlock_t cb_pool_lock;
2256
2257 void *internal_cb_pool_virt_addr;
2258 dma_addr_t internal_cb_pool_dma_addr;
2259 struct gen_pool *internal_cb_pool;
2260 u64 internal_cb_va_base;
2261
2262 struct list_head fpriv_list;
2263 struct mutex fpriv_list_lock;
2264
2265 struct hl_ctx *compute_ctx;
2266
2267 struct hl_cs_counters_atomic aggregated_cs_counters;
2268
2269 struct hl_mmu_priv mmu_priv;
2270 struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
2271
2272 struct fw_load_mgr fw_loader;
2273
2274 struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
2275
2276 atomic64_t dram_used_mem;
2277 u64 timeout_jiffies;
2278 u64 max_power;
2279 u64 clock_gating_mask;
2280 u64 boot_error_status_mask;
2281 u64 last_successful_open_jif;
2282 u64 last_open_session_duration_jif;
2283 u64 open_counter;
2284 atomic_t in_reset;
2285 enum hl_pll_frequency curr_pll_profile;
2286 enum cpucp_card_types card_type;
2287 u32 major;
2288 u32 high_pll;
2289 u32 soft_reset_cnt;
2290 u32 hard_reset_cnt;
2291 u32 clk_throttling_reason;
2292 u16 id;
2293 u16 id_control;
2294 u16 cpu_pci_msb_addr;
2295 u8 disabled;
2296 u8 late_init_done;
2297 u8 hwmon_initialized;
2298 u8 hard_reset_pending;
2299 u8 heartbeat;
2300 u8 reset_on_lockup;
2301 u8 dram_default_page_mapping;
2302 u8 memory_scrub;
2303 u8 pmmu_huge_range;
2304 u8 init_done;
2305 u8 device_cpu_disabled;
2306 u8 dma_mask;
2307 u8 in_debug;
2308 u8 power9_64bit_dma_enable;
2309 u8 cdev_sysfs_created;
2310 u8 stop_on_err;
2311 u8 supports_sync_stream;
2312 u8 sync_stream_queue_idx;
2313 u8 collective_mon_idx;
2314 u8 supports_coresight;
2315 u8 supports_soft_reset;
2316 u8 allow_external_soft_reset;
2317 u8 supports_cb_mapping;
2318 u8 needs_reset;
2319 u8 process_kill_trial_cnt;
2320 u8 device_fini_pending;
2321 u8 supports_staged_submission;
2322 u8 curr_reset_cause;
2323 u8 skip_reset_on_timeout;
2324 u8 device_cpu_is_halted;
2325
2326
2327 u64 nic_ports_mask;
2328 u64 fw_components;
2329 u8 mmu_enable;
2330 u8 mmu_huge_page_opt;
2331 u8 reset_pcilink;
2332 u8 cpu_queues_enable;
2333 u8 pldm;
2334 u8 axi_drain;
2335 u8 sram_scrambler_enable;
2336 u8 dram_scrambler_enable;
2337 u8 hard_reset_on_fw_events;
2338 u8 bmc_enable;
2339 u8 rl_enable;
2340 u8 reset_on_preboot_fail;
2341 u8 reset_upon_device_release;
2342 u8 reset_if_device_not_idle;
2343};
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
2359
2360
2361
2362
2363
2364
2365struct hl_ioctl_desc {
2366 unsigned int cmd;
2367 hl_ioctl_t *func;
2368};
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384static inline bool hl_mem_area_inside_range(u64 address, u64 size,
2385 u64 range_start_address, u64 range_end_address)
2386{
2387 u64 end_address = address + size;
2388
2389 if ((address >= range_start_address) &&
2390 (end_address <= range_end_address) &&
2391 (end_address > address))
2392 return true;
2393
2394 return false;
2395}
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
2408 u64 range_start_address, u64 range_end_address)
2409{
2410 u64 end_address = address + size;
2411
2412 if ((address >= range_start_address) &&
2413 (address < range_end_address))
2414 return true;
2415
2416 if ((end_address >= range_start_address) &&
2417 (end_address < range_end_address))
2418 return true;
2419
2420 if ((address < range_start_address) &&
2421 (end_address >= range_end_address))
2422 return true;
2423
2424 return false;
2425}
2426
2427int hl_device_open(struct inode *inode, struct file *filp);
2428int hl_device_open_ctrl(struct inode *inode, struct file *filp);
2429bool hl_device_operational(struct hl_device *hdev,
2430 enum hl_device_status *status);
2431enum hl_device_status hl_device_status(struct hl_device *hdev);
2432int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
2433int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
2434 enum hl_asic_type asic_type, int minor);
2435void destroy_hdev(struct hl_device *hdev);
2436int hl_hw_queues_create(struct hl_device *hdev);
2437void hl_hw_queues_destroy(struct hl_device *hdev);
2438int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
2439 u32 cb_size, u64 cb_ptr);
2440int hl_hw_queue_schedule_cs(struct hl_cs *cs);
2441u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
2442void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
2443void hl_hw_queue_update_ci(struct hl_cs *cs);
2444void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
2445
2446#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
2447#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
2448
2449int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
2450void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
2451int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
2452void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
2453void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
2454void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
2455irqreturn_t hl_irq_handler_cq(int irq, void *arg);
2456irqreturn_t hl_irq_handler_eq(int irq, void *arg);
2457irqreturn_t hl_irq_handler_user_cq(int irq, void *arg);
2458irqreturn_t hl_irq_handler_default(int irq, void *arg);
2459u32 hl_cq_inc_ptr(u32 ptr);
2460
2461int hl_asid_init(struct hl_device *hdev);
2462void hl_asid_fini(struct hl_device *hdev);
2463unsigned long hl_asid_alloc(struct hl_device *hdev);
2464void hl_asid_free(struct hl_device *hdev, unsigned long asid);
2465
2466int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
2467void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
2468int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
2469void hl_ctx_do_release(struct kref *ref);
2470void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
2471int hl_ctx_put(struct hl_ctx *ctx);
2472struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
2473void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
2474void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
2475
2476int hl_device_init(struct hl_device *hdev, struct class *hclass);
2477void hl_device_fini(struct hl_device *hdev);
2478int hl_device_suspend(struct hl_device *hdev);
2479int hl_device_resume(struct hl_device *hdev);
2480int hl_device_reset(struct hl_device *hdev, u32 flags);
2481void hl_hpriv_get(struct hl_fpriv *hpriv);
2482int hl_hpriv_put(struct hl_fpriv *hpriv);
2483int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
2484int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
2485
2486int hl_build_hwmon_channel_info(struct hl_device *hdev,
2487 struct cpucp_sensor *sensors_arr);
2488
2489int hl_sysfs_init(struct hl_device *hdev);
2490void hl_sysfs_fini(struct hl_device *hdev);
2491
2492int hl_hwmon_init(struct hl_device *hdev);
2493void hl_hwmon_fini(struct hl_device *hdev);
2494
2495int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
2496 struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
2497 bool map_cb, u64 *handle);
2498int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
2499int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
2500int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
2501struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
2502 u32 handle);
2503void hl_cb_put(struct hl_cb *cb);
2504void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
2505void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
2506struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
2507 bool internal_cb);
2508int hl_cb_pool_init(struct hl_device *hdev);
2509int hl_cb_pool_fini(struct hl_device *hdev);
2510int hl_cb_va_pool_init(struct hl_ctx *ctx);
2511void hl_cb_va_pool_fini(struct hl_ctx *ctx);
2512
2513void hl_cs_rollback_all(struct hl_device *hdev);
2514void hl_pending_cb_list_flush(struct hl_ctx *ctx);
2515struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
2516 enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
2517void hl_sob_reset_error(struct kref *ref);
2518int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
2519void hl_fence_put(struct hl_fence *fence);
2520void hl_fence_get(struct hl_fence *fence);
2521void cs_get(struct hl_cs *cs);
2522bool cs_needs_completion(struct hl_cs *cs);
2523bool cs_needs_timeout(struct hl_cs *cs);
2524bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
2525struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
2526
2527void goya_set_asic_funcs(struct hl_device *hdev);
2528void gaudi_set_asic_funcs(struct hl_device *hdev);
2529
2530int hl_vm_ctx_init(struct hl_ctx *ctx);
2531void hl_vm_ctx_fini(struct hl_ctx *ctx);
2532
2533int hl_vm_init(struct hl_device *hdev);
2534void hl_vm_fini(struct hl_device *hdev);
2535
2536void hl_hw_block_mem_init(struct hl_ctx *ctx);
2537void hl_hw_block_mem_fini(struct hl_ctx *ctx);
2538
2539u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
2540 enum hl_va_range_type type, u32 size, u32 alignment);
2541int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
2542 u64 start_addr, u64 size);
2543int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
2544 struct hl_userptr *userptr);
2545void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
2546void hl_userptr_delete_list(struct hl_device *hdev,
2547 struct list_head *userptr_list);
2548bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
2549 struct list_head *userptr_list,
2550 struct hl_userptr **userptr);
2551
2552int hl_mmu_init(struct hl_device *hdev);
2553void hl_mmu_fini(struct hl_device *hdev);
2554int hl_mmu_ctx_init(struct hl_ctx *ctx);
2555void hl_mmu_ctx_fini(struct hl_ctx *ctx);
2556int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
2557 u32 page_size, bool flush_pte);
2558int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
2559 bool flush_pte);
2560int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
2561 u64 phys_addr, u32 size);
2562int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
2563void hl_mmu_swap_out(struct hl_ctx *ctx);
2564void hl_mmu_swap_in(struct hl_ctx *ctx);
2565int hl_mmu_if_set_funcs(struct hl_device *hdev);
2566void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
2567int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
2568int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
2569 struct hl_mmu_hop_info *hops);
2570u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
2571u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
2572bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
2573
2574int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
2575 void __iomem *dst, u32 src_offset, u32 size);
2576int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
2577int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
2578 u16 len, u32 timeout, u64 *result);
2579int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
2580int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
2581 size_t irq_arr_size);
2582int hl_fw_test_cpu_queue(struct hl_device *hdev);
2583void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
2584 dma_addr_t *dma_handle);
2585void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
2586 void *vaddr);
2587int hl_fw_send_heartbeat(struct hl_device *hdev);
2588int hl_fw_cpucp_info_get(struct hl_device *hdev,
2589 u32 sts_boot_dev_sts0_reg,
2590 u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
2591 u32 boot_err1_reg);
2592int hl_fw_cpucp_handshake(struct hl_device *hdev,
2593 u32 sts_boot_dev_sts0_reg,
2594 u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
2595 u32 boot_err1_reg);
2596int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
2597int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
2598 struct hl_info_pci_counters *counters);
2599int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
2600 u64 *total_energy);
2601int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
2602 enum pll_index *pll_index);
2603int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
2604 u16 *pll_freq_arr);
2605int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
2606void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev);
2607void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev);
2608int hl_fw_init_cpu(struct hl_device *hdev);
2609int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
2610 u32 sts_boot_dev_sts0_reg,
2611 u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
2612 u32 boot_err1_reg, u32 timeout);
2613int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
2614 struct fw_load_mgr *fw_loader,
2615 enum comms_cmd cmd, unsigned int size,
2616 bool wait_ok, u32 timeout);
2617int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
2618 bool is_wc[3]);
2619int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
2620int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
2621int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
2622 struct hl_inbound_pci_region *pci_region);
2623int hl_pci_set_outbound_region(struct hl_device *hdev,
2624 struct hl_outbound_pci_region *pci_region);
2625enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
2626int hl_pci_init(struct hl_device *hdev);
2627void hl_pci_fini(struct hl_device *hdev);
2628
2629long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
2630 bool curr);
2631void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
2632 u64 freq);
2633int hl_get_temperature(struct hl_device *hdev,
2634 int sensor_index, u32 attr, long *value);
2635int hl_set_temperature(struct hl_device *hdev,
2636 int sensor_index, u32 attr, long value);
2637int hl_get_voltage(struct hl_device *hdev,
2638 int sensor_index, u32 attr, long *value);
2639int hl_get_current(struct hl_device *hdev,
2640 int sensor_index, u32 attr, long *value);
2641int hl_get_fan_speed(struct hl_device *hdev,
2642 int sensor_index, u32 attr, long *value);
2643int hl_get_pwm_info(struct hl_device *hdev,
2644 int sensor_index, u32 attr, long *value);
2645void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
2646 long value);
2647u64 hl_get_max_power(struct hl_device *hdev);
2648void hl_set_max_power(struct hl_device *hdev);
2649int hl_set_voltage(struct hl_device *hdev,
2650 int sensor_index, u32 attr, long value);
2651int hl_set_current(struct hl_device *hdev,
2652 int sensor_index, u32 attr, long value);
2653void hl_release_pending_user_interrupts(struct hl_device *hdev);
2654int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
2655 struct hl_hw_sob **hw_sob, u32 count);
2656
2657#ifdef CONFIG_DEBUG_FS
2658
2659void hl_debugfs_init(void);
2660void hl_debugfs_fini(void);
2661void hl_debugfs_add_device(struct hl_device *hdev);
2662void hl_debugfs_remove_device(struct hl_device *hdev);
2663void hl_debugfs_add_file(struct hl_fpriv *hpriv);
2664void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
2665void hl_debugfs_add_cb(struct hl_cb *cb);
2666void hl_debugfs_remove_cb(struct hl_cb *cb);
2667void hl_debugfs_add_cs(struct hl_cs *cs);
2668void hl_debugfs_remove_cs(struct hl_cs *cs);
2669void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
2670void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
2671void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
2672void hl_debugfs_remove_userptr(struct hl_device *hdev,
2673 struct hl_userptr *userptr);
2674void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
2675void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
2676
2677#else
2678
2679static inline void __init hl_debugfs_init(void)
2680{
2681}
2682
2683static inline void hl_debugfs_fini(void)
2684{
2685}
2686
2687static inline void hl_debugfs_add_device(struct hl_device *hdev)
2688{
2689}
2690
2691static inline void hl_debugfs_remove_device(struct hl_device *hdev)
2692{
2693}
2694
2695static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
2696{
2697}
2698
2699static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
2700{
2701}
2702
2703static inline void hl_debugfs_add_cb(struct hl_cb *cb)
2704{
2705}
2706
2707static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
2708{
2709}
2710
2711static inline void hl_debugfs_add_cs(struct hl_cs *cs)
2712{
2713}
2714
2715static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
2716{
2717}
2718
2719static inline void hl_debugfs_add_job(struct hl_device *hdev,
2720 struct hl_cs_job *job)
2721{
2722}
2723
2724static inline void hl_debugfs_remove_job(struct hl_device *hdev,
2725 struct hl_cs_job *job)
2726{
2727}
2728
2729static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
2730 struct hl_userptr *userptr)
2731{
2732}
2733
2734static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
2735 struct hl_userptr *userptr)
2736{
2737}
2738
2739static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
2740 struct hl_ctx *ctx)
2741{
2742}
2743
2744static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
2745 struct hl_ctx *ctx)
2746{
2747}
2748
2749#endif
2750
2751
2752long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
2753long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
2754int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
2755int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
2756int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
2757int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
2758
2759#endif
2760