1
2
3
4
5
6
7
8#ifndef HABANALABSP_H_
9#define HABANALABSP_H_
10
11#include "include/armcp_if.h"
12#include "include/qman_if.h"
13
14#include <linux/cdev.h>
15#include <linux/iopoll.h>
16#include <linux/irqreturn.h>
17#include <linux/dma-fence.h>
18#include <linux/dma-direction.h>
19#include <linux/scatterlist.h>
20#include <linux/hashtable.h>
21
22#define HL_NAME "habanalabs"
23
24#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT)
25
26#define HL_PENDING_RESET_PER_SEC 5
27
28#define HL_DEVICE_TIMEOUT_USEC 1000000
29
30#define HL_HEARTBEAT_PER_USEC 5000000
31
32#define HL_PLL_LOW_JOB_FREQ_USEC 5000000
33
34#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000
35#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000
36
37#define HL_PCI_ELBI_TIMEOUT_MSEC 10
38
39#define HL_SIM_MAX_TIMEOUT_US 10000000
40
41#define HL_MAX_QUEUES 128
42
43
44#define HL_MAX_PENDING_CS 64
45
46#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
47
48
49#define MEM_HASH_TABLE_BITS 7
50
51
52#define MMU_HASH_TABLE_BITS 7
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67struct pgt_info {
68 struct hlist_node node;
69 u64 phys_addr;
70 u64 shadow_addr;
71 struct hl_ctx *ctx;
72 int num_of_ptes;
73};
74
75struct hl_device;
76struct hl_fpriv;
77
78
79
80
81
82
83
84
85
86
87
88
89enum hl_queue_type {
90 QUEUE_TYPE_NA,
91 QUEUE_TYPE_EXT,
92 QUEUE_TYPE_INT,
93 QUEUE_TYPE_CPU,
94 QUEUE_TYPE_HW
95};
96
97
98
99
100
101
102
103
104
105struct hw_queue_properties {
106 enum hl_queue_type type;
107 u8 driver_only;
108 u8 requires_kernel_cb;
109};
110
111
112
113
114
115
116enum vm_type_t {
117 VM_TYPE_USERPTR = 0x1,
118 VM_TYPE_PHYS_PACK = 0x2
119};
120
121
122
123
124
125
126
127
128enum hl_device_hw_state {
129 HL_DEVICE_HW_STATE_CLEAN = 0,
130 HL_DEVICE_HW_STATE_DIRTY
131};
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149struct hl_mmu_properties {
150 u64 start_addr;
151 u64 end_addr;
152 u64 hop0_shift;
153 u64 hop1_shift;
154 u64 hop2_shift;
155 u64 hop3_shift;
156 u64 hop4_shift;
157 u64 hop0_mask;
158 u64 hop1_mask;
159 u64 hop2_mask;
160 u64 hop3_mask;
161 u64 hop4_mask;
162 u32 page_size;
163};
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210struct asic_fixed_properties {
211 struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
212 struct armcp_info armcp_info;
213 char uboot_ver[VERSION_MAX_LEN];
214 char preboot_ver[VERSION_MAX_LEN];
215 struct hl_mmu_properties dmmu;
216 struct hl_mmu_properties pmmu;
217 struct hl_mmu_properties pmmu_huge;
218 u64 sram_base_address;
219 u64 sram_end_address;
220 u64 sram_user_base_address;
221 u64 dram_base_address;
222 u64 dram_end_address;
223 u64 dram_user_base_address;
224 u64 dram_size;
225 u64 dram_pci_bar_size;
226 u64 max_power_default;
227 u64 dram_size_for_default_page_mapping;
228 u64 pcie_dbi_base_address;
229 u64 pcie_aux_dbi_reg_addr;
230 u64 mmu_pgt_addr;
231 u64 mmu_dram_default_page_addr;
232 u32 mmu_pgt_size;
233 u32 mmu_pte_size;
234 u32 mmu_hop_table_size;
235 u32 mmu_hop0_tables_total_size;
236 u32 dram_page_size;
237 u32 cfg_size;
238 u32 sram_size;
239 u32 max_asid;
240 u32 num_of_events;
241 u32 psoc_pci_pll_nr;
242 u32 psoc_pci_pll_nf;
243 u32 psoc_pci_pll_od;
244 u32 psoc_pci_pll_div_factor;
245 u32 high_pll;
246 u32 cb_pool_cb_cnt;
247 u32 cb_pool_cb_size;
248 u8 tpc_enabled_mask;
249 u8 completion_queues_count;
250};
251
252
253
254
255
256
257
258
259struct hl_dma_fence {
260 struct dma_fence base_fence;
261 spinlock_t lock;
262 struct hl_device *hdev;
263 u64 cs_seq;
264};
265
266
267
268
269
270
271
272
273
274
275struct hl_cb_mgr {
276 spinlock_t cb_lock;
277 struct idr cb_handles;
278};
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297struct hl_cb {
298 struct kref refcount;
299 struct hl_device *hdev;
300 spinlock_t lock;
301 struct list_head debugfs_list;
302 struct list_head pool_list;
303 u64 kernel_address;
304 dma_addr_t bus_address;
305 u32 mmap_size;
306 u32 size;
307 u32 id;
308 u32 cs_cnt;
309 u32 ctx_id;
310 u8 mmap;
311 u8 is_pool;
312};
313
314
315
316
317
318
319struct hl_cs_job;
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340#define HL_PAGE_SIZE 4096
341
342#define HL_QUEUE_LENGTH 256
343#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
344
345
346
347
348
349#define HL_CQ_LENGTH HL_QUEUE_LENGTH
350#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
351
352
353#define HL_EQ_LENGTH 64
354#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
355
356
357#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372struct hl_hw_queue {
373 struct hl_cs_job **shadow_queue;
374 enum hl_queue_type queue_type;
375 u64 kernel_address;
376 dma_addr_t bus_address;
377 u32 pi;
378 u32 ci;
379 u32 hw_queue_id;
380 u16 int_queue_len;
381 u8 valid;
382};
383
384
385
386
387
388
389
390
391
392
393
394struct hl_cq {
395 struct hl_device *hdev;
396 u64 kernel_address;
397 dma_addr_t bus_address;
398 u32 hw_queue_id;
399 u32 ci;
400 u32 pi;
401 atomic_t free_slots_cnt;
402};
403
404
405
406
407
408
409
410
411struct hl_eq {
412 struct hl_device *hdev;
413 u64 kernel_address;
414 dma_addr_t bus_address;
415 u32 ci;
416};
417
418
419
420
421
422
423
424
425
426
427
428
429enum hl_asic_type {
430 ASIC_INVALID,
431 ASIC_GOYA,
432 ASIC_GAUDI
433};
434
435struct hl_cs_parser;
436
437
438
439
440
441
442
443enum hl_pm_mng_profile {
444 PM_AUTO = 1,
445 PM_MANUAL,
446 PM_LAST
447};
448
449
450
451
452
453
454
455enum hl_pll_frequency {
456 PLL_HIGH = 1,
457 PLL_LOW,
458 PLL_LAST
459};
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538struct hl_asic_funcs {
539 int (*early_init)(struct hl_device *hdev);
540 int (*early_fini)(struct hl_device *hdev);
541 int (*late_init)(struct hl_device *hdev);
542 void (*late_fini)(struct hl_device *hdev);
543 int (*sw_init)(struct hl_device *hdev);
544 int (*sw_fini)(struct hl_device *hdev);
545 int (*hw_init)(struct hl_device *hdev);
546 void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
547 void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
548 int (*suspend)(struct hl_device *hdev);
549 int (*resume)(struct hl_device *hdev);
550 int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
551 u64 kaddress, phys_addr_t paddress, u32 size);
552 void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
553 void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
554 struct hl_bd *bd);
555 void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
556 dma_addr_t *dma_handle, gfp_t flag);
557 void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
558 void *cpu_addr, dma_addr_t dma_handle);
559 void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
560 dma_addr_t *dma_handle, u16 *queue_len);
561 int (*test_queues)(struct hl_device *hdev);
562 void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
563 gfp_t mem_flags, dma_addr_t *dma_handle);
564 void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
565 dma_addr_t dma_addr);
566 void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
567 size_t size, dma_addr_t *dma_handle);
568 void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
569 size_t size, void *vaddr);
570 void (*hl_dma_unmap_sg)(struct hl_device *hdev,
571 struct scatterlist *sgl, int nents,
572 enum dma_data_direction dir);
573 int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
574 int (*asic_dma_map_sg)(struct hl_device *hdev,
575 struct scatterlist *sgl, int nents,
576 enum dma_data_direction dir);
577 u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
578 struct sg_table *sgt);
579 void (*add_end_of_cb_packets)(struct hl_device *hdev,
580 u64 kernel_address, u32 len,
581 u64 cq_addr, u32 cq_val, u32 msix_num);
582 void (*update_eq_ci)(struct hl_device *hdev, u32 val);
583 int (*context_switch)(struct hl_device *hdev, u32 asid);
584 void (*restore_phase_topology)(struct hl_device *hdev);
585 int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
586 int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
587 int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
588 int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
589 void (*add_device_attr)(struct hl_device *hdev,
590 struct attribute_group *dev_attr_grp);
591 void (*handle_eqe)(struct hl_device *hdev,
592 struct hl_eq_entry *eq_entry);
593 void (*set_pll_profile)(struct hl_device *hdev,
594 enum hl_pll_frequency freq);
595 void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
596 u32 *size);
597 u64 (*read_pte)(struct hl_device *hdev, u64 addr);
598 void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
599 void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
600 u32 flags);
601 void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
602 u32 asid, u64 va, u64 size);
603 int (*send_heartbeat)(struct hl_device *hdev);
604 int (*debug_coresight)(struct hl_device *hdev, void *data);
605 bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
606 struct seq_file *s);
607 int (*soft_reset_late_init)(struct hl_device *hdev);
608 void (*hw_queues_lock)(struct hl_device *hdev);
609 void (*hw_queues_unlock)(struct hl_device *hdev);
610 u32 (*get_pci_id)(struct hl_device *hdev);
611 int (*get_eeprom_data)(struct hl_device *hdev, void *data,
612 size_t max_size);
613 int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
614 u16 len, u32 timeout, long *result);
615 enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
616 int (*pci_bars_map)(struct hl_device *hdev);
617 u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
618 int (*init_iatu)(struct hl_device *hdev);
619 u32 (*rreg)(struct hl_device *hdev, u32 reg);
620 void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
621 void (*halt_coresight)(struct hl_device *hdev);
622 int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
623};
624
625
626
627
628
629
630#define HL_KERNEL_ASID_ID 0
631
632
633
634
635
636
637
638
639struct hl_va_range {
640 struct mutex lock;
641 struct list_head list;
642 u64 start_addr;
643 u64 end_addr;
644};
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682struct hl_ctx {
683 DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
684 DECLARE_HASHTABLE(mmu_phys_hash, MMU_HASH_TABLE_BITS);
685 DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
686 struct hl_fpriv *hpriv;
687 struct hl_device *hdev;
688 struct kref refcount;
689 struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
690 struct hl_va_range *host_va_range;
691 struct hl_va_range *host_huge_va_range;
692 struct hl_va_range *dram_va_range;
693 struct mutex mem_hash_lock;
694 struct mutex mmu_lock;
695 struct list_head debugfs_list;
696 u64 cs_sequence;
697 u64 *dram_default_hops;
698 spinlock_t cs_lock;
699 atomic64_t dram_phys_mem;
700 atomic_t thread_ctx_switch_token;
701 u32 thread_ctx_switch_wait_token;
702 u32 asid;
703 u32 handle;
704};
705
706
707
708
709
710
711struct hl_ctx_mgr {
712 struct mutex ctx_lock;
713 struct idr ctx_handles;
714};
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734struct hl_userptr {
735 enum vm_type_t vm_type;
736 struct list_head job_node;
737 struct frame_vector *vec;
738 struct sg_table *sgt;
739 enum dma_data_direction dir;
740 struct list_head debugfs_list;
741 u64 addr;
742 u32 size;
743 u8 dma_mapped;
744};
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765struct hl_cs {
766 u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
767 struct hl_ctx *ctx;
768 struct list_head job_list;
769 spinlock_t job_lock;
770 struct kref refcount;
771 struct dma_fence *fence;
772 struct delayed_work work_tdr;
773 struct list_head mirror_node;
774 struct list_head debugfs_list;
775 u64 sequence;
776 u8 submitted;
777 u8 completed;
778 u8 timedout;
779 u8 tdr_active;
780 u8 aborted;
781};
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803struct hl_cs_job {
804 struct list_head cs_node;
805 struct hl_cs *cs;
806 struct hl_cb *user_cb;
807 struct hl_cb *patched_cb;
808 struct work_struct finish_work;
809 struct list_head userptr_list;
810 struct list_head debugfs_list;
811 enum hl_queue_type queue_type;
812 u32 id;
813 u32 hw_queue_id;
814 u32 user_cb_size;
815 u32 job_cb_size;
816 u8 is_kernel_allocated_cb;
817};
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837struct hl_cs_parser {
838 struct hl_cb *user_cb;
839 struct hl_cb *patched_cb;
840 struct list_head *job_userptr_list;
841 u64 cs_sequence;
842 enum hl_queue_type queue_type;
843 u32 ctx_id;
844 u32 hw_queue_id;
845 u32 user_cb_size;
846 u32 patched_cb_size;
847 u8 job_id;
848 u8 is_kernel_allocated_cb;
849};
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864struct hl_vm_hash_node {
865 struct hlist_node node;
866 u64 vaddr;
867 void *ptr;
868};
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885struct hl_vm_phys_pg_pack {
886 enum vm_type_t vm_type;
887 u64 *pages;
888 u64 npages;
889 u64 total_size;
890 atomic_t mapping_cnt;
891 u32 asid;
892 u32 page_size;
893 u32 flags;
894 u32 handle;
895 u32 offset;
896 u8 contiguous;
897 u8 created_from_userptr;
898};
899
900
901
902
903
904
905
906
907struct hl_vm_va_block {
908 struct list_head node;
909 u64 start;
910 u64 end;
911 u64 size;
912};
913
914
915
916
917
918
919
920
921
922
923struct hl_vm {
924 struct gen_pool *dram_pg_pool;
925 struct kref dram_pg_pool_refcount;
926 spinlock_t idr_lock;
927 struct idr phys_pg_pack_handles;
928 u8 init_done;
929};
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945struct hl_debug_params {
946 void *input;
947 void *output;
948 u32 output_size;
949 u32 reg_idx;
950 u32 op;
951 bool enable;
952};
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972struct hl_fpriv {
973 struct hl_device *hdev;
974 struct file *filp;
975 struct pid *taskpid;
976 struct hl_ctx *ctx;
977 struct hl_ctx_mgr ctx_mgr;
978 struct hl_cb_mgr cb_mgr;
979 struct list_head debugfs_list;
980 struct list_head dev_node;
981 struct kref refcount;
982 struct mutex restore_phase_mutex;
983 u8 is_control;
984};
985
986
987
988
989
990
991
992
993
994
995
996
997struct hl_info_list {
998 const char *name;
999 int (*show)(struct seq_file *s, void *data);
1000 ssize_t (*write)(struct file *file, const char __user *buf,
1001 size_t count, loff_t *f_pos);
1002};
1003
1004
1005
1006
1007
1008
1009
1010struct hl_debugfs_entry {
1011 struct dentry *dent;
1012 const struct hl_info_list *info_ent;
1013 struct hl_dbg_device_entry *dev_entry;
1014};
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040struct hl_dbg_device_entry {
1041 struct dentry *root;
1042 struct hl_device *hdev;
1043 struct hl_debugfs_entry *entry_arr;
1044 struct list_head file_list;
1045 struct mutex file_mutex;
1046 struct list_head cb_list;
1047 spinlock_t cb_spinlock;
1048 struct list_head cs_list;
1049 spinlock_t cs_spinlock;
1050 struct list_head cs_job_list;
1051 spinlock_t cs_job_spinlock;
1052 struct list_head userptr_list;
1053 spinlock_t userptr_spinlock;
1054 struct list_head ctx_mem_hash_list;
1055 spinlock_t ctx_mem_hash_spinlock;
1056 u64 addr;
1057 u64 mmu_addr;
1058 u32 mmu_asid;
1059 u8 i2c_bus;
1060 u8 i2c_addr;
1061 u8 i2c_reg;
1062};
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072#define HL_MAX_MINORS 256
1073
1074
1075
1076
1077
1078u32 hl_rreg(struct hl_device *hdev, u32 reg);
1079void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
1080
1081#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
1082#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
1083#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
1084 hdev->asic_funcs->rreg(hdev, (reg)))
1085
1086#define WREG32_P(reg, val, mask) \
1087 do { \
1088 u32 tmp_ = RREG32(reg); \
1089 tmp_ &= (mask); \
1090 tmp_ |= ((val) & ~(mask)); \
1091 WREG32(reg, tmp_); \
1092 } while (0)
1093#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
1094#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
1095
1096#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
1097#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
1098#define WREG32_FIELD(reg, offset, field, val) \
1099 WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
1100 ~REG_FIELD_MASK(reg, field)) | \
1101 (val) << REG_FIELD_SHIFT(reg, field))
1102
1103
1104
1105
1106#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
1107({ \
1108 ktime_t __timeout; \
1109 if (hdev->pdev) \
1110 __timeout = ktime_add_us(ktime_get(), timeout_us); \
1111 else \
1112 __timeout = ktime_add_us(ktime_get(),\
1113 min((u64)(timeout_us * 10), \
1114 (u64) HL_SIM_MAX_TIMEOUT_US)); \
1115 might_sleep_if(sleep_us); \
1116 for (;;) { \
1117 (val) = RREG32(addr); \
1118 if (cond) \
1119 break; \
1120 if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
1121 (val) = RREG32(addr); \
1122 break; \
1123 } \
1124 if (sleep_us) \
1125 usleep_range((sleep_us >> 2) + 1, sleep_us); \
1126 } \
1127 (cond) ? 0 : -ETIMEDOUT; \
1128})
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
1143 mem_written_by_device) \
1144({ \
1145 ktime_t __timeout; \
1146 if (hdev->pdev) \
1147 __timeout = ktime_add_us(ktime_get(), timeout_us); \
1148 else \
1149 __timeout = ktime_add_us(ktime_get(),\
1150 min((u64)(timeout_us * 10), \
1151 (u64) HL_SIM_MAX_TIMEOUT_US)); \
1152 might_sleep_if(sleep_us); \
1153 for (;;) { \
1154 \
1155 mb(); \
1156 (val) = *((u32 *) (uintptr_t) (addr)); \
1157 if (mem_written_by_device) \
1158 (val) = le32_to_cpu(*(__le32 *) &(val)); \
1159 if (cond) \
1160 break; \
1161 if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
1162 (val) = *((u32 *) (uintptr_t) (addr)); \
1163 if (mem_written_by_device) \
1164 (val) = le32_to_cpu(*(__le32 *) &(val)); \
1165 break; \
1166 } \
1167 if (sleep_us) \
1168 usleep_range((sleep_us >> 2) + 1, sleep_us); \
1169 } \
1170 (cond) ? 0 : -ETIMEDOUT; \
1171})
1172
1173#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
1174 timeout_us) \
1175({ \
1176 ktime_t __timeout; \
1177 if (hdev->pdev) \
1178 __timeout = ktime_add_us(ktime_get(), timeout_us); \
1179 else \
1180 __timeout = ktime_add_us(ktime_get(),\
1181 min((u64)(timeout_us * 10), \
1182 (u64) HL_SIM_MAX_TIMEOUT_US)); \
1183 might_sleep_if(sleep_us); \
1184 for (;;) { \
1185 (val) = readl(addr); \
1186 if (cond) \
1187 break; \
1188 if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
1189 (val) = readl(addr); \
1190 break; \
1191 } \
1192 if (sleep_us) \
1193 usleep_range((sleep_us >> 2) + 1, sleep_us); \
1194 } \
1195 (cond) ? 0 : -ETIMEDOUT; \
1196})
1197
1198struct hwmon_chip_info;
1199
1200
1201
1202
1203
1204
1205struct hl_device_reset_work {
1206 struct work_struct reset_work;
1207 struct hl_device *hdev;
1208};
1209
1210
1211
1212
1213
1214
1215struct hl_device_idle_busy_ts {
1216 ktime_t idle_to_busy_ts;
1217 ktime_t busy_to_idle_ts;
1218};
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304struct hl_device {
1305 struct pci_dev *pdev;
1306 void __iomem *pcie_bar[6];
1307 void __iomem *rmmio;
1308 struct cdev cdev;
1309 struct cdev cdev_ctrl;
1310 struct device *dev;
1311 struct device *dev_ctrl;
1312 struct delayed_work work_freq;
1313 struct delayed_work work_heartbeat;
1314 char asic_name[16];
1315 enum hl_asic_type asic_type;
1316 struct hl_cq *completion_queue;
1317 struct workqueue_struct *cq_wq;
1318 struct workqueue_struct *eq_wq;
1319 struct hl_ctx *kernel_ctx;
1320 struct hl_hw_queue *kernel_queues;
1321 struct list_head hw_queues_mirror_list;
1322 spinlock_t hw_queues_mirror_lock;
1323 struct hl_cb_mgr kernel_cb_mgr;
1324 struct hl_eq event_queue;
1325 struct dma_pool *dma_pool;
1326 void *cpu_accessible_dma_mem;
1327 dma_addr_t cpu_accessible_dma_address;
1328 struct gen_pool *cpu_accessible_dma_pool;
1329 unsigned long *asid_bitmap;
1330 struct mutex asid_mutex;
1331 struct mutex send_cpu_message_lock;
1332 struct mutex debug_lock;
1333 struct asic_fixed_properties asic_prop;
1334 const struct hl_asic_funcs *asic_funcs;
1335 void *asic_specific;
1336 struct gen_pool *mmu_pgt_pool;
1337 struct hl_vm vm;
1338 struct mutex mmu_cache_lock;
1339 void *mmu_shadow_hop0;
1340 struct device *hwmon_dev;
1341 enum hl_pm_mng_profile pm_mng_profile;
1342 struct hwmon_chip_info *hl_chip_info;
1343
1344 struct hl_dbg_device_entry hl_debugfs;
1345
1346 struct list_head cb_pool;
1347 spinlock_t cb_pool_lock;
1348
1349 struct list_head fpriv_list;
1350 struct mutex fpriv_list_lock;
1351
1352 struct hl_ctx *compute_ctx;
1353
1354 struct hl_device_idle_busy_ts *idle_busy_ts_arr;
1355
1356 atomic64_t dram_used_mem;
1357 u64 timeout_jiffies;
1358 u64 max_power;
1359 atomic_t in_reset;
1360 enum hl_pll_frequency curr_pll_profile;
1361 int cs_active_cnt;
1362 u32 major;
1363 u32 high_pll;
1364 u32 soft_reset_cnt;
1365 u32 hard_reset_cnt;
1366 u32 idle_busy_ts_idx;
1367 u16 id;
1368 u16 id_control;
1369 u8 disabled;
1370 u8 late_init_done;
1371 u8 hwmon_initialized;
1372 u8 hard_reset_pending;
1373 u8 heartbeat;
1374 u8 reset_on_lockup;
1375 u8 dram_supports_virtual_memory;
1376 u8 dram_default_page_mapping;
1377 u8 pmmu_huge_range;
1378 u8 init_done;
1379 u8 device_cpu_disabled;
1380 u8 dma_mask;
1381 u8 in_debug;
1382 u8 cdev_sysfs_created;
1383
1384
1385 u8 mmu_enable;
1386 u8 cpu_enable;
1387 u8 reset_pcilink;
1388 u8 cpu_queues_enable;
1389 u8 fw_loading;
1390 u8 pldm;
1391};
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
1407
1408
1409
1410
1411
1412
1413struct hl_ioctl_desc {
1414 unsigned int cmd;
1415 hl_ioctl_t *func;
1416};
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432static inline bool hl_mem_area_inside_range(u64 address, u32 size,
1433 u64 range_start_address, u64 range_end_address)
1434{
1435 u64 end_address = address + size;
1436
1437 if ((address >= range_start_address) &&
1438 (end_address <= range_end_address) &&
1439 (end_address > address))
1440 return true;
1441
1442 return false;
1443}
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
1456 u64 range_start_address, u64 range_end_address)
1457{
1458 u64 end_address = address + size;
1459
1460 if ((address >= range_start_address) &&
1461 (address < range_end_address))
1462 return true;
1463
1464 if ((end_address >= range_start_address) &&
1465 (end_address < range_end_address))
1466 return true;
1467
1468 if ((address < range_start_address) &&
1469 (end_address >= range_end_address))
1470 return true;
1471
1472 return false;
1473}
1474
1475int hl_device_open(struct inode *inode, struct file *filp);
1476int hl_device_open_ctrl(struct inode *inode, struct file *filp);
1477bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
1478enum hl_device_status hl_device_status(struct hl_device *hdev);
1479int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
1480int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
1481 enum hl_asic_type asic_type, int minor);
1482void destroy_hdev(struct hl_device *hdev);
1483int hl_hw_queues_create(struct hl_device *hdev);
1484void hl_hw_queues_destroy(struct hl_device *hdev);
1485int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
1486 u32 cb_size, u64 cb_ptr);
1487int hl_hw_queue_schedule_cs(struct hl_cs *cs);
1488u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
1489void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
1490void hl_int_hw_queue_update_ci(struct hl_cs *cs);
1491void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
1492
1493#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
1494#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
1495
1496int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
1497void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
1498int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
1499void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
1500void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
1501void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
1502irqreturn_t hl_irq_handler_cq(int irq, void *arg);
1503irqreturn_t hl_irq_handler_eq(int irq, void *arg);
1504u32 hl_cq_inc_ptr(u32 ptr);
1505
1506int hl_asid_init(struct hl_device *hdev);
1507void hl_asid_fini(struct hl_device *hdev);
1508unsigned long hl_asid_alloc(struct hl_device *hdev);
1509void hl_asid_free(struct hl_device *hdev, unsigned long asid);
1510
1511int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
1512void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
1513int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
1514void hl_ctx_do_release(struct kref *ref);
1515void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
1516int hl_ctx_put(struct hl_ctx *ctx);
1517struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
1518void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
1519void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
1520
1521int hl_device_init(struct hl_device *hdev, struct class *hclass);
1522void hl_device_fini(struct hl_device *hdev);
1523int hl_device_suspend(struct hl_device *hdev);
1524int hl_device_resume(struct hl_device *hdev);
1525int hl_device_reset(struct hl_device *hdev, bool hard_reset,
1526 bool from_hard_reset_thread);
1527void hl_hpriv_get(struct hl_fpriv *hpriv);
1528void hl_hpriv_put(struct hl_fpriv *hpriv);
1529int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
1530uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
1531
1532int hl_build_hwmon_channel_info(struct hl_device *hdev,
1533 struct armcp_sensor *sensors_arr);
1534
1535int hl_sysfs_init(struct hl_device *hdev);
1536void hl_sysfs_fini(struct hl_device *hdev);
1537
1538int hl_hwmon_init(struct hl_device *hdev);
1539void hl_hwmon_fini(struct hl_device *hdev);
1540
1541int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
1542 u64 *handle, int ctx_id);
1543int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
1544int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
1545struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
1546 u32 handle);
1547void hl_cb_put(struct hl_cb *cb);
1548void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
1549void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
1550struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
1551int hl_cb_pool_init(struct hl_device *hdev);
1552int hl_cb_pool_fini(struct hl_device *hdev);
1553
1554void hl_cs_rollback_all(struct hl_device *hdev);
1555struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1556 enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
1557
1558void goya_set_asic_funcs(struct hl_device *hdev);
1559
1560int hl_vm_ctx_init(struct hl_ctx *ctx);
1561void hl_vm_ctx_fini(struct hl_ctx *ctx);
1562
1563int hl_vm_init(struct hl_device *hdev);
1564void hl_vm_fini(struct hl_device *hdev);
1565
1566int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
1567 struct hl_userptr *userptr);
1568void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
1569void hl_userptr_delete_list(struct hl_device *hdev,
1570 struct list_head *userptr_list);
1571bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
1572 struct list_head *userptr_list,
1573 struct hl_userptr **userptr);
1574
1575int hl_mmu_init(struct hl_device *hdev);
1576void hl_mmu_fini(struct hl_device *hdev);
1577int hl_mmu_ctx_init(struct hl_ctx *ctx);
1578void hl_mmu_ctx_fini(struct hl_ctx *ctx);
1579int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
1580 u32 page_size, bool flush_pte);
1581int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
1582 bool flush_pte);
1583void hl_mmu_swap_out(struct hl_ctx *ctx);
1584void hl_mmu_swap_in(struct hl_ctx *ctx);
1585
1586int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
1587 void __iomem *dst);
1588int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
1589int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
1590 u16 len, u32 timeout, long *result);
1591int hl_fw_test_cpu_queue(struct hl_device *hdev);
1592void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
1593 dma_addr_t *dma_handle);
1594void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
1595 void *vaddr);
1596int hl_fw_send_heartbeat(struct hl_device *hdev);
1597int hl_fw_armcp_info_get(struct hl_device *hdev);
1598int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
1599
1600int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
1601 bool is_wc[3]);
1602int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
1603int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
1604 u64 addr);
1605int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
1606 u64 dram_base_address, u64 host_phys_base_address,
1607 u64 host_phys_size);
1608int hl_pci_init(struct hl_device *hdev, u8 dma_mask);
1609void hl_pci_fini(struct hl_device *hdev);
1610int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
1611
1612long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
1613void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
1614int hl_get_temperature(struct hl_device *hdev,
1615 int sensor_index, u32 attr, long *value);
1616int hl_set_temperature(struct hl_device *hdev,
1617 int sensor_index, u32 attr, long value);
1618int hl_get_voltage(struct hl_device *hdev,
1619 int sensor_index, u32 attr, long *value);
1620int hl_get_current(struct hl_device *hdev,
1621 int sensor_index, u32 attr, long *value);
1622int hl_get_fan_speed(struct hl_device *hdev,
1623 int sensor_index, u32 attr, long *value);
1624int hl_get_pwm_info(struct hl_device *hdev,
1625 int sensor_index, u32 attr, long *value);
1626void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
1627 long value);
1628u64 hl_get_max_power(struct hl_device *hdev);
1629void hl_set_max_power(struct hl_device *hdev, u64 value);
1630
1631#ifdef CONFIG_DEBUG_FS
1632
1633void hl_debugfs_init(void);
1634void hl_debugfs_fini(void);
1635void hl_debugfs_add_device(struct hl_device *hdev);
1636void hl_debugfs_remove_device(struct hl_device *hdev);
1637void hl_debugfs_add_file(struct hl_fpriv *hpriv);
1638void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
1639void hl_debugfs_add_cb(struct hl_cb *cb);
1640void hl_debugfs_remove_cb(struct hl_cb *cb);
1641void hl_debugfs_add_cs(struct hl_cs *cs);
1642void hl_debugfs_remove_cs(struct hl_cs *cs);
1643void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
1644void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
1645void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
1646void hl_debugfs_remove_userptr(struct hl_device *hdev,
1647 struct hl_userptr *userptr);
1648void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
1649void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
1650
1651#else
1652
1653static inline void __init hl_debugfs_init(void)
1654{
1655}
1656
1657static inline void hl_debugfs_fini(void)
1658{
1659}
1660
1661static inline void hl_debugfs_add_device(struct hl_device *hdev)
1662{
1663}
1664
1665static inline void hl_debugfs_remove_device(struct hl_device *hdev)
1666{
1667}
1668
1669static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
1670{
1671}
1672
1673static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
1674{
1675}
1676
1677static inline void hl_debugfs_add_cb(struct hl_cb *cb)
1678{
1679}
1680
1681static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
1682{
1683}
1684
1685static inline void hl_debugfs_add_cs(struct hl_cs *cs)
1686{
1687}
1688
1689static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
1690{
1691}
1692
1693static inline void hl_debugfs_add_job(struct hl_device *hdev,
1694 struct hl_cs_job *job)
1695{
1696}
1697
1698static inline void hl_debugfs_remove_job(struct hl_device *hdev,
1699 struct hl_cs_job *job)
1700{
1701}
1702
1703static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
1704 struct hl_userptr *userptr)
1705{
1706}
1707
1708static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
1709 struct hl_userptr *userptr)
1710{
1711}
1712
1713static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
1714 struct hl_ctx *ctx)
1715{
1716}
1717
1718static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
1719 struct hl_ctx *ctx)
1720{
1721}
1722
1723#endif
1724
1725
1726long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
1727long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
1728int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
1729int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
1730int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
1731int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
1732
1733#endif
1734