1
2
3
4
5
6
7
8#include "habanalabs.h"
9
10#include <linux/slab.h>
11
12
13
14
15
16
17
18
19
20inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
21{
22 ptr += val;
23 ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
24 return ptr;
25}
26static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
27{
28 return atomic_read(ci) & ((queue_len << 1) - 1);
29}
30
31static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
32{
33 int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
34
35 if (delta >= 0)
36 return (queue_len - delta);
37 else
38 return (abs(delta) - queue_len);
39}
40
41void hl_hw_queue_update_ci(struct hl_cs *cs)
42{
43 struct hl_device *hdev = cs->ctx->hdev;
44 struct hl_hw_queue *q;
45 int i;
46
47 if (hdev->disabled)
48 return;
49
50 q = &hdev->kernel_queues[0];
51
52
53 if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
54 return;
55
56
57
58
59
60
61 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
62 if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
63 atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
64 }
65}
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
84 u32 ctl, u32 len, u64 ptr)
85{
86 struct hl_bd *bd;
87
88 bd = q->kernel_address;
89 bd += hl_pi_2_offset(q->pi);
90 bd->ctl = cpu_to_le32(ctl);
91 bd->len = cpu_to_le32(len);
92 bd->ptr = cpu_to_le64(ptr);
93
94 q->pi = hl_queue_inc_ptr(q->pi);
95 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static int ext_queue_sanity_checks(struct hl_device *hdev,
117 struct hl_hw_queue *q, int num_of_entries,
118 bool reserve_cq_entry)
119{
120 atomic_t *free_slots =
121 &hdev->completion_queue[q->cq_id].free_slots_cnt;
122 int free_slots_cnt;
123
124
125 free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
126
127 if (free_slots_cnt < num_of_entries) {
128 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
129 q->hw_queue_id, num_of_entries);
130 return -EAGAIN;
131 }
132
133 if (reserve_cq_entry) {
134
135
136
137
138
139
140
141 if (atomic_add_negative(num_of_entries * -1, free_slots)) {
142 dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
143 num_of_entries, q->hw_queue_id);
144 atomic_add(num_of_entries, free_slots);
145 return -EAGAIN;
146 }
147 }
148
149 return 0;
150}
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165static int int_queue_sanity_checks(struct hl_device *hdev,
166 struct hl_hw_queue *q,
167 int num_of_entries)
168{
169 int free_slots_cnt;
170
171 if (num_of_entries > q->int_queue_len) {
172 dev_err(hdev->dev,
173 "Cannot populate queue %u with %u jobs\n",
174 q->hw_queue_id, num_of_entries);
175 return -ENOMEM;
176 }
177
178
179 free_slots_cnt = queue_free_slots(q, q->int_queue_len);
180
181 if (free_slots_cnt < num_of_entries) {
182 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
183 q->hw_queue_id, num_of_entries);
184 return -EAGAIN;
185 }
186
187 return 0;
188}
189
190
191
192
193
194
195
196
197
198
199
200static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
201 int num_of_entries)
202{
203 int free_slots_cnt;
204
205
206 free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
207
208 if (free_slots_cnt < num_of_entries) {
209 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
210 q->hw_queue_id, num_of_entries);
211 return -EAGAIN;
212 }
213
214 return 0;
215}
216
217
218
219
220
221
222
223
224
225
226
227
228int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
229 u32 cb_size, u64 cb_ptr)
230{
231 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
232 int rc = 0;
233
234 hdev->asic_funcs->hw_queues_lock(hdev);
235
236 if (hdev->disabled) {
237 rc = -EPERM;
238 goto out;
239 }
240
241
242
243
244
245
246 if (q->queue_type != QUEUE_TYPE_HW) {
247 rc = ext_queue_sanity_checks(hdev, q, 1, false);
248 if (rc)
249 goto out;
250 }
251
252 hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
253
254out:
255 hdev->asic_funcs->hw_queues_unlock(hdev);
256
257 return rc;
258}
259
260
261
262
263
264
265
266
267
268static void ext_queue_schedule_job(struct hl_cs_job *job)
269{
270 struct hl_device *hdev = job->cs->ctx->hdev;
271 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
272 struct hl_cq_entry cq_pkt;
273 struct hl_cq *cq;
274 u64 cq_addr;
275 struct hl_cb *cb;
276 u32 ctl;
277 u32 len;
278 u64 ptr;
279
280
281
282
283
284 ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
285
286 cb = job->patched_cb;
287 len = job->job_cb_size;
288 ptr = cb->bus_address;
289
290
291 if (!cs_needs_completion(job->cs))
292 goto submit_bd;
293
294 cq_pkt.data = cpu_to_le32(
295 ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
296 & CQ_ENTRY_SHADOW_INDEX_MASK) |
297 FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
298 FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
299
300
301
302
303
304
305
306
307 cq = &hdev->completion_queue[q->cq_id];
308 cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
309
310 hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
311 cq_addr,
312 le32_to_cpu(cq_pkt.data),
313 q->msi_vec,
314 job->contains_dma_pkt);
315
316 q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
317
318 cq->pi = hl_cq_inc_ptr(cq->pi);
319
320submit_bd:
321 hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
322}
323
324
325
326
327
328
329
330
331
332static void int_queue_schedule_job(struct hl_cs_job *job)
333{
334 struct hl_device *hdev = job->cs->ctx->hdev;
335 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
336 struct hl_bd bd;
337 __le64 *pi;
338
339 bd.ctl = 0;
340 bd.len = cpu_to_le32(job->job_cb_size);
341
342 if (job->is_kernel_allocated_cb)
343
344
345
346 bd.ptr = cpu_to_le64(job->user_cb->bus_address);
347 else
348 bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
349
350 pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
351
352 q->pi++;
353 q->pi &= ((q->int_queue_len << 1) - 1);
354
355 hdev->asic_funcs->pqe_write(hdev, pi, &bd);
356
357 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
358}
359
360
361
362
363
364
365
366
367
368static void hw_queue_schedule_job(struct hl_cs_job *job)
369{
370 struct hl_device *hdev = job->cs->ctx->hdev;
371 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
372 u64 ptr;
373 u32 offset, ctl, len;
374
375
376
377
378
379
380
381 offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
382 ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
383 ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
384
385 len = job->job_cb_size;
386
387
388
389
390
391
392
393 if (job->patched_cb)
394 ptr = job->patched_cb->bus_address;
395 else if (job->is_kernel_allocated_cb)
396 ptr = job->user_cb->bus_address;
397 else
398 ptr = (u64) (uintptr_t) job->user_cb;
399
400 hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
401}
402
403static int init_signal_cs(struct hl_device *hdev,
404 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
405{
406 struct hl_sync_stream_properties *prop;
407 struct hl_hw_sob *hw_sob;
408 u32 q_idx;
409 int rc = 0;
410
411 q_idx = job->hw_queue_id;
412 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
413 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
414
415 cs_cmpl->hw_sob = hw_sob;
416 cs_cmpl->sob_val = prop->next_sob_val;
417
418 dev_dbg(hdev->dev,
419 "generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
420 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
421 cs_cmpl->cs_seq);
422
423
424
425
426 hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
427 cs_cmpl->hw_sob->sob_id, 0, true);
428
429 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
430 false);
431
432 return rc;
433}
434
435void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
436 struct hl_cs *cs, struct hl_cs_job *job,
437 struct hl_cs_compl *cs_cmpl)
438{
439 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
440 u32 offset = 0;
441
442 cs_cmpl->hw_sob = handle->hw_sob;
443
444
445
446
447
448
449
450
451
452
453 if (job->encaps_sig_wait_offset)
454 offset = job->encaps_sig_wait_offset - 1;
455
456 cs_cmpl->sob_val = handle->pre_sob_val + offset;
457}
458
459static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
460 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
461{
462 struct hl_gen_wait_properties wait_prop;
463 struct hl_sync_stream_properties *prop;
464 struct hl_cs_compl *signal_cs_cmpl;
465 u32 q_idx;
466
467 q_idx = job->hw_queue_id;
468 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
469
470 signal_cs_cmpl = container_of(cs->signal_fence,
471 struct hl_cs_compl,
472 base_fence);
473
474 if (cs->encaps_signals) {
475
476
477
478
479 hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
480
481 dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
482 cs->encaps_sig_hdl->q_idx,
483 cs->encaps_sig_hdl->cs_seq,
484 cs_cmpl->sob_val,
485 job->encaps_sig_wait_offset);
486 } else {
487
488 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
489 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
490 }
491
492
493
494
495
496
497
498
499
500
501
502
503
504 spin_lock(&signal_cs_cmpl->lock);
505
506 if (completion_done(&cs->signal_fence->completion)) {
507 spin_unlock(&signal_cs_cmpl->lock);
508 return -EINVAL;
509 }
510
511 kref_get(&cs_cmpl->hw_sob->kref);
512
513 spin_unlock(&signal_cs_cmpl->lock);
514
515 dev_dbg(hdev->dev,
516 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
517 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
518 prop->base_mon_id, q_idx, cs->sequence);
519
520 wait_prop.data = (void *) job->patched_cb;
521 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
522 wait_prop.sob_mask = 0x1;
523 wait_prop.sob_val = cs_cmpl->sob_val;
524 wait_prop.mon_id = prop->base_mon_id;
525 wait_prop.q_idx = q_idx;
526 wait_prop.size = 0;
527
528 hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
529
530 mb();
531 hl_fence_put(cs->signal_fence);
532 cs->signal_fence = NULL;
533
534 return 0;
535}
536
537
538
539
540
541
542
543static int init_signal_wait_cs(struct hl_cs *cs)
544{
545 struct hl_ctx *ctx = cs->ctx;
546 struct hl_device *hdev = ctx->hdev;
547 struct hl_cs_job *job;
548 struct hl_cs_compl *cs_cmpl =
549 container_of(cs->fence, struct hl_cs_compl, base_fence);
550 int rc = 0;
551
552
553 job = list_first_entry(&cs->job_list, struct hl_cs_job,
554 cs_node);
555
556 if (cs->type & CS_TYPE_SIGNAL)
557 rc = init_signal_cs(hdev, job, cs_cmpl);
558 else if (cs->type & CS_TYPE_WAIT)
559 rc = init_wait_cs(hdev, cs, job, cs_cmpl);
560
561 return rc;
562}
563
564static int encaps_sig_first_staged_cs_handler
565 (struct hl_device *hdev, struct hl_cs *cs)
566{
567 struct hl_cs_compl *cs_cmpl =
568 container_of(cs->fence,
569 struct hl_cs_compl, base_fence);
570 struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
571 struct hl_encaps_signals_mgr *mgr;
572 int rc = 0;
573
574 mgr = &hdev->compute_ctx->sig_mgr;
575
576 spin_lock(&mgr->lock);
577 encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
578 if (encaps_sig_hdl) {
579
580
581
582
583 encaps_sig_hdl->cs_seq = cs->sequence;
584
585
586
587
588 cs_cmpl->encaps_signals = true;
589 cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
590
591
592
593
594
595 cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
596 cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
597 encaps_sig_hdl->count;
598
599 dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
600 cs->sequence, encaps_sig_hdl->id,
601 encaps_sig_hdl->count,
602 encaps_sig_hdl->q_idx,
603 cs_cmpl->hw_sob->sob_id,
604 cs_cmpl->sob_val);
605
606 } else {
607 dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
608 cs->encaps_sig_hdl_id);
609 rc = -EINVAL;
610 }
611
612 spin_unlock(&mgr->lock);
613
614 return rc;
615}
616
617
618
619
620
621int hl_hw_queue_schedule_cs(struct hl_cs *cs)
622{
623 enum hl_device_status status;
624 struct hl_cs_counters_atomic *cntr;
625 struct hl_ctx *ctx = cs->ctx;
626 struct hl_device *hdev = ctx->hdev;
627 struct hl_cs_job *job, *tmp;
628 struct hl_hw_queue *q;
629 int rc = 0, i, cq_cnt;
630 bool first_entry;
631 u32 max_queues;
632
633 cntr = &hdev->aggregated_cs_counters;
634
635 hdev->asic_funcs->hw_queues_lock(hdev);
636
637 if (!hl_device_operational(hdev, &status)) {
638 atomic64_inc(&cntr->device_in_reset_drop_cnt);
639 atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
640 dev_err(hdev->dev,
641 "device is %s, CS rejected!\n", hdev->status[status]);
642 rc = -EPERM;
643 goto out;
644 }
645
646 max_queues = hdev->asic_prop.max_queues;
647
648 q = &hdev->kernel_queues[0];
649 for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
650 if (cs->jobs_in_queue_cnt[i]) {
651 switch (q->queue_type) {
652 case QUEUE_TYPE_EXT:
653 rc = ext_queue_sanity_checks(hdev, q,
654 cs->jobs_in_queue_cnt[i],
655 cs_needs_completion(cs) ?
656 true : false);
657 break;
658 case QUEUE_TYPE_INT:
659 rc = int_queue_sanity_checks(hdev, q,
660 cs->jobs_in_queue_cnt[i]);
661 break;
662 case QUEUE_TYPE_HW:
663 rc = hw_queue_sanity_checks(hdev, q,
664 cs->jobs_in_queue_cnt[i]);
665 break;
666 default:
667 dev_err(hdev->dev, "Queue type %d is invalid\n",
668 q->queue_type);
669 rc = -EINVAL;
670 break;
671 }
672
673 if (rc) {
674 atomic64_inc(
675 &ctx->cs_counters.queue_full_drop_cnt);
676 atomic64_inc(&cntr->queue_full_drop_cnt);
677 goto unroll_cq_resv;
678 }
679
680 if (q->queue_type == QUEUE_TYPE_EXT)
681 cq_cnt++;
682 }
683 }
684
685 if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
686 rc = init_signal_wait_cs(cs);
687 if (rc)
688 goto unroll_cq_resv;
689 } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
690 rc = hdev->asic_funcs->collective_wait_init_cs(cs);
691 if (rc)
692 goto unroll_cq_resv;
693 }
694
695
696 if (cs->encaps_signals && cs->staged_first) {
697 rc = encaps_sig_first_staged_cs_handler(hdev, cs);
698 if (rc)
699 goto unroll_cq_resv;
700 }
701
702 spin_lock(&hdev->cs_mirror_lock);
703
704
705 if (cs->staged_cs && !cs->staged_first) {
706 struct hl_cs *staged_cs;
707
708 staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
709 if (!staged_cs) {
710 dev_err(hdev->dev,
711 "Cannot find staged submission sequence %llu",
712 cs->staged_sequence);
713 rc = -EINVAL;
714 goto unlock_cs_mirror;
715 }
716
717 if (is_staged_cs_last_exists(hdev, staged_cs)) {
718 dev_err(hdev->dev,
719 "Staged submission sequence %llu already submitted",
720 cs->staged_sequence);
721 rc = -EINVAL;
722 goto unlock_cs_mirror;
723 }
724
725 list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
726
727
728 if (hdev->supports_wait_for_multi_cs)
729 staged_cs->fence->stream_master_qid_map |=
730 cs->fence->stream_master_qid_map;
731 }
732
733 list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
734
735
736 first_entry = list_first_entry(&hdev->cs_mirror_list,
737 struct hl_cs, mirror_node) == cs;
738 if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
739 first_entry && cs_needs_timeout(cs)) {
740 cs->tdr_active = true;
741 schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
742
743 }
744
745 spin_unlock(&hdev->cs_mirror_lock);
746
747 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
748 switch (job->queue_type) {
749 case QUEUE_TYPE_EXT:
750 ext_queue_schedule_job(job);
751 break;
752 case QUEUE_TYPE_INT:
753 int_queue_schedule_job(job);
754 break;
755 case QUEUE_TYPE_HW:
756 hw_queue_schedule_job(job);
757 break;
758 default:
759 break;
760 }
761
762 cs->submitted = true;
763
764 goto out;
765
766unlock_cs_mirror:
767 spin_unlock(&hdev->cs_mirror_lock);
768unroll_cq_resv:
769 q = &hdev->kernel_queues[0];
770 for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
771 if ((q->queue_type == QUEUE_TYPE_EXT) &&
772 (cs->jobs_in_queue_cnt[i])) {
773 atomic_t *free_slots =
774 &hdev->completion_queue[i].free_slots_cnt;
775 atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
776 cq_cnt--;
777 }
778 }
779
780out:
781 hdev->asic_funcs->hw_queues_unlock(hdev);
782
783 return rc;
784}
785
786
787
788
789
790
791
792void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
793{
794 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
795
796 atomic_inc(&q->ci);
797}
798
799static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
800 bool is_cpu_queue)
801{
802 void *p;
803 int rc;
804
805 if (is_cpu_queue)
806 p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
807 HL_QUEUE_SIZE_IN_BYTES,
808 &q->bus_address);
809 else
810 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
811 HL_QUEUE_SIZE_IN_BYTES,
812 &q->bus_address,
813 GFP_KERNEL | __GFP_ZERO);
814 if (!p)
815 return -ENOMEM;
816
817 q->kernel_address = p;
818
819 q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
820 sizeof(*q->shadow_queue),
821 GFP_KERNEL);
822 if (!q->shadow_queue) {
823 dev_err(hdev->dev,
824 "Failed to allocate shadow queue for H/W queue %d\n",
825 q->hw_queue_id);
826 rc = -ENOMEM;
827 goto free_queue;
828 }
829
830
831 atomic_set(&q->ci, 0);
832 q->pi = 0;
833
834 return 0;
835
836free_queue:
837 if (is_cpu_queue)
838 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
839 HL_QUEUE_SIZE_IN_BYTES,
840 q->kernel_address);
841 else
842 hdev->asic_funcs->asic_dma_free_coherent(hdev,
843 HL_QUEUE_SIZE_IN_BYTES,
844 q->kernel_address,
845 q->bus_address);
846
847 return rc;
848}
849
850static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
851{
852 void *p;
853
854 p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
855 &q->bus_address, &q->int_queue_len);
856 if (!p) {
857 dev_err(hdev->dev,
858 "Failed to get base address for internal queue %d\n",
859 q->hw_queue_id);
860 return -EFAULT;
861 }
862
863 q->kernel_address = p;
864 q->pi = 0;
865 atomic_set(&q->ci, 0);
866
867 return 0;
868}
869
870static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
871{
872 return ext_and_cpu_queue_init(hdev, q, true);
873}
874
875static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
876{
877 return ext_and_cpu_queue_init(hdev, q, false);
878}
879
880static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
881{
882 void *p;
883
884 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
885 HL_QUEUE_SIZE_IN_BYTES,
886 &q->bus_address,
887 GFP_KERNEL | __GFP_ZERO);
888 if (!p)
889 return -ENOMEM;
890
891 q->kernel_address = p;
892
893
894 atomic_set(&q->ci, 0);
895 q->pi = 0;
896
897 return 0;
898}
899
900static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
901{
902 struct hl_sync_stream_properties *sync_stream_prop;
903 struct asic_fixed_properties *prop = &hdev->asic_prop;
904 struct hl_hw_sob *hw_sob;
905 int sob, reserved_mon_idx, queue_idx;
906
907 sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
908
909
910
911
912
913
914 if (hdev->kernel_queues[q_idx].collective_mode ==
915 HL_COLLECTIVE_MASTER) {
916 reserved_mon_idx = hdev->collective_mon_idx;
917
918
919 sync_stream_prop->collective_mstr_mon_id[0] =
920 prop->collective_first_mon + reserved_mon_idx;
921
922
923 sync_stream_prop->collective_mstr_mon_id[1] =
924 prop->collective_first_mon + reserved_mon_idx + 1;
925
926 hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
927 } else if (hdev->kernel_queues[q_idx].collective_mode ==
928 HL_COLLECTIVE_SLAVE) {
929 reserved_mon_idx = hdev->collective_mon_idx++;
930
931
932 sync_stream_prop->collective_slave_mon_id =
933 prop->collective_first_mon + reserved_mon_idx;
934 }
935
936 if (!hdev->kernel_queues[q_idx].supports_sync_stream)
937 return;
938
939 queue_idx = hdev->sync_stream_queue_idx++;
940
941 sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
942 (queue_idx * HL_RSVD_SOBS);
943 sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
944 (queue_idx * HL_RSVD_MONS);
945 sync_stream_prop->next_sob_val = 1;
946 sync_stream_prop->curr_sob_offset = 0;
947
948 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
949 hw_sob = &sync_stream_prop->hw_sob[sob];
950 hw_sob->hdev = hdev;
951 hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
952 hw_sob->sob_addr =
953 hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
954 hw_sob->q_idx = q_idx;
955 kref_init(&hw_sob->kref);
956 }
957}
958
959static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
960{
961 struct hl_sync_stream_properties *prop =
962 &hdev->kernel_queues[q_idx].sync_stream_prop;
963
964
965
966
967
968 kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
969 prop->curr_sob_offset = 0;
970 prop->next_sob_val = 1;
971}
972
973
974
975
976
977
978
979
980
981
982
983static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
984 u32 hw_queue_id)
985{
986 int rc;
987
988 q->hw_queue_id = hw_queue_id;
989
990 switch (q->queue_type) {
991 case QUEUE_TYPE_EXT:
992 rc = ext_queue_init(hdev, q);
993 break;
994 case QUEUE_TYPE_INT:
995 rc = int_queue_init(hdev, q);
996 break;
997 case QUEUE_TYPE_CPU:
998 rc = cpu_queue_init(hdev, q);
999 break;
1000 case QUEUE_TYPE_HW:
1001 rc = hw_queue_init(hdev, q);
1002 break;
1003 case QUEUE_TYPE_NA:
1004 q->valid = 0;
1005 return 0;
1006 default:
1007 dev_crit(hdev->dev, "wrong queue type %d during init\n",
1008 q->queue_type);
1009 rc = -EINVAL;
1010 break;
1011 }
1012
1013 sync_stream_queue_init(hdev, q->hw_queue_id);
1014
1015 if (rc)
1016 return rc;
1017
1018 q->valid = 1;
1019
1020 return 0;
1021}
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
1032{
1033 if (!q->valid)
1034 return;
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054 if (q->queue_type == QUEUE_TYPE_INT)
1055 return;
1056
1057 kfree(q->shadow_queue);
1058
1059 if (q->queue_type == QUEUE_TYPE_CPU)
1060 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
1061 HL_QUEUE_SIZE_IN_BYTES,
1062 q->kernel_address);
1063 else
1064 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1065 HL_QUEUE_SIZE_IN_BYTES,
1066 q->kernel_address,
1067 q->bus_address);
1068}
1069
1070int hl_hw_queues_create(struct hl_device *hdev)
1071{
1072 struct asic_fixed_properties *asic = &hdev->asic_prop;
1073 struct hl_hw_queue *q;
1074 int i, rc, q_ready_cnt;
1075
1076 hdev->kernel_queues = kcalloc(asic->max_queues,
1077 sizeof(*hdev->kernel_queues), GFP_KERNEL);
1078
1079 if (!hdev->kernel_queues) {
1080 dev_err(hdev->dev, "Not enough memory for H/W queues\n");
1081 return -ENOMEM;
1082 }
1083
1084
1085 for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
1086 i < asic->max_queues ; i++, q_ready_cnt++, q++) {
1087
1088 q->queue_type = asic->hw_queues_props[i].type;
1089 q->supports_sync_stream =
1090 asic->hw_queues_props[i].supports_sync_stream;
1091 q->collective_mode = asic->hw_queues_props[i].collective_mode;
1092 rc = queue_init(hdev, q, i);
1093 if (rc) {
1094 dev_err(hdev->dev,
1095 "failed to initialize queue %d\n", i);
1096 goto release_queues;
1097 }
1098 }
1099
1100 return 0;
1101
1102release_queues:
1103 for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
1104 queue_fini(hdev, q);
1105
1106 kfree(hdev->kernel_queues);
1107
1108 return rc;
1109}
1110
1111void hl_hw_queues_destroy(struct hl_device *hdev)
1112{
1113 struct hl_hw_queue *q;
1114 u32 max_queues = hdev->asic_prop.max_queues;
1115 int i;
1116
1117 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
1118 queue_fini(hdev, q);
1119
1120 kfree(hdev->kernel_queues);
1121}
1122
1123void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
1124{
1125 struct hl_hw_queue *q;
1126 u32 max_queues = hdev->asic_prop.max_queues;
1127 int i;
1128
1129 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
1130 if ((!q->valid) ||
1131 ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
1132 continue;
1133 q->pi = 0;
1134 atomic_set(&q->ci, 0);
1135
1136 if (q->supports_sync_stream)
1137 sync_stream_queue_reset(hdev, q->hw_queue_id);
1138 }
1139}
1140