1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <linux/spinlock.h>
49#include <linux/seqlock.h>
50#include <linux/netdevice.h>
51#include <linux/moduleparam.h>
52#include <linux/bitops.h>
53#include <linux/timer.h>
54#include <linux/vmalloc.h>
55#include <linux/highmem.h>
56
57#include "hfi.h"
58#include "common.h"
59#include "qp.h"
60#include "sdma.h"
61#include "iowait.h"
62#include "trace.h"
63
64
65#define SDMA_DESCQ_CNT 2048
66#define SDMA_DESC_INTR 64
67#define INVALID_TAIL 0xffff
68
69static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
70module_param(sdma_descq_cnt, uint, S_IRUGO);
71MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
72
73static uint sdma_idle_cnt = 250;
74module_param(sdma_idle_cnt, uint, S_IRUGO);
75MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
76
77uint mod_num_sdma;
78module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
79MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
80
81static uint sdma_desct_intr = SDMA_DESC_INTR;
82module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
83MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
84
85#define SDMA_WAIT_BATCH_SIZE 20
86
87#define SDMA_ERR_HALT_TIMEOUT 10
88
89
90#define SD(name) SEND_DMA_##name
91#define ALL_SDMA_ENG_HALT_ERRS \
92 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
93 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
94 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
95 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
96 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
97 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
98 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
99 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
100 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
101 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
102 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
103 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
104 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
105 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
106 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
107 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
108 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
109 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
110
111
112#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
113#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
114#define SDMA_SENDCTRL_OP_HALT BIT(2)
115#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
116
117
118#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
119SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
120#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
121SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
122
123static const char * const sdma_state_names[] = {
124 [sdma_state_s00_hw_down] = "s00_HwDown",
125 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
126 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
127 [sdma_state_s20_idle] = "s20_Idle",
128 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
129 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
130 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
131 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
132 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
133 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
134 [sdma_state_s99_running] = "s99_Running",
135};
136
137#ifdef CONFIG_SDMA_VERBOSITY
138static const char * const sdma_event_names[] = {
139 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
140 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
141 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
142 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
143 [sdma_event_e30_go_running] = "e30_GoRunning",
144 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
145 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
146 [sdma_event_e60_hw_halted] = "e60_HwHalted",
147 [sdma_event_e70_go_idle] = "e70_GoIdle",
148 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
149 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
150 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
151 [sdma_event_e85_link_down] = "e85_LinkDown",
152 [sdma_event_e90_sw_halted] = "e90_SwHalted",
153};
154#endif
155
156static const struct sdma_set_state_action sdma_action_table[] = {
157 [sdma_state_s00_hw_down] = {
158 .go_s99_running_tofalse = 1,
159 .op_enable = 0,
160 .op_intenable = 0,
161 .op_halt = 0,
162 .op_cleanup = 0,
163 },
164 [sdma_state_s10_hw_start_up_halt_wait] = {
165 .op_enable = 0,
166 .op_intenable = 0,
167 .op_halt = 1,
168 .op_cleanup = 0,
169 },
170 [sdma_state_s15_hw_start_up_clean_wait] = {
171 .op_enable = 0,
172 .op_intenable = 1,
173 .op_halt = 0,
174 .op_cleanup = 1,
175 },
176 [sdma_state_s20_idle] = {
177 .op_enable = 0,
178 .op_intenable = 1,
179 .op_halt = 0,
180 .op_cleanup = 0,
181 },
182 [sdma_state_s30_sw_clean_up_wait] = {
183 .op_enable = 0,
184 .op_intenable = 0,
185 .op_halt = 0,
186 .op_cleanup = 0,
187 },
188 [sdma_state_s40_hw_clean_up_wait] = {
189 .op_enable = 0,
190 .op_intenable = 0,
191 .op_halt = 0,
192 .op_cleanup = 1,
193 },
194 [sdma_state_s50_hw_halt_wait] = {
195 .op_enable = 0,
196 .op_intenable = 0,
197 .op_halt = 0,
198 .op_cleanup = 0,
199 },
200 [sdma_state_s60_idle_halt_wait] = {
201 .go_s99_running_tofalse = 1,
202 .op_enable = 0,
203 .op_intenable = 0,
204 .op_halt = 1,
205 .op_cleanup = 0,
206 },
207 [sdma_state_s80_hw_freeze] = {
208 .op_enable = 0,
209 .op_intenable = 0,
210 .op_halt = 0,
211 .op_cleanup = 0,
212 },
213 [sdma_state_s82_freeze_sw_clean] = {
214 .op_enable = 0,
215 .op_intenable = 0,
216 .op_halt = 0,
217 .op_cleanup = 0,
218 },
219 [sdma_state_s99_running] = {
220 .op_enable = 1,
221 .op_intenable = 1,
222 .op_halt = 0,
223 .op_cleanup = 0,
224 .go_s99_running_totrue = 1,
225 },
226};
227
228#define SDMA_TAIL_UPDATE_THRESH 0x1F
229
230
231static void sdma_complete(struct kref *);
232static void sdma_finalput(struct sdma_state *);
233static void sdma_get(struct sdma_state *);
234static void sdma_hw_clean_up_task(unsigned long);
235static void sdma_put(struct sdma_state *);
236static void sdma_set_state(struct sdma_engine *, enum sdma_states);
237static void sdma_start_hw_clean_up(struct sdma_engine *);
238static void sdma_sw_clean_up_task(unsigned long);
239static void sdma_sendctrl(struct sdma_engine *, unsigned);
240static void init_sdma_regs(struct sdma_engine *, u32, uint);
241static void sdma_process_event(
242 struct sdma_engine *sde,
243 enum sdma_events event);
244static void __sdma_process_event(
245 struct sdma_engine *sde,
246 enum sdma_events event);
247static void dump_sdma_state(struct sdma_engine *sde);
248static void sdma_make_progress(struct sdma_engine *sde, u64 status);
249static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
250static void sdma_flush_descq(struct sdma_engine *sde);
251
252
253
254
255
256static const char *sdma_state_name(enum sdma_states state)
257{
258 return sdma_state_names[state];
259}
260
261static void sdma_get(struct sdma_state *ss)
262{
263 kref_get(&ss->kref);
264}
265
266static void sdma_complete(struct kref *kref)
267{
268 struct sdma_state *ss =
269 container_of(kref, struct sdma_state, kref);
270
271 complete(&ss->comp);
272}
273
274static void sdma_put(struct sdma_state *ss)
275{
276 kref_put(&ss->kref, sdma_complete);
277}
278
279static void sdma_finalput(struct sdma_state *ss)
280{
281 sdma_put(ss);
282 wait_for_completion(&ss->comp);
283}
284
285static inline void write_sde_csr(
286 struct sdma_engine *sde,
287 u32 offset0,
288 u64 value)
289{
290 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
291}
292
293static inline u64 read_sde_csr(
294 struct sdma_engine *sde,
295 u32 offset0)
296{
297 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
298}
299
300
301
302
303
304static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
305 int pause)
306{
307 u64 off = 8 * sde->this_idx;
308 struct hfi1_devdata *dd = sde->dd;
309 int lcnt = 0;
310 u64 reg_prev;
311 u64 reg = 0;
312
313 while (1) {
314 reg_prev = reg;
315 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
316
317 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
318 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
319 if (reg == 0)
320 break;
321
322 if (reg != reg_prev)
323 lcnt = 0;
324 if (lcnt++ > 500) {
325
326 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
327 __func__, sde->this_idx, (u32)reg);
328 queue_work(dd->pport->link_wq,
329 &dd->pport->link_bounce_work);
330 break;
331 }
332 udelay(1);
333 }
334}
335
336
337
338
339
340void sdma_wait(struct hfi1_devdata *dd)
341{
342 int i;
343
344 for (i = 0; i < dd->num_sdma; i++) {
345 struct sdma_engine *sde = &dd->per_sdma[i];
346
347 sdma_wait_for_packet_egress(sde, 0);
348 }
349}
350
351static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
352{
353 u64 reg;
354
355 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
356 return;
357 reg = cnt;
358 reg &= SD(DESC_CNT_CNT_MASK);
359 reg <<= SD(DESC_CNT_CNT_SHIFT);
360 write_sde_csr(sde, SD(DESC_CNT), reg);
361}
362
363static inline void complete_tx(struct sdma_engine *sde,
364 struct sdma_txreq *tx,
365 int res)
366{
367
368 struct iowait *wait = tx->wait;
369 callback_t complete = tx->complete;
370
371#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
372 trace_hfi1_sdma_out_sn(sde, tx->sn);
373 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
374 dd_dev_err(sde->dd, "expected %llu got %llu\n",
375 sde->head_sn, tx->sn);
376 sde->head_sn++;
377#endif
378 __sdma_txclean(sde->dd, tx);
379 if (complete)
380 (*complete)(tx, res);
381 if (wait && iowait_sdma_dec(wait))
382 iowait_drain_wakeup(wait);
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403static void sdma_flush(struct sdma_engine *sde)
404{
405 struct sdma_txreq *txp, *txp_next;
406 LIST_HEAD(flushlist);
407 unsigned long flags;
408
409
410 sdma_flush_descq(sde);
411 spin_lock_irqsave(&sde->flushlist_lock, flags);
412
413 list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
414 list_del_init(&txp->list);
415 list_add_tail(&txp->list, &flushlist);
416 }
417 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
418
419 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
420 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
421}
422
423
424
425
426
427
428
429
430
431
432
433static void sdma_field_flush(struct work_struct *work)
434{
435 unsigned long flags;
436 struct sdma_engine *sde =
437 container_of(work, struct sdma_engine, flush_worker);
438
439 write_seqlock_irqsave(&sde->head_lock, flags);
440 if (!__sdma_running(sde))
441 sdma_flush(sde);
442 write_sequnlock_irqrestore(&sde->head_lock, flags);
443}
444
445static void sdma_err_halt_wait(struct work_struct *work)
446{
447 struct sdma_engine *sde = container_of(work, struct sdma_engine,
448 err_halt_worker);
449 u64 statuscsr;
450 unsigned long timeout;
451
452 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
453 while (1) {
454 statuscsr = read_sde_csr(sde, SD(STATUS));
455 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
456 if (statuscsr)
457 break;
458 if (time_after(jiffies, timeout)) {
459 dd_dev_err(sde->dd,
460 "SDMA engine %d - timeout waiting for engine to halt\n",
461 sde->this_idx);
462
463
464
465
466 break;
467 }
468 usleep_range(80, 120);
469 }
470
471 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
472}
473
474static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
475{
476 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
477 unsigned index;
478 struct hfi1_devdata *dd = sde->dd;
479
480 for (index = 0; index < dd->num_sdma; index++) {
481 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
482
483 if (curr_sdma != sde)
484 curr_sdma->progress_check_head =
485 curr_sdma->descq_head;
486 }
487 dd_dev_err(sde->dd,
488 "SDMA engine %d - check scheduled\n",
489 sde->this_idx);
490 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
491 }
492}
493
494static void sdma_err_progress_check(struct timer_list *t)
495{
496 unsigned index;
497 struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
498
499 dd_dev_err(sde->dd, "SDE progress check event\n");
500 for (index = 0; index < sde->dd->num_sdma; index++) {
501 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
502 unsigned long flags;
503
504
505 if (curr_sde == sde)
506 continue;
507
508
509
510
511
512 spin_lock_irqsave(&curr_sde->tail_lock, flags);
513 write_seqlock(&curr_sde->head_lock);
514
515
516 if (curr_sde->state.current_state != sdma_state_s99_running) {
517 write_sequnlock(&curr_sde->head_lock);
518 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
519 continue;
520 }
521
522 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
523 (curr_sde->descq_head ==
524 curr_sde->progress_check_head))
525 __sdma_process_event(curr_sde,
526 sdma_event_e90_sw_halted);
527 write_sequnlock(&curr_sde->head_lock);
528 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
529 }
530 schedule_work(&sde->err_halt_worker);
531}
532
533static void sdma_hw_clean_up_task(unsigned long opaque)
534{
535 struct sdma_engine *sde = (struct sdma_engine *)opaque;
536 u64 statuscsr;
537
538 while (1) {
539#ifdef CONFIG_SDMA_VERBOSITY
540 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
541 sde->this_idx, slashstrip(__FILE__), __LINE__,
542 __func__);
543#endif
544 statuscsr = read_sde_csr(sde, SD(STATUS));
545 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
546 if (statuscsr)
547 break;
548 udelay(10);
549 }
550
551 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
552}
553
554static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
555{
556 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
557}
558
559
560
561
562static void sdma_flush_descq(struct sdma_engine *sde)
563{
564 u16 head, tail;
565 int progress = 0;
566 struct sdma_txreq *txp = get_txhead(sde);
567
568
569
570
571
572
573 head = sde->descq_head & sde->sdma_mask;
574 tail = sde->descq_tail & sde->sdma_mask;
575 while (head != tail) {
576
577 head = ++sde->descq_head & sde->sdma_mask;
578
579 if (txp && txp->next_descq_idx == head) {
580
581 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
582 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
583 trace_hfi1_sdma_progress(sde, head, tail, txp);
584 txp = get_txhead(sde);
585 }
586 progress++;
587 }
588 if (progress)
589 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
590}
591
592static void sdma_sw_clean_up_task(unsigned long opaque)
593{
594 struct sdma_engine *sde = (struct sdma_engine *)opaque;
595 unsigned long flags;
596
597 spin_lock_irqsave(&sde->tail_lock, flags);
598 write_seqlock(&sde->head_lock);
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619 sdma_make_progress(sde, 0);
620
621 sdma_flush(sde);
622
623
624
625
626
627
628 sde->descq_tail = 0;
629 sde->descq_head = 0;
630 sde->desc_avail = sdma_descq_freecnt(sde);
631 *sde->head_dma = 0;
632
633 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
634
635 write_sequnlock(&sde->head_lock);
636 spin_unlock_irqrestore(&sde->tail_lock, flags);
637}
638
639static void sdma_sw_tear_down(struct sdma_engine *sde)
640{
641 struct sdma_state *ss = &sde->state;
642
643
644 sdma_put(ss);
645
646
647 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
648 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
649}
650
651static void sdma_start_hw_clean_up(struct sdma_engine *sde)
652{
653 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
654}
655
656static void sdma_set_state(struct sdma_engine *sde,
657 enum sdma_states next_state)
658{
659 struct sdma_state *ss = &sde->state;
660 const struct sdma_set_state_action *action = sdma_action_table;
661 unsigned op = 0;
662
663 trace_hfi1_sdma_state(
664 sde,
665 sdma_state_names[ss->current_state],
666 sdma_state_names[next_state]);
667
668
669 ss->previous_state = ss->current_state;
670 ss->previous_op = ss->current_op;
671 ss->current_state = next_state;
672
673 if (ss->previous_state != sdma_state_s99_running &&
674 next_state == sdma_state_s99_running)
675 sdma_flush(sde);
676
677 if (action[next_state].op_enable)
678 op |= SDMA_SENDCTRL_OP_ENABLE;
679
680 if (action[next_state].op_intenable)
681 op |= SDMA_SENDCTRL_OP_INTENABLE;
682
683 if (action[next_state].op_halt)
684 op |= SDMA_SENDCTRL_OP_HALT;
685
686 if (action[next_state].op_cleanup)
687 op |= SDMA_SENDCTRL_OP_CLEANUP;
688
689 if (action[next_state].go_s99_running_tofalse)
690 ss->go_s99_running = 0;
691
692 if (action[next_state].go_s99_running_totrue)
693 ss->go_s99_running = 1;
694
695 ss->current_op = op;
696 sdma_sendctrl(sde, ss->current_op);
697}
698
699
700
701
702
703
704
705
706
707
708
709
710
711u16 sdma_get_descq_cnt(void)
712{
713 u16 count = sdma_descq_cnt;
714
715 if (!count)
716 return SDMA_DESCQ_CNT;
717
718
719
720 if (!is_power_of_2(count))
721 return SDMA_DESCQ_CNT;
722 if (count < 64 || count > 32768)
723 return SDMA_DESCQ_CNT;
724 return count;
725}
726
727
728
729
730
731
732
733
734int sdma_engine_get_vl(struct sdma_engine *sde)
735{
736 struct hfi1_devdata *dd = sde->dd;
737 struct sdma_vl_map *m;
738 u8 vl;
739
740 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
741 return -EINVAL;
742
743 rcu_read_lock();
744 m = rcu_dereference(dd->sdma_map);
745 if (unlikely(!m)) {
746 rcu_read_unlock();
747 return -EINVAL;
748 }
749 vl = m->engine_to_vl[sde->this_idx];
750 rcu_read_unlock();
751
752 return vl;
753}
754
755
756
757
758
759
760
761
762
763
764
765struct sdma_engine *sdma_select_engine_vl(
766 struct hfi1_devdata *dd,
767 u32 selector,
768 u8 vl)
769{
770 struct sdma_vl_map *m;
771 struct sdma_map_elem *e;
772 struct sdma_engine *rval;
773
774
775
776
777
778 if (vl >= num_vls) {
779 rval = NULL;
780 goto done;
781 }
782
783 rcu_read_lock();
784 m = rcu_dereference(dd->sdma_map);
785 if (unlikely(!m)) {
786 rcu_read_unlock();
787 return &dd->per_sdma[0];
788 }
789 e = m->map[vl & m->mask];
790 rval = e->sde[selector & e->mask];
791 rcu_read_unlock();
792
793done:
794 rval = !rval ? &dd->per_sdma[0] : rval;
795 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
796 return rval;
797}
798
799
800
801
802
803
804
805
806
807
808struct sdma_engine *sdma_select_engine_sc(
809 struct hfi1_devdata *dd,
810 u32 selector,
811 u8 sc5)
812{
813 u8 vl = sc_to_vlt(dd, sc5);
814
815 return sdma_select_engine_vl(dd, selector, vl);
816}
817
818struct sdma_rht_map_elem {
819 u32 mask;
820 u8 ctr;
821 struct sdma_engine *sde[0];
822};
823
824struct sdma_rht_node {
825 unsigned long cpu_id;
826 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
827 struct rhash_head node;
828};
829
830#define NR_CPUS_HINT 192
831
832static const struct rhashtable_params sdma_rht_params = {
833 .nelem_hint = NR_CPUS_HINT,
834 .head_offset = offsetof(struct sdma_rht_node, node),
835 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
836 .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
837 .max_size = NR_CPUS,
838 .min_size = 8,
839 .automatic_shrinking = true,
840};
841
842
843
844
845
846
847
848
849
850
851
852
853struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
854 u32 selector, u8 vl)
855{
856 struct sdma_rht_node *rht_node;
857 struct sdma_engine *sde = NULL;
858 const struct cpumask *current_mask = ¤t->cpus_allowed;
859 unsigned long cpu_id;
860
861
862
863
864
865 if (cpumask_weight(current_mask) != 1)
866 goto out;
867
868 cpu_id = smp_processor_id();
869 rcu_read_lock();
870 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
871 sdma_rht_params);
872
873 if (rht_node && rht_node->map[vl]) {
874 struct sdma_rht_map_elem *map = rht_node->map[vl];
875
876 sde = map->sde[selector & map->mask];
877 }
878 rcu_read_unlock();
879
880 if (sde)
881 return sde;
882
883out:
884 return sdma_select_engine_vl(dd, selector, vl);
885}
886
887static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
888{
889 int i;
890
891 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
892 map->sde[map->ctr + i] = map->sde[i];
893}
894
895static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
896 struct sdma_engine *sde)
897{
898 unsigned int i, pow;
899
900
901 for (i = 0; i < map->ctr; i++) {
902 if (map->sde[i] == sde) {
903 memmove(&map->sde[i], &map->sde[i + 1],
904 (map->ctr - i - 1) * sizeof(map->sde[0]));
905 map->ctr--;
906 pow = roundup_pow_of_two(map->ctr ? : 1);
907 map->mask = pow - 1;
908 sdma_populate_sde_map(map);
909 break;
910 }
911 }
912}
913
914
915
916
917static DEFINE_MUTEX(process_to_sde_mutex);
918
919ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
920 size_t count)
921{
922 struct hfi1_devdata *dd = sde->dd;
923 cpumask_var_t mask, new_mask;
924 unsigned long cpu;
925 int ret, vl, sz;
926
927 vl = sdma_engine_get_vl(sde);
928 if (unlikely(vl < 0))
929 return -EINVAL;
930
931 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
932 if (!ret)
933 return -ENOMEM;
934
935 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
936 if (!ret) {
937 free_cpumask_var(mask);
938 return -ENOMEM;
939 }
940 ret = cpulist_parse(buf, mask);
941 if (ret)
942 goto out_free;
943
944 if (!cpumask_subset(mask, cpu_online_mask)) {
945 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
946 ret = -EINVAL;
947 goto out_free;
948 }
949
950 sz = sizeof(struct sdma_rht_map_elem) +
951 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
952
953 mutex_lock(&process_to_sde_mutex);
954
955 for_each_cpu(cpu, mask) {
956 struct sdma_rht_node *rht_node;
957
958
959 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
960 cpumask_set_cpu(cpu, new_mask);
961 continue;
962 }
963
964 if (vl >= ARRAY_SIZE(rht_node->map)) {
965 ret = -EINVAL;
966 goto out;
967 }
968
969 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
970 sdma_rht_params);
971 if (!rht_node) {
972 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
973 if (!rht_node) {
974 ret = -ENOMEM;
975 goto out;
976 }
977
978 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
979 if (!rht_node->map[vl]) {
980 kfree(rht_node);
981 ret = -ENOMEM;
982 goto out;
983 }
984 rht_node->cpu_id = cpu;
985 rht_node->map[vl]->mask = 0;
986 rht_node->map[vl]->ctr = 1;
987 rht_node->map[vl]->sde[0] = sde;
988
989 ret = rhashtable_insert_fast(dd->sdma_rht,
990 &rht_node->node,
991 sdma_rht_params);
992 if (ret) {
993 kfree(rht_node->map[vl]);
994 kfree(rht_node);
995 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
996 cpu);
997 goto out;
998 }
999
1000 } else {
1001 int ctr, pow;
1002
1003
1004 if (!rht_node->map[vl])
1005 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
1006
1007 if (!rht_node->map[vl]) {
1008 ret = -ENOMEM;
1009 goto out;
1010 }
1011
1012 rht_node->map[vl]->ctr++;
1013 ctr = rht_node->map[vl]->ctr;
1014 rht_node->map[vl]->sde[ctr - 1] = sde;
1015 pow = roundup_pow_of_two(ctr);
1016 rht_node->map[vl]->mask = pow - 1;
1017
1018
1019 sdma_populate_sde_map(rht_node->map[vl]);
1020 }
1021 cpumask_set_cpu(cpu, new_mask);
1022 }
1023
1024
1025 for_each_cpu(cpu, cpu_online_mask) {
1026 struct sdma_rht_node *rht_node;
1027
1028
1029 if (cpumask_test_cpu(cpu, mask))
1030 continue;
1031
1032 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
1033 sdma_rht_params);
1034 if (rht_node) {
1035 bool empty = true;
1036 int i;
1037
1038
1039 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1040 if (rht_node->map[i])
1041 sdma_cleanup_sde_map(rht_node->map[i],
1042 sde);
1043
1044
1045 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1046 if (!rht_node->map[i])
1047 continue;
1048
1049 if (rht_node->map[i]->ctr) {
1050 empty = false;
1051 break;
1052 }
1053 }
1054
1055 if (empty) {
1056 ret = rhashtable_remove_fast(dd->sdma_rht,
1057 &rht_node->node,
1058 sdma_rht_params);
1059 WARN_ON(ret);
1060
1061 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1062 kfree(rht_node->map[i]);
1063
1064 kfree(rht_node);
1065 }
1066 }
1067 }
1068
1069 cpumask_copy(&sde->cpu_mask, new_mask);
1070out:
1071 mutex_unlock(&process_to_sde_mutex);
1072out_free:
1073 free_cpumask_var(mask);
1074 free_cpumask_var(new_mask);
1075 return ret ? : strnlen(buf, PAGE_SIZE);
1076}
1077
1078ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1079{
1080 mutex_lock(&process_to_sde_mutex);
1081 if (cpumask_empty(&sde->cpu_mask))
1082 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1083 else
1084 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1085 mutex_unlock(&process_to_sde_mutex);
1086 return strnlen(buf, PAGE_SIZE);
1087}
1088
1089static void sdma_rht_free(void *ptr, void *arg)
1090{
1091 struct sdma_rht_node *rht_node = ptr;
1092 int i;
1093
1094 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1095 kfree(rht_node->map[i]);
1096
1097 kfree(rht_node);
1098}
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1109 struct hfi1_devdata *dd,
1110 unsigned long cpuid)
1111{
1112 struct sdma_rht_node *rht_node;
1113 int i, j;
1114
1115 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
1116 sdma_rht_params);
1117 if (!rht_node)
1118 return;
1119
1120 seq_printf(s, "cpu%3lu: ", cpuid);
1121 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1122 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1123 continue;
1124
1125 seq_printf(s, " vl%d: [", i);
1126
1127 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1128 if (!rht_node->map[i]->sde[j])
1129 continue;
1130
1131 if (j > 0)
1132 seq_puts(s, ",");
1133
1134 seq_printf(s, " sdma%2d",
1135 rht_node->map[i]->sde[j]->this_idx);
1136 }
1137 seq_puts(s, " ]");
1138 }
1139
1140 seq_puts(s, "\n");
1141}
1142
1143
1144
1145
1146static void sdma_map_free(struct sdma_vl_map *m)
1147{
1148 int i;
1149
1150 for (i = 0; m && i < m->actual_vls; i++)
1151 kfree(m->map[i]);
1152 kfree(m);
1153}
1154
1155
1156
1157
1158static void sdma_map_rcu_callback(struct rcu_head *list)
1159{
1160 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1161
1162 sdma_map_free(m);
1163}
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1193{
1194 int i, j;
1195 int extra, sde_per_vl;
1196 int engine = 0;
1197 u8 lvl_engines[OPA_MAX_VLS];
1198 struct sdma_vl_map *oldmap, *newmap;
1199
1200 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1201 return 0;
1202
1203 if (!vl_engines) {
1204
1205 sde_per_vl = dd->num_sdma / num_vls;
1206
1207 extra = dd->num_sdma % num_vls;
1208 vl_engines = lvl_engines;
1209
1210 for (i = num_vls - 1; i >= 0; i--, extra--)
1211 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1212 }
1213
1214 newmap = kzalloc(
1215 sizeof(struct sdma_vl_map) +
1216 roundup_pow_of_two(num_vls) *
1217 sizeof(struct sdma_map_elem *),
1218 GFP_KERNEL);
1219 if (!newmap)
1220 goto bail;
1221 newmap->actual_vls = num_vls;
1222 newmap->vls = roundup_pow_of_two(num_vls);
1223 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1224
1225 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1226 newmap->engine_to_vl[i] = -1;
1227 for (i = 0; i < newmap->vls; i++) {
1228
1229 int first_engine = engine;
1230
1231 if (i < newmap->actual_vls) {
1232 int sz = roundup_pow_of_two(vl_engines[i]);
1233
1234
1235 newmap->map[i] = kzalloc(
1236 sizeof(struct sdma_map_elem) +
1237 sz * sizeof(struct sdma_engine *),
1238 GFP_KERNEL);
1239 if (!newmap->map[i])
1240 goto bail;
1241 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1242
1243 for (j = 0; j < sz; j++) {
1244 newmap->map[i]->sde[j] =
1245 &dd->per_sdma[engine];
1246 if (++engine >= first_engine + vl_engines[i])
1247
1248 engine = first_engine;
1249 }
1250
1251 for (j = 0; j < vl_engines[i]; j++)
1252 newmap->engine_to_vl[first_engine + j] = i;
1253 } else {
1254
1255 newmap->map[i] = newmap->map[i % num_vls];
1256 }
1257 engine = first_engine + vl_engines[i];
1258 }
1259
1260 spin_lock_irq(&dd->sde_map_lock);
1261 oldmap = rcu_dereference_protected(dd->sdma_map,
1262 lockdep_is_held(&dd->sde_map_lock));
1263
1264
1265 rcu_assign_pointer(dd->sdma_map, newmap);
1266
1267 spin_unlock_irq(&dd->sde_map_lock);
1268
1269 if (oldmap)
1270 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1271 return 0;
1272bail:
1273
1274 sdma_map_free(newmap);
1275 return -ENOMEM;
1276}
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1287{
1288 size_t i;
1289 struct sdma_engine *sde;
1290
1291 if (dd->sdma_pad_dma) {
1292 dma_free_coherent(&dd->pcidev->dev, 4,
1293 (void *)dd->sdma_pad_dma,
1294 dd->sdma_pad_phys);
1295 dd->sdma_pad_dma = NULL;
1296 dd->sdma_pad_phys = 0;
1297 }
1298 if (dd->sdma_heads_dma) {
1299 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1300 (void *)dd->sdma_heads_dma,
1301 dd->sdma_heads_phys);
1302 dd->sdma_heads_dma = NULL;
1303 dd->sdma_heads_phys = 0;
1304 }
1305 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1306 sde = &dd->per_sdma[i];
1307
1308 sde->head_dma = NULL;
1309 sde->head_phys = 0;
1310
1311 if (sde->descq) {
1312 dma_free_coherent(
1313 &dd->pcidev->dev,
1314 sde->descq_cnt * sizeof(u64[2]),
1315 sde->descq,
1316 sde->descq_phys
1317 );
1318 sde->descq = NULL;
1319 sde->descq_phys = 0;
1320 }
1321 kvfree(sde->tx_ring);
1322 sde->tx_ring = NULL;
1323 }
1324 spin_lock_irq(&dd->sde_map_lock);
1325 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1326 RCU_INIT_POINTER(dd->sdma_map, NULL);
1327 spin_unlock_irq(&dd->sde_map_lock);
1328 synchronize_rcu();
1329 kfree(dd->per_sdma);
1330 dd->per_sdma = NULL;
1331
1332 if (dd->sdma_rht) {
1333 rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
1334 kfree(dd->sdma_rht);
1335 dd->sdma_rht = NULL;
1336 }
1337}
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350int sdma_init(struct hfi1_devdata *dd, u8 port)
1351{
1352 unsigned this_idx;
1353 struct sdma_engine *sde;
1354 struct rhashtable *tmp_sdma_rht;
1355 u16 descq_cnt;
1356 void *curr_head;
1357 struct hfi1_pportdata *ppd = dd->pport + port;
1358 u32 per_sdma_credits;
1359 uint idle_cnt = sdma_idle_cnt;
1360 size_t num_engines = dd->chip_sdma_engines;
1361 int ret = -ENOMEM;
1362
1363 if (!HFI1_CAP_IS_KSET(SDMA)) {
1364 HFI1_CAP_CLEAR(SDMA_AHG);
1365 return 0;
1366 }
1367 if (mod_num_sdma &&
1368
1369 mod_num_sdma <= dd->chip_sdma_engines &&
1370
1371 mod_num_sdma >= num_vls)
1372 num_engines = mod_num_sdma;
1373
1374 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1375 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
1376 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1377 dd->chip_sdma_mem_size);
1378
1379 per_sdma_credits =
1380 dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
1381
1382
1383 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1384 atomic_set(&dd->sdma_unfreeze_count, 0);
1385
1386 descq_cnt = sdma_get_descq_cnt();
1387 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1388 num_engines, descq_cnt);
1389
1390
1391 dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
1392 GFP_KERNEL, dd->node);
1393 if (!dd->per_sdma)
1394 return ret;
1395
1396 idle_cnt = ns_to_cclock(dd, idle_cnt);
1397 if (idle_cnt)
1398 dd->default_desc1 =
1399 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1400 else
1401 dd->default_desc1 =
1402 SDMA_DESC1_INT_REQ_FLAG;
1403
1404 if (!sdma_desct_intr)
1405 sdma_desct_intr = SDMA_DESC_INTR;
1406
1407
1408 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1409 sde = &dd->per_sdma[this_idx];
1410 sde->dd = dd;
1411 sde->ppd = ppd;
1412 sde->this_idx = this_idx;
1413 sde->descq_cnt = descq_cnt;
1414 sde->desc_avail = sdma_descq_freecnt(sde);
1415 sde->sdma_shift = ilog2(descq_cnt);
1416 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1417
1418
1419 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1420 this_idx);
1421 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1422 this_idx);
1423 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1424 this_idx);
1425
1426 sde->imask = sde->int_mask | sde->progress_mask |
1427 sde->idle_mask;
1428
1429 spin_lock_init(&sde->tail_lock);
1430 seqlock_init(&sde->head_lock);
1431 spin_lock_init(&sde->senddmactrl_lock);
1432 spin_lock_init(&sde->flushlist_lock);
1433
1434 sde->ahg_bits = 0xfffffffe00000000ULL;
1435
1436 sdma_set_state(sde, sdma_state_s00_hw_down);
1437
1438
1439 kref_init(&sde->state.kref);
1440 init_completion(&sde->state.comp);
1441
1442 INIT_LIST_HEAD(&sde->flushlist);
1443 INIT_LIST_HEAD(&sde->dmawait);
1444
1445 sde->tail_csr =
1446 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1447
1448 tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
1449 (unsigned long)sde);
1450
1451 tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
1452 (unsigned long)sde);
1453 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1454 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1455
1456 sde->progress_check_head = 0;
1457
1458 timer_setup(&sde->err_progress_check_timer,
1459 sdma_err_progress_check, 0);
1460
1461 sde->descq = dma_zalloc_coherent(
1462 &dd->pcidev->dev,
1463 descq_cnt * sizeof(u64[2]),
1464 &sde->descq_phys,
1465 GFP_KERNEL
1466 );
1467 if (!sde->descq)
1468 goto bail;
1469 sde->tx_ring =
1470 kvzalloc_node(sizeof(struct sdma_txreq *) * descq_cnt,
1471 GFP_KERNEL, dd->node);
1472 if (!sde->tx_ring)
1473 goto bail;
1474 }
1475
1476 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1477
1478 dd->sdma_heads_dma = dma_zalloc_coherent(
1479 &dd->pcidev->dev,
1480 dd->sdma_heads_size,
1481 &dd->sdma_heads_phys,
1482 GFP_KERNEL
1483 );
1484 if (!dd->sdma_heads_dma) {
1485 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1486 goto bail;
1487 }
1488
1489
1490 dd->sdma_pad_dma = dma_zalloc_coherent(
1491 &dd->pcidev->dev,
1492 sizeof(u32),
1493 &dd->sdma_pad_phys,
1494 GFP_KERNEL
1495 );
1496 if (!dd->sdma_pad_dma) {
1497 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1498 goto bail;
1499 }
1500
1501
1502 curr_head = (void *)dd->sdma_heads_dma;
1503 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1504 unsigned long phys_offset;
1505
1506 sde = &dd->per_sdma[this_idx];
1507
1508 sde->head_dma = curr_head;
1509 curr_head += L1_CACHE_BYTES;
1510 phys_offset = (unsigned long)sde->head_dma -
1511 (unsigned long)dd->sdma_heads_dma;
1512 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1513 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1514 }
1515 dd->flags |= HFI1_HAS_SEND_DMA;
1516 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1517 dd->num_sdma = num_engines;
1518 ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
1519 if (ret < 0)
1520 goto bail;
1521
1522 tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
1523 if (!tmp_sdma_rht) {
1524 ret = -ENOMEM;
1525 goto bail;
1526 }
1527
1528 ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
1529 if (ret < 0)
1530 goto bail;
1531 dd->sdma_rht = tmp_sdma_rht;
1532
1533 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1534 return 0;
1535
1536bail:
1537 sdma_clean(dd, num_engines);
1538 return ret;
1539}
1540
1541
1542
1543
1544
1545
1546
1547void sdma_all_running(struct hfi1_devdata *dd)
1548{
1549 struct sdma_engine *sde;
1550 unsigned int i;
1551
1552
1553 for (i = 0; i < dd->num_sdma; ++i) {
1554 sde = &dd->per_sdma[i];
1555 sdma_process_event(sde, sdma_event_e30_go_running);
1556 }
1557}
1558
1559
1560
1561
1562
1563
1564
1565void sdma_all_idle(struct hfi1_devdata *dd)
1566{
1567 struct sdma_engine *sde;
1568 unsigned int i;
1569
1570
1571 for (i = 0; i < dd->num_sdma; ++i) {
1572 sde = &dd->per_sdma[i];
1573 sdma_process_event(sde, sdma_event_e70_go_idle);
1574 }
1575}
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585void sdma_start(struct hfi1_devdata *dd)
1586{
1587 unsigned i;
1588 struct sdma_engine *sde;
1589
1590
1591 for (i = 0; i < dd->num_sdma; ++i) {
1592 sde = &dd->per_sdma[i];
1593 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1594 }
1595}
1596
1597
1598
1599
1600
1601void sdma_exit(struct hfi1_devdata *dd)
1602{
1603 unsigned this_idx;
1604 struct sdma_engine *sde;
1605
1606 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1607 ++this_idx) {
1608 sde = &dd->per_sdma[this_idx];
1609 if (!list_empty(&sde->dmawait))
1610 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1611 sde->this_idx);
1612 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1613
1614 del_timer_sync(&sde->err_progress_check_timer);
1615
1616
1617
1618
1619
1620
1621 sdma_finalput(&sde->state);
1622 }
1623}
1624
1625
1626
1627
1628static inline void sdma_unmap_desc(
1629 struct hfi1_devdata *dd,
1630 struct sdma_desc *descp)
1631{
1632 switch (sdma_mapping_type(descp)) {
1633 case SDMA_MAP_SINGLE:
1634 dma_unmap_single(
1635 &dd->pcidev->dev,
1636 sdma_mapping_addr(descp),
1637 sdma_mapping_len(descp),
1638 DMA_TO_DEVICE);
1639 break;
1640 case SDMA_MAP_PAGE:
1641 dma_unmap_page(
1642 &dd->pcidev->dev,
1643 sdma_mapping_addr(descp),
1644 sdma_mapping_len(descp),
1645 DMA_TO_DEVICE);
1646 break;
1647 }
1648}
1649
1650
1651
1652
1653
1654static inline u8 ahg_mode(struct sdma_txreq *tx)
1655{
1656 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1657 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1658}
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671void __sdma_txclean(
1672 struct hfi1_devdata *dd,
1673 struct sdma_txreq *tx)
1674{
1675 u16 i;
1676
1677 if (tx->num_desc) {
1678 u8 skip = 0, mode = ahg_mode(tx);
1679
1680
1681 sdma_unmap_desc(dd, &tx->descp[0]);
1682
1683 if (mode > SDMA_AHG_APPLY_UPDATE1)
1684 skip = mode >> 1;
1685 for (i = 1 + skip; i < tx->num_desc; i++)
1686 sdma_unmap_desc(dd, &tx->descp[i]);
1687 tx->num_desc = 0;
1688 }
1689 kfree(tx->coalesce_buf);
1690 tx->coalesce_buf = NULL;
1691
1692 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1693 tx->desc_limit = ARRAY_SIZE(tx->descs);
1694 kfree(tx->descp);
1695 }
1696}
1697
1698static inline u16 sdma_gethead(struct sdma_engine *sde)
1699{
1700 struct hfi1_devdata *dd = sde->dd;
1701 int use_dmahead;
1702 u16 hwhead;
1703
1704#ifdef CONFIG_SDMA_VERBOSITY
1705 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1706 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1707#endif
1708
1709retry:
1710 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1711 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1712 hwhead = use_dmahead ?
1713 (u16)le64_to_cpu(*sde->head_dma) :
1714 (u16)read_sde_csr(sde, SD(HEAD));
1715
1716 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1717 u16 cnt;
1718 u16 swtail;
1719 u16 swhead;
1720 int sane;
1721
1722 swhead = sde->descq_head & sde->sdma_mask;
1723
1724 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1725 cnt = sde->descq_cnt;
1726
1727 if (swhead < swtail)
1728
1729 sane = (hwhead >= swhead) & (hwhead <= swtail);
1730 else if (swhead > swtail)
1731
1732 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1733 (hwhead <= swtail);
1734 else
1735
1736 sane = (hwhead == swhead);
1737
1738 if (unlikely(!sane)) {
1739 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
1740 sde->this_idx,
1741 use_dmahead ? "dma" : "kreg",
1742 hwhead, swhead, swtail, cnt);
1743 if (use_dmahead) {
1744
1745 use_dmahead = 0;
1746 goto retry;
1747 }
1748
1749 hwhead = swhead;
1750 }
1751 }
1752 return hwhead;
1753}
1754
1755
1756
1757
1758
1759
1760
1761static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1762{
1763 struct iowait *wait, *nw;
1764 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1765 uint i, n = 0, seq, max_idx = 0;
1766 struct sdma_txreq *stx;
1767 struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
1768 u8 max_starved_cnt = 0;
1769
1770#ifdef CONFIG_SDMA_VERBOSITY
1771 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1772 slashstrip(__FILE__), __LINE__, __func__);
1773 dd_dev_err(sde->dd, "avail: %u\n", avail);
1774#endif
1775
1776 do {
1777 seq = read_seqbegin(&dev->iowait_lock);
1778 if (!list_empty(&sde->dmawait)) {
1779
1780 write_seqlock(&dev->iowait_lock);
1781
1782 list_for_each_entry_safe(
1783 wait,
1784 nw,
1785 &sde->dmawait,
1786 list) {
1787 u16 num_desc = 0;
1788
1789 if (!wait->wakeup)
1790 continue;
1791 if (n == ARRAY_SIZE(waits))
1792 break;
1793 if (!list_empty(&wait->tx_head)) {
1794 stx = list_first_entry(
1795 &wait->tx_head,
1796 struct sdma_txreq,
1797 list);
1798 num_desc = stx->num_desc;
1799 }
1800 if (num_desc > avail)
1801 break;
1802 avail -= num_desc;
1803
1804 iowait_starve_find_max(wait, &max_starved_cnt,
1805 n, &max_idx);
1806 list_del_init(&wait->list);
1807 waits[n++] = wait;
1808 }
1809 write_sequnlock(&dev->iowait_lock);
1810 break;
1811 }
1812 } while (read_seqretry(&dev->iowait_lock, seq));
1813
1814
1815 if (n)
1816 waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
1817
1818 for (i = 0; i < n; i++)
1819 if (i != max_idx)
1820 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1821}
1822
1823
1824static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1825{
1826 struct sdma_txreq *txp = NULL;
1827 int progress = 0;
1828 u16 hwhead, swhead;
1829 int idle_check_done = 0;
1830
1831 hwhead = sdma_gethead(sde);
1832
1833
1834
1835
1836
1837
1838
1839retry:
1840 txp = get_txhead(sde);
1841 swhead = sde->descq_head & sde->sdma_mask;
1842 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1843 while (swhead != hwhead) {
1844
1845 swhead = ++sde->descq_head & sde->sdma_mask;
1846
1847
1848 if (txp && txp->next_descq_idx == swhead) {
1849
1850 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1851 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1852
1853 txp = get_txhead(sde);
1854 }
1855 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1856 progress++;
1857 }
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868 if ((status & sde->idle_mask) && !idle_check_done) {
1869 u16 swtail;
1870
1871 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1872 if (swtail != hwhead) {
1873 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1874 idle_check_done = 1;
1875 goto retry;
1876 }
1877 }
1878
1879 sde->last_status = status;
1880 if (progress)
1881 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1882}
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1894{
1895 trace_hfi1_sdma_engine_interrupt(sde, status);
1896 write_seqlock(&sde->head_lock);
1897 sdma_set_desc_cnt(sde, sdma_desct_intr);
1898 if (status & sde->idle_mask)
1899 sde->idle_int_cnt++;
1900 else if (status & sde->progress_mask)
1901 sde->progress_int_cnt++;
1902 else if (status & sde->int_mask)
1903 sde->sdma_int_cnt++;
1904 sdma_make_progress(sde, status);
1905 write_sequnlock(&sde->head_lock);
1906}
1907
1908
1909
1910
1911
1912
1913void sdma_engine_error(struct sdma_engine *sde, u64 status)
1914{
1915 unsigned long flags;
1916
1917#ifdef CONFIG_SDMA_VERBOSITY
1918 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1919 sde->this_idx,
1920 (unsigned long long)status,
1921 sdma_state_names[sde->state.current_state]);
1922#endif
1923 spin_lock_irqsave(&sde->tail_lock, flags);
1924 write_seqlock(&sde->head_lock);
1925 if (status & ALL_SDMA_ENG_HALT_ERRS)
1926 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1927 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1928 dd_dev_err(sde->dd,
1929 "SDMA (%u) engine error: 0x%llx state %s\n",
1930 sde->this_idx,
1931 (unsigned long long)status,
1932 sdma_state_names[sde->state.current_state]);
1933 dump_sdma_state(sde);
1934 }
1935 write_sequnlock(&sde->head_lock);
1936 spin_unlock_irqrestore(&sde->tail_lock, flags);
1937}
1938
1939static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1940{
1941 u64 set_senddmactrl = 0;
1942 u64 clr_senddmactrl = 0;
1943 unsigned long flags;
1944
1945#ifdef CONFIG_SDMA_VERBOSITY
1946 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1947 sde->this_idx,
1948 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1949 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1950 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1951 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1952#endif
1953
1954 if (op & SDMA_SENDCTRL_OP_ENABLE)
1955 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1956 else
1957 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1958
1959 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1960 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1961 else
1962 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1963
1964 if (op & SDMA_SENDCTRL_OP_HALT)
1965 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1966 else
1967 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1968
1969 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1970
1971 sde->p_senddmactrl |= set_senddmactrl;
1972 sde->p_senddmactrl &= ~clr_senddmactrl;
1973
1974 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1975 write_sde_csr(sde, SD(CTRL),
1976 sde->p_senddmactrl |
1977 SD(CTRL_SDMA_CLEANUP_SMASK));
1978 else
1979 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1980
1981 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1982
1983#ifdef CONFIG_SDMA_VERBOSITY
1984 sdma_dumpstate(sde);
1985#endif
1986}
1987
1988static void sdma_setlengen(struct sdma_engine *sde)
1989{
1990#ifdef CONFIG_SDMA_VERBOSITY
1991 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1992 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1993#endif
1994
1995
1996
1997
1998
1999
2000 write_sde_csr(sde, SD(LEN_GEN),
2001 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
2002 write_sde_csr(sde, SD(LEN_GEN),
2003 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
2004 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
2005}
2006
2007static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
2008{
2009
2010 smp_wmb();
2011 writeq(tail, sde->tail_csr);
2012}
2013
2014
2015
2016
2017
2018static void sdma_hw_start_up(struct sdma_engine *sde)
2019{
2020 u64 reg;
2021
2022#ifdef CONFIG_SDMA_VERBOSITY
2023 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2024 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2025#endif
2026
2027 sdma_setlengen(sde);
2028 sdma_update_tail(sde, 0);
2029 *sde->head_dma = 0;
2030
2031 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
2032 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
2033 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
2034}
2035
2036
2037
2038
2039
2040
2041static void set_sdma_integrity(struct sdma_engine *sde)
2042{
2043 struct hfi1_devdata *dd = sde->dd;
2044
2045 write_sde_csr(sde, SD(CHECK_ENABLE),
2046 hfi1_pkt_base_sdma_integrity(dd));
2047}
2048
2049static void init_sdma_regs(
2050 struct sdma_engine *sde,
2051 u32 credits,
2052 uint idle_cnt)
2053{
2054 u8 opval, opmask;
2055#ifdef CONFIG_SDMA_VERBOSITY
2056 struct hfi1_devdata *dd = sde->dd;
2057
2058 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2059 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2060#endif
2061
2062 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2063 sdma_setlengen(sde);
2064 sdma_update_tail(sde, 0);
2065 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2066 write_sde_csr(sde, SD(DESC_CNT), 0);
2067 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2068 write_sde_csr(sde, SD(MEMORY),
2069 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2070 ((u64)(credits * sde->this_idx) <<
2071 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2072 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2073 set_sdma_integrity(sde);
2074 opmask = OPCODE_CHECK_MASK_DISABLED;
2075 opval = OPCODE_CHECK_VAL_DISABLED;
2076 write_sde_csr(sde, SD(CHECK_OPCODE),
2077 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2078 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2079}
2080
2081#ifdef CONFIG_SDMA_VERBOSITY
2082
2083#define sdma_dumpstate_helper0(reg) do { \
2084 csr = read_csr(sde->dd, reg); \
2085 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2086 } while (0)
2087
2088#define sdma_dumpstate_helper(reg) do { \
2089 csr = read_sde_csr(sde, reg); \
2090 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2091 #reg, sde->this_idx, csr); \
2092 } while (0)
2093
2094#define sdma_dumpstate_helper2(reg) do { \
2095 csr = read_csr(sde->dd, reg + (8 * i)); \
2096 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2097 #reg, i, csr); \
2098 } while (0)
2099
2100void sdma_dumpstate(struct sdma_engine *sde)
2101{
2102 u64 csr;
2103 unsigned i;
2104
2105 sdma_dumpstate_helper(SD(CTRL));
2106 sdma_dumpstate_helper(SD(STATUS));
2107 sdma_dumpstate_helper0(SD(ERR_STATUS));
2108 sdma_dumpstate_helper0(SD(ERR_MASK));
2109 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2110 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2111
2112 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2113 sdma_dumpstate_helper2(CCE_INT_STATUS);
2114 sdma_dumpstate_helper2(CCE_INT_MASK);
2115 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2116 }
2117
2118 sdma_dumpstate_helper(SD(TAIL));
2119 sdma_dumpstate_helper(SD(HEAD));
2120 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2121 sdma_dumpstate_helper(SD(IDLE_CNT));
2122 sdma_dumpstate_helper(SD(RELOAD_CNT));
2123 sdma_dumpstate_helper(SD(DESC_CNT));
2124 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2125 sdma_dumpstate_helper(SD(MEMORY));
2126 sdma_dumpstate_helper0(SD(ENGINES));
2127 sdma_dumpstate_helper0(SD(MEM_SIZE));
2128
2129 sdma_dumpstate_helper(SD(BASE_ADDR));
2130 sdma_dumpstate_helper(SD(LEN_GEN));
2131 sdma_dumpstate_helper(SD(HEAD_ADDR));
2132 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2133 sdma_dumpstate_helper(SD(CHECK_VL));
2134 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2135 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2136 sdma_dumpstate_helper(SD(CHECK_SLID));
2137 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2138}
2139#endif
2140
2141static void dump_sdma_state(struct sdma_engine *sde)
2142{
2143 struct hw_sdma_desc *descqp;
2144 u64 desc[2];
2145 u64 addr;
2146 u8 gen;
2147 u16 len;
2148 u16 head, tail, cnt;
2149
2150 head = sde->descq_head & sde->sdma_mask;
2151 tail = sde->descq_tail & sde->sdma_mask;
2152 cnt = sdma_descq_freecnt(sde);
2153
2154 dd_dev_err(sde->dd,
2155 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2156 sde->this_idx, head, tail, cnt,
2157 !list_empty(&sde->flushlist));
2158
2159
2160 while (head != tail) {
2161 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2162
2163 descqp = &sde->descq[head];
2164 desc[0] = le64_to_cpu(descqp->qw[0]);
2165 desc[1] = le64_to_cpu(descqp->qw[1]);
2166 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2167 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2168 'H' : '-';
2169 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2170 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2171 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2172 & SDMA_DESC0_PHY_ADDR_MASK;
2173 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2174 & SDMA_DESC1_GENERATION_MASK;
2175 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2176 & SDMA_DESC0_BYTE_COUNT_MASK;
2177 dd_dev_err(sde->dd,
2178 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2179 head, flags, addr, gen, len);
2180 dd_dev_err(sde->dd,
2181 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2182 desc[0], desc[1]);
2183 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2184 dd_dev_err(sde->dd,
2185 "\taidx: %u amode: %u alen: %u\n",
2186 (u8)((desc[1] &
2187 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2188 SDMA_DESC1_HEADER_INDEX_SHIFT),
2189 (u8)((desc[1] &
2190 SDMA_DESC1_HEADER_MODE_SMASK) >>
2191 SDMA_DESC1_HEADER_MODE_SHIFT),
2192 (u8)((desc[1] &
2193 SDMA_DESC1_HEADER_DWS_SMASK) >>
2194 SDMA_DESC1_HEADER_DWS_SHIFT));
2195 head++;
2196 head &= sde->sdma_mask;
2197 }
2198}
2199
2200#define SDE_FMT \
2201 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2202
2203
2204
2205
2206
2207
2208
2209void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2210{
2211 u16 head, tail;
2212 struct hw_sdma_desc *descqp;
2213 u64 desc[2];
2214 u64 addr;
2215 u8 gen;
2216 u16 len;
2217
2218 head = sde->descq_head & sde->sdma_mask;
2219 tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
2220 seq_printf(s, SDE_FMT, sde->this_idx,
2221 sde->cpu,
2222 sdma_state_name(sde->state.current_state),
2223 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2224 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2225 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2226 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2227 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2228 (unsigned long long)le64_to_cpu(*sde->head_dma),
2229 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2230 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2231 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2232 (unsigned long long)sde->last_status,
2233 (unsigned long long)sde->ahg_bits,
2234 sde->tx_tail,
2235 sde->tx_head,
2236 sde->descq_tail,
2237 sde->descq_head,
2238 !list_empty(&sde->flushlist),
2239 sde->descq_full_count,
2240 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2241
2242
2243 while (head != tail) {
2244 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2245
2246 descqp = &sde->descq[head];
2247 desc[0] = le64_to_cpu(descqp->qw[0]);
2248 desc[1] = le64_to_cpu(descqp->qw[1]);
2249 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2250 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2251 'H' : '-';
2252 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2253 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2254 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2255 & SDMA_DESC0_PHY_ADDR_MASK;
2256 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2257 & SDMA_DESC1_GENERATION_MASK;
2258 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2259 & SDMA_DESC0_BYTE_COUNT_MASK;
2260 seq_printf(s,
2261 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2262 head, flags, addr, gen, len);
2263 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2264 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2265 (u8)((desc[1] &
2266 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2267 SDMA_DESC1_HEADER_INDEX_SHIFT),
2268 (u8)((desc[1] &
2269 SDMA_DESC1_HEADER_MODE_SMASK) >>
2270 SDMA_DESC1_HEADER_MODE_SHIFT));
2271 head = (head + 1) & sde->sdma_mask;
2272 }
2273}
2274
2275
2276
2277
2278
2279static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2280{
2281 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2282
2283 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2284 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2285 << SDMA_DESC1_GENERATION_SHIFT;
2286 return qw1;
2287}
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2306{
2307 int i;
2308 u16 tail;
2309 struct sdma_desc *descp = tx->descp;
2310 u8 skip = 0, mode = ahg_mode(tx);
2311
2312 tail = sde->descq_tail & sde->sdma_mask;
2313 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2314 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2315 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2316 tail, &sde->descq[tail]);
2317 tail = ++sde->descq_tail & sde->sdma_mask;
2318 descp++;
2319 if (mode > SDMA_AHG_APPLY_UPDATE1)
2320 skip = mode >> 1;
2321 for (i = 1; i < tx->num_desc; i++, descp++) {
2322 u64 qw1;
2323
2324 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2325 if (skip) {
2326
2327 qw1 = descp->qw[1];
2328 skip--;
2329 } else {
2330
2331 qw1 = add_gen(sde, descp->qw[1]);
2332 }
2333 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2334 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2335 tail, &sde->descq[tail]);
2336 tail = ++sde->descq_tail & sde->sdma_mask;
2337 }
2338 tx->next_descq_idx = tail;
2339#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2340 tx->sn = sde->tail_sn++;
2341 trace_hfi1_sdma_in_sn(sde, tx->sn);
2342 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2343#endif
2344 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2345 sde->desc_avail -= tx->num_desc;
2346 return tail;
2347}
2348
2349
2350
2351
2352static int sdma_check_progress(
2353 struct sdma_engine *sde,
2354 struct iowait *wait,
2355 struct sdma_txreq *tx,
2356 bool pkts_sent)
2357{
2358 int ret;
2359
2360 sde->desc_avail = sdma_descq_freecnt(sde);
2361 if (tx->num_desc <= sde->desc_avail)
2362 return -EAGAIN;
2363
2364 if (wait && wait->sleep) {
2365 unsigned seq;
2366
2367 seq = raw_seqcount_begin(
2368 (const seqcount_t *)&sde->head_lock.seqcount);
2369 ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
2370 if (ret == -EAGAIN)
2371 sde->desc_avail = sdma_descq_freecnt(sde);
2372 } else {
2373 ret = -EBUSY;
2374 }
2375 return ret;
2376}
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393int sdma_send_txreq(struct sdma_engine *sde,
2394 struct iowait *wait,
2395 struct sdma_txreq *tx,
2396 bool pkts_sent)
2397{
2398 int ret = 0;
2399 u16 tail;
2400 unsigned long flags;
2401
2402
2403 if (unlikely(tx->tlen))
2404 return -EINVAL;
2405 tx->wait = wait;
2406 spin_lock_irqsave(&sde->tail_lock, flags);
2407retry:
2408 if (unlikely(!__sdma_running(sde)))
2409 goto unlock_noconn;
2410 if (unlikely(tx->num_desc > sde->desc_avail))
2411 goto nodesc;
2412 tail = submit_tx(sde, tx);
2413 if (wait)
2414 iowait_sdma_inc(wait);
2415 sdma_update_tail(sde, tail);
2416unlock:
2417 spin_unlock_irqrestore(&sde->tail_lock, flags);
2418 return ret;
2419unlock_noconn:
2420 if (wait)
2421 iowait_sdma_inc(wait);
2422 tx->next_descq_idx = 0;
2423#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2424 tx->sn = sde->tail_sn++;
2425 trace_hfi1_sdma_in_sn(sde, tx->sn);
2426#endif
2427 spin_lock(&sde->flushlist_lock);
2428 list_add_tail(&tx->list, &sde->flushlist);
2429 spin_unlock(&sde->flushlist_lock);
2430 if (wait) {
2431 wait->tx_count++;
2432 wait->count += tx->num_desc;
2433 }
2434 schedule_work(&sde->flush_worker);
2435 ret = -ECOMM;
2436 goto unlock;
2437nodesc:
2438 ret = sdma_check_progress(sde, wait, tx, pkts_sent);
2439 if (ret == -EAGAIN) {
2440 ret = 0;
2441 goto retry;
2442 }
2443 sde->descq_full_count++;
2444 goto unlock;
2445}
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
2476 struct list_head *tx_list, u32 *count_out)
2477{
2478 struct sdma_txreq *tx, *tx_next;
2479 int ret = 0;
2480 unsigned long flags;
2481 u16 tail = INVALID_TAIL;
2482 u32 submit_count = 0, flush_count = 0, total_count;
2483
2484 spin_lock_irqsave(&sde->tail_lock, flags);
2485retry:
2486 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2487 tx->wait = wait;
2488 if (unlikely(!__sdma_running(sde)))
2489 goto unlock_noconn;
2490 if (unlikely(tx->num_desc > sde->desc_avail))
2491 goto nodesc;
2492 if (unlikely(tx->tlen)) {
2493 ret = -EINVAL;
2494 goto update_tail;
2495 }
2496 list_del_init(&tx->list);
2497 tail = submit_tx(sde, tx);
2498 submit_count++;
2499 if (tail != INVALID_TAIL &&
2500 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2501 sdma_update_tail(sde, tail);
2502 tail = INVALID_TAIL;
2503 }
2504 }
2505update_tail:
2506 total_count = submit_count + flush_count;
2507 if (wait) {
2508 iowait_sdma_add(wait, total_count);
2509 iowait_starve_clear(submit_count > 0, wait);
2510 }
2511 if (tail != INVALID_TAIL)
2512 sdma_update_tail(sde, tail);
2513 spin_unlock_irqrestore(&sde->tail_lock, flags);
2514 *count_out = total_count;
2515 return ret;
2516unlock_noconn:
2517 spin_lock(&sde->flushlist_lock);
2518 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2519 tx->wait = wait;
2520 list_del_init(&tx->list);
2521 tx->next_descq_idx = 0;
2522#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2523 tx->sn = sde->tail_sn++;
2524 trace_hfi1_sdma_in_sn(sde, tx->sn);
2525#endif
2526 list_add_tail(&tx->list, &sde->flushlist);
2527 flush_count++;
2528 if (wait) {
2529 wait->tx_count++;
2530 wait->count += tx->num_desc;
2531 }
2532 }
2533 spin_unlock(&sde->flushlist_lock);
2534 schedule_work(&sde->flush_worker);
2535 ret = -ECOMM;
2536 goto update_tail;
2537nodesc:
2538 ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
2539 if (ret == -EAGAIN) {
2540 ret = 0;
2541 goto retry;
2542 }
2543 sde->descq_full_count++;
2544 goto update_tail;
2545}
2546
2547static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2548{
2549 unsigned long flags;
2550
2551 spin_lock_irqsave(&sde->tail_lock, flags);
2552 write_seqlock(&sde->head_lock);
2553
2554 __sdma_process_event(sde, event);
2555
2556 if (sde->state.current_state == sdma_state_s99_running)
2557 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2558
2559 write_sequnlock(&sde->head_lock);
2560 spin_unlock_irqrestore(&sde->tail_lock, flags);
2561}
2562
2563static void __sdma_process_event(struct sdma_engine *sde,
2564 enum sdma_events event)
2565{
2566 struct sdma_state *ss = &sde->state;
2567 int need_progress = 0;
2568
2569
2570#ifdef CONFIG_SDMA_VERBOSITY
2571 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2572 sdma_state_names[ss->current_state],
2573 sdma_event_names[event]);
2574#endif
2575
2576 switch (ss->current_state) {
2577 case sdma_state_s00_hw_down:
2578 switch (event) {
2579 case sdma_event_e00_go_hw_down:
2580 break;
2581 case sdma_event_e30_go_running:
2582
2583
2584
2585
2586
2587
2588
2589 ss->go_s99_running = 1;
2590
2591 case sdma_event_e10_go_hw_start:
2592
2593 sdma_get(&sde->state);
2594 sdma_set_state(sde,
2595 sdma_state_s10_hw_start_up_halt_wait);
2596 break;
2597 case sdma_event_e15_hw_halt_done:
2598 break;
2599 case sdma_event_e25_hw_clean_up_done:
2600 break;
2601 case sdma_event_e40_sw_cleaned:
2602 sdma_sw_tear_down(sde);
2603 break;
2604 case sdma_event_e50_hw_cleaned:
2605 break;
2606 case sdma_event_e60_hw_halted:
2607 break;
2608 case sdma_event_e70_go_idle:
2609 break;
2610 case sdma_event_e80_hw_freeze:
2611 break;
2612 case sdma_event_e81_hw_frozen:
2613 break;
2614 case sdma_event_e82_hw_unfreeze:
2615 break;
2616 case sdma_event_e85_link_down:
2617 break;
2618 case sdma_event_e90_sw_halted:
2619 break;
2620 }
2621 break;
2622
2623 case sdma_state_s10_hw_start_up_halt_wait:
2624 switch (event) {
2625 case sdma_event_e00_go_hw_down:
2626 sdma_set_state(sde, sdma_state_s00_hw_down);
2627 sdma_sw_tear_down(sde);
2628 break;
2629 case sdma_event_e10_go_hw_start:
2630 break;
2631 case sdma_event_e15_hw_halt_done:
2632 sdma_set_state(sde,
2633 sdma_state_s15_hw_start_up_clean_wait);
2634 sdma_start_hw_clean_up(sde);
2635 break;
2636 case sdma_event_e25_hw_clean_up_done:
2637 break;
2638 case sdma_event_e30_go_running:
2639 ss->go_s99_running = 1;
2640 break;
2641 case sdma_event_e40_sw_cleaned:
2642 break;
2643 case sdma_event_e50_hw_cleaned:
2644 break;
2645 case sdma_event_e60_hw_halted:
2646 schedule_work(&sde->err_halt_worker);
2647 break;
2648 case sdma_event_e70_go_idle:
2649 ss->go_s99_running = 0;
2650 break;
2651 case sdma_event_e80_hw_freeze:
2652 break;
2653 case sdma_event_e81_hw_frozen:
2654 break;
2655 case sdma_event_e82_hw_unfreeze:
2656 break;
2657 case sdma_event_e85_link_down:
2658 break;
2659 case sdma_event_e90_sw_halted:
2660 break;
2661 }
2662 break;
2663
2664 case sdma_state_s15_hw_start_up_clean_wait:
2665 switch (event) {
2666 case sdma_event_e00_go_hw_down:
2667 sdma_set_state(sde, sdma_state_s00_hw_down);
2668 sdma_sw_tear_down(sde);
2669 break;
2670 case sdma_event_e10_go_hw_start:
2671 break;
2672 case sdma_event_e15_hw_halt_done:
2673 break;
2674 case sdma_event_e25_hw_clean_up_done:
2675 sdma_hw_start_up(sde);
2676 sdma_set_state(sde, ss->go_s99_running ?
2677 sdma_state_s99_running :
2678 sdma_state_s20_idle);
2679 break;
2680 case sdma_event_e30_go_running:
2681 ss->go_s99_running = 1;
2682 break;
2683 case sdma_event_e40_sw_cleaned:
2684 break;
2685 case sdma_event_e50_hw_cleaned:
2686 break;
2687 case sdma_event_e60_hw_halted:
2688 break;
2689 case sdma_event_e70_go_idle:
2690 ss->go_s99_running = 0;
2691 break;
2692 case sdma_event_e80_hw_freeze:
2693 break;
2694 case sdma_event_e81_hw_frozen:
2695 break;
2696 case sdma_event_e82_hw_unfreeze:
2697 break;
2698 case sdma_event_e85_link_down:
2699 break;
2700 case sdma_event_e90_sw_halted:
2701 break;
2702 }
2703 break;
2704
2705 case sdma_state_s20_idle:
2706 switch (event) {
2707 case sdma_event_e00_go_hw_down:
2708 sdma_set_state(sde, sdma_state_s00_hw_down);
2709 sdma_sw_tear_down(sde);
2710 break;
2711 case sdma_event_e10_go_hw_start:
2712 break;
2713 case sdma_event_e15_hw_halt_done:
2714 break;
2715 case sdma_event_e25_hw_clean_up_done:
2716 break;
2717 case sdma_event_e30_go_running:
2718 sdma_set_state(sde, sdma_state_s99_running);
2719 ss->go_s99_running = 1;
2720 break;
2721 case sdma_event_e40_sw_cleaned:
2722 break;
2723 case sdma_event_e50_hw_cleaned:
2724 break;
2725 case sdma_event_e60_hw_halted:
2726 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2727 schedule_work(&sde->err_halt_worker);
2728 break;
2729 case sdma_event_e70_go_idle:
2730 break;
2731 case sdma_event_e85_link_down:
2732
2733 case sdma_event_e80_hw_freeze:
2734 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2735 atomic_dec(&sde->dd->sdma_unfreeze_count);
2736 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2737 break;
2738 case sdma_event_e81_hw_frozen:
2739 break;
2740 case sdma_event_e82_hw_unfreeze:
2741 break;
2742 case sdma_event_e90_sw_halted:
2743 break;
2744 }
2745 break;
2746
2747 case sdma_state_s30_sw_clean_up_wait:
2748 switch (event) {
2749 case sdma_event_e00_go_hw_down:
2750 sdma_set_state(sde, sdma_state_s00_hw_down);
2751 break;
2752 case sdma_event_e10_go_hw_start:
2753 break;
2754 case sdma_event_e15_hw_halt_done:
2755 break;
2756 case sdma_event_e25_hw_clean_up_done:
2757 break;
2758 case sdma_event_e30_go_running:
2759 ss->go_s99_running = 1;
2760 break;
2761 case sdma_event_e40_sw_cleaned:
2762 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2763 sdma_start_hw_clean_up(sde);
2764 break;
2765 case sdma_event_e50_hw_cleaned:
2766 break;
2767 case sdma_event_e60_hw_halted:
2768 break;
2769 case sdma_event_e70_go_idle:
2770 ss->go_s99_running = 0;
2771 break;
2772 case sdma_event_e80_hw_freeze:
2773 break;
2774 case sdma_event_e81_hw_frozen:
2775 break;
2776 case sdma_event_e82_hw_unfreeze:
2777 break;
2778 case sdma_event_e85_link_down:
2779 ss->go_s99_running = 0;
2780 break;
2781 case sdma_event_e90_sw_halted:
2782 break;
2783 }
2784 break;
2785
2786 case sdma_state_s40_hw_clean_up_wait:
2787 switch (event) {
2788 case sdma_event_e00_go_hw_down:
2789 sdma_set_state(sde, sdma_state_s00_hw_down);
2790 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2791 break;
2792 case sdma_event_e10_go_hw_start:
2793 break;
2794 case sdma_event_e15_hw_halt_done:
2795 break;
2796 case sdma_event_e25_hw_clean_up_done:
2797 sdma_hw_start_up(sde);
2798 sdma_set_state(sde, ss->go_s99_running ?
2799 sdma_state_s99_running :
2800 sdma_state_s20_idle);
2801 break;
2802 case sdma_event_e30_go_running:
2803 ss->go_s99_running = 1;
2804 break;
2805 case sdma_event_e40_sw_cleaned:
2806 break;
2807 case sdma_event_e50_hw_cleaned:
2808 break;
2809 case sdma_event_e60_hw_halted:
2810 break;
2811 case sdma_event_e70_go_idle:
2812 ss->go_s99_running = 0;
2813 break;
2814 case sdma_event_e80_hw_freeze:
2815 break;
2816 case sdma_event_e81_hw_frozen:
2817 break;
2818 case sdma_event_e82_hw_unfreeze:
2819 break;
2820 case sdma_event_e85_link_down:
2821 ss->go_s99_running = 0;
2822 break;
2823 case sdma_event_e90_sw_halted:
2824 break;
2825 }
2826 break;
2827
2828 case sdma_state_s50_hw_halt_wait:
2829 switch (event) {
2830 case sdma_event_e00_go_hw_down:
2831 sdma_set_state(sde, sdma_state_s00_hw_down);
2832 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2833 break;
2834 case sdma_event_e10_go_hw_start:
2835 break;
2836 case sdma_event_e15_hw_halt_done:
2837 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2838 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2839 break;
2840 case sdma_event_e25_hw_clean_up_done:
2841 break;
2842 case sdma_event_e30_go_running:
2843 ss->go_s99_running = 1;
2844 break;
2845 case sdma_event_e40_sw_cleaned:
2846 break;
2847 case sdma_event_e50_hw_cleaned:
2848 break;
2849 case sdma_event_e60_hw_halted:
2850 schedule_work(&sde->err_halt_worker);
2851 break;
2852 case sdma_event_e70_go_idle:
2853 ss->go_s99_running = 0;
2854 break;
2855 case sdma_event_e80_hw_freeze:
2856 break;
2857 case sdma_event_e81_hw_frozen:
2858 break;
2859 case sdma_event_e82_hw_unfreeze:
2860 break;
2861 case sdma_event_e85_link_down:
2862 ss->go_s99_running = 0;
2863 break;
2864 case sdma_event_e90_sw_halted:
2865 break;
2866 }
2867 break;
2868
2869 case sdma_state_s60_idle_halt_wait:
2870 switch (event) {
2871 case sdma_event_e00_go_hw_down:
2872 sdma_set_state(sde, sdma_state_s00_hw_down);
2873 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2874 break;
2875 case sdma_event_e10_go_hw_start:
2876 break;
2877 case sdma_event_e15_hw_halt_done:
2878 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2879 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2880 break;
2881 case sdma_event_e25_hw_clean_up_done:
2882 break;
2883 case sdma_event_e30_go_running:
2884 ss->go_s99_running = 1;
2885 break;
2886 case sdma_event_e40_sw_cleaned:
2887 break;
2888 case sdma_event_e50_hw_cleaned:
2889 break;
2890 case sdma_event_e60_hw_halted:
2891 schedule_work(&sde->err_halt_worker);
2892 break;
2893 case sdma_event_e70_go_idle:
2894 ss->go_s99_running = 0;
2895 break;
2896 case sdma_event_e80_hw_freeze:
2897 break;
2898 case sdma_event_e81_hw_frozen:
2899 break;
2900 case sdma_event_e82_hw_unfreeze:
2901 break;
2902 case sdma_event_e85_link_down:
2903 break;
2904 case sdma_event_e90_sw_halted:
2905 break;
2906 }
2907 break;
2908
2909 case sdma_state_s80_hw_freeze:
2910 switch (event) {
2911 case sdma_event_e00_go_hw_down:
2912 sdma_set_state(sde, sdma_state_s00_hw_down);
2913 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2914 break;
2915 case sdma_event_e10_go_hw_start:
2916 break;
2917 case sdma_event_e15_hw_halt_done:
2918 break;
2919 case sdma_event_e25_hw_clean_up_done:
2920 break;
2921 case sdma_event_e30_go_running:
2922 ss->go_s99_running = 1;
2923 break;
2924 case sdma_event_e40_sw_cleaned:
2925 break;
2926 case sdma_event_e50_hw_cleaned:
2927 break;
2928 case sdma_event_e60_hw_halted:
2929 break;
2930 case sdma_event_e70_go_idle:
2931 ss->go_s99_running = 0;
2932 break;
2933 case sdma_event_e80_hw_freeze:
2934 break;
2935 case sdma_event_e81_hw_frozen:
2936 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2937 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2938 break;
2939 case sdma_event_e82_hw_unfreeze:
2940 break;
2941 case sdma_event_e85_link_down:
2942 break;
2943 case sdma_event_e90_sw_halted:
2944 break;
2945 }
2946 break;
2947
2948 case sdma_state_s82_freeze_sw_clean:
2949 switch (event) {
2950 case sdma_event_e00_go_hw_down:
2951 sdma_set_state(sde, sdma_state_s00_hw_down);
2952 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2953 break;
2954 case sdma_event_e10_go_hw_start:
2955 break;
2956 case sdma_event_e15_hw_halt_done:
2957 break;
2958 case sdma_event_e25_hw_clean_up_done:
2959 break;
2960 case sdma_event_e30_go_running:
2961 ss->go_s99_running = 1;
2962 break;
2963 case sdma_event_e40_sw_cleaned:
2964
2965 atomic_dec(&sde->dd->sdma_unfreeze_count);
2966 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2967 break;
2968 case sdma_event_e50_hw_cleaned:
2969 break;
2970 case sdma_event_e60_hw_halted:
2971 break;
2972 case sdma_event_e70_go_idle:
2973 ss->go_s99_running = 0;
2974 break;
2975 case sdma_event_e80_hw_freeze:
2976 break;
2977 case sdma_event_e81_hw_frozen:
2978 break;
2979 case sdma_event_e82_hw_unfreeze:
2980 sdma_hw_start_up(sde);
2981 sdma_set_state(sde, ss->go_s99_running ?
2982 sdma_state_s99_running :
2983 sdma_state_s20_idle);
2984 break;
2985 case sdma_event_e85_link_down:
2986 break;
2987 case sdma_event_e90_sw_halted:
2988 break;
2989 }
2990 break;
2991
2992 case sdma_state_s99_running:
2993 switch (event) {
2994 case sdma_event_e00_go_hw_down:
2995 sdma_set_state(sde, sdma_state_s00_hw_down);
2996 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2997 break;
2998 case sdma_event_e10_go_hw_start:
2999 break;
3000 case sdma_event_e15_hw_halt_done:
3001 break;
3002 case sdma_event_e25_hw_clean_up_done:
3003 break;
3004 case sdma_event_e30_go_running:
3005 break;
3006 case sdma_event_e40_sw_cleaned:
3007 break;
3008 case sdma_event_e50_hw_cleaned:
3009 break;
3010 case sdma_event_e60_hw_halted:
3011 need_progress = 1;
3012 sdma_err_progress_check_schedule(sde);
3013
3014 case sdma_event_e90_sw_halted:
3015
3016
3017
3018
3019 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
3020 schedule_work(&sde->err_halt_worker);
3021 break;
3022 case sdma_event_e70_go_idle:
3023 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
3024 break;
3025 case sdma_event_e85_link_down:
3026 ss->go_s99_running = 0;
3027
3028 case sdma_event_e80_hw_freeze:
3029 sdma_set_state(sde, sdma_state_s80_hw_freeze);
3030 atomic_dec(&sde->dd->sdma_unfreeze_count);
3031 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
3032 break;
3033 case sdma_event_e81_hw_frozen:
3034 break;
3035 case sdma_event_e82_hw_unfreeze:
3036 break;
3037 }
3038 break;
3039 }
3040
3041 ss->last_event = event;
3042 if (need_progress)
3043 sdma_make_progress(sde, 0);
3044}
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3060{
3061 int i;
3062
3063
3064 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3065
3066 if (!tx->tlen) {
3067 tx->desc_limit = MAX_DESC;
3068 } else if (!tx->coalesce_buf) {
3069
3070 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3071 GFP_ATOMIC);
3072 if (!tx->coalesce_buf)
3073 goto enomem;
3074 tx->coalesce_idx = 0;
3075 }
3076 return 0;
3077 }
3078
3079 if (unlikely(tx->num_desc == MAX_DESC))
3080 goto enomem;
3081
3082 tx->descp = kmalloc_array(
3083 MAX_DESC,
3084 sizeof(struct sdma_desc),
3085 GFP_ATOMIC);
3086 if (!tx->descp)
3087 goto enomem;
3088
3089
3090 tx->desc_limit = MAX_DESC - 1;
3091
3092 for (i = 0; i < tx->num_desc; i++)
3093 tx->descp[i] = tx->descs[i];
3094 return 0;
3095enomem:
3096 __sdma_txclean(dd, tx);
3097 return -ENOMEM;
3098}
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3117 int type, void *kvaddr, struct page *page,
3118 unsigned long offset, u16 len)
3119{
3120 int pad_len, rval;
3121 dma_addr_t addr;
3122
3123 rval = _extend_sdma_tx_descs(dd, tx);
3124 if (rval) {
3125 __sdma_txclean(dd, tx);
3126 return rval;
3127 }
3128
3129
3130 if (tx->coalesce_buf) {
3131 if (type == SDMA_MAP_NONE) {
3132 __sdma_txclean(dd, tx);
3133 return -EINVAL;
3134 }
3135
3136 if (type == SDMA_MAP_PAGE) {
3137 kvaddr = kmap(page);
3138 kvaddr += offset;
3139 } else if (WARN_ON(!kvaddr)) {
3140 __sdma_txclean(dd, tx);
3141 return -EINVAL;
3142 }
3143
3144 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3145 tx->coalesce_idx += len;
3146 if (type == SDMA_MAP_PAGE)
3147 kunmap(page);
3148
3149
3150 if (tx->tlen - tx->coalesce_idx)
3151 return 0;
3152
3153
3154 pad_len = tx->packet_len & (sizeof(u32) - 1);
3155 if (pad_len) {
3156 pad_len = sizeof(u32) - pad_len;
3157 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3158
3159 tx->packet_len += pad_len;
3160 tx->tlen += pad_len;
3161 }
3162
3163
3164 addr = dma_map_single(&dd->pcidev->dev,
3165 tx->coalesce_buf,
3166 tx->tlen,
3167 DMA_TO_DEVICE);
3168
3169 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3170 __sdma_txclean(dd, tx);
3171 return -ENOSPC;
3172 }
3173
3174
3175 tx->desc_limit = MAX_DESC;
3176 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3177 addr, tx->tlen);
3178 }
3179
3180 return 1;
3181}
3182
3183
3184void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3185{
3186 struct sdma_engine *sde;
3187 int i;
3188 u64 sreg;
3189
3190 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3191 SD(CHECK_SLID_MASK_SHIFT)) |
3192 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3193 SD(CHECK_SLID_VALUE_SHIFT));
3194
3195 for (i = 0; i < dd->num_sdma; i++) {
3196 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3197 i, (u32)sreg);
3198 sde = &dd->per_sdma[i];
3199 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3200 }
3201}
3202
3203
3204int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3205{
3206 int rval = 0;
3207
3208 tx->num_desc++;
3209 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3210 rval = _extend_sdma_tx_descs(dd, tx);
3211 if (rval) {
3212 __sdma_txclean(dd, tx);
3213 return rval;
3214 }
3215 }
3216
3217 make_tx_sdma_desc(
3218 tx,
3219 SDMA_MAP_NONE,
3220 dd->sdma_pad_phys,
3221 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3222 _sdma_close_tx(dd, tx);
3223 return rval;
3224}
3225
3226
3227
3228
3229
3230
3231
3232
3233void _sdma_txreq_ahgadd(
3234 struct sdma_txreq *tx,
3235 u8 num_ahg,
3236 u8 ahg_entry,
3237 u32 *ahg,
3238 u8 ahg_hlen)
3239{
3240 u32 i, shift = 0, desc = 0;
3241 u8 mode;
3242
3243 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3244
3245 if (num_ahg == 1)
3246 mode = SDMA_AHG_APPLY_UPDATE1;
3247 else if (num_ahg <= 5)
3248 mode = SDMA_AHG_APPLY_UPDATE2;
3249 else
3250 mode = SDMA_AHG_APPLY_UPDATE3;
3251 tx->num_desc++;
3252
3253 switch (mode) {
3254 case SDMA_AHG_APPLY_UPDATE3:
3255 tx->num_desc++;
3256 tx->descs[2].qw[0] = 0;
3257 tx->descs[2].qw[1] = 0;
3258
3259 case SDMA_AHG_APPLY_UPDATE2:
3260 tx->num_desc++;
3261 tx->descs[1].qw[0] = 0;
3262 tx->descs[1].qw[1] = 0;
3263 break;
3264 }
3265 ahg_hlen >>= 2;
3266 tx->descs[0].qw[1] |=
3267 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3268 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3269 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3270 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3271 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3272 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3273 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3274 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3275 for (i = 0; i < (num_ahg - 1); i++) {
3276 if (!shift && !(i & 2))
3277 desc++;
3278 tx->descs[desc].qw[!!(i & 2)] |=
3279 (((u64)ahg[i + 1])
3280 << shift);
3281 shift = (shift + 32) & 63;
3282 }
3283}
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293int sdma_ahg_alloc(struct sdma_engine *sde)
3294{
3295 int nr;
3296 int oldbit;
3297
3298 if (!sde) {
3299 trace_hfi1_ahg_allocate(sde, -EINVAL);
3300 return -EINVAL;
3301 }
3302 while (1) {
3303 nr = ffz(READ_ONCE(sde->ahg_bits));
3304 if (nr > 31) {
3305 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3306 return -ENOSPC;
3307 }
3308 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3309 if (!oldbit)
3310 break;
3311 cpu_relax();
3312 }
3313 trace_hfi1_ahg_allocate(sde, nr);
3314 return nr;
3315}
3316
3317
3318
3319
3320
3321
3322
3323
3324void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3325{
3326 if (!sde)
3327 return;
3328 trace_hfi1_ahg_deallocate(sde, ahg_index);
3329 if (ahg_index < 0 || ahg_index > 31)
3330 return;
3331 clear_bit(ahg_index, &sde->ahg_bits);
3332}
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3343{
3344 int i;
3345 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3346 sdma_event_e80_hw_freeze;
3347
3348
3349 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3350
3351
3352 for (i = 0; i < dd->num_sdma; i++)
3353 sdma_process_event(&dd->per_sdma[i], event);
3354
3355
3356}
3357
3358
3359
3360
3361
3362void sdma_freeze(struct hfi1_devdata *dd)
3363{
3364 int i;
3365 int ret;
3366
3367
3368
3369
3370
3371 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3372 atomic_read(&dd->sdma_unfreeze_count) <=
3373 0);
3374
3375 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3376 return;
3377
3378
3379 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3380
3381
3382 for (i = 0; i < dd->num_sdma; i++)
3383 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3384
3385
3386
3387
3388
3389
3390 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3391 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3392
3393}
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403void sdma_unfreeze(struct hfi1_devdata *dd)
3404{
3405 int i;
3406
3407
3408 for (i = 0; i < dd->num_sdma; i++)
3409 sdma_process_event(&dd->per_sdma[i],
3410 sdma_event_e82_hw_unfreeze);
3411}
3412
3413
3414
3415
3416
3417
3418void _sdma_engine_progress_schedule(
3419 struct sdma_engine *sde)
3420{
3421 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3422
3423 write_csr(sde->dd,
3424 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3425 sde->progress_mask);
3426}
3427