1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <linux/spinlock.h>
49#include <linux/seqlock.h>
50#include <linux/netdevice.h>
51#include <linux/moduleparam.h>
52#include <linux/bitops.h>
53#include <linux/timer.h>
54#include <linux/vmalloc.h>
55#include <linux/highmem.h>
56
57#include "hfi.h"
58#include "common.h"
59#include "qp.h"
60#include "sdma.h"
61#include "iowait.h"
62#include "trace.h"
63
64
65#define SDMA_DESCQ_CNT 2048
66#define SDMA_DESC_INTR 64
67#define INVALID_TAIL 0xffff
68
69static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
70module_param(sdma_descq_cnt, uint, S_IRUGO);
71MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
72
73static uint sdma_idle_cnt = 250;
74module_param(sdma_idle_cnt, uint, S_IRUGO);
75MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
76
77uint mod_num_sdma;
78module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
79MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
80
81static uint sdma_desct_intr = SDMA_DESC_INTR;
82module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
83MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
84
85#define SDMA_WAIT_BATCH_SIZE 20
86
87#define SDMA_ERR_HALT_TIMEOUT 10
88
89
90#define SD(name) SEND_DMA_##name
91#define ALL_SDMA_ENG_HALT_ERRS \
92 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
93 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
94 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
95 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
96 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
97 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
98 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
99 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
100 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
101 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
102 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
103 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
104 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
105 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
106 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
107 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
108 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
109 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
110
111
112#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
113#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
114#define SDMA_SENDCTRL_OP_HALT BIT(2)
115#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
116
117
118#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
119SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
120#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
121SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
122
123static const char * const sdma_state_names[] = {
124 [sdma_state_s00_hw_down] = "s00_HwDown",
125 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
126 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
127 [sdma_state_s20_idle] = "s20_Idle",
128 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
129 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
130 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
131 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
132 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
133 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
134 [sdma_state_s99_running] = "s99_Running",
135};
136
137#ifdef CONFIG_SDMA_VERBOSITY
138static const char * const sdma_event_names[] = {
139 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
140 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
141 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
142 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
143 [sdma_event_e30_go_running] = "e30_GoRunning",
144 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
145 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
146 [sdma_event_e60_hw_halted] = "e60_HwHalted",
147 [sdma_event_e70_go_idle] = "e70_GoIdle",
148 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
149 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
150 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
151 [sdma_event_e85_link_down] = "e85_LinkDown",
152 [sdma_event_e90_sw_halted] = "e90_SwHalted",
153};
154#endif
155
156static const struct sdma_set_state_action sdma_action_table[] = {
157 [sdma_state_s00_hw_down] = {
158 .go_s99_running_tofalse = 1,
159 .op_enable = 0,
160 .op_intenable = 0,
161 .op_halt = 0,
162 .op_cleanup = 0,
163 },
164 [sdma_state_s10_hw_start_up_halt_wait] = {
165 .op_enable = 0,
166 .op_intenable = 0,
167 .op_halt = 1,
168 .op_cleanup = 0,
169 },
170 [sdma_state_s15_hw_start_up_clean_wait] = {
171 .op_enable = 0,
172 .op_intenable = 1,
173 .op_halt = 0,
174 .op_cleanup = 1,
175 },
176 [sdma_state_s20_idle] = {
177 .op_enable = 0,
178 .op_intenable = 1,
179 .op_halt = 0,
180 .op_cleanup = 0,
181 },
182 [sdma_state_s30_sw_clean_up_wait] = {
183 .op_enable = 0,
184 .op_intenable = 0,
185 .op_halt = 0,
186 .op_cleanup = 0,
187 },
188 [sdma_state_s40_hw_clean_up_wait] = {
189 .op_enable = 0,
190 .op_intenable = 0,
191 .op_halt = 0,
192 .op_cleanup = 1,
193 },
194 [sdma_state_s50_hw_halt_wait] = {
195 .op_enable = 0,
196 .op_intenable = 0,
197 .op_halt = 0,
198 .op_cleanup = 0,
199 },
200 [sdma_state_s60_idle_halt_wait] = {
201 .go_s99_running_tofalse = 1,
202 .op_enable = 0,
203 .op_intenable = 0,
204 .op_halt = 1,
205 .op_cleanup = 0,
206 },
207 [sdma_state_s80_hw_freeze] = {
208 .op_enable = 0,
209 .op_intenable = 0,
210 .op_halt = 0,
211 .op_cleanup = 0,
212 },
213 [sdma_state_s82_freeze_sw_clean] = {
214 .op_enable = 0,
215 .op_intenable = 0,
216 .op_halt = 0,
217 .op_cleanup = 0,
218 },
219 [sdma_state_s99_running] = {
220 .op_enable = 1,
221 .op_intenable = 1,
222 .op_halt = 0,
223 .op_cleanup = 0,
224 .go_s99_running_totrue = 1,
225 },
226};
227
228#define SDMA_TAIL_UPDATE_THRESH 0x1F
229
230
231static void sdma_complete(struct kref *);
232static void sdma_finalput(struct sdma_state *);
233static void sdma_get(struct sdma_state *);
234static void sdma_hw_clean_up_task(unsigned long);
235static void sdma_put(struct sdma_state *);
236static void sdma_set_state(struct sdma_engine *, enum sdma_states);
237static void sdma_start_hw_clean_up(struct sdma_engine *);
238static void sdma_sw_clean_up_task(unsigned long);
239static void sdma_sendctrl(struct sdma_engine *, unsigned);
240static void init_sdma_regs(struct sdma_engine *, u32, uint);
241static void sdma_process_event(
242 struct sdma_engine *sde,
243 enum sdma_events event);
244static void __sdma_process_event(
245 struct sdma_engine *sde,
246 enum sdma_events event);
247static void dump_sdma_state(struct sdma_engine *sde);
248static void sdma_make_progress(struct sdma_engine *sde, u64 status);
249static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
250static void sdma_flush_descq(struct sdma_engine *sde);
251
252
253
254
255
256static const char *sdma_state_name(enum sdma_states state)
257{
258 return sdma_state_names[state];
259}
260
261static void sdma_get(struct sdma_state *ss)
262{
263 kref_get(&ss->kref);
264}
265
266static void sdma_complete(struct kref *kref)
267{
268 struct sdma_state *ss =
269 container_of(kref, struct sdma_state, kref);
270
271 complete(&ss->comp);
272}
273
274static void sdma_put(struct sdma_state *ss)
275{
276 kref_put(&ss->kref, sdma_complete);
277}
278
279static void sdma_finalput(struct sdma_state *ss)
280{
281 sdma_put(ss);
282 wait_for_completion(&ss->comp);
283}
284
285static inline void write_sde_csr(
286 struct sdma_engine *sde,
287 u32 offset0,
288 u64 value)
289{
290 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
291}
292
293static inline u64 read_sde_csr(
294 struct sdma_engine *sde,
295 u32 offset0)
296{
297 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
298}
299
300
301
302
303
304static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
305 int pause)
306{
307 u64 off = 8 * sde->this_idx;
308 struct hfi1_devdata *dd = sde->dd;
309 int lcnt = 0;
310 u64 reg_prev;
311 u64 reg = 0;
312
313 while (1) {
314 reg_prev = reg;
315 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
316
317 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
318 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
319 if (reg == 0)
320 break;
321
322 if (reg != reg_prev)
323 lcnt = 0;
324 if (lcnt++ > 500) {
325
326 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
327 __func__, sde->this_idx, (u32)reg);
328 queue_work(dd->pport->link_wq,
329 &dd->pport->link_bounce_work);
330 break;
331 }
332 udelay(1);
333 }
334}
335
336
337
338
339
340void sdma_wait(struct hfi1_devdata *dd)
341{
342 int i;
343
344 for (i = 0; i < dd->num_sdma; i++) {
345 struct sdma_engine *sde = &dd->per_sdma[i];
346
347 sdma_wait_for_packet_egress(sde, 0);
348 }
349}
350
351static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
352{
353 u64 reg;
354
355 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
356 return;
357 reg = cnt;
358 reg &= SD(DESC_CNT_CNT_MASK);
359 reg <<= SD(DESC_CNT_CNT_SHIFT);
360 write_sde_csr(sde, SD(DESC_CNT), reg);
361}
362
363static inline void complete_tx(struct sdma_engine *sde,
364 struct sdma_txreq *tx,
365 int res)
366{
367
368 struct iowait *wait = tx->wait;
369 callback_t complete = tx->complete;
370
371#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
372 trace_hfi1_sdma_out_sn(sde, tx->sn);
373 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
374 dd_dev_err(sde->dd, "expected %llu got %llu\n",
375 sde->head_sn, tx->sn);
376 sde->head_sn++;
377#endif
378 __sdma_txclean(sde->dd, tx);
379 if (complete)
380 (*complete)(tx, res);
381 if (wait && iowait_sdma_dec(wait))
382 iowait_drain_wakeup(wait);
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403static void sdma_flush(struct sdma_engine *sde)
404{
405 struct sdma_txreq *txp, *txp_next;
406 LIST_HEAD(flushlist);
407 unsigned long flags;
408
409
410 sdma_flush_descq(sde);
411 spin_lock_irqsave(&sde->flushlist_lock, flags);
412
413 list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
414 list_del_init(&txp->list);
415 list_add_tail(&txp->list, &flushlist);
416 }
417 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
418
419 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
420 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
421}
422
423
424
425
426
427
428
429
430
431
432
433static void sdma_field_flush(struct work_struct *work)
434{
435 unsigned long flags;
436 struct sdma_engine *sde =
437 container_of(work, struct sdma_engine, flush_worker);
438
439 write_seqlock_irqsave(&sde->head_lock, flags);
440 if (!__sdma_running(sde))
441 sdma_flush(sde);
442 write_sequnlock_irqrestore(&sde->head_lock, flags);
443}
444
445static void sdma_err_halt_wait(struct work_struct *work)
446{
447 struct sdma_engine *sde = container_of(work, struct sdma_engine,
448 err_halt_worker);
449 u64 statuscsr;
450 unsigned long timeout;
451
452 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
453 while (1) {
454 statuscsr = read_sde_csr(sde, SD(STATUS));
455 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
456 if (statuscsr)
457 break;
458 if (time_after(jiffies, timeout)) {
459 dd_dev_err(sde->dd,
460 "SDMA engine %d - timeout waiting for engine to halt\n",
461 sde->this_idx);
462
463
464
465
466 break;
467 }
468 usleep_range(80, 120);
469 }
470
471 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
472}
473
474static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
475{
476 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
477 unsigned index;
478 struct hfi1_devdata *dd = sde->dd;
479
480 for (index = 0; index < dd->num_sdma; index++) {
481 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
482
483 if (curr_sdma != sde)
484 curr_sdma->progress_check_head =
485 curr_sdma->descq_head;
486 }
487 dd_dev_err(sde->dd,
488 "SDMA engine %d - check scheduled\n",
489 sde->this_idx);
490 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
491 }
492}
493
494static void sdma_err_progress_check(struct timer_list *t)
495{
496 unsigned index;
497 struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
498
499 dd_dev_err(sde->dd, "SDE progress check event\n");
500 for (index = 0; index < sde->dd->num_sdma; index++) {
501 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
502 unsigned long flags;
503
504
505 if (curr_sde == sde)
506 continue;
507
508
509
510
511
512 spin_lock_irqsave(&curr_sde->tail_lock, flags);
513 write_seqlock(&curr_sde->head_lock);
514
515
516 if (curr_sde->state.current_state != sdma_state_s99_running) {
517 write_sequnlock(&curr_sde->head_lock);
518 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
519 continue;
520 }
521
522 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
523 (curr_sde->descq_head ==
524 curr_sde->progress_check_head))
525 __sdma_process_event(curr_sde,
526 sdma_event_e90_sw_halted);
527 write_sequnlock(&curr_sde->head_lock);
528 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
529 }
530 schedule_work(&sde->err_halt_worker);
531}
532
533static void sdma_hw_clean_up_task(unsigned long opaque)
534{
535 struct sdma_engine *sde = (struct sdma_engine *)opaque;
536 u64 statuscsr;
537
538 while (1) {
539#ifdef CONFIG_SDMA_VERBOSITY
540 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
541 sde->this_idx, slashstrip(__FILE__), __LINE__,
542 __func__);
543#endif
544 statuscsr = read_sde_csr(sde, SD(STATUS));
545 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
546 if (statuscsr)
547 break;
548 udelay(10);
549 }
550
551 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
552}
553
554static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
555{
556 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
557}
558
559
560
561
562static void sdma_flush_descq(struct sdma_engine *sde)
563{
564 u16 head, tail;
565 int progress = 0;
566 struct sdma_txreq *txp = get_txhead(sde);
567
568
569
570
571
572
573 head = sde->descq_head & sde->sdma_mask;
574 tail = sde->descq_tail & sde->sdma_mask;
575 while (head != tail) {
576
577 head = ++sde->descq_head & sde->sdma_mask;
578
579 if (txp && txp->next_descq_idx == head) {
580
581 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
582 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
583 trace_hfi1_sdma_progress(sde, head, tail, txp);
584 txp = get_txhead(sde);
585 }
586 progress++;
587 }
588 if (progress)
589 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
590}
591
592static void sdma_sw_clean_up_task(unsigned long opaque)
593{
594 struct sdma_engine *sde = (struct sdma_engine *)opaque;
595 unsigned long flags;
596
597 spin_lock_irqsave(&sde->tail_lock, flags);
598 write_seqlock(&sde->head_lock);
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619 sdma_make_progress(sde, 0);
620
621 sdma_flush(sde);
622
623
624
625
626
627
628 sde->descq_tail = 0;
629 sde->descq_head = 0;
630 sde->desc_avail = sdma_descq_freecnt(sde);
631 *sde->head_dma = 0;
632
633 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
634
635 write_sequnlock(&sde->head_lock);
636 spin_unlock_irqrestore(&sde->tail_lock, flags);
637}
638
639static void sdma_sw_tear_down(struct sdma_engine *sde)
640{
641 struct sdma_state *ss = &sde->state;
642
643
644 sdma_put(ss);
645
646
647 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
648 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
649}
650
651static void sdma_start_hw_clean_up(struct sdma_engine *sde)
652{
653 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
654}
655
656static void sdma_set_state(struct sdma_engine *sde,
657 enum sdma_states next_state)
658{
659 struct sdma_state *ss = &sde->state;
660 const struct sdma_set_state_action *action = sdma_action_table;
661 unsigned op = 0;
662
663 trace_hfi1_sdma_state(
664 sde,
665 sdma_state_names[ss->current_state],
666 sdma_state_names[next_state]);
667
668
669 ss->previous_state = ss->current_state;
670 ss->previous_op = ss->current_op;
671 ss->current_state = next_state;
672
673 if (ss->previous_state != sdma_state_s99_running &&
674 next_state == sdma_state_s99_running)
675 sdma_flush(sde);
676
677 if (action[next_state].op_enable)
678 op |= SDMA_SENDCTRL_OP_ENABLE;
679
680 if (action[next_state].op_intenable)
681 op |= SDMA_SENDCTRL_OP_INTENABLE;
682
683 if (action[next_state].op_halt)
684 op |= SDMA_SENDCTRL_OP_HALT;
685
686 if (action[next_state].op_cleanup)
687 op |= SDMA_SENDCTRL_OP_CLEANUP;
688
689 if (action[next_state].go_s99_running_tofalse)
690 ss->go_s99_running = 0;
691
692 if (action[next_state].go_s99_running_totrue)
693 ss->go_s99_running = 1;
694
695 ss->current_op = op;
696 sdma_sendctrl(sde, ss->current_op);
697}
698
699
700
701
702
703
704
705
706
707
708
709
710
711u16 sdma_get_descq_cnt(void)
712{
713 u16 count = sdma_descq_cnt;
714
715 if (!count)
716 return SDMA_DESCQ_CNT;
717
718
719
720 if (!is_power_of_2(count))
721 return SDMA_DESCQ_CNT;
722 if (count < 64 || count > 32768)
723 return SDMA_DESCQ_CNT;
724 return count;
725}
726
727
728
729
730
731
732
733
734int sdma_engine_get_vl(struct sdma_engine *sde)
735{
736 struct hfi1_devdata *dd = sde->dd;
737 struct sdma_vl_map *m;
738 u8 vl;
739
740 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
741 return -EINVAL;
742
743 rcu_read_lock();
744 m = rcu_dereference(dd->sdma_map);
745 if (unlikely(!m)) {
746 rcu_read_unlock();
747 return -EINVAL;
748 }
749 vl = m->engine_to_vl[sde->this_idx];
750 rcu_read_unlock();
751
752 return vl;
753}
754
755
756
757
758
759
760
761
762
763
764
765struct sdma_engine *sdma_select_engine_vl(
766 struct hfi1_devdata *dd,
767 u32 selector,
768 u8 vl)
769{
770 struct sdma_vl_map *m;
771 struct sdma_map_elem *e;
772 struct sdma_engine *rval;
773
774
775
776
777
778 if (vl >= num_vls) {
779 rval = NULL;
780 goto done;
781 }
782
783 rcu_read_lock();
784 m = rcu_dereference(dd->sdma_map);
785 if (unlikely(!m)) {
786 rcu_read_unlock();
787 return &dd->per_sdma[0];
788 }
789 e = m->map[vl & m->mask];
790 rval = e->sde[selector & e->mask];
791 rcu_read_unlock();
792
793done:
794 rval = !rval ? &dd->per_sdma[0] : rval;
795 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
796 return rval;
797}
798
799
800
801
802
803
804
805
806
807
808struct sdma_engine *sdma_select_engine_sc(
809 struct hfi1_devdata *dd,
810 u32 selector,
811 u8 sc5)
812{
813 u8 vl = sc_to_vlt(dd, sc5);
814
815 return sdma_select_engine_vl(dd, selector, vl);
816}
817
818struct sdma_rht_map_elem {
819 u32 mask;
820 u8 ctr;
821 struct sdma_engine *sde[0];
822};
823
824struct sdma_rht_node {
825 unsigned long cpu_id;
826 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
827 struct rhash_head node;
828};
829
830#define NR_CPUS_HINT 192
831
832static const struct rhashtable_params sdma_rht_params = {
833 .nelem_hint = NR_CPUS_HINT,
834 .head_offset = offsetof(struct sdma_rht_node, node),
835 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
836 .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
837 .max_size = NR_CPUS,
838 .min_size = 8,
839 .automatic_shrinking = true,
840};
841
842
843
844
845
846
847
848
849
850
851
852
853struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
854 u32 selector, u8 vl)
855{
856 struct sdma_rht_node *rht_node;
857 struct sdma_engine *sde = NULL;
858 const struct cpumask *current_mask = ¤t->cpus_allowed;
859 unsigned long cpu_id;
860
861
862
863
864
865 if (cpumask_weight(current_mask) != 1)
866 goto out;
867
868 cpu_id = smp_processor_id();
869 rcu_read_lock();
870 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
871 sdma_rht_params);
872
873 if (rht_node && rht_node->map[vl]) {
874 struct sdma_rht_map_elem *map = rht_node->map[vl];
875
876 sde = map->sde[selector & map->mask];
877 }
878 rcu_read_unlock();
879
880 if (sde)
881 return sde;
882
883out:
884 return sdma_select_engine_vl(dd, selector, vl);
885}
886
887static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
888{
889 int i;
890
891 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
892 map->sde[map->ctr + i] = map->sde[i];
893}
894
895static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
896 struct sdma_engine *sde)
897{
898 unsigned int i, pow;
899
900
901 for (i = 0; i < map->ctr; i++) {
902 if (map->sde[i] == sde) {
903 memmove(&map->sde[i], &map->sde[i + 1],
904 (map->ctr - i - 1) * sizeof(map->sde[0]));
905 map->ctr--;
906 pow = roundup_pow_of_two(map->ctr ? : 1);
907 map->mask = pow - 1;
908 sdma_populate_sde_map(map);
909 break;
910 }
911 }
912}
913
914
915
916
917static DEFINE_MUTEX(process_to_sde_mutex);
918
919ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
920 size_t count)
921{
922 struct hfi1_devdata *dd = sde->dd;
923 cpumask_var_t mask, new_mask;
924 unsigned long cpu;
925 int ret, vl, sz;
926 struct sdma_rht_node *rht_node;
927
928 vl = sdma_engine_get_vl(sde);
929 if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
930 return -EINVAL;
931
932 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
933 if (!ret)
934 return -ENOMEM;
935
936 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
937 if (!ret) {
938 free_cpumask_var(mask);
939 return -ENOMEM;
940 }
941 ret = cpulist_parse(buf, mask);
942 if (ret)
943 goto out_free;
944
945 if (!cpumask_subset(mask, cpu_online_mask)) {
946 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
947 ret = -EINVAL;
948 goto out_free;
949 }
950
951 sz = sizeof(struct sdma_rht_map_elem) +
952 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
953
954 mutex_lock(&process_to_sde_mutex);
955
956 for_each_cpu(cpu, mask) {
957
958 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
959 cpumask_set_cpu(cpu, new_mask);
960 continue;
961 }
962
963 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
964 sdma_rht_params);
965 if (!rht_node) {
966 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
967 if (!rht_node) {
968 ret = -ENOMEM;
969 goto out;
970 }
971
972 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
973 if (!rht_node->map[vl]) {
974 kfree(rht_node);
975 ret = -ENOMEM;
976 goto out;
977 }
978 rht_node->cpu_id = cpu;
979 rht_node->map[vl]->mask = 0;
980 rht_node->map[vl]->ctr = 1;
981 rht_node->map[vl]->sde[0] = sde;
982
983 ret = rhashtable_insert_fast(dd->sdma_rht,
984 &rht_node->node,
985 sdma_rht_params);
986 if (ret) {
987 kfree(rht_node->map[vl]);
988 kfree(rht_node);
989 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
990 cpu);
991 goto out;
992 }
993
994 } else {
995 int ctr, pow;
996
997
998 if (!rht_node->map[vl])
999 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
1000
1001 if (!rht_node->map[vl]) {
1002 ret = -ENOMEM;
1003 goto out;
1004 }
1005
1006 rht_node->map[vl]->ctr++;
1007 ctr = rht_node->map[vl]->ctr;
1008 rht_node->map[vl]->sde[ctr - 1] = sde;
1009 pow = roundup_pow_of_two(ctr);
1010 rht_node->map[vl]->mask = pow - 1;
1011
1012
1013 sdma_populate_sde_map(rht_node->map[vl]);
1014 }
1015 cpumask_set_cpu(cpu, new_mask);
1016 }
1017
1018
1019 for_each_cpu(cpu, cpu_online_mask) {
1020 struct sdma_rht_node *rht_node;
1021
1022
1023 if (cpumask_test_cpu(cpu, mask))
1024 continue;
1025
1026 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
1027 sdma_rht_params);
1028 if (rht_node) {
1029 bool empty = true;
1030 int i;
1031
1032
1033 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1034 if (rht_node->map[i])
1035 sdma_cleanup_sde_map(rht_node->map[i],
1036 sde);
1037
1038
1039 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1040 if (!rht_node->map[i])
1041 continue;
1042
1043 if (rht_node->map[i]->ctr) {
1044 empty = false;
1045 break;
1046 }
1047 }
1048
1049 if (empty) {
1050 ret = rhashtable_remove_fast(dd->sdma_rht,
1051 &rht_node->node,
1052 sdma_rht_params);
1053 WARN_ON(ret);
1054
1055 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1056 kfree(rht_node->map[i]);
1057
1058 kfree(rht_node);
1059 }
1060 }
1061 }
1062
1063 cpumask_copy(&sde->cpu_mask, new_mask);
1064out:
1065 mutex_unlock(&process_to_sde_mutex);
1066out_free:
1067 free_cpumask_var(mask);
1068 free_cpumask_var(new_mask);
1069 return ret ? : strnlen(buf, PAGE_SIZE);
1070}
1071
1072ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1073{
1074 mutex_lock(&process_to_sde_mutex);
1075 if (cpumask_empty(&sde->cpu_mask))
1076 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1077 else
1078 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1079 mutex_unlock(&process_to_sde_mutex);
1080 return strnlen(buf, PAGE_SIZE);
1081}
1082
1083static void sdma_rht_free(void *ptr, void *arg)
1084{
1085 struct sdma_rht_node *rht_node = ptr;
1086 int i;
1087
1088 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1089 kfree(rht_node->map[i]);
1090
1091 kfree(rht_node);
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1103 struct hfi1_devdata *dd,
1104 unsigned long cpuid)
1105{
1106 struct sdma_rht_node *rht_node;
1107 int i, j;
1108
1109 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
1110 sdma_rht_params);
1111 if (!rht_node)
1112 return;
1113
1114 seq_printf(s, "cpu%3lu: ", cpuid);
1115 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1116 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1117 continue;
1118
1119 seq_printf(s, " vl%d: [", i);
1120
1121 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1122 if (!rht_node->map[i]->sde[j])
1123 continue;
1124
1125 if (j > 0)
1126 seq_puts(s, ",");
1127
1128 seq_printf(s, " sdma%2d",
1129 rht_node->map[i]->sde[j]->this_idx);
1130 }
1131 seq_puts(s, " ]");
1132 }
1133
1134 seq_puts(s, "\n");
1135}
1136
1137
1138
1139
1140static void sdma_map_free(struct sdma_vl_map *m)
1141{
1142 int i;
1143
1144 for (i = 0; m && i < m->actual_vls; i++)
1145 kfree(m->map[i]);
1146 kfree(m);
1147}
1148
1149
1150
1151
1152static void sdma_map_rcu_callback(struct rcu_head *list)
1153{
1154 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1155
1156 sdma_map_free(m);
1157}
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1187{
1188 int i, j;
1189 int extra, sde_per_vl;
1190 int engine = 0;
1191 u8 lvl_engines[OPA_MAX_VLS];
1192 struct sdma_vl_map *oldmap, *newmap;
1193
1194 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1195 return 0;
1196
1197 if (!vl_engines) {
1198
1199 sde_per_vl = dd->num_sdma / num_vls;
1200
1201 extra = dd->num_sdma % num_vls;
1202 vl_engines = lvl_engines;
1203
1204 for (i = num_vls - 1; i >= 0; i--, extra--)
1205 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1206 }
1207
1208 newmap = kzalloc(
1209 sizeof(struct sdma_vl_map) +
1210 roundup_pow_of_two(num_vls) *
1211 sizeof(struct sdma_map_elem *),
1212 GFP_KERNEL);
1213 if (!newmap)
1214 goto bail;
1215 newmap->actual_vls = num_vls;
1216 newmap->vls = roundup_pow_of_two(num_vls);
1217 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1218
1219 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1220 newmap->engine_to_vl[i] = -1;
1221 for (i = 0; i < newmap->vls; i++) {
1222
1223 int first_engine = engine;
1224
1225 if (i < newmap->actual_vls) {
1226 int sz = roundup_pow_of_two(vl_engines[i]);
1227
1228
1229 newmap->map[i] = kzalloc(
1230 sizeof(struct sdma_map_elem) +
1231 sz * sizeof(struct sdma_engine *),
1232 GFP_KERNEL);
1233 if (!newmap->map[i])
1234 goto bail;
1235 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1236
1237 for (j = 0; j < sz; j++) {
1238 newmap->map[i]->sde[j] =
1239 &dd->per_sdma[engine];
1240 if (++engine >= first_engine + vl_engines[i])
1241
1242 engine = first_engine;
1243 }
1244
1245 for (j = 0; j < vl_engines[i]; j++)
1246 newmap->engine_to_vl[first_engine + j] = i;
1247 } else {
1248
1249 newmap->map[i] = newmap->map[i % num_vls];
1250 }
1251 engine = first_engine + vl_engines[i];
1252 }
1253
1254 spin_lock_irq(&dd->sde_map_lock);
1255 oldmap = rcu_dereference_protected(dd->sdma_map,
1256 lockdep_is_held(&dd->sde_map_lock));
1257
1258
1259 rcu_assign_pointer(dd->sdma_map, newmap);
1260
1261 spin_unlock_irq(&dd->sde_map_lock);
1262
1263 if (oldmap)
1264 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1265 return 0;
1266bail:
1267
1268 sdma_map_free(newmap);
1269 return -ENOMEM;
1270}
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1281{
1282 size_t i;
1283 struct sdma_engine *sde;
1284
1285 if (dd->sdma_pad_dma) {
1286 dma_free_coherent(&dd->pcidev->dev, 4,
1287 (void *)dd->sdma_pad_dma,
1288 dd->sdma_pad_phys);
1289 dd->sdma_pad_dma = NULL;
1290 dd->sdma_pad_phys = 0;
1291 }
1292 if (dd->sdma_heads_dma) {
1293 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1294 (void *)dd->sdma_heads_dma,
1295 dd->sdma_heads_phys);
1296 dd->sdma_heads_dma = NULL;
1297 dd->sdma_heads_phys = 0;
1298 }
1299 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1300 sde = &dd->per_sdma[i];
1301
1302 sde->head_dma = NULL;
1303 sde->head_phys = 0;
1304
1305 if (sde->descq) {
1306 dma_free_coherent(
1307 &dd->pcidev->dev,
1308 sde->descq_cnt * sizeof(u64[2]),
1309 sde->descq,
1310 sde->descq_phys
1311 );
1312 sde->descq = NULL;
1313 sde->descq_phys = 0;
1314 }
1315 kvfree(sde->tx_ring);
1316 sde->tx_ring = NULL;
1317 }
1318 spin_lock_irq(&dd->sde_map_lock);
1319 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1320 RCU_INIT_POINTER(dd->sdma_map, NULL);
1321 spin_unlock_irq(&dd->sde_map_lock);
1322 synchronize_rcu();
1323 kfree(dd->per_sdma);
1324 dd->per_sdma = NULL;
1325
1326 if (dd->sdma_rht) {
1327 rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
1328 kfree(dd->sdma_rht);
1329 dd->sdma_rht = NULL;
1330 }
1331}
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344int sdma_init(struct hfi1_devdata *dd, u8 port)
1345{
1346 unsigned this_idx;
1347 struct sdma_engine *sde;
1348 struct rhashtable *tmp_sdma_rht;
1349 u16 descq_cnt;
1350 void *curr_head;
1351 struct hfi1_pportdata *ppd = dd->pport + port;
1352 u32 per_sdma_credits;
1353 uint idle_cnt = sdma_idle_cnt;
1354 size_t num_engines = chip_sdma_engines(dd);
1355 int ret = -ENOMEM;
1356
1357 if (!HFI1_CAP_IS_KSET(SDMA)) {
1358 HFI1_CAP_CLEAR(SDMA_AHG);
1359 return 0;
1360 }
1361 if (mod_num_sdma &&
1362
1363 mod_num_sdma <= chip_sdma_engines(dd) &&
1364
1365 mod_num_sdma >= num_vls)
1366 num_engines = mod_num_sdma;
1367
1368 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1369 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
1370 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1371 chip_sdma_mem_size(dd));
1372
1373 per_sdma_credits =
1374 chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
1375
1376
1377 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1378 atomic_set(&dd->sdma_unfreeze_count, 0);
1379
1380 descq_cnt = sdma_get_descq_cnt();
1381 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1382 num_engines, descq_cnt);
1383
1384
1385 dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
1386 GFP_KERNEL, dd->node);
1387 if (!dd->per_sdma)
1388 return ret;
1389
1390 idle_cnt = ns_to_cclock(dd, idle_cnt);
1391 if (idle_cnt)
1392 dd->default_desc1 =
1393 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1394 else
1395 dd->default_desc1 =
1396 SDMA_DESC1_INT_REQ_FLAG;
1397
1398 if (!sdma_desct_intr)
1399 sdma_desct_intr = SDMA_DESC_INTR;
1400
1401
1402 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1403 sde = &dd->per_sdma[this_idx];
1404 sde->dd = dd;
1405 sde->ppd = ppd;
1406 sde->this_idx = this_idx;
1407 sde->descq_cnt = descq_cnt;
1408 sde->desc_avail = sdma_descq_freecnt(sde);
1409 sde->sdma_shift = ilog2(descq_cnt);
1410 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1411
1412
1413 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1414 this_idx);
1415 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1416 this_idx);
1417 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1418 this_idx);
1419
1420 sde->imask = sde->int_mask | sde->progress_mask |
1421 sde->idle_mask;
1422
1423 spin_lock_init(&sde->tail_lock);
1424 seqlock_init(&sde->head_lock);
1425 spin_lock_init(&sde->senddmactrl_lock);
1426 spin_lock_init(&sde->flushlist_lock);
1427
1428 sde->ahg_bits = 0xfffffffe00000000ULL;
1429
1430 sdma_set_state(sde, sdma_state_s00_hw_down);
1431
1432
1433 kref_init(&sde->state.kref);
1434 init_completion(&sde->state.comp);
1435
1436 INIT_LIST_HEAD(&sde->flushlist);
1437 INIT_LIST_HEAD(&sde->dmawait);
1438
1439 sde->tail_csr =
1440 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1441
1442 tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
1443 (unsigned long)sde);
1444
1445 tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
1446 (unsigned long)sde);
1447 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1448 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1449
1450 sde->progress_check_head = 0;
1451
1452 timer_setup(&sde->err_progress_check_timer,
1453 sdma_err_progress_check, 0);
1454
1455 sde->descq = dma_zalloc_coherent(
1456 &dd->pcidev->dev,
1457 descq_cnt * sizeof(u64[2]),
1458 &sde->descq_phys,
1459 GFP_KERNEL
1460 );
1461 if (!sde->descq)
1462 goto bail;
1463 sde->tx_ring =
1464 kvzalloc_node(array_size(descq_cnt,
1465 sizeof(struct sdma_txreq *)),
1466 GFP_KERNEL, dd->node);
1467 if (!sde->tx_ring)
1468 goto bail;
1469 }
1470
1471 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1472
1473 dd->sdma_heads_dma = dma_zalloc_coherent(
1474 &dd->pcidev->dev,
1475 dd->sdma_heads_size,
1476 &dd->sdma_heads_phys,
1477 GFP_KERNEL
1478 );
1479 if (!dd->sdma_heads_dma) {
1480 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1481 goto bail;
1482 }
1483
1484
1485 dd->sdma_pad_dma = dma_zalloc_coherent(
1486 &dd->pcidev->dev,
1487 sizeof(u32),
1488 &dd->sdma_pad_phys,
1489 GFP_KERNEL
1490 );
1491 if (!dd->sdma_pad_dma) {
1492 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1493 goto bail;
1494 }
1495
1496
1497 curr_head = (void *)dd->sdma_heads_dma;
1498 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1499 unsigned long phys_offset;
1500
1501 sde = &dd->per_sdma[this_idx];
1502
1503 sde->head_dma = curr_head;
1504 curr_head += L1_CACHE_BYTES;
1505 phys_offset = (unsigned long)sde->head_dma -
1506 (unsigned long)dd->sdma_heads_dma;
1507 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1508 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1509 }
1510 dd->flags |= HFI1_HAS_SEND_DMA;
1511 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1512 dd->num_sdma = num_engines;
1513 ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
1514 if (ret < 0)
1515 goto bail;
1516
1517 tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
1518 if (!tmp_sdma_rht) {
1519 ret = -ENOMEM;
1520 goto bail;
1521 }
1522
1523 ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
1524 if (ret < 0)
1525 goto bail;
1526 dd->sdma_rht = tmp_sdma_rht;
1527
1528 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1529 return 0;
1530
1531bail:
1532 sdma_clean(dd, num_engines);
1533 return ret;
1534}
1535
1536
1537
1538
1539
1540
1541
1542void sdma_all_running(struct hfi1_devdata *dd)
1543{
1544 struct sdma_engine *sde;
1545 unsigned int i;
1546
1547
1548 for (i = 0; i < dd->num_sdma; ++i) {
1549 sde = &dd->per_sdma[i];
1550 sdma_process_event(sde, sdma_event_e30_go_running);
1551 }
1552}
1553
1554
1555
1556
1557
1558
1559
1560void sdma_all_idle(struct hfi1_devdata *dd)
1561{
1562 struct sdma_engine *sde;
1563 unsigned int i;
1564
1565
1566 for (i = 0; i < dd->num_sdma; ++i) {
1567 sde = &dd->per_sdma[i];
1568 sdma_process_event(sde, sdma_event_e70_go_idle);
1569 }
1570}
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580void sdma_start(struct hfi1_devdata *dd)
1581{
1582 unsigned i;
1583 struct sdma_engine *sde;
1584
1585
1586 for (i = 0; i < dd->num_sdma; ++i) {
1587 sde = &dd->per_sdma[i];
1588 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1589 }
1590}
1591
1592
1593
1594
1595
1596void sdma_exit(struct hfi1_devdata *dd)
1597{
1598 unsigned this_idx;
1599 struct sdma_engine *sde;
1600
1601 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1602 ++this_idx) {
1603 sde = &dd->per_sdma[this_idx];
1604 if (!list_empty(&sde->dmawait))
1605 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1606 sde->this_idx);
1607 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1608
1609 del_timer_sync(&sde->err_progress_check_timer);
1610
1611
1612
1613
1614
1615
1616 sdma_finalput(&sde->state);
1617 }
1618}
1619
1620
1621
1622
1623static inline void sdma_unmap_desc(
1624 struct hfi1_devdata *dd,
1625 struct sdma_desc *descp)
1626{
1627 switch (sdma_mapping_type(descp)) {
1628 case SDMA_MAP_SINGLE:
1629 dma_unmap_single(
1630 &dd->pcidev->dev,
1631 sdma_mapping_addr(descp),
1632 sdma_mapping_len(descp),
1633 DMA_TO_DEVICE);
1634 break;
1635 case SDMA_MAP_PAGE:
1636 dma_unmap_page(
1637 &dd->pcidev->dev,
1638 sdma_mapping_addr(descp),
1639 sdma_mapping_len(descp),
1640 DMA_TO_DEVICE);
1641 break;
1642 }
1643}
1644
1645
1646
1647
1648
1649static inline u8 ahg_mode(struct sdma_txreq *tx)
1650{
1651 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1652 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1653}
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666void __sdma_txclean(
1667 struct hfi1_devdata *dd,
1668 struct sdma_txreq *tx)
1669{
1670 u16 i;
1671
1672 if (tx->num_desc) {
1673 u8 skip = 0, mode = ahg_mode(tx);
1674
1675
1676 sdma_unmap_desc(dd, &tx->descp[0]);
1677
1678 if (mode > SDMA_AHG_APPLY_UPDATE1)
1679 skip = mode >> 1;
1680 for (i = 1 + skip; i < tx->num_desc; i++)
1681 sdma_unmap_desc(dd, &tx->descp[i]);
1682 tx->num_desc = 0;
1683 }
1684 kfree(tx->coalesce_buf);
1685 tx->coalesce_buf = NULL;
1686
1687 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1688 tx->desc_limit = ARRAY_SIZE(tx->descs);
1689 kfree(tx->descp);
1690 }
1691}
1692
1693static inline u16 sdma_gethead(struct sdma_engine *sde)
1694{
1695 struct hfi1_devdata *dd = sde->dd;
1696 int use_dmahead;
1697 u16 hwhead;
1698
1699#ifdef CONFIG_SDMA_VERBOSITY
1700 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1701 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1702#endif
1703
1704retry:
1705 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1706 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1707 hwhead = use_dmahead ?
1708 (u16)le64_to_cpu(*sde->head_dma) :
1709 (u16)read_sde_csr(sde, SD(HEAD));
1710
1711 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1712 u16 cnt;
1713 u16 swtail;
1714 u16 swhead;
1715 int sane;
1716
1717 swhead = sde->descq_head & sde->sdma_mask;
1718
1719 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1720 cnt = sde->descq_cnt;
1721
1722 if (swhead < swtail)
1723
1724 sane = (hwhead >= swhead) & (hwhead <= swtail);
1725 else if (swhead > swtail)
1726
1727 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1728 (hwhead <= swtail);
1729 else
1730
1731 sane = (hwhead == swhead);
1732
1733 if (unlikely(!sane)) {
1734 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
1735 sde->this_idx,
1736 use_dmahead ? "dma" : "kreg",
1737 hwhead, swhead, swtail, cnt);
1738 if (use_dmahead) {
1739
1740 use_dmahead = 0;
1741 goto retry;
1742 }
1743
1744 hwhead = swhead;
1745 }
1746 }
1747 return hwhead;
1748}
1749
1750
1751
1752
1753
1754
1755
1756static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1757{
1758 struct iowait *wait, *nw;
1759 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1760 uint i, n = 0, seq, max_idx = 0;
1761 struct sdma_txreq *stx;
1762 struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
1763 u8 max_starved_cnt = 0;
1764
1765#ifdef CONFIG_SDMA_VERBOSITY
1766 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1767 slashstrip(__FILE__), __LINE__, __func__);
1768 dd_dev_err(sde->dd, "avail: %u\n", avail);
1769#endif
1770
1771 do {
1772 seq = read_seqbegin(&dev->iowait_lock);
1773 if (!list_empty(&sde->dmawait)) {
1774
1775 write_seqlock(&dev->iowait_lock);
1776
1777 list_for_each_entry_safe(
1778 wait,
1779 nw,
1780 &sde->dmawait,
1781 list) {
1782 u16 num_desc = 0;
1783
1784 if (!wait->wakeup)
1785 continue;
1786 if (n == ARRAY_SIZE(waits))
1787 break;
1788 if (!list_empty(&wait->tx_head)) {
1789 stx = list_first_entry(
1790 &wait->tx_head,
1791 struct sdma_txreq,
1792 list);
1793 num_desc = stx->num_desc;
1794 }
1795 if (num_desc > avail)
1796 break;
1797 avail -= num_desc;
1798
1799 iowait_starve_find_max(wait, &max_starved_cnt,
1800 n, &max_idx);
1801 list_del_init(&wait->list);
1802 waits[n++] = wait;
1803 }
1804 write_sequnlock(&dev->iowait_lock);
1805 break;
1806 }
1807 } while (read_seqretry(&dev->iowait_lock, seq));
1808
1809
1810 if (n)
1811 waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
1812
1813 for (i = 0; i < n; i++)
1814 if (i != max_idx)
1815 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1816}
1817
1818
1819static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1820{
1821 struct sdma_txreq *txp = NULL;
1822 int progress = 0;
1823 u16 hwhead, swhead;
1824 int idle_check_done = 0;
1825
1826 hwhead = sdma_gethead(sde);
1827
1828
1829
1830
1831
1832
1833
1834retry:
1835 txp = get_txhead(sde);
1836 swhead = sde->descq_head & sde->sdma_mask;
1837 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1838 while (swhead != hwhead) {
1839
1840 swhead = ++sde->descq_head & sde->sdma_mask;
1841
1842
1843 if (txp && txp->next_descq_idx == swhead) {
1844
1845 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1846 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1847
1848 txp = get_txhead(sde);
1849 }
1850 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1851 progress++;
1852 }
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863 if ((status & sde->idle_mask) && !idle_check_done) {
1864 u16 swtail;
1865
1866 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1867 if (swtail != hwhead) {
1868 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1869 idle_check_done = 1;
1870 goto retry;
1871 }
1872 }
1873
1874 sde->last_status = status;
1875 if (progress)
1876 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1877}
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1889{
1890 trace_hfi1_sdma_engine_interrupt(sde, status);
1891 write_seqlock(&sde->head_lock);
1892 sdma_set_desc_cnt(sde, sdma_desct_intr);
1893 if (status & sde->idle_mask)
1894 sde->idle_int_cnt++;
1895 else if (status & sde->progress_mask)
1896 sde->progress_int_cnt++;
1897 else if (status & sde->int_mask)
1898 sde->sdma_int_cnt++;
1899 sdma_make_progress(sde, status);
1900 write_sequnlock(&sde->head_lock);
1901}
1902
1903
1904
1905
1906
1907
1908void sdma_engine_error(struct sdma_engine *sde, u64 status)
1909{
1910 unsigned long flags;
1911
1912#ifdef CONFIG_SDMA_VERBOSITY
1913 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1914 sde->this_idx,
1915 (unsigned long long)status,
1916 sdma_state_names[sde->state.current_state]);
1917#endif
1918 spin_lock_irqsave(&sde->tail_lock, flags);
1919 write_seqlock(&sde->head_lock);
1920 if (status & ALL_SDMA_ENG_HALT_ERRS)
1921 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1922 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1923 dd_dev_err(sde->dd,
1924 "SDMA (%u) engine error: 0x%llx state %s\n",
1925 sde->this_idx,
1926 (unsigned long long)status,
1927 sdma_state_names[sde->state.current_state]);
1928 dump_sdma_state(sde);
1929 }
1930 write_sequnlock(&sde->head_lock);
1931 spin_unlock_irqrestore(&sde->tail_lock, flags);
1932}
1933
1934static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1935{
1936 u64 set_senddmactrl = 0;
1937 u64 clr_senddmactrl = 0;
1938 unsigned long flags;
1939
1940#ifdef CONFIG_SDMA_VERBOSITY
1941 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1942 sde->this_idx,
1943 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1944 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1945 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1946 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1947#endif
1948
1949 if (op & SDMA_SENDCTRL_OP_ENABLE)
1950 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1951 else
1952 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1953
1954 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1955 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1956 else
1957 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1958
1959 if (op & SDMA_SENDCTRL_OP_HALT)
1960 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1961 else
1962 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1963
1964 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1965
1966 sde->p_senddmactrl |= set_senddmactrl;
1967 sde->p_senddmactrl &= ~clr_senddmactrl;
1968
1969 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1970 write_sde_csr(sde, SD(CTRL),
1971 sde->p_senddmactrl |
1972 SD(CTRL_SDMA_CLEANUP_SMASK));
1973 else
1974 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1975
1976 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1977
1978#ifdef CONFIG_SDMA_VERBOSITY
1979 sdma_dumpstate(sde);
1980#endif
1981}
1982
1983static void sdma_setlengen(struct sdma_engine *sde)
1984{
1985#ifdef CONFIG_SDMA_VERBOSITY
1986 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1987 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1988#endif
1989
1990
1991
1992
1993
1994
1995 write_sde_csr(sde, SD(LEN_GEN),
1996 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
1997 write_sde_csr(sde, SD(LEN_GEN),
1998 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
1999 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
2000}
2001
2002static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
2003{
2004
2005 smp_wmb();
2006 writeq(tail, sde->tail_csr);
2007}
2008
2009
2010
2011
2012
2013static void sdma_hw_start_up(struct sdma_engine *sde)
2014{
2015 u64 reg;
2016
2017#ifdef CONFIG_SDMA_VERBOSITY
2018 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2019 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2020#endif
2021
2022 sdma_setlengen(sde);
2023 sdma_update_tail(sde, 0);
2024 *sde->head_dma = 0;
2025
2026 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
2027 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
2028 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
2029}
2030
2031
2032
2033
2034
2035
2036static void set_sdma_integrity(struct sdma_engine *sde)
2037{
2038 struct hfi1_devdata *dd = sde->dd;
2039
2040 write_sde_csr(sde, SD(CHECK_ENABLE),
2041 hfi1_pkt_base_sdma_integrity(dd));
2042}
2043
2044static void init_sdma_regs(
2045 struct sdma_engine *sde,
2046 u32 credits,
2047 uint idle_cnt)
2048{
2049 u8 opval, opmask;
2050#ifdef CONFIG_SDMA_VERBOSITY
2051 struct hfi1_devdata *dd = sde->dd;
2052
2053 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2054 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2055#endif
2056
2057 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2058 sdma_setlengen(sde);
2059 sdma_update_tail(sde, 0);
2060 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2061 write_sde_csr(sde, SD(DESC_CNT), 0);
2062 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2063 write_sde_csr(sde, SD(MEMORY),
2064 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2065 ((u64)(credits * sde->this_idx) <<
2066 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2067 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2068 set_sdma_integrity(sde);
2069 opmask = OPCODE_CHECK_MASK_DISABLED;
2070 opval = OPCODE_CHECK_VAL_DISABLED;
2071 write_sde_csr(sde, SD(CHECK_OPCODE),
2072 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2073 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2074}
2075
2076#ifdef CONFIG_SDMA_VERBOSITY
2077
2078#define sdma_dumpstate_helper0(reg) do { \
2079 csr = read_csr(sde->dd, reg); \
2080 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2081 } while (0)
2082
2083#define sdma_dumpstate_helper(reg) do { \
2084 csr = read_sde_csr(sde, reg); \
2085 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2086 #reg, sde->this_idx, csr); \
2087 } while (0)
2088
2089#define sdma_dumpstate_helper2(reg) do { \
2090 csr = read_csr(sde->dd, reg + (8 * i)); \
2091 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2092 #reg, i, csr); \
2093 } while (0)
2094
2095void sdma_dumpstate(struct sdma_engine *sde)
2096{
2097 u64 csr;
2098 unsigned i;
2099
2100 sdma_dumpstate_helper(SD(CTRL));
2101 sdma_dumpstate_helper(SD(STATUS));
2102 sdma_dumpstate_helper0(SD(ERR_STATUS));
2103 sdma_dumpstate_helper0(SD(ERR_MASK));
2104 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2105 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2106
2107 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2108 sdma_dumpstate_helper2(CCE_INT_STATUS);
2109 sdma_dumpstate_helper2(CCE_INT_MASK);
2110 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2111 }
2112
2113 sdma_dumpstate_helper(SD(TAIL));
2114 sdma_dumpstate_helper(SD(HEAD));
2115 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2116 sdma_dumpstate_helper(SD(IDLE_CNT));
2117 sdma_dumpstate_helper(SD(RELOAD_CNT));
2118 sdma_dumpstate_helper(SD(DESC_CNT));
2119 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2120 sdma_dumpstate_helper(SD(MEMORY));
2121 sdma_dumpstate_helper0(SD(ENGINES));
2122 sdma_dumpstate_helper0(SD(MEM_SIZE));
2123
2124 sdma_dumpstate_helper(SD(BASE_ADDR));
2125 sdma_dumpstate_helper(SD(LEN_GEN));
2126 sdma_dumpstate_helper(SD(HEAD_ADDR));
2127 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2128 sdma_dumpstate_helper(SD(CHECK_VL));
2129 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2130 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2131 sdma_dumpstate_helper(SD(CHECK_SLID));
2132 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2133}
2134#endif
2135
2136static void dump_sdma_state(struct sdma_engine *sde)
2137{
2138 struct hw_sdma_desc *descqp;
2139 u64 desc[2];
2140 u64 addr;
2141 u8 gen;
2142 u16 len;
2143 u16 head, tail, cnt;
2144
2145 head = sde->descq_head & sde->sdma_mask;
2146 tail = sde->descq_tail & sde->sdma_mask;
2147 cnt = sdma_descq_freecnt(sde);
2148
2149 dd_dev_err(sde->dd,
2150 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2151 sde->this_idx, head, tail, cnt,
2152 !list_empty(&sde->flushlist));
2153
2154
2155 while (head != tail) {
2156 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2157
2158 descqp = &sde->descq[head];
2159 desc[0] = le64_to_cpu(descqp->qw[0]);
2160 desc[1] = le64_to_cpu(descqp->qw[1]);
2161 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2162 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2163 'H' : '-';
2164 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2165 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2166 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2167 & SDMA_DESC0_PHY_ADDR_MASK;
2168 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2169 & SDMA_DESC1_GENERATION_MASK;
2170 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2171 & SDMA_DESC0_BYTE_COUNT_MASK;
2172 dd_dev_err(sde->dd,
2173 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2174 head, flags, addr, gen, len);
2175 dd_dev_err(sde->dd,
2176 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2177 desc[0], desc[1]);
2178 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2179 dd_dev_err(sde->dd,
2180 "\taidx: %u amode: %u alen: %u\n",
2181 (u8)((desc[1] &
2182 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2183 SDMA_DESC1_HEADER_INDEX_SHIFT),
2184 (u8)((desc[1] &
2185 SDMA_DESC1_HEADER_MODE_SMASK) >>
2186 SDMA_DESC1_HEADER_MODE_SHIFT),
2187 (u8)((desc[1] &
2188 SDMA_DESC1_HEADER_DWS_SMASK) >>
2189 SDMA_DESC1_HEADER_DWS_SHIFT));
2190 head++;
2191 head &= sde->sdma_mask;
2192 }
2193}
2194
2195#define SDE_FMT \
2196 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2197
2198
2199
2200
2201
2202
2203
2204void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2205{
2206 u16 head, tail;
2207 struct hw_sdma_desc *descqp;
2208 u64 desc[2];
2209 u64 addr;
2210 u8 gen;
2211 u16 len;
2212
2213 head = sde->descq_head & sde->sdma_mask;
2214 tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
2215 seq_printf(s, SDE_FMT, sde->this_idx,
2216 sde->cpu,
2217 sdma_state_name(sde->state.current_state),
2218 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2219 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2220 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2221 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2222 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2223 (unsigned long long)le64_to_cpu(*sde->head_dma),
2224 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2225 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2226 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2227 (unsigned long long)sde->last_status,
2228 (unsigned long long)sde->ahg_bits,
2229 sde->tx_tail,
2230 sde->tx_head,
2231 sde->descq_tail,
2232 sde->descq_head,
2233 !list_empty(&sde->flushlist),
2234 sde->descq_full_count,
2235 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2236
2237
2238 while (head != tail) {
2239 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2240
2241 descqp = &sde->descq[head];
2242 desc[0] = le64_to_cpu(descqp->qw[0]);
2243 desc[1] = le64_to_cpu(descqp->qw[1]);
2244 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2245 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2246 'H' : '-';
2247 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2248 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2249 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2250 & SDMA_DESC0_PHY_ADDR_MASK;
2251 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2252 & SDMA_DESC1_GENERATION_MASK;
2253 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2254 & SDMA_DESC0_BYTE_COUNT_MASK;
2255 seq_printf(s,
2256 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2257 head, flags, addr, gen, len);
2258 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2259 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2260 (u8)((desc[1] &
2261 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2262 SDMA_DESC1_HEADER_INDEX_SHIFT),
2263 (u8)((desc[1] &
2264 SDMA_DESC1_HEADER_MODE_SMASK) >>
2265 SDMA_DESC1_HEADER_MODE_SHIFT));
2266 head = (head + 1) & sde->sdma_mask;
2267 }
2268}
2269
2270
2271
2272
2273
2274static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2275{
2276 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2277
2278 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2279 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2280 << SDMA_DESC1_GENERATION_SHIFT;
2281 return qw1;
2282}
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2301{
2302 int i;
2303 u16 tail;
2304 struct sdma_desc *descp = tx->descp;
2305 u8 skip = 0, mode = ahg_mode(tx);
2306
2307 tail = sde->descq_tail & sde->sdma_mask;
2308 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2309 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2310 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2311 tail, &sde->descq[tail]);
2312 tail = ++sde->descq_tail & sde->sdma_mask;
2313 descp++;
2314 if (mode > SDMA_AHG_APPLY_UPDATE1)
2315 skip = mode >> 1;
2316 for (i = 1; i < tx->num_desc; i++, descp++) {
2317 u64 qw1;
2318
2319 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2320 if (skip) {
2321
2322 qw1 = descp->qw[1];
2323 skip--;
2324 } else {
2325
2326 qw1 = add_gen(sde, descp->qw[1]);
2327 }
2328 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2329 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2330 tail, &sde->descq[tail]);
2331 tail = ++sde->descq_tail & sde->sdma_mask;
2332 }
2333 tx->next_descq_idx = tail;
2334#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2335 tx->sn = sde->tail_sn++;
2336 trace_hfi1_sdma_in_sn(sde, tx->sn);
2337 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2338#endif
2339 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2340 sde->desc_avail -= tx->num_desc;
2341 return tail;
2342}
2343
2344
2345
2346
2347static int sdma_check_progress(
2348 struct sdma_engine *sde,
2349 struct iowait *wait,
2350 struct sdma_txreq *tx,
2351 bool pkts_sent)
2352{
2353 int ret;
2354
2355 sde->desc_avail = sdma_descq_freecnt(sde);
2356 if (tx->num_desc <= sde->desc_avail)
2357 return -EAGAIN;
2358
2359 if (wait && wait->sleep) {
2360 unsigned seq;
2361
2362 seq = raw_seqcount_begin(
2363 (const seqcount_t *)&sde->head_lock.seqcount);
2364 ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
2365 if (ret == -EAGAIN)
2366 sde->desc_avail = sdma_descq_freecnt(sde);
2367 } else {
2368 ret = -EBUSY;
2369 }
2370 return ret;
2371}
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388int sdma_send_txreq(struct sdma_engine *sde,
2389 struct iowait *wait,
2390 struct sdma_txreq *tx,
2391 bool pkts_sent)
2392{
2393 int ret = 0;
2394 u16 tail;
2395 unsigned long flags;
2396
2397
2398 if (unlikely(tx->tlen))
2399 return -EINVAL;
2400 tx->wait = wait;
2401 spin_lock_irqsave(&sde->tail_lock, flags);
2402retry:
2403 if (unlikely(!__sdma_running(sde)))
2404 goto unlock_noconn;
2405 if (unlikely(tx->num_desc > sde->desc_avail))
2406 goto nodesc;
2407 tail = submit_tx(sde, tx);
2408 if (wait)
2409 iowait_sdma_inc(wait);
2410 sdma_update_tail(sde, tail);
2411unlock:
2412 spin_unlock_irqrestore(&sde->tail_lock, flags);
2413 return ret;
2414unlock_noconn:
2415 if (wait)
2416 iowait_sdma_inc(wait);
2417 tx->next_descq_idx = 0;
2418#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2419 tx->sn = sde->tail_sn++;
2420 trace_hfi1_sdma_in_sn(sde, tx->sn);
2421#endif
2422 spin_lock(&sde->flushlist_lock);
2423 list_add_tail(&tx->list, &sde->flushlist);
2424 spin_unlock(&sde->flushlist_lock);
2425 if (wait) {
2426 wait->tx_count++;
2427 wait->count += tx->num_desc;
2428 }
2429 schedule_work(&sde->flush_worker);
2430 ret = -ECOMM;
2431 goto unlock;
2432nodesc:
2433 ret = sdma_check_progress(sde, wait, tx, pkts_sent);
2434 if (ret == -EAGAIN) {
2435 ret = 0;
2436 goto retry;
2437 }
2438 sde->descq_full_count++;
2439 goto unlock;
2440}
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
2471 struct list_head *tx_list, u32 *count_out)
2472{
2473 struct sdma_txreq *tx, *tx_next;
2474 int ret = 0;
2475 unsigned long flags;
2476 u16 tail = INVALID_TAIL;
2477 u32 submit_count = 0, flush_count = 0, total_count;
2478
2479 spin_lock_irqsave(&sde->tail_lock, flags);
2480retry:
2481 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2482 tx->wait = wait;
2483 if (unlikely(!__sdma_running(sde)))
2484 goto unlock_noconn;
2485 if (unlikely(tx->num_desc > sde->desc_avail))
2486 goto nodesc;
2487 if (unlikely(tx->tlen)) {
2488 ret = -EINVAL;
2489 goto update_tail;
2490 }
2491 list_del_init(&tx->list);
2492 tail = submit_tx(sde, tx);
2493 submit_count++;
2494 if (tail != INVALID_TAIL &&
2495 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2496 sdma_update_tail(sde, tail);
2497 tail = INVALID_TAIL;
2498 }
2499 }
2500update_tail:
2501 total_count = submit_count + flush_count;
2502 if (wait) {
2503 iowait_sdma_add(wait, total_count);
2504 iowait_starve_clear(submit_count > 0, wait);
2505 }
2506 if (tail != INVALID_TAIL)
2507 sdma_update_tail(sde, tail);
2508 spin_unlock_irqrestore(&sde->tail_lock, flags);
2509 *count_out = total_count;
2510 return ret;
2511unlock_noconn:
2512 spin_lock(&sde->flushlist_lock);
2513 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2514 tx->wait = wait;
2515 list_del_init(&tx->list);
2516 tx->next_descq_idx = 0;
2517#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2518 tx->sn = sde->tail_sn++;
2519 trace_hfi1_sdma_in_sn(sde, tx->sn);
2520#endif
2521 list_add_tail(&tx->list, &sde->flushlist);
2522 flush_count++;
2523 if (wait) {
2524 wait->tx_count++;
2525 wait->count += tx->num_desc;
2526 }
2527 }
2528 spin_unlock(&sde->flushlist_lock);
2529 schedule_work(&sde->flush_worker);
2530 ret = -ECOMM;
2531 goto update_tail;
2532nodesc:
2533 ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
2534 if (ret == -EAGAIN) {
2535 ret = 0;
2536 goto retry;
2537 }
2538 sde->descq_full_count++;
2539 goto update_tail;
2540}
2541
2542static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2543{
2544 unsigned long flags;
2545
2546 spin_lock_irqsave(&sde->tail_lock, flags);
2547 write_seqlock(&sde->head_lock);
2548
2549 __sdma_process_event(sde, event);
2550
2551 if (sde->state.current_state == sdma_state_s99_running)
2552 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2553
2554 write_sequnlock(&sde->head_lock);
2555 spin_unlock_irqrestore(&sde->tail_lock, flags);
2556}
2557
2558static void __sdma_process_event(struct sdma_engine *sde,
2559 enum sdma_events event)
2560{
2561 struct sdma_state *ss = &sde->state;
2562 int need_progress = 0;
2563
2564
2565#ifdef CONFIG_SDMA_VERBOSITY
2566 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2567 sdma_state_names[ss->current_state],
2568 sdma_event_names[event]);
2569#endif
2570
2571 switch (ss->current_state) {
2572 case sdma_state_s00_hw_down:
2573 switch (event) {
2574 case sdma_event_e00_go_hw_down:
2575 break;
2576 case sdma_event_e30_go_running:
2577
2578
2579
2580
2581
2582
2583
2584 ss->go_s99_running = 1;
2585
2586 case sdma_event_e10_go_hw_start:
2587
2588 sdma_get(&sde->state);
2589 sdma_set_state(sde,
2590 sdma_state_s10_hw_start_up_halt_wait);
2591 break;
2592 case sdma_event_e15_hw_halt_done:
2593 break;
2594 case sdma_event_e25_hw_clean_up_done:
2595 break;
2596 case sdma_event_e40_sw_cleaned:
2597 sdma_sw_tear_down(sde);
2598 break;
2599 case sdma_event_e50_hw_cleaned:
2600 break;
2601 case sdma_event_e60_hw_halted:
2602 break;
2603 case sdma_event_e70_go_idle:
2604 break;
2605 case sdma_event_e80_hw_freeze:
2606 break;
2607 case sdma_event_e81_hw_frozen:
2608 break;
2609 case sdma_event_e82_hw_unfreeze:
2610 break;
2611 case sdma_event_e85_link_down:
2612 break;
2613 case sdma_event_e90_sw_halted:
2614 break;
2615 }
2616 break;
2617
2618 case sdma_state_s10_hw_start_up_halt_wait:
2619 switch (event) {
2620 case sdma_event_e00_go_hw_down:
2621 sdma_set_state(sde, sdma_state_s00_hw_down);
2622 sdma_sw_tear_down(sde);
2623 break;
2624 case sdma_event_e10_go_hw_start:
2625 break;
2626 case sdma_event_e15_hw_halt_done:
2627 sdma_set_state(sde,
2628 sdma_state_s15_hw_start_up_clean_wait);
2629 sdma_start_hw_clean_up(sde);
2630 break;
2631 case sdma_event_e25_hw_clean_up_done:
2632 break;
2633 case sdma_event_e30_go_running:
2634 ss->go_s99_running = 1;
2635 break;
2636 case sdma_event_e40_sw_cleaned:
2637 break;
2638 case sdma_event_e50_hw_cleaned:
2639 break;
2640 case sdma_event_e60_hw_halted:
2641 schedule_work(&sde->err_halt_worker);
2642 break;
2643 case sdma_event_e70_go_idle:
2644 ss->go_s99_running = 0;
2645 break;
2646 case sdma_event_e80_hw_freeze:
2647 break;
2648 case sdma_event_e81_hw_frozen:
2649 break;
2650 case sdma_event_e82_hw_unfreeze:
2651 break;
2652 case sdma_event_e85_link_down:
2653 break;
2654 case sdma_event_e90_sw_halted:
2655 break;
2656 }
2657 break;
2658
2659 case sdma_state_s15_hw_start_up_clean_wait:
2660 switch (event) {
2661 case sdma_event_e00_go_hw_down:
2662 sdma_set_state(sde, sdma_state_s00_hw_down);
2663 sdma_sw_tear_down(sde);
2664 break;
2665 case sdma_event_e10_go_hw_start:
2666 break;
2667 case sdma_event_e15_hw_halt_done:
2668 break;
2669 case sdma_event_e25_hw_clean_up_done:
2670 sdma_hw_start_up(sde);
2671 sdma_set_state(sde, ss->go_s99_running ?
2672 sdma_state_s99_running :
2673 sdma_state_s20_idle);
2674 break;
2675 case sdma_event_e30_go_running:
2676 ss->go_s99_running = 1;
2677 break;
2678 case sdma_event_e40_sw_cleaned:
2679 break;
2680 case sdma_event_e50_hw_cleaned:
2681 break;
2682 case sdma_event_e60_hw_halted:
2683 break;
2684 case sdma_event_e70_go_idle:
2685 ss->go_s99_running = 0;
2686 break;
2687 case sdma_event_e80_hw_freeze:
2688 break;
2689 case sdma_event_e81_hw_frozen:
2690 break;
2691 case sdma_event_e82_hw_unfreeze:
2692 break;
2693 case sdma_event_e85_link_down:
2694 break;
2695 case sdma_event_e90_sw_halted:
2696 break;
2697 }
2698 break;
2699
2700 case sdma_state_s20_idle:
2701 switch (event) {
2702 case sdma_event_e00_go_hw_down:
2703 sdma_set_state(sde, sdma_state_s00_hw_down);
2704 sdma_sw_tear_down(sde);
2705 break;
2706 case sdma_event_e10_go_hw_start:
2707 break;
2708 case sdma_event_e15_hw_halt_done:
2709 break;
2710 case sdma_event_e25_hw_clean_up_done:
2711 break;
2712 case sdma_event_e30_go_running:
2713 sdma_set_state(sde, sdma_state_s99_running);
2714 ss->go_s99_running = 1;
2715 break;
2716 case sdma_event_e40_sw_cleaned:
2717 break;
2718 case sdma_event_e50_hw_cleaned:
2719 break;
2720 case sdma_event_e60_hw_halted:
2721 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2722 schedule_work(&sde->err_halt_worker);
2723 break;
2724 case sdma_event_e70_go_idle:
2725 break;
2726 case sdma_event_e85_link_down:
2727
2728 case sdma_event_e80_hw_freeze:
2729 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2730 atomic_dec(&sde->dd->sdma_unfreeze_count);
2731 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2732 break;
2733 case sdma_event_e81_hw_frozen:
2734 break;
2735 case sdma_event_e82_hw_unfreeze:
2736 break;
2737 case sdma_event_e90_sw_halted:
2738 break;
2739 }
2740 break;
2741
2742 case sdma_state_s30_sw_clean_up_wait:
2743 switch (event) {
2744 case sdma_event_e00_go_hw_down:
2745 sdma_set_state(sde, sdma_state_s00_hw_down);
2746 break;
2747 case sdma_event_e10_go_hw_start:
2748 break;
2749 case sdma_event_e15_hw_halt_done:
2750 break;
2751 case sdma_event_e25_hw_clean_up_done:
2752 break;
2753 case sdma_event_e30_go_running:
2754 ss->go_s99_running = 1;
2755 break;
2756 case sdma_event_e40_sw_cleaned:
2757 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2758 sdma_start_hw_clean_up(sde);
2759 break;
2760 case sdma_event_e50_hw_cleaned:
2761 break;
2762 case sdma_event_e60_hw_halted:
2763 break;
2764 case sdma_event_e70_go_idle:
2765 ss->go_s99_running = 0;
2766 break;
2767 case sdma_event_e80_hw_freeze:
2768 break;
2769 case sdma_event_e81_hw_frozen:
2770 break;
2771 case sdma_event_e82_hw_unfreeze:
2772 break;
2773 case sdma_event_e85_link_down:
2774 ss->go_s99_running = 0;
2775 break;
2776 case sdma_event_e90_sw_halted:
2777 break;
2778 }
2779 break;
2780
2781 case sdma_state_s40_hw_clean_up_wait:
2782 switch (event) {
2783 case sdma_event_e00_go_hw_down:
2784 sdma_set_state(sde, sdma_state_s00_hw_down);
2785 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2786 break;
2787 case sdma_event_e10_go_hw_start:
2788 break;
2789 case sdma_event_e15_hw_halt_done:
2790 break;
2791 case sdma_event_e25_hw_clean_up_done:
2792 sdma_hw_start_up(sde);
2793 sdma_set_state(sde, ss->go_s99_running ?
2794 sdma_state_s99_running :
2795 sdma_state_s20_idle);
2796 break;
2797 case sdma_event_e30_go_running:
2798 ss->go_s99_running = 1;
2799 break;
2800 case sdma_event_e40_sw_cleaned:
2801 break;
2802 case sdma_event_e50_hw_cleaned:
2803 break;
2804 case sdma_event_e60_hw_halted:
2805 break;
2806 case sdma_event_e70_go_idle:
2807 ss->go_s99_running = 0;
2808 break;
2809 case sdma_event_e80_hw_freeze:
2810 break;
2811 case sdma_event_e81_hw_frozen:
2812 break;
2813 case sdma_event_e82_hw_unfreeze:
2814 break;
2815 case sdma_event_e85_link_down:
2816 ss->go_s99_running = 0;
2817 break;
2818 case sdma_event_e90_sw_halted:
2819 break;
2820 }
2821 break;
2822
2823 case sdma_state_s50_hw_halt_wait:
2824 switch (event) {
2825 case sdma_event_e00_go_hw_down:
2826 sdma_set_state(sde, sdma_state_s00_hw_down);
2827 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2828 break;
2829 case sdma_event_e10_go_hw_start:
2830 break;
2831 case sdma_event_e15_hw_halt_done:
2832 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2833 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2834 break;
2835 case sdma_event_e25_hw_clean_up_done:
2836 break;
2837 case sdma_event_e30_go_running:
2838 ss->go_s99_running = 1;
2839 break;
2840 case sdma_event_e40_sw_cleaned:
2841 break;
2842 case sdma_event_e50_hw_cleaned:
2843 break;
2844 case sdma_event_e60_hw_halted:
2845 schedule_work(&sde->err_halt_worker);
2846 break;
2847 case sdma_event_e70_go_idle:
2848 ss->go_s99_running = 0;
2849 break;
2850 case sdma_event_e80_hw_freeze:
2851 break;
2852 case sdma_event_e81_hw_frozen:
2853 break;
2854 case sdma_event_e82_hw_unfreeze:
2855 break;
2856 case sdma_event_e85_link_down:
2857 ss->go_s99_running = 0;
2858 break;
2859 case sdma_event_e90_sw_halted:
2860 break;
2861 }
2862 break;
2863
2864 case sdma_state_s60_idle_halt_wait:
2865 switch (event) {
2866 case sdma_event_e00_go_hw_down:
2867 sdma_set_state(sde, sdma_state_s00_hw_down);
2868 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2869 break;
2870 case sdma_event_e10_go_hw_start:
2871 break;
2872 case sdma_event_e15_hw_halt_done:
2873 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2874 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2875 break;
2876 case sdma_event_e25_hw_clean_up_done:
2877 break;
2878 case sdma_event_e30_go_running:
2879 ss->go_s99_running = 1;
2880 break;
2881 case sdma_event_e40_sw_cleaned:
2882 break;
2883 case sdma_event_e50_hw_cleaned:
2884 break;
2885 case sdma_event_e60_hw_halted:
2886 schedule_work(&sde->err_halt_worker);
2887 break;
2888 case sdma_event_e70_go_idle:
2889 ss->go_s99_running = 0;
2890 break;
2891 case sdma_event_e80_hw_freeze:
2892 break;
2893 case sdma_event_e81_hw_frozen:
2894 break;
2895 case sdma_event_e82_hw_unfreeze:
2896 break;
2897 case sdma_event_e85_link_down:
2898 break;
2899 case sdma_event_e90_sw_halted:
2900 break;
2901 }
2902 break;
2903
2904 case sdma_state_s80_hw_freeze:
2905 switch (event) {
2906 case sdma_event_e00_go_hw_down:
2907 sdma_set_state(sde, sdma_state_s00_hw_down);
2908 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2909 break;
2910 case sdma_event_e10_go_hw_start:
2911 break;
2912 case sdma_event_e15_hw_halt_done:
2913 break;
2914 case sdma_event_e25_hw_clean_up_done:
2915 break;
2916 case sdma_event_e30_go_running:
2917 ss->go_s99_running = 1;
2918 break;
2919 case sdma_event_e40_sw_cleaned:
2920 break;
2921 case sdma_event_e50_hw_cleaned:
2922 break;
2923 case sdma_event_e60_hw_halted:
2924 break;
2925 case sdma_event_e70_go_idle:
2926 ss->go_s99_running = 0;
2927 break;
2928 case sdma_event_e80_hw_freeze:
2929 break;
2930 case sdma_event_e81_hw_frozen:
2931 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2932 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2933 break;
2934 case sdma_event_e82_hw_unfreeze:
2935 break;
2936 case sdma_event_e85_link_down:
2937 break;
2938 case sdma_event_e90_sw_halted:
2939 break;
2940 }
2941 break;
2942
2943 case sdma_state_s82_freeze_sw_clean:
2944 switch (event) {
2945 case sdma_event_e00_go_hw_down:
2946 sdma_set_state(sde, sdma_state_s00_hw_down);
2947 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2948 break;
2949 case sdma_event_e10_go_hw_start:
2950 break;
2951 case sdma_event_e15_hw_halt_done:
2952 break;
2953 case sdma_event_e25_hw_clean_up_done:
2954 break;
2955 case sdma_event_e30_go_running:
2956 ss->go_s99_running = 1;
2957 break;
2958 case sdma_event_e40_sw_cleaned:
2959
2960 atomic_dec(&sde->dd->sdma_unfreeze_count);
2961 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2962 break;
2963 case sdma_event_e50_hw_cleaned:
2964 break;
2965 case sdma_event_e60_hw_halted:
2966 break;
2967 case sdma_event_e70_go_idle:
2968 ss->go_s99_running = 0;
2969 break;
2970 case sdma_event_e80_hw_freeze:
2971 break;
2972 case sdma_event_e81_hw_frozen:
2973 break;
2974 case sdma_event_e82_hw_unfreeze:
2975 sdma_hw_start_up(sde);
2976 sdma_set_state(sde, ss->go_s99_running ?
2977 sdma_state_s99_running :
2978 sdma_state_s20_idle);
2979 break;
2980 case sdma_event_e85_link_down:
2981 break;
2982 case sdma_event_e90_sw_halted:
2983 break;
2984 }
2985 break;
2986
2987 case sdma_state_s99_running:
2988 switch (event) {
2989 case sdma_event_e00_go_hw_down:
2990 sdma_set_state(sde, sdma_state_s00_hw_down);
2991 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2992 break;
2993 case sdma_event_e10_go_hw_start:
2994 break;
2995 case sdma_event_e15_hw_halt_done:
2996 break;
2997 case sdma_event_e25_hw_clean_up_done:
2998 break;
2999 case sdma_event_e30_go_running:
3000 break;
3001 case sdma_event_e40_sw_cleaned:
3002 break;
3003 case sdma_event_e50_hw_cleaned:
3004 break;
3005 case sdma_event_e60_hw_halted:
3006 need_progress = 1;
3007 sdma_err_progress_check_schedule(sde);
3008
3009 case sdma_event_e90_sw_halted:
3010
3011
3012
3013
3014 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
3015 schedule_work(&sde->err_halt_worker);
3016 break;
3017 case sdma_event_e70_go_idle:
3018 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
3019 break;
3020 case sdma_event_e85_link_down:
3021 ss->go_s99_running = 0;
3022
3023 case sdma_event_e80_hw_freeze:
3024 sdma_set_state(sde, sdma_state_s80_hw_freeze);
3025 atomic_dec(&sde->dd->sdma_unfreeze_count);
3026 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
3027 break;
3028 case sdma_event_e81_hw_frozen:
3029 break;
3030 case sdma_event_e82_hw_unfreeze:
3031 break;
3032 }
3033 break;
3034 }
3035
3036 ss->last_event = event;
3037 if (need_progress)
3038 sdma_make_progress(sde, 0);
3039}
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3055{
3056 int i;
3057
3058
3059 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3060
3061 if (!tx->tlen) {
3062 tx->desc_limit = MAX_DESC;
3063 } else if (!tx->coalesce_buf) {
3064
3065 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3066 GFP_ATOMIC);
3067 if (!tx->coalesce_buf)
3068 goto enomem;
3069 tx->coalesce_idx = 0;
3070 }
3071 return 0;
3072 }
3073
3074 if (unlikely(tx->num_desc == MAX_DESC))
3075 goto enomem;
3076
3077 tx->descp = kmalloc_array(
3078 MAX_DESC,
3079 sizeof(struct sdma_desc),
3080 GFP_ATOMIC);
3081 if (!tx->descp)
3082 goto enomem;
3083
3084
3085 tx->desc_limit = MAX_DESC - 1;
3086
3087 for (i = 0; i < tx->num_desc; i++)
3088 tx->descp[i] = tx->descs[i];
3089 return 0;
3090enomem:
3091 __sdma_txclean(dd, tx);
3092 return -ENOMEM;
3093}
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3112 int type, void *kvaddr, struct page *page,
3113 unsigned long offset, u16 len)
3114{
3115 int pad_len, rval;
3116 dma_addr_t addr;
3117
3118 rval = _extend_sdma_tx_descs(dd, tx);
3119 if (rval) {
3120 __sdma_txclean(dd, tx);
3121 return rval;
3122 }
3123
3124
3125 if (tx->coalesce_buf) {
3126 if (type == SDMA_MAP_NONE) {
3127 __sdma_txclean(dd, tx);
3128 return -EINVAL;
3129 }
3130
3131 if (type == SDMA_MAP_PAGE) {
3132 kvaddr = kmap(page);
3133 kvaddr += offset;
3134 } else if (WARN_ON(!kvaddr)) {
3135 __sdma_txclean(dd, tx);
3136 return -EINVAL;
3137 }
3138
3139 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3140 tx->coalesce_idx += len;
3141 if (type == SDMA_MAP_PAGE)
3142 kunmap(page);
3143
3144
3145 if (tx->tlen - tx->coalesce_idx)
3146 return 0;
3147
3148
3149 pad_len = tx->packet_len & (sizeof(u32) - 1);
3150 if (pad_len) {
3151 pad_len = sizeof(u32) - pad_len;
3152 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3153
3154 tx->packet_len += pad_len;
3155 tx->tlen += pad_len;
3156 }
3157
3158
3159 addr = dma_map_single(&dd->pcidev->dev,
3160 tx->coalesce_buf,
3161 tx->tlen,
3162 DMA_TO_DEVICE);
3163
3164 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3165 __sdma_txclean(dd, tx);
3166 return -ENOSPC;
3167 }
3168
3169
3170 tx->desc_limit = MAX_DESC;
3171 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3172 addr, tx->tlen);
3173 }
3174
3175 return 1;
3176}
3177
3178
3179void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3180{
3181 struct sdma_engine *sde;
3182 int i;
3183 u64 sreg;
3184
3185 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3186 SD(CHECK_SLID_MASK_SHIFT)) |
3187 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3188 SD(CHECK_SLID_VALUE_SHIFT));
3189
3190 for (i = 0; i < dd->num_sdma; i++) {
3191 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3192 i, (u32)sreg);
3193 sde = &dd->per_sdma[i];
3194 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3195 }
3196}
3197
3198
3199int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3200{
3201 int rval = 0;
3202
3203 tx->num_desc++;
3204 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3205 rval = _extend_sdma_tx_descs(dd, tx);
3206 if (rval) {
3207 __sdma_txclean(dd, tx);
3208 return rval;
3209 }
3210 }
3211
3212 make_tx_sdma_desc(
3213 tx,
3214 SDMA_MAP_NONE,
3215 dd->sdma_pad_phys,
3216 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3217 _sdma_close_tx(dd, tx);
3218 return rval;
3219}
3220
3221
3222
3223
3224
3225
3226
3227
3228void _sdma_txreq_ahgadd(
3229 struct sdma_txreq *tx,
3230 u8 num_ahg,
3231 u8 ahg_entry,
3232 u32 *ahg,
3233 u8 ahg_hlen)
3234{
3235 u32 i, shift = 0, desc = 0;
3236 u8 mode;
3237
3238 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3239
3240 if (num_ahg == 1)
3241 mode = SDMA_AHG_APPLY_UPDATE1;
3242 else if (num_ahg <= 5)
3243 mode = SDMA_AHG_APPLY_UPDATE2;
3244 else
3245 mode = SDMA_AHG_APPLY_UPDATE3;
3246 tx->num_desc++;
3247
3248 switch (mode) {
3249 case SDMA_AHG_APPLY_UPDATE3:
3250 tx->num_desc++;
3251 tx->descs[2].qw[0] = 0;
3252 tx->descs[2].qw[1] = 0;
3253
3254 case SDMA_AHG_APPLY_UPDATE2:
3255 tx->num_desc++;
3256 tx->descs[1].qw[0] = 0;
3257 tx->descs[1].qw[1] = 0;
3258 break;
3259 }
3260 ahg_hlen >>= 2;
3261 tx->descs[0].qw[1] |=
3262 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3263 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3264 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3265 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3266 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3267 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3268 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3269 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3270 for (i = 0; i < (num_ahg - 1); i++) {
3271 if (!shift && !(i & 2))
3272 desc++;
3273 tx->descs[desc].qw[!!(i & 2)] |=
3274 (((u64)ahg[i + 1])
3275 << shift);
3276 shift = (shift + 32) & 63;
3277 }
3278}
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288int sdma_ahg_alloc(struct sdma_engine *sde)
3289{
3290 int nr;
3291 int oldbit;
3292
3293 if (!sde) {
3294 trace_hfi1_ahg_allocate(sde, -EINVAL);
3295 return -EINVAL;
3296 }
3297 while (1) {
3298 nr = ffz(READ_ONCE(sde->ahg_bits));
3299 if (nr > 31) {
3300 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3301 return -ENOSPC;
3302 }
3303 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3304 if (!oldbit)
3305 break;
3306 cpu_relax();
3307 }
3308 trace_hfi1_ahg_allocate(sde, nr);
3309 return nr;
3310}
3311
3312
3313
3314
3315
3316
3317
3318
3319void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3320{
3321 if (!sde)
3322 return;
3323 trace_hfi1_ahg_deallocate(sde, ahg_index);
3324 if (ahg_index < 0 || ahg_index > 31)
3325 return;
3326 clear_bit(ahg_index, &sde->ahg_bits);
3327}
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3338{
3339 int i;
3340 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3341 sdma_event_e80_hw_freeze;
3342
3343
3344 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3345
3346
3347 for (i = 0; i < dd->num_sdma; i++)
3348 sdma_process_event(&dd->per_sdma[i], event);
3349
3350
3351}
3352
3353
3354
3355
3356
3357void sdma_freeze(struct hfi1_devdata *dd)
3358{
3359 int i;
3360 int ret;
3361
3362
3363
3364
3365
3366 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3367 atomic_read(&dd->sdma_unfreeze_count) <=
3368 0);
3369
3370 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3371 return;
3372
3373
3374 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3375
3376
3377 for (i = 0; i < dd->num_sdma; i++)
3378 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3379
3380
3381
3382
3383
3384
3385 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3386 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3387
3388}
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398void sdma_unfreeze(struct hfi1_devdata *dd)
3399{
3400 int i;
3401
3402
3403 for (i = 0; i < dd->num_sdma; i++)
3404 sdma_process_event(&dd->per_sdma[i],
3405 sdma_event_e82_hw_unfreeze);
3406}
3407
3408
3409
3410
3411
3412
3413void _sdma_engine_progress_schedule(
3414 struct sdma_engine *sde)
3415{
3416 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3417
3418 write_csr(sde->dd,
3419 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3420 sde->progress_mask);
3421}
3422