1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <linux/spinlock.h>
49#include <linux/seqlock.h>
50#include <linux/netdevice.h>
51#include <linux/moduleparam.h>
52#include <linux/bitops.h>
53#include <linux/timer.h>
54#include <linux/vmalloc.h>
55#include <linux/highmem.h>
56
57#include "hfi.h"
58#include "common.h"
59#include "qp.h"
60#include "sdma.h"
61#include "iowait.h"
62#include "trace.h"
63
64
65#define SDMA_DESCQ_CNT 2048
66#define SDMA_DESC_INTR 64
67#define INVALID_TAIL 0xffff
68
69static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
70module_param(sdma_descq_cnt, uint, S_IRUGO);
71MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
72
73static uint sdma_idle_cnt = 250;
74module_param(sdma_idle_cnt, uint, S_IRUGO);
75MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
76
77uint mod_num_sdma;
78module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
79MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
80
81static uint sdma_desct_intr = SDMA_DESC_INTR;
82module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
83MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
84
85#define SDMA_WAIT_BATCH_SIZE 20
86
87#define SDMA_ERR_HALT_TIMEOUT 10
88
89
90#define SD(name) SEND_DMA_##name
91#define ALL_SDMA_ENG_HALT_ERRS \
92 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
93 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
94 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
95 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
96 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
97 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
98 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
99 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
100 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
101 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
102 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
103 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
104 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
105 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
106 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
107 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
108 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
109 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
110
111
112#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
113#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
114#define SDMA_SENDCTRL_OP_HALT BIT(2)
115#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
116
117
118#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
119SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
120#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
121SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
122
123static const char * const sdma_state_names[] = {
124 [sdma_state_s00_hw_down] = "s00_HwDown",
125 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
126 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
127 [sdma_state_s20_idle] = "s20_Idle",
128 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
129 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
130 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
131 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
132 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
133 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
134 [sdma_state_s99_running] = "s99_Running",
135};
136
137#ifdef CONFIG_SDMA_VERBOSITY
138static const char * const sdma_event_names[] = {
139 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
140 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
141 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
142 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
143 [sdma_event_e30_go_running] = "e30_GoRunning",
144 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
145 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
146 [sdma_event_e60_hw_halted] = "e60_HwHalted",
147 [sdma_event_e70_go_idle] = "e70_GoIdle",
148 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
149 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
150 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
151 [sdma_event_e85_link_down] = "e85_LinkDown",
152 [sdma_event_e90_sw_halted] = "e90_SwHalted",
153};
154#endif
155
156static const struct sdma_set_state_action sdma_action_table[] = {
157 [sdma_state_s00_hw_down] = {
158 .go_s99_running_tofalse = 1,
159 .op_enable = 0,
160 .op_intenable = 0,
161 .op_halt = 0,
162 .op_cleanup = 0,
163 },
164 [sdma_state_s10_hw_start_up_halt_wait] = {
165 .op_enable = 0,
166 .op_intenable = 0,
167 .op_halt = 1,
168 .op_cleanup = 0,
169 },
170 [sdma_state_s15_hw_start_up_clean_wait] = {
171 .op_enable = 0,
172 .op_intenable = 1,
173 .op_halt = 0,
174 .op_cleanup = 1,
175 },
176 [sdma_state_s20_idle] = {
177 .op_enable = 0,
178 .op_intenable = 1,
179 .op_halt = 0,
180 .op_cleanup = 0,
181 },
182 [sdma_state_s30_sw_clean_up_wait] = {
183 .op_enable = 0,
184 .op_intenable = 0,
185 .op_halt = 0,
186 .op_cleanup = 0,
187 },
188 [sdma_state_s40_hw_clean_up_wait] = {
189 .op_enable = 0,
190 .op_intenable = 0,
191 .op_halt = 0,
192 .op_cleanup = 1,
193 },
194 [sdma_state_s50_hw_halt_wait] = {
195 .op_enable = 0,
196 .op_intenable = 0,
197 .op_halt = 0,
198 .op_cleanup = 0,
199 },
200 [sdma_state_s60_idle_halt_wait] = {
201 .go_s99_running_tofalse = 1,
202 .op_enable = 0,
203 .op_intenable = 0,
204 .op_halt = 1,
205 .op_cleanup = 0,
206 },
207 [sdma_state_s80_hw_freeze] = {
208 .op_enable = 0,
209 .op_intenable = 0,
210 .op_halt = 0,
211 .op_cleanup = 0,
212 },
213 [sdma_state_s82_freeze_sw_clean] = {
214 .op_enable = 0,
215 .op_intenable = 0,
216 .op_halt = 0,
217 .op_cleanup = 0,
218 },
219 [sdma_state_s99_running] = {
220 .op_enable = 1,
221 .op_intenable = 1,
222 .op_halt = 0,
223 .op_cleanup = 0,
224 .go_s99_running_totrue = 1,
225 },
226};
227
228#define SDMA_TAIL_UPDATE_THRESH 0x1F
229
230
231static void sdma_complete(struct kref *);
232static void sdma_finalput(struct sdma_state *);
233static void sdma_get(struct sdma_state *);
234static void sdma_hw_clean_up_task(unsigned long);
235static void sdma_put(struct sdma_state *);
236static void sdma_set_state(struct sdma_engine *, enum sdma_states);
237static void sdma_start_hw_clean_up(struct sdma_engine *);
238static void sdma_sw_clean_up_task(unsigned long);
239static void sdma_sendctrl(struct sdma_engine *, unsigned);
240static void init_sdma_regs(struct sdma_engine *, u32, uint);
241static void sdma_process_event(
242 struct sdma_engine *sde,
243 enum sdma_events event);
244static void __sdma_process_event(
245 struct sdma_engine *sde,
246 enum sdma_events event);
247static void dump_sdma_state(struct sdma_engine *sde);
248static void sdma_make_progress(struct sdma_engine *sde, u64 status);
249static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
250static void sdma_flush_descq(struct sdma_engine *sde);
251
252
253
254
255
256static const char *sdma_state_name(enum sdma_states state)
257{
258 return sdma_state_names[state];
259}
260
261static void sdma_get(struct sdma_state *ss)
262{
263 kref_get(&ss->kref);
264}
265
266static void sdma_complete(struct kref *kref)
267{
268 struct sdma_state *ss =
269 container_of(kref, struct sdma_state, kref);
270
271 complete(&ss->comp);
272}
273
274static void sdma_put(struct sdma_state *ss)
275{
276 kref_put(&ss->kref, sdma_complete);
277}
278
279static void sdma_finalput(struct sdma_state *ss)
280{
281 sdma_put(ss);
282 wait_for_completion(&ss->comp);
283}
284
285static inline void write_sde_csr(
286 struct sdma_engine *sde,
287 u32 offset0,
288 u64 value)
289{
290 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
291}
292
293static inline u64 read_sde_csr(
294 struct sdma_engine *sde,
295 u32 offset0)
296{
297 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
298}
299
300
301
302
303
304static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
305 int pause)
306{
307 u64 off = 8 * sde->this_idx;
308 struct hfi1_devdata *dd = sde->dd;
309 int lcnt = 0;
310 u64 reg_prev;
311 u64 reg = 0;
312
313 while (1) {
314 reg_prev = reg;
315 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
316
317 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
318 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
319 if (reg == 0)
320 break;
321
322 if (reg != reg_prev)
323 lcnt = 0;
324 if (lcnt++ > 500) {
325
326 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
327 __func__, sde->this_idx, (u32)reg);
328 queue_work(dd->pport->link_wq,
329 &dd->pport->link_bounce_work);
330 break;
331 }
332 udelay(1);
333 }
334}
335
336
337
338
339
340void sdma_wait(struct hfi1_devdata *dd)
341{
342 int i;
343
344 for (i = 0; i < dd->num_sdma; i++) {
345 struct sdma_engine *sde = &dd->per_sdma[i];
346
347 sdma_wait_for_packet_egress(sde, 0);
348 }
349}
350
351static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
352{
353 u64 reg;
354
355 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
356 return;
357 reg = cnt;
358 reg &= SD(DESC_CNT_CNT_MASK);
359 reg <<= SD(DESC_CNT_CNT_SHIFT);
360 write_sde_csr(sde, SD(DESC_CNT), reg);
361}
362
363static inline void complete_tx(struct sdma_engine *sde,
364 struct sdma_txreq *tx,
365 int res)
366{
367
368 struct iowait *wait = tx->wait;
369 callback_t complete = tx->complete;
370
371#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
372 trace_hfi1_sdma_out_sn(sde, tx->sn);
373 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
374 dd_dev_err(sde->dd, "expected %llu got %llu\n",
375 sde->head_sn, tx->sn);
376 sde->head_sn++;
377#endif
378 __sdma_txclean(sde->dd, tx);
379 if (complete)
380 (*complete)(tx, res);
381 if (wait && iowait_sdma_dec(wait))
382 iowait_drain_wakeup(wait);
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403static void sdma_flush(struct sdma_engine *sde)
404{
405 struct sdma_txreq *txp, *txp_next;
406 LIST_HEAD(flushlist);
407 unsigned long flags;
408
409
410 sdma_flush_descq(sde);
411 spin_lock_irqsave(&sde->flushlist_lock, flags);
412
413 list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
414 list_del_init(&txp->list);
415 list_add_tail(&txp->list, &flushlist);
416 }
417 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
418
419 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
420 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
421}
422
423
424
425
426
427
428
429
430
431
432
433static void sdma_field_flush(struct work_struct *work)
434{
435 unsigned long flags;
436 struct sdma_engine *sde =
437 container_of(work, struct sdma_engine, flush_worker);
438
439 write_seqlock_irqsave(&sde->head_lock, flags);
440 if (!__sdma_running(sde))
441 sdma_flush(sde);
442 write_sequnlock_irqrestore(&sde->head_lock, flags);
443}
444
445static void sdma_err_halt_wait(struct work_struct *work)
446{
447 struct sdma_engine *sde = container_of(work, struct sdma_engine,
448 err_halt_worker);
449 u64 statuscsr;
450 unsigned long timeout;
451
452 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
453 while (1) {
454 statuscsr = read_sde_csr(sde, SD(STATUS));
455 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
456 if (statuscsr)
457 break;
458 if (time_after(jiffies, timeout)) {
459 dd_dev_err(sde->dd,
460 "SDMA engine %d - timeout waiting for engine to halt\n",
461 sde->this_idx);
462
463
464
465
466 break;
467 }
468 usleep_range(80, 120);
469 }
470
471 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
472}
473
474static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
475{
476 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
477 unsigned index;
478 struct hfi1_devdata *dd = sde->dd;
479
480 for (index = 0; index < dd->num_sdma; index++) {
481 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
482
483 if (curr_sdma != sde)
484 curr_sdma->progress_check_head =
485 curr_sdma->descq_head;
486 }
487 dd_dev_err(sde->dd,
488 "SDMA engine %d - check scheduled\n",
489 sde->this_idx);
490 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
491 }
492}
493
494static void sdma_err_progress_check(struct timer_list *t)
495{
496 unsigned index;
497 struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
498
499 dd_dev_err(sde->dd, "SDE progress check event\n");
500 for (index = 0; index < sde->dd->num_sdma; index++) {
501 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
502 unsigned long flags;
503
504
505 if (curr_sde == sde)
506 continue;
507
508
509
510
511
512 spin_lock_irqsave(&curr_sde->tail_lock, flags);
513 write_seqlock(&curr_sde->head_lock);
514
515
516 if (curr_sde->state.current_state != sdma_state_s99_running) {
517 write_sequnlock(&curr_sde->head_lock);
518 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
519 continue;
520 }
521
522 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
523 (curr_sde->descq_head ==
524 curr_sde->progress_check_head))
525 __sdma_process_event(curr_sde,
526 sdma_event_e90_sw_halted);
527 write_sequnlock(&curr_sde->head_lock);
528 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
529 }
530 schedule_work(&sde->err_halt_worker);
531}
532
533static void sdma_hw_clean_up_task(unsigned long opaque)
534{
535 struct sdma_engine *sde = (struct sdma_engine *)opaque;
536 u64 statuscsr;
537
538 while (1) {
539#ifdef CONFIG_SDMA_VERBOSITY
540 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
541 sde->this_idx, slashstrip(__FILE__), __LINE__,
542 __func__);
543#endif
544 statuscsr = read_sde_csr(sde, SD(STATUS));
545 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
546 if (statuscsr)
547 break;
548 udelay(10);
549 }
550
551 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
552}
553
554static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
555{
556 smp_read_barrier_depends();
557 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
558}
559
560
561
562
563static void sdma_flush_descq(struct sdma_engine *sde)
564{
565 u16 head, tail;
566 int progress = 0;
567 struct sdma_txreq *txp = get_txhead(sde);
568
569
570
571
572
573
574 head = sde->descq_head & sde->sdma_mask;
575 tail = sde->descq_tail & sde->sdma_mask;
576 while (head != tail) {
577
578 head = ++sde->descq_head & sde->sdma_mask;
579
580 if (txp && txp->next_descq_idx == head) {
581
582 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
583 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
584 trace_hfi1_sdma_progress(sde, head, tail, txp);
585 txp = get_txhead(sde);
586 }
587 progress++;
588 }
589 if (progress)
590 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
591}
592
593static void sdma_sw_clean_up_task(unsigned long opaque)
594{
595 struct sdma_engine *sde = (struct sdma_engine *)opaque;
596 unsigned long flags;
597
598 spin_lock_irqsave(&sde->tail_lock, flags);
599 write_seqlock(&sde->head_lock);
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620 sdma_make_progress(sde, 0);
621
622 sdma_flush(sde);
623
624
625
626
627
628
629 sde->descq_tail = 0;
630 sde->descq_head = 0;
631 sde->desc_avail = sdma_descq_freecnt(sde);
632 *sde->head_dma = 0;
633
634 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
635
636 write_sequnlock(&sde->head_lock);
637 spin_unlock_irqrestore(&sde->tail_lock, flags);
638}
639
640static void sdma_sw_tear_down(struct sdma_engine *sde)
641{
642 struct sdma_state *ss = &sde->state;
643
644
645 sdma_put(ss);
646
647
648 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
649 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
650}
651
652static void sdma_start_hw_clean_up(struct sdma_engine *sde)
653{
654 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
655}
656
657static void sdma_set_state(struct sdma_engine *sde,
658 enum sdma_states next_state)
659{
660 struct sdma_state *ss = &sde->state;
661 const struct sdma_set_state_action *action = sdma_action_table;
662 unsigned op = 0;
663
664 trace_hfi1_sdma_state(
665 sde,
666 sdma_state_names[ss->current_state],
667 sdma_state_names[next_state]);
668
669
670 ss->previous_state = ss->current_state;
671 ss->previous_op = ss->current_op;
672 ss->current_state = next_state;
673
674 if (ss->previous_state != sdma_state_s99_running &&
675 next_state == sdma_state_s99_running)
676 sdma_flush(sde);
677
678 if (action[next_state].op_enable)
679 op |= SDMA_SENDCTRL_OP_ENABLE;
680
681 if (action[next_state].op_intenable)
682 op |= SDMA_SENDCTRL_OP_INTENABLE;
683
684 if (action[next_state].op_halt)
685 op |= SDMA_SENDCTRL_OP_HALT;
686
687 if (action[next_state].op_cleanup)
688 op |= SDMA_SENDCTRL_OP_CLEANUP;
689
690 if (action[next_state].go_s99_running_tofalse)
691 ss->go_s99_running = 0;
692
693 if (action[next_state].go_s99_running_totrue)
694 ss->go_s99_running = 1;
695
696 ss->current_op = op;
697 sdma_sendctrl(sde, ss->current_op);
698}
699
700
701
702
703
704
705
706
707
708
709
710
711
712u16 sdma_get_descq_cnt(void)
713{
714 u16 count = sdma_descq_cnt;
715
716 if (!count)
717 return SDMA_DESCQ_CNT;
718
719
720
721 if (!is_power_of_2(count))
722 return SDMA_DESCQ_CNT;
723 if (count < 64 || count > 32768)
724 return SDMA_DESCQ_CNT;
725 return count;
726}
727
728
729
730
731
732
733
734
735int sdma_engine_get_vl(struct sdma_engine *sde)
736{
737 struct hfi1_devdata *dd = sde->dd;
738 struct sdma_vl_map *m;
739 u8 vl;
740
741 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
742 return -EINVAL;
743
744 rcu_read_lock();
745 m = rcu_dereference(dd->sdma_map);
746 if (unlikely(!m)) {
747 rcu_read_unlock();
748 return -EINVAL;
749 }
750 vl = m->engine_to_vl[sde->this_idx];
751 rcu_read_unlock();
752
753 return vl;
754}
755
756
757
758
759
760
761
762
763
764
765
766struct sdma_engine *sdma_select_engine_vl(
767 struct hfi1_devdata *dd,
768 u32 selector,
769 u8 vl)
770{
771 struct sdma_vl_map *m;
772 struct sdma_map_elem *e;
773 struct sdma_engine *rval;
774
775
776
777
778
779 if (vl >= num_vls) {
780 rval = NULL;
781 goto done;
782 }
783
784 rcu_read_lock();
785 m = rcu_dereference(dd->sdma_map);
786 if (unlikely(!m)) {
787 rcu_read_unlock();
788 return &dd->per_sdma[0];
789 }
790 e = m->map[vl & m->mask];
791 rval = e->sde[selector & e->mask];
792 rcu_read_unlock();
793
794done:
795 rval = !rval ? &dd->per_sdma[0] : rval;
796 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
797 return rval;
798}
799
800
801
802
803
804
805
806
807
808
809struct sdma_engine *sdma_select_engine_sc(
810 struct hfi1_devdata *dd,
811 u32 selector,
812 u8 sc5)
813{
814 u8 vl = sc_to_vlt(dd, sc5);
815
816 return sdma_select_engine_vl(dd, selector, vl);
817}
818
819struct sdma_rht_map_elem {
820 u32 mask;
821 u8 ctr;
822 struct sdma_engine *sde[0];
823};
824
825struct sdma_rht_node {
826 unsigned long cpu_id;
827 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
828 struct rhash_head node;
829};
830
831#define NR_CPUS_HINT 192
832
833static const struct rhashtable_params sdma_rht_params = {
834 .nelem_hint = NR_CPUS_HINT,
835 .head_offset = offsetof(struct sdma_rht_node, node),
836 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
837 .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
838 .max_size = NR_CPUS,
839 .min_size = 8,
840 .automatic_shrinking = true,
841};
842
843
844
845
846
847
848
849
850
851
852
853
854struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
855 u32 selector, u8 vl)
856{
857 struct sdma_rht_node *rht_node;
858 struct sdma_engine *sde = NULL;
859 const struct cpumask *current_mask = ¤t->cpus_allowed;
860 unsigned long cpu_id;
861
862
863
864
865
866 if (cpumask_weight(current_mask) != 1)
867 goto out;
868
869 cpu_id = smp_processor_id();
870 rcu_read_lock();
871 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
872 sdma_rht_params);
873
874 if (rht_node && rht_node->map[vl]) {
875 struct sdma_rht_map_elem *map = rht_node->map[vl];
876
877 sde = map->sde[selector & map->mask];
878 }
879 rcu_read_unlock();
880
881 if (sde)
882 return sde;
883
884out:
885 return sdma_select_engine_vl(dd, selector, vl);
886}
887
888static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
889{
890 int i;
891
892 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
893 map->sde[map->ctr + i] = map->sde[i];
894}
895
896static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
897 struct sdma_engine *sde)
898{
899 unsigned int i, pow;
900
901
902 for (i = 0; i < map->ctr; i++) {
903 if (map->sde[i] == sde) {
904 memmove(&map->sde[i], &map->sde[i + 1],
905 (map->ctr - i - 1) * sizeof(map->sde[0]));
906 map->ctr--;
907 pow = roundup_pow_of_two(map->ctr ? : 1);
908 map->mask = pow - 1;
909 sdma_populate_sde_map(map);
910 break;
911 }
912 }
913}
914
915
916
917
918static DEFINE_MUTEX(process_to_sde_mutex);
919
920ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
921 size_t count)
922{
923 struct hfi1_devdata *dd = sde->dd;
924 cpumask_var_t mask, new_mask;
925 unsigned long cpu;
926 int ret, vl, sz;
927
928 vl = sdma_engine_get_vl(sde);
929 if (unlikely(vl < 0))
930 return -EINVAL;
931
932 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
933 if (!ret)
934 return -ENOMEM;
935
936 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
937 if (!ret) {
938 free_cpumask_var(mask);
939 return -ENOMEM;
940 }
941 ret = cpulist_parse(buf, mask);
942 if (ret)
943 goto out_free;
944
945 if (!cpumask_subset(mask, cpu_online_mask)) {
946 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
947 ret = -EINVAL;
948 goto out_free;
949 }
950
951 sz = sizeof(struct sdma_rht_map_elem) +
952 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
953
954 mutex_lock(&process_to_sde_mutex);
955
956 for_each_cpu(cpu, mask) {
957 struct sdma_rht_node *rht_node;
958
959
960 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
961 cpumask_set_cpu(cpu, new_mask);
962 continue;
963 }
964
965 if (vl >= ARRAY_SIZE(rht_node->map)) {
966 ret = -EINVAL;
967 goto out;
968 }
969
970 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
971 sdma_rht_params);
972 if (!rht_node) {
973 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
974 if (!rht_node) {
975 ret = -ENOMEM;
976 goto out;
977 }
978
979 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
980 if (!rht_node->map[vl]) {
981 kfree(rht_node);
982 ret = -ENOMEM;
983 goto out;
984 }
985 rht_node->cpu_id = cpu;
986 rht_node->map[vl]->mask = 0;
987 rht_node->map[vl]->ctr = 1;
988 rht_node->map[vl]->sde[0] = sde;
989
990 ret = rhashtable_insert_fast(dd->sdma_rht,
991 &rht_node->node,
992 sdma_rht_params);
993 if (ret) {
994 kfree(rht_node->map[vl]);
995 kfree(rht_node);
996 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
997 cpu);
998 goto out;
999 }
1000
1001 } else {
1002 int ctr, pow;
1003
1004
1005 if (!rht_node->map[vl])
1006 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
1007
1008 if (!rht_node->map[vl]) {
1009 ret = -ENOMEM;
1010 goto out;
1011 }
1012
1013 rht_node->map[vl]->ctr++;
1014 ctr = rht_node->map[vl]->ctr;
1015 rht_node->map[vl]->sde[ctr - 1] = sde;
1016 pow = roundup_pow_of_two(ctr);
1017 rht_node->map[vl]->mask = pow - 1;
1018
1019
1020 sdma_populate_sde_map(rht_node->map[vl]);
1021 }
1022 cpumask_set_cpu(cpu, new_mask);
1023 }
1024
1025
1026 for_each_cpu(cpu, cpu_online_mask) {
1027 struct sdma_rht_node *rht_node;
1028
1029
1030 if (cpumask_test_cpu(cpu, mask))
1031 continue;
1032
1033 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
1034 sdma_rht_params);
1035 if (rht_node) {
1036 bool empty = true;
1037 int i;
1038
1039
1040 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1041 if (rht_node->map[i])
1042 sdma_cleanup_sde_map(rht_node->map[i],
1043 sde);
1044
1045
1046 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1047 if (!rht_node->map[i])
1048 continue;
1049
1050 if (rht_node->map[i]->ctr) {
1051 empty = false;
1052 break;
1053 }
1054 }
1055
1056 if (empty) {
1057 ret = rhashtable_remove_fast(dd->sdma_rht,
1058 &rht_node->node,
1059 sdma_rht_params);
1060 WARN_ON(ret);
1061
1062 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1063 kfree(rht_node->map[i]);
1064
1065 kfree(rht_node);
1066 }
1067 }
1068 }
1069
1070 cpumask_copy(&sde->cpu_mask, new_mask);
1071out:
1072 mutex_unlock(&process_to_sde_mutex);
1073out_free:
1074 free_cpumask_var(mask);
1075 free_cpumask_var(new_mask);
1076 return ret ? : strnlen(buf, PAGE_SIZE);
1077}
1078
1079ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1080{
1081 mutex_lock(&process_to_sde_mutex);
1082 if (cpumask_empty(&sde->cpu_mask))
1083 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1084 else
1085 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1086 mutex_unlock(&process_to_sde_mutex);
1087 return strnlen(buf, PAGE_SIZE);
1088}
1089
1090static void sdma_rht_free(void *ptr, void *arg)
1091{
1092 struct sdma_rht_node *rht_node = ptr;
1093 int i;
1094
1095 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1096 kfree(rht_node->map[i]);
1097
1098 kfree(rht_node);
1099}
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1110 struct hfi1_devdata *dd,
1111 unsigned long cpuid)
1112{
1113 struct sdma_rht_node *rht_node;
1114 int i, j;
1115
1116 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
1117 sdma_rht_params);
1118 if (!rht_node)
1119 return;
1120
1121 seq_printf(s, "cpu%3lu: ", cpuid);
1122 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1123 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1124 continue;
1125
1126 seq_printf(s, " vl%d: [", i);
1127
1128 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1129 if (!rht_node->map[i]->sde[j])
1130 continue;
1131
1132 if (j > 0)
1133 seq_puts(s, ",");
1134
1135 seq_printf(s, " sdma%2d",
1136 rht_node->map[i]->sde[j]->this_idx);
1137 }
1138 seq_puts(s, " ]");
1139 }
1140
1141 seq_puts(s, "\n");
1142}
1143
1144
1145
1146
1147static void sdma_map_free(struct sdma_vl_map *m)
1148{
1149 int i;
1150
1151 for (i = 0; m && i < m->actual_vls; i++)
1152 kfree(m->map[i]);
1153 kfree(m);
1154}
1155
1156
1157
1158
1159static void sdma_map_rcu_callback(struct rcu_head *list)
1160{
1161 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1162
1163 sdma_map_free(m);
1164}
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1194{
1195 int i, j;
1196 int extra, sde_per_vl;
1197 int engine = 0;
1198 u8 lvl_engines[OPA_MAX_VLS];
1199 struct sdma_vl_map *oldmap, *newmap;
1200
1201 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1202 return 0;
1203
1204 if (!vl_engines) {
1205
1206 sde_per_vl = dd->num_sdma / num_vls;
1207
1208 extra = dd->num_sdma % num_vls;
1209 vl_engines = lvl_engines;
1210
1211 for (i = num_vls - 1; i >= 0; i--, extra--)
1212 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1213 }
1214
1215 newmap = kzalloc(
1216 sizeof(struct sdma_vl_map) +
1217 roundup_pow_of_two(num_vls) *
1218 sizeof(struct sdma_map_elem *),
1219 GFP_KERNEL);
1220 if (!newmap)
1221 goto bail;
1222 newmap->actual_vls = num_vls;
1223 newmap->vls = roundup_pow_of_two(num_vls);
1224 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1225
1226 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1227 newmap->engine_to_vl[i] = -1;
1228 for (i = 0; i < newmap->vls; i++) {
1229
1230 int first_engine = engine;
1231
1232 if (i < newmap->actual_vls) {
1233 int sz = roundup_pow_of_two(vl_engines[i]);
1234
1235
1236 newmap->map[i] = kzalloc(
1237 sizeof(struct sdma_map_elem) +
1238 sz * sizeof(struct sdma_engine *),
1239 GFP_KERNEL);
1240 if (!newmap->map[i])
1241 goto bail;
1242 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1243
1244 for (j = 0; j < sz; j++) {
1245 newmap->map[i]->sde[j] =
1246 &dd->per_sdma[engine];
1247 if (++engine >= first_engine + vl_engines[i])
1248
1249 engine = first_engine;
1250 }
1251
1252 for (j = 0; j < vl_engines[i]; j++)
1253 newmap->engine_to_vl[first_engine + j] = i;
1254 } else {
1255
1256 newmap->map[i] = newmap->map[i % num_vls];
1257 }
1258 engine = first_engine + vl_engines[i];
1259 }
1260
1261 spin_lock_irq(&dd->sde_map_lock);
1262 oldmap = rcu_dereference_protected(dd->sdma_map,
1263 lockdep_is_held(&dd->sde_map_lock));
1264
1265
1266 rcu_assign_pointer(dd->sdma_map, newmap);
1267
1268 spin_unlock_irq(&dd->sde_map_lock);
1269
1270 if (oldmap)
1271 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1272 return 0;
1273bail:
1274
1275 sdma_map_free(newmap);
1276 return -ENOMEM;
1277}
1278
1279
1280
1281
1282
1283
1284
1285static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1286{
1287 size_t i;
1288 struct sdma_engine *sde;
1289
1290 if (dd->sdma_pad_dma) {
1291 dma_free_coherent(&dd->pcidev->dev, 4,
1292 (void *)dd->sdma_pad_dma,
1293 dd->sdma_pad_phys);
1294 dd->sdma_pad_dma = NULL;
1295 dd->sdma_pad_phys = 0;
1296 }
1297 if (dd->sdma_heads_dma) {
1298 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1299 (void *)dd->sdma_heads_dma,
1300 dd->sdma_heads_phys);
1301 dd->sdma_heads_dma = NULL;
1302 dd->sdma_heads_phys = 0;
1303 }
1304 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1305 sde = &dd->per_sdma[i];
1306
1307 sde->head_dma = NULL;
1308 sde->head_phys = 0;
1309
1310 if (sde->descq) {
1311 dma_free_coherent(
1312 &dd->pcidev->dev,
1313 sde->descq_cnt * sizeof(u64[2]),
1314 sde->descq,
1315 sde->descq_phys
1316 );
1317 sde->descq = NULL;
1318 sde->descq_phys = 0;
1319 }
1320 kvfree(sde->tx_ring);
1321 sde->tx_ring = NULL;
1322 }
1323 spin_lock_irq(&dd->sde_map_lock);
1324 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1325 RCU_INIT_POINTER(dd->sdma_map, NULL);
1326 spin_unlock_irq(&dd->sde_map_lock);
1327 synchronize_rcu();
1328 kfree(dd->per_sdma);
1329 dd->per_sdma = NULL;
1330
1331 if (dd->sdma_rht) {
1332 rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
1333 kfree(dd->sdma_rht);
1334 dd->sdma_rht = NULL;
1335 }
1336}
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349int sdma_init(struct hfi1_devdata *dd, u8 port)
1350{
1351 unsigned this_idx;
1352 struct sdma_engine *sde;
1353 struct rhashtable *tmp_sdma_rht;
1354 u16 descq_cnt;
1355 void *curr_head;
1356 struct hfi1_pportdata *ppd = dd->pport + port;
1357 u32 per_sdma_credits;
1358 uint idle_cnt = sdma_idle_cnt;
1359 size_t num_engines = dd->chip_sdma_engines;
1360 int ret = -ENOMEM;
1361
1362 if (!HFI1_CAP_IS_KSET(SDMA)) {
1363 HFI1_CAP_CLEAR(SDMA_AHG);
1364 return 0;
1365 }
1366 if (mod_num_sdma &&
1367
1368 mod_num_sdma <= dd->chip_sdma_engines &&
1369
1370 mod_num_sdma >= num_vls)
1371 num_engines = mod_num_sdma;
1372
1373 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1374 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
1375 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1376 dd->chip_sdma_mem_size);
1377
1378 per_sdma_credits =
1379 dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
1380
1381
1382 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1383 atomic_set(&dd->sdma_unfreeze_count, 0);
1384
1385 descq_cnt = sdma_get_descq_cnt();
1386 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1387 num_engines, descq_cnt);
1388
1389
1390 dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
1391 if (!dd->per_sdma)
1392 return ret;
1393
1394 idle_cnt = ns_to_cclock(dd, idle_cnt);
1395 if (idle_cnt)
1396 dd->default_desc1 =
1397 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1398 else
1399 dd->default_desc1 =
1400 SDMA_DESC1_INT_REQ_FLAG;
1401
1402 if (!sdma_desct_intr)
1403 sdma_desct_intr = SDMA_DESC_INTR;
1404
1405
1406 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1407 sde = &dd->per_sdma[this_idx];
1408 sde->dd = dd;
1409 sde->ppd = ppd;
1410 sde->this_idx = this_idx;
1411 sde->descq_cnt = descq_cnt;
1412 sde->desc_avail = sdma_descq_freecnt(sde);
1413 sde->sdma_shift = ilog2(descq_cnt);
1414 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1415
1416
1417 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1418 this_idx);
1419 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1420 this_idx);
1421 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1422 this_idx);
1423
1424 sde->imask = sde->int_mask | sde->progress_mask |
1425 sde->idle_mask;
1426
1427 spin_lock_init(&sde->tail_lock);
1428 seqlock_init(&sde->head_lock);
1429 spin_lock_init(&sde->senddmactrl_lock);
1430 spin_lock_init(&sde->flushlist_lock);
1431
1432 sde->ahg_bits = 0xfffffffe00000000ULL;
1433
1434 sdma_set_state(sde, sdma_state_s00_hw_down);
1435
1436
1437 kref_init(&sde->state.kref);
1438 init_completion(&sde->state.comp);
1439
1440 INIT_LIST_HEAD(&sde->flushlist);
1441 INIT_LIST_HEAD(&sde->dmawait);
1442
1443 sde->tail_csr =
1444 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1445
1446 tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
1447 (unsigned long)sde);
1448
1449 tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
1450 (unsigned long)sde);
1451 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1452 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1453
1454 sde->progress_check_head = 0;
1455
1456 timer_setup(&sde->err_progress_check_timer,
1457 sdma_err_progress_check, 0);
1458
1459 sde->descq = dma_zalloc_coherent(
1460 &dd->pcidev->dev,
1461 descq_cnt * sizeof(u64[2]),
1462 &sde->descq_phys,
1463 GFP_KERNEL
1464 );
1465 if (!sde->descq)
1466 goto bail;
1467 sde->tx_ring =
1468 kvzalloc_node(sizeof(struct sdma_txreq *) * descq_cnt,
1469 GFP_KERNEL, dd->node);
1470 if (!sde->tx_ring)
1471 goto bail;
1472 }
1473
1474 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1475
1476 dd->sdma_heads_dma = dma_zalloc_coherent(
1477 &dd->pcidev->dev,
1478 dd->sdma_heads_size,
1479 &dd->sdma_heads_phys,
1480 GFP_KERNEL
1481 );
1482 if (!dd->sdma_heads_dma) {
1483 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1484 goto bail;
1485 }
1486
1487
1488 dd->sdma_pad_dma = dma_zalloc_coherent(
1489 &dd->pcidev->dev,
1490 sizeof(u32),
1491 &dd->sdma_pad_phys,
1492 GFP_KERNEL
1493 );
1494 if (!dd->sdma_pad_dma) {
1495 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1496 goto bail;
1497 }
1498
1499
1500 curr_head = (void *)dd->sdma_heads_dma;
1501 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1502 unsigned long phys_offset;
1503
1504 sde = &dd->per_sdma[this_idx];
1505
1506 sde->head_dma = curr_head;
1507 curr_head += L1_CACHE_BYTES;
1508 phys_offset = (unsigned long)sde->head_dma -
1509 (unsigned long)dd->sdma_heads_dma;
1510 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1511 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1512 }
1513 dd->flags |= HFI1_HAS_SEND_DMA;
1514 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1515 dd->num_sdma = num_engines;
1516 ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
1517 if (ret < 0)
1518 goto bail;
1519
1520 tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
1521 if (!tmp_sdma_rht) {
1522 ret = -ENOMEM;
1523 goto bail;
1524 }
1525
1526 ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
1527 if (ret < 0)
1528 goto bail;
1529 dd->sdma_rht = tmp_sdma_rht;
1530
1531 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1532 return 0;
1533
1534bail:
1535 sdma_clean(dd, num_engines);
1536 return ret;
1537}
1538
1539
1540
1541
1542
1543
1544
1545void sdma_all_running(struct hfi1_devdata *dd)
1546{
1547 struct sdma_engine *sde;
1548 unsigned int i;
1549
1550
1551 for (i = 0; i < dd->num_sdma; ++i) {
1552 sde = &dd->per_sdma[i];
1553 sdma_process_event(sde, sdma_event_e30_go_running);
1554 }
1555}
1556
1557
1558
1559
1560
1561
1562
1563void sdma_all_idle(struct hfi1_devdata *dd)
1564{
1565 struct sdma_engine *sde;
1566 unsigned int i;
1567
1568
1569 for (i = 0; i < dd->num_sdma; ++i) {
1570 sde = &dd->per_sdma[i];
1571 sdma_process_event(sde, sdma_event_e70_go_idle);
1572 }
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583void sdma_start(struct hfi1_devdata *dd)
1584{
1585 unsigned i;
1586 struct sdma_engine *sde;
1587
1588
1589 for (i = 0; i < dd->num_sdma; ++i) {
1590 sde = &dd->per_sdma[i];
1591 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1592 }
1593}
1594
1595
1596
1597
1598
1599void sdma_exit(struct hfi1_devdata *dd)
1600{
1601 unsigned this_idx;
1602 struct sdma_engine *sde;
1603
1604 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1605 ++this_idx) {
1606 sde = &dd->per_sdma[this_idx];
1607 if (!list_empty(&sde->dmawait))
1608 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1609 sde->this_idx);
1610 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1611
1612 del_timer_sync(&sde->err_progress_check_timer);
1613
1614
1615
1616
1617
1618
1619 sdma_finalput(&sde->state);
1620 }
1621 sdma_clean(dd, dd->num_sdma);
1622}
1623
1624
1625
1626
1627static inline void sdma_unmap_desc(
1628 struct hfi1_devdata *dd,
1629 struct sdma_desc *descp)
1630{
1631 switch (sdma_mapping_type(descp)) {
1632 case SDMA_MAP_SINGLE:
1633 dma_unmap_single(
1634 &dd->pcidev->dev,
1635 sdma_mapping_addr(descp),
1636 sdma_mapping_len(descp),
1637 DMA_TO_DEVICE);
1638 break;
1639 case SDMA_MAP_PAGE:
1640 dma_unmap_page(
1641 &dd->pcidev->dev,
1642 sdma_mapping_addr(descp),
1643 sdma_mapping_len(descp),
1644 DMA_TO_DEVICE);
1645 break;
1646 }
1647}
1648
1649
1650
1651
1652
1653static inline u8 ahg_mode(struct sdma_txreq *tx)
1654{
1655 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1656 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670void __sdma_txclean(
1671 struct hfi1_devdata *dd,
1672 struct sdma_txreq *tx)
1673{
1674 u16 i;
1675
1676 if (tx->num_desc) {
1677 u8 skip = 0, mode = ahg_mode(tx);
1678
1679
1680 sdma_unmap_desc(dd, &tx->descp[0]);
1681
1682 if (mode > SDMA_AHG_APPLY_UPDATE1)
1683 skip = mode >> 1;
1684 for (i = 1 + skip; i < tx->num_desc; i++)
1685 sdma_unmap_desc(dd, &tx->descp[i]);
1686 tx->num_desc = 0;
1687 }
1688 kfree(tx->coalesce_buf);
1689 tx->coalesce_buf = NULL;
1690
1691 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1692 tx->desc_limit = ARRAY_SIZE(tx->descs);
1693 kfree(tx->descp);
1694 }
1695}
1696
1697static inline u16 sdma_gethead(struct sdma_engine *sde)
1698{
1699 struct hfi1_devdata *dd = sde->dd;
1700 int use_dmahead;
1701 u16 hwhead;
1702
1703#ifdef CONFIG_SDMA_VERBOSITY
1704 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1705 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1706#endif
1707
1708retry:
1709 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1710 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1711 hwhead = use_dmahead ?
1712 (u16)le64_to_cpu(*sde->head_dma) :
1713 (u16)read_sde_csr(sde, SD(HEAD));
1714
1715 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1716 u16 cnt;
1717 u16 swtail;
1718 u16 swhead;
1719 int sane;
1720
1721 swhead = sde->descq_head & sde->sdma_mask;
1722
1723 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1724 cnt = sde->descq_cnt;
1725
1726 if (swhead < swtail)
1727
1728 sane = (hwhead >= swhead) & (hwhead <= swtail);
1729 else if (swhead > swtail)
1730
1731 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1732 (hwhead <= swtail);
1733 else
1734
1735 sane = (hwhead == swhead);
1736
1737 if (unlikely(!sane)) {
1738 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
1739 sde->this_idx,
1740 use_dmahead ? "dma" : "kreg",
1741 hwhead, swhead, swtail, cnt);
1742 if (use_dmahead) {
1743
1744 use_dmahead = 0;
1745 goto retry;
1746 }
1747
1748 hwhead = swhead;
1749 }
1750 }
1751 return hwhead;
1752}
1753
1754
1755
1756
1757
1758
1759
1760static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1761{
1762 struct iowait *wait, *nw;
1763 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1764 uint i, n = 0, seq, max_idx = 0;
1765 struct sdma_txreq *stx;
1766 struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
1767 u8 max_starved_cnt = 0;
1768
1769#ifdef CONFIG_SDMA_VERBOSITY
1770 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1771 slashstrip(__FILE__), __LINE__, __func__);
1772 dd_dev_err(sde->dd, "avail: %u\n", avail);
1773#endif
1774
1775 do {
1776 seq = read_seqbegin(&dev->iowait_lock);
1777 if (!list_empty(&sde->dmawait)) {
1778
1779 write_seqlock(&dev->iowait_lock);
1780
1781 list_for_each_entry_safe(
1782 wait,
1783 nw,
1784 &sde->dmawait,
1785 list) {
1786 u16 num_desc = 0;
1787
1788 if (!wait->wakeup)
1789 continue;
1790 if (n == ARRAY_SIZE(waits))
1791 break;
1792 if (!list_empty(&wait->tx_head)) {
1793 stx = list_first_entry(
1794 &wait->tx_head,
1795 struct sdma_txreq,
1796 list);
1797 num_desc = stx->num_desc;
1798 }
1799 if (num_desc > avail)
1800 break;
1801 avail -= num_desc;
1802
1803 iowait_starve_find_max(wait, &max_starved_cnt,
1804 n, &max_idx);
1805 list_del_init(&wait->list);
1806 waits[n++] = wait;
1807 }
1808 write_sequnlock(&dev->iowait_lock);
1809 break;
1810 }
1811 } while (read_seqretry(&dev->iowait_lock, seq));
1812
1813
1814 if (n)
1815 waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
1816
1817 for (i = 0; i < n; i++)
1818 if (i != max_idx)
1819 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1820}
1821
1822
1823static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1824{
1825 struct sdma_txreq *txp = NULL;
1826 int progress = 0;
1827 u16 hwhead, swhead;
1828 int idle_check_done = 0;
1829
1830 hwhead = sdma_gethead(sde);
1831
1832
1833
1834
1835
1836
1837
1838retry:
1839 txp = get_txhead(sde);
1840 swhead = sde->descq_head & sde->sdma_mask;
1841 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1842 while (swhead != hwhead) {
1843
1844 swhead = ++sde->descq_head & sde->sdma_mask;
1845
1846
1847 if (txp && txp->next_descq_idx == swhead) {
1848
1849 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1850 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1851
1852 txp = get_txhead(sde);
1853 }
1854 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1855 progress++;
1856 }
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867 if ((status & sde->idle_mask) && !idle_check_done) {
1868 u16 swtail;
1869
1870 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1871 if (swtail != hwhead) {
1872 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1873 idle_check_done = 1;
1874 goto retry;
1875 }
1876 }
1877
1878 sde->last_status = status;
1879 if (progress)
1880 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1893{
1894 trace_hfi1_sdma_engine_interrupt(sde, status);
1895 write_seqlock(&sde->head_lock);
1896 sdma_set_desc_cnt(sde, sdma_desct_intr);
1897 if (status & sde->idle_mask)
1898 sde->idle_int_cnt++;
1899 else if (status & sde->progress_mask)
1900 sde->progress_int_cnt++;
1901 else if (status & sde->int_mask)
1902 sde->sdma_int_cnt++;
1903 sdma_make_progress(sde, status);
1904 write_sequnlock(&sde->head_lock);
1905}
1906
1907
1908
1909
1910
1911
1912void sdma_engine_error(struct sdma_engine *sde, u64 status)
1913{
1914 unsigned long flags;
1915
1916#ifdef CONFIG_SDMA_VERBOSITY
1917 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1918 sde->this_idx,
1919 (unsigned long long)status,
1920 sdma_state_names[sde->state.current_state]);
1921#endif
1922 spin_lock_irqsave(&sde->tail_lock, flags);
1923 write_seqlock(&sde->head_lock);
1924 if (status & ALL_SDMA_ENG_HALT_ERRS)
1925 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1926 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1927 dd_dev_err(sde->dd,
1928 "SDMA (%u) engine error: 0x%llx state %s\n",
1929 sde->this_idx,
1930 (unsigned long long)status,
1931 sdma_state_names[sde->state.current_state]);
1932 dump_sdma_state(sde);
1933 }
1934 write_sequnlock(&sde->head_lock);
1935 spin_unlock_irqrestore(&sde->tail_lock, flags);
1936}
1937
1938static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1939{
1940 u64 set_senddmactrl = 0;
1941 u64 clr_senddmactrl = 0;
1942 unsigned long flags;
1943
1944#ifdef CONFIG_SDMA_VERBOSITY
1945 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1946 sde->this_idx,
1947 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1948 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1949 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1950 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1951#endif
1952
1953 if (op & SDMA_SENDCTRL_OP_ENABLE)
1954 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1955 else
1956 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1957
1958 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1959 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1960 else
1961 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1962
1963 if (op & SDMA_SENDCTRL_OP_HALT)
1964 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1965 else
1966 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1967
1968 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1969
1970 sde->p_senddmactrl |= set_senddmactrl;
1971 sde->p_senddmactrl &= ~clr_senddmactrl;
1972
1973 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1974 write_sde_csr(sde, SD(CTRL),
1975 sde->p_senddmactrl |
1976 SD(CTRL_SDMA_CLEANUP_SMASK));
1977 else
1978 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1979
1980 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1981
1982#ifdef CONFIG_SDMA_VERBOSITY
1983 sdma_dumpstate(sde);
1984#endif
1985}
1986
1987static void sdma_setlengen(struct sdma_engine *sde)
1988{
1989#ifdef CONFIG_SDMA_VERBOSITY
1990 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1991 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1992#endif
1993
1994
1995
1996
1997
1998
1999 write_sde_csr(sde, SD(LEN_GEN),
2000 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
2001 write_sde_csr(sde, SD(LEN_GEN),
2002 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
2003 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
2004}
2005
2006static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
2007{
2008
2009 smp_wmb();
2010 writeq(tail, sde->tail_csr);
2011}
2012
2013
2014
2015
2016
2017static void sdma_hw_start_up(struct sdma_engine *sde)
2018{
2019 u64 reg;
2020
2021#ifdef CONFIG_SDMA_VERBOSITY
2022 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2023 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2024#endif
2025
2026 sdma_setlengen(sde);
2027 sdma_update_tail(sde, 0);
2028 *sde->head_dma = 0;
2029
2030 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
2031 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
2032 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
2033}
2034
2035
2036
2037
2038
2039
2040static void set_sdma_integrity(struct sdma_engine *sde)
2041{
2042 struct hfi1_devdata *dd = sde->dd;
2043
2044 write_sde_csr(sde, SD(CHECK_ENABLE),
2045 hfi1_pkt_base_sdma_integrity(dd));
2046}
2047
2048static void init_sdma_regs(
2049 struct sdma_engine *sde,
2050 u32 credits,
2051 uint idle_cnt)
2052{
2053 u8 opval, opmask;
2054#ifdef CONFIG_SDMA_VERBOSITY
2055 struct hfi1_devdata *dd = sde->dd;
2056
2057 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2058 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2059#endif
2060
2061 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2062 sdma_setlengen(sde);
2063 sdma_update_tail(sde, 0);
2064 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2065 write_sde_csr(sde, SD(DESC_CNT), 0);
2066 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2067 write_sde_csr(sde, SD(MEMORY),
2068 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2069 ((u64)(credits * sde->this_idx) <<
2070 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2071 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2072 set_sdma_integrity(sde);
2073 opmask = OPCODE_CHECK_MASK_DISABLED;
2074 opval = OPCODE_CHECK_VAL_DISABLED;
2075 write_sde_csr(sde, SD(CHECK_OPCODE),
2076 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2077 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2078}
2079
2080#ifdef CONFIG_SDMA_VERBOSITY
2081
2082#define sdma_dumpstate_helper0(reg) do { \
2083 csr = read_csr(sde->dd, reg); \
2084 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2085 } while (0)
2086
2087#define sdma_dumpstate_helper(reg) do { \
2088 csr = read_sde_csr(sde, reg); \
2089 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2090 #reg, sde->this_idx, csr); \
2091 } while (0)
2092
2093#define sdma_dumpstate_helper2(reg) do { \
2094 csr = read_csr(sde->dd, reg + (8 * i)); \
2095 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2096 #reg, i, csr); \
2097 } while (0)
2098
2099void sdma_dumpstate(struct sdma_engine *sde)
2100{
2101 u64 csr;
2102 unsigned i;
2103
2104 sdma_dumpstate_helper(SD(CTRL));
2105 sdma_dumpstate_helper(SD(STATUS));
2106 sdma_dumpstate_helper0(SD(ERR_STATUS));
2107 sdma_dumpstate_helper0(SD(ERR_MASK));
2108 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2109 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2110
2111 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2112 sdma_dumpstate_helper2(CCE_INT_STATUS);
2113 sdma_dumpstate_helper2(CCE_INT_MASK);
2114 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2115 }
2116
2117 sdma_dumpstate_helper(SD(TAIL));
2118 sdma_dumpstate_helper(SD(HEAD));
2119 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2120 sdma_dumpstate_helper(SD(IDLE_CNT));
2121 sdma_dumpstate_helper(SD(RELOAD_CNT));
2122 sdma_dumpstate_helper(SD(DESC_CNT));
2123 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2124 sdma_dumpstate_helper(SD(MEMORY));
2125 sdma_dumpstate_helper0(SD(ENGINES));
2126 sdma_dumpstate_helper0(SD(MEM_SIZE));
2127
2128 sdma_dumpstate_helper(SD(BASE_ADDR));
2129 sdma_dumpstate_helper(SD(LEN_GEN));
2130 sdma_dumpstate_helper(SD(HEAD_ADDR));
2131 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2132 sdma_dumpstate_helper(SD(CHECK_VL));
2133 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2134 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2135 sdma_dumpstate_helper(SD(CHECK_SLID));
2136 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2137}
2138#endif
2139
2140static void dump_sdma_state(struct sdma_engine *sde)
2141{
2142 struct hw_sdma_desc *descqp;
2143 u64 desc[2];
2144 u64 addr;
2145 u8 gen;
2146 u16 len;
2147 u16 head, tail, cnt;
2148
2149 head = sde->descq_head & sde->sdma_mask;
2150 tail = sde->descq_tail & sde->sdma_mask;
2151 cnt = sdma_descq_freecnt(sde);
2152
2153 dd_dev_err(sde->dd,
2154 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2155 sde->this_idx, head, tail, cnt,
2156 !list_empty(&sde->flushlist));
2157
2158
2159 while (head != tail) {
2160 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2161
2162 descqp = &sde->descq[head];
2163 desc[0] = le64_to_cpu(descqp->qw[0]);
2164 desc[1] = le64_to_cpu(descqp->qw[1]);
2165 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2166 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2167 'H' : '-';
2168 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2169 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2170 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2171 & SDMA_DESC0_PHY_ADDR_MASK;
2172 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2173 & SDMA_DESC1_GENERATION_MASK;
2174 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2175 & SDMA_DESC0_BYTE_COUNT_MASK;
2176 dd_dev_err(sde->dd,
2177 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2178 head, flags, addr, gen, len);
2179 dd_dev_err(sde->dd,
2180 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2181 desc[0], desc[1]);
2182 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2183 dd_dev_err(sde->dd,
2184 "\taidx: %u amode: %u alen: %u\n",
2185 (u8)((desc[1] &
2186 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2187 SDMA_DESC1_HEADER_INDEX_SHIFT),
2188 (u8)((desc[1] &
2189 SDMA_DESC1_HEADER_MODE_SMASK) >>
2190 SDMA_DESC1_HEADER_MODE_SHIFT),
2191 (u8)((desc[1] &
2192 SDMA_DESC1_HEADER_DWS_SMASK) >>
2193 SDMA_DESC1_HEADER_DWS_SHIFT));
2194 head++;
2195 head &= sde->sdma_mask;
2196 }
2197}
2198
2199#define SDE_FMT \
2200 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2201
2202
2203
2204
2205
2206
2207
2208void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2209{
2210 u16 head, tail;
2211 struct hw_sdma_desc *descqp;
2212 u64 desc[2];
2213 u64 addr;
2214 u8 gen;
2215 u16 len;
2216
2217 head = sde->descq_head & sde->sdma_mask;
2218 tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
2219 seq_printf(s, SDE_FMT, sde->this_idx,
2220 sde->cpu,
2221 sdma_state_name(sde->state.current_state),
2222 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2223 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2224 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2225 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2226 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2227 (unsigned long long)le64_to_cpu(*sde->head_dma),
2228 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2229 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2230 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2231 (unsigned long long)sde->last_status,
2232 (unsigned long long)sde->ahg_bits,
2233 sde->tx_tail,
2234 sde->tx_head,
2235 sde->descq_tail,
2236 sde->descq_head,
2237 !list_empty(&sde->flushlist),
2238 sde->descq_full_count,
2239 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2240
2241
2242 while (head != tail) {
2243 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2244
2245 descqp = &sde->descq[head];
2246 desc[0] = le64_to_cpu(descqp->qw[0]);
2247 desc[1] = le64_to_cpu(descqp->qw[1]);
2248 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2249 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2250 'H' : '-';
2251 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2252 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2253 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2254 & SDMA_DESC0_PHY_ADDR_MASK;
2255 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2256 & SDMA_DESC1_GENERATION_MASK;
2257 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2258 & SDMA_DESC0_BYTE_COUNT_MASK;
2259 seq_printf(s,
2260 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2261 head, flags, addr, gen, len);
2262 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2263 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2264 (u8)((desc[1] &
2265 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2266 SDMA_DESC1_HEADER_INDEX_SHIFT),
2267 (u8)((desc[1] &
2268 SDMA_DESC1_HEADER_MODE_SMASK) >>
2269 SDMA_DESC1_HEADER_MODE_SHIFT));
2270 head = (head + 1) & sde->sdma_mask;
2271 }
2272}
2273
2274
2275
2276
2277
2278static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2279{
2280 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2281
2282 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2283 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2284 << SDMA_DESC1_GENERATION_SHIFT;
2285 return qw1;
2286}
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2305{
2306 int i;
2307 u16 tail;
2308 struct sdma_desc *descp = tx->descp;
2309 u8 skip = 0, mode = ahg_mode(tx);
2310
2311 tail = sde->descq_tail & sde->sdma_mask;
2312 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2313 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2314 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2315 tail, &sde->descq[tail]);
2316 tail = ++sde->descq_tail & sde->sdma_mask;
2317 descp++;
2318 if (mode > SDMA_AHG_APPLY_UPDATE1)
2319 skip = mode >> 1;
2320 for (i = 1; i < tx->num_desc; i++, descp++) {
2321 u64 qw1;
2322
2323 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2324 if (skip) {
2325
2326 qw1 = descp->qw[1];
2327 skip--;
2328 } else {
2329
2330 qw1 = add_gen(sde, descp->qw[1]);
2331 }
2332 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2333 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2334 tail, &sde->descq[tail]);
2335 tail = ++sde->descq_tail & sde->sdma_mask;
2336 }
2337 tx->next_descq_idx = tail;
2338#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2339 tx->sn = sde->tail_sn++;
2340 trace_hfi1_sdma_in_sn(sde, tx->sn);
2341 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2342#endif
2343 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2344 sde->desc_avail -= tx->num_desc;
2345 return tail;
2346}
2347
2348
2349
2350
2351static int sdma_check_progress(
2352 struct sdma_engine *sde,
2353 struct iowait *wait,
2354 struct sdma_txreq *tx,
2355 bool pkts_sent)
2356{
2357 int ret;
2358
2359 sde->desc_avail = sdma_descq_freecnt(sde);
2360 if (tx->num_desc <= sde->desc_avail)
2361 return -EAGAIN;
2362
2363 if (wait && wait->sleep) {
2364 unsigned seq;
2365
2366 seq = raw_seqcount_begin(
2367 (const seqcount_t *)&sde->head_lock.seqcount);
2368 ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
2369 if (ret == -EAGAIN)
2370 sde->desc_avail = sdma_descq_freecnt(sde);
2371 } else {
2372 ret = -EBUSY;
2373 }
2374 return ret;
2375}
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392int sdma_send_txreq(struct sdma_engine *sde,
2393 struct iowait *wait,
2394 struct sdma_txreq *tx,
2395 bool pkts_sent)
2396{
2397 int ret = 0;
2398 u16 tail;
2399 unsigned long flags;
2400
2401
2402 if (unlikely(tx->tlen))
2403 return -EINVAL;
2404 tx->wait = wait;
2405 spin_lock_irqsave(&sde->tail_lock, flags);
2406retry:
2407 if (unlikely(!__sdma_running(sde)))
2408 goto unlock_noconn;
2409 if (unlikely(tx->num_desc > sde->desc_avail))
2410 goto nodesc;
2411 tail = submit_tx(sde, tx);
2412 if (wait)
2413 iowait_sdma_inc(wait);
2414 sdma_update_tail(sde, tail);
2415unlock:
2416 spin_unlock_irqrestore(&sde->tail_lock, flags);
2417 return ret;
2418unlock_noconn:
2419 if (wait)
2420 iowait_sdma_inc(wait);
2421 tx->next_descq_idx = 0;
2422#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2423 tx->sn = sde->tail_sn++;
2424 trace_hfi1_sdma_in_sn(sde, tx->sn);
2425#endif
2426 spin_lock(&sde->flushlist_lock);
2427 list_add_tail(&tx->list, &sde->flushlist);
2428 spin_unlock(&sde->flushlist_lock);
2429 if (wait) {
2430 wait->tx_count++;
2431 wait->count += tx->num_desc;
2432 }
2433 schedule_work(&sde->flush_worker);
2434 ret = -ECOMM;
2435 goto unlock;
2436nodesc:
2437 ret = sdma_check_progress(sde, wait, tx, pkts_sent);
2438 if (ret == -EAGAIN) {
2439 ret = 0;
2440 goto retry;
2441 }
2442 sde->descq_full_count++;
2443 goto unlock;
2444}
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
2475 struct list_head *tx_list, u32 *count_out)
2476{
2477 struct sdma_txreq *tx, *tx_next;
2478 int ret = 0;
2479 unsigned long flags;
2480 u16 tail = INVALID_TAIL;
2481 u32 submit_count = 0, flush_count = 0, total_count;
2482
2483 spin_lock_irqsave(&sde->tail_lock, flags);
2484retry:
2485 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2486 tx->wait = wait;
2487 if (unlikely(!__sdma_running(sde)))
2488 goto unlock_noconn;
2489 if (unlikely(tx->num_desc > sde->desc_avail))
2490 goto nodesc;
2491 if (unlikely(tx->tlen)) {
2492 ret = -EINVAL;
2493 goto update_tail;
2494 }
2495 list_del_init(&tx->list);
2496 tail = submit_tx(sde, tx);
2497 submit_count++;
2498 if (tail != INVALID_TAIL &&
2499 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2500 sdma_update_tail(sde, tail);
2501 tail = INVALID_TAIL;
2502 }
2503 }
2504update_tail:
2505 total_count = submit_count + flush_count;
2506 if (wait) {
2507 iowait_sdma_add(wait, total_count);
2508 iowait_starve_clear(submit_count > 0, wait);
2509 }
2510 if (tail != INVALID_TAIL)
2511 sdma_update_tail(sde, tail);
2512 spin_unlock_irqrestore(&sde->tail_lock, flags);
2513 *count_out = total_count;
2514 return ret;
2515unlock_noconn:
2516 spin_lock(&sde->flushlist_lock);
2517 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2518 tx->wait = wait;
2519 list_del_init(&tx->list);
2520 tx->next_descq_idx = 0;
2521#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2522 tx->sn = sde->tail_sn++;
2523 trace_hfi1_sdma_in_sn(sde, tx->sn);
2524#endif
2525 list_add_tail(&tx->list, &sde->flushlist);
2526 flush_count++;
2527 if (wait) {
2528 wait->tx_count++;
2529 wait->count += tx->num_desc;
2530 }
2531 }
2532 spin_unlock(&sde->flushlist_lock);
2533 schedule_work(&sde->flush_worker);
2534 ret = -ECOMM;
2535 goto update_tail;
2536nodesc:
2537 ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
2538 if (ret == -EAGAIN) {
2539 ret = 0;
2540 goto retry;
2541 }
2542 sde->descq_full_count++;
2543 goto update_tail;
2544}
2545
2546static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2547{
2548 unsigned long flags;
2549
2550 spin_lock_irqsave(&sde->tail_lock, flags);
2551 write_seqlock(&sde->head_lock);
2552
2553 __sdma_process_event(sde, event);
2554
2555 if (sde->state.current_state == sdma_state_s99_running)
2556 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2557
2558 write_sequnlock(&sde->head_lock);
2559 spin_unlock_irqrestore(&sde->tail_lock, flags);
2560}
2561
2562static void __sdma_process_event(struct sdma_engine *sde,
2563 enum sdma_events event)
2564{
2565 struct sdma_state *ss = &sde->state;
2566 int need_progress = 0;
2567
2568
2569#ifdef CONFIG_SDMA_VERBOSITY
2570 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2571 sdma_state_names[ss->current_state],
2572 sdma_event_names[event]);
2573#endif
2574
2575 switch (ss->current_state) {
2576 case sdma_state_s00_hw_down:
2577 switch (event) {
2578 case sdma_event_e00_go_hw_down:
2579 break;
2580 case sdma_event_e30_go_running:
2581
2582
2583
2584
2585
2586
2587
2588 ss->go_s99_running = 1;
2589
2590 case sdma_event_e10_go_hw_start:
2591
2592 sdma_get(&sde->state);
2593 sdma_set_state(sde,
2594 sdma_state_s10_hw_start_up_halt_wait);
2595 break;
2596 case sdma_event_e15_hw_halt_done:
2597 break;
2598 case sdma_event_e25_hw_clean_up_done:
2599 break;
2600 case sdma_event_e40_sw_cleaned:
2601 sdma_sw_tear_down(sde);
2602 break;
2603 case sdma_event_e50_hw_cleaned:
2604 break;
2605 case sdma_event_e60_hw_halted:
2606 break;
2607 case sdma_event_e70_go_idle:
2608 break;
2609 case sdma_event_e80_hw_freeze:
2610 break;
2611 case sdma_event_e81_hw_frozen:
2612 break;
2613 case sdma_event_e82_hw_unfreeze:
2614 break;
2615 case sdma_event_e85_link_down:
2616 break;
2617 case sdma_event_e90_sw_halted:
2618 break;
2619 }
2620 break;
2621
2622 case sdma_state_s10_hw_start_up_halt_wait:
2623 switch (event) {
2624 case sdma_event_e00_go_hw_down:
2625 sdma_set_state(sde, sdma_state_s00_hw_down);
2626 sdma_sw_tear_down(sde);
2627 break;
2628 case sdma_event_e10_go_hw_start:
2629 break;
2630 case sdma_event_e15_hw_halt_done:
2631 sdma_set_state(sde,
2632 sdma_state_s15_hw_start_up_clean_wait);
2633 sdma_start_hw_clean_up(sde);
2634 break;
2635 case sdma_event_e25_hw_clean_up_done:
2636 break;
2637 case sdma_event_e30_go_running:
2638 ss->go_s99_running = 1;
2639 break;
2640 case sdma_event_e40_sw_cleaned:
2641 break;
2642 case sdma_event_e50_hw_cleaned:
2643 break;
2644 case sdma_event_e60_hw_halted:
2645 schedule_work(&sde->err_halt_worker);
2646 break;
2647 case sdma_event_e70_go_idle:
2648 ss->go_s99_running = 0;
2649 break;
2650 case sdma_event_e80_hw_freeze:
2651 break;
2652 case sdma_event_e81_hw_frozen:
2653 break;
2654 case sdma_event_e82_hw_unfreeze:
2655 break;
2656 case sdma_event_e85_link_down:
2657 break;
2658 case sdma_event_e90_sw_halted:
2659 break;
2660 }
2661 break;
2662
2663 case sdma_state_s15_hw_start_up_clean_wait:
2664 switch (event) {
2665 case sdma_event_e00_go_hw_down:
2666 sdma_set_state(sde, sdma_state_s00_hw_down);
2667 sdma_sw_tear_down(sde);
2668 break;
2669 case sdma_event_e10_go_hw_start:
2670 break;
2671 case sdma_event_e15_hw_halt_done:
2672 break;
2673 case sdma_event_e25_hw_clean_up_done:
2674 sdma_hw_start_up(sde);
2675 sdma_set_state(sde, ss->go_s99_running ?
2676 sdma_state_s99_running :
2677 sdma_state_s20_idle);
2678 break;
2679 case sdma_event_e30_go_running:
2680 ss->go_s99_running = 1;
2681 break;
2682 case sdma_event_e40_sw_cleaned:
2683 break;
2684 case sdma_event_e50_hw_cleaned:
2685 break;
2686 case sdma_event_e60_hw_halted:
2687 break;
2688 case sdma_event_e70_go_idle:
2689 ss->go_s99_running = 0;
2690 break;
2691 case sdma_event_e80_hw_freeze:
2692 break;
2693 case sdma_event_e81_hw_frozen:
2694 break;
2695 case sdma_event_e82_hw_unfreeze:
2696 break;
2697 case sdma_event_e85_link_down:
2698 break;
2699 case sdma_event_e90_sw_halted:
2700 break;
2701 }
2702 break;
2703
2704 case sdma_state_s20_idle:
2705 switch (event) {
2706 case sdma_event_e00_go_hw_down:
2707 sdma_set_state(sde, sdma_state_s00_hw_down);
2708 sdma_sw_tear_down(sde);
2709 break;
2710 case sdma_event_e10_go_hw_start:
2711 break;
2712 case sdma_event_e15_hw_halt_done:
2713 break;
2714 case sdma_event_e25_hw_clean_up_done:
2715 break;
2716 case sdma_event_e30_go_running:
2717 sdma_set_state(sde, sdma_state_s99_running);
2718 ss->go_s99_running = 1;
2719 break;
2720 case sdma_event_e40_sw_cleaned:
2721 break;
2722 case sdma_event_e50_hw_cleaned:
2723 break;
2724 case sdma_event_e60_hw_halted:
2725 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2726 schedule_work(&sde->err_halt_worker);
2727 break;
2728 case sdma_event_e70_go_idle:
2729 break;
2730 case sdma_event_e85_link_down:
2731
2732 case sdma_event_e80_hw_freeze:
2733 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2734 atomic_dec(&sde->dd->sdma_unfreeze_count);
2735 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2736 break;
2737 case sdma_event_e81_hw_frozen:
2738 break;
2739 case sdma_event_e82_hw_unfreeze:
2740 break;
2741 case sdma_event_e90_sw_halted:
2742 break;
2743 }
2744 break;
2745
2746 case sdma_state_s30_sw_clean_up_wait:
2747 switch (event) {
2748 case sdma_event_e00_go_hw_down:
2749 sdma_set_state(sde, sdma_state_s00_hw_down);
2750 break;
2751 case sdma_event_e10_go_hw_start:
2752 break;
2753 case sdma_event_e15_hw_halt_done:
2754 break;
2755 case sdma_event_e25_hw_clean_up_done:
2756 break;
2757 case sdma_event_e30_go_running:
2758 ss->go_s99_running = 1;
2759 break;
2760 case sdma_event_e40_sw_cleaned:
2761 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2762 sdma_start_hw_clean_up(sde);
2763 break;
2764 case sdma_event_e50_hw_cleaned:
2765 break;
2766 case sdma_event_e60_hw_halted:
2767 break;
2768 case sdma_event_e70_go_idle:
2769 ss->go_s99_running = 0;
2770 break;
2771 case sdma_event_e80_hw_freeze:
2772 break;
2773 case sdma_event_e81_hw_frozen:
2774 break;
2775 case sdma_event_e82_hw_unfreeze:
2776 break;
2777 case sdma_event_e85_link_down:
2778 ss->go_s99_running = 0;
2779 break;
2780 case sdma_event_e90_sw_halted:
2781 break;
2782 }
2783 break;
2784
2785 case sdma_state_s40_hw_clean_up_wait:
2786 switch (event) {
2787 case sdma_event_e00_go_hw_down:
2788 sdma_set_state(sde, sdma_state_s00_hw_down);
2789 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2790 break;
2791 case sdma_event_e10_go_hw_start:
2792 break;
2793 case sdma_event_e15_hw_halt_done:
2794 break;
2795 case sdma_event_e25_hw_clean_up_done:
2796 sdma_hw_start_up(sde);
2797 sdma_set_state(sde, ss->go_s99_running ?
2798 sdma_state_s99_running :
2799 sdma_state_s20_idle);
2800 break;
2801 case sdma_event_e30_go_running:
2802 ss->go_s99_running = 1;
2803 break;
2804 case sdma_event_e40_sw_cleaned:
2805 break;
2806 case sdma_event_e50_hw_cleaned:
2807 break;
2808 case sdma_event_e60_hw_halted:
2809 break;
2810 case sdma_event_e70_go_idle:
2811 ss->go_s99_running = 0;
2812 break;
2813 case sdma_event_e80_hw_freeze:
2814 break;
2815 case sdma_event_e81_hw_frozen:
2816 break;
2817 case sdma_event_e82_hw_unfreeze:
2818 break;
2819 case sdma_event_e85_link_down:
2820 ss->go_s99_running = 0;
2821 break;
2822 case sdma_event_e90_sw_halted:
2823 break;
2824 }
2825 break;
2826
2827 case sdma_state_s50_hw_halt_wait:
2828 switch (event) {
2829 case sdma_event_e00_go_hw_down:
2830 sdma_set_state(sde, sdma_state_s00_hw_down);
2831 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2832 break;
2833 case sdma_event_e10_go_hw_start:
2834 break;
2835 case sdma_event_e15_hw_halt_done:
2836 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2837 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2838 break;
2839 case sdma_event_e25_hw_clean_up_done:
2840 break;
2841 case sdma_event_e30_go_running:
2842 ss->go_s99_running = 1;
2843 break;
2844 case sdma_event_e40_sw_cleaned:
2845 break;
2846 case sdma_event_e50_hw_cleaned:
2847 break;
2848 case sdma_event_e60_hw_halted:
2849 schedule_work(&sde->err_halt_worker);
2850 break;
2851 case sdma_event_e70_go_idle:
2852 ss->go_s99_running = 0;
2853 break;
2854 case sdma_event_e80_hw_freeze:
2855 break;
2856 case sdma_event_e81_hw_frozen:
2857 break;
2858 case sdma_event_e82_hw_unfreeze:
2859 break;
2860 case sdma_event_e85_link_down:
2861 ss->go_s99_running = 0;
2862 break;
2863 case sdma_event_e90_sw_halted:
2864 break;
2865 }
2866 break;
2867
2868 case sdma_state_s60_idle_halt_wait:
2869 switch (event) {
2870 case sdma_event_e00_go_hw_down:
2871 sdma_set_state(sde, sdma_state_s00_hw_down);
2872 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2873 break;
2874 case sdma_event_e10_go_hw_start:
2875 break;
2876 case sdma_event_e15_hw_halt_done:
2877 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2878 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2879 break;
2880 case sdma_event_e25_hw_clean_up_done:
2881 break;
2882 case sdma_event_e30_go_running:
2883 ss->go_s99_running = 1;
2884 break;
2885 case sdma_event_e40_sw_cleaned:
2886 break;
2887 case sdma_event_e50_hw_cleaned:
2888 break;
2889 case sdma_event_e60_hw_halted:
2890 schedule_work(&sde->err_halt_worker);
2891 break;
2892 case sdma_event_e70_go_idle:
2893 ss->go_s99_running = 0;
2894 break;
2895 case sdma_event_e80_hw_freeze:
2896 break;
2897 case sdma_event_e81_hw_frozen:
2898 break;
2899 case sdma_event_e82_hw_unfreeze:
2900 break;
2901 case sdma_event_e85_link_down:
2902 break;
2903 case sdma_event_e90_sw_halted:
2904 break;
2905 }
2906 break;
2907
2908 case sdma_state_s80_hw_freeze:
2909 switch (event) {
2910 case sdma_event_e00_go_hw_down:
2911 sdma_set_state(sde, sdma_state_s00_hw_down);
2912 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2913 break;
2914 case sdma_event_e10_go_hw_start:
2915 break;
2916 case sdma_event_e15_hw_halt_done:
2917 break;
2918 case sdma_event_e25_hw_clean_up_done:
2919 break;
2920 case sdma_event_e30_go_running:
2921 ss->go_s99_running = 1;
2922 break;
2923 case sdma_event_e40_sw_cleaned:
2924 break;
2925 case sdma_event_e50_hw_cleaned:
2926 break;
2927 case sdma_event_e60_hw_halted:
2928 break;
2929 case sdma_event_e70_go_idle:
2930 ss->go_s99_running = 0;
2931 break;
2932 case sdma_event_e80_hw_freeze:
2933 break;
2934 case sdma_event_e81_hw_frozen:
2935 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2936 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2937 break;
2938 case sdma_event_e82_hw_unfreeze:
2939 break;
2940 case sdma_event_e85_link_down:
2941 break;
2942 case sdma_event_e90_sw_halted:
2943 break;
2944 }
2945 break;
2946
2947 case sdma_state_s82_freeze_sw_clean:
2948 switch (event) {
2949 case sdma_event_e00_go_hw_down:
2950 sdma_set_state(sde, sdma_state_s00_hw_down);
2951 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2952 break;
2953 case sdma_event_e10_go_hw_start:
2954 break;
2955 case sdma_event_e15_hw_halt_done:
2956 break;
2957 case sdma_event_e25_hw_clean_up_done:
2958 break;
2959 case sdma_event_e30_go_running:
2960 ss->go_s99_running = 1;
2961 break;
2962 case sdma_event_e40_sw_cleaned:
2963
2964 atomic_dec(&sde->dd->sdma_unfreeze_count);
2965 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2966 break;
2967 case sdma_event_e50_hw_cleaned:
2968 break;
2969 case sdma_event_e60_hw_halted:
2970 break;
2971 case sdma_event_e70_go_idle:
2972 ss->go_s99_running = 0;
2973 break;
2974 case sdma_event_e80_hw_freeze:
2975 break;
2976 case sdma_event_e81_hw_frozen:
2977 break;
2978 case sdma_event_e82_hw_unfreeze:
2979 sdma_hw_start_up(sde);
2980 sdma_set_state(sde, ss->go_s99_running ?
2981 sdma_state_s99_running :
2982 sdma_state_s20_idle);
2983 break;
2984 case sdma_event_e85_link_down:
2985 break;
2986 case sdma_event_e90_sw_halted:
2987 break;
2988 }
2989 break;
2990
2991 case sdma_state_s99_running:
2992 switch (event) {
2993 case sdma_event_e00_go_hw_down:
2994 sdma_set_state(sde, sdma_state_s00_hw_down);
2995 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2996 break;
2997 case sdma_event_e10_go_hw_start:
2998 break;
2999 case sdma_event_e15_hw_halt_done:
3000 break;
3001 case sdma_event_e25_hw_clean_up_done:
3002 break;
3003 case sdma_event_e30_go_running:
3004 break;
3005 case sdma_event_e40_sw_cleaned:
3006 break;
3007 case sdma_event_e50_hw_cleaned:
3008 break;
3009 case sdma_event_e60_hw_halted:
3010 need_progress = 1;
3011 sdma_err_progress_check_schedule(sde);
3012
3013 case sdma_event_e90_sw_halted:
3014
3015
3016
3017
3018 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
3019 schedule_work(&sde->err_halt_worker);
3020 break;
3021 case sdma_event_e70_go_idle:
3022 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
3023 break;
3024 case sdma_event_e85_link_down:
3025 ss->go_s99_running = 0;
3026
3027 case sdma_event_e80_hw_freeze:
3028 sdma_set_state(sde, sdma_state_s80_hw_freeze);
3029 atomic_dec(&sde->dd->sdma_unfreeze_count);
3030 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
3031 break;
3032 case sdma_event_e81_hw_frozen:
3033 break;
3034 case sdma_event_e82_hw_unfreeze:
3035 break;
3036 }
3037 break;
3038 }
3039
3040 ss->last_event = event;
3041 if (need_progress)
3042 sdma_make_progress(sde, 0);
3043}
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3059{
3060 int i;
3061
3062
3063 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3064
3065 if (!tx->tlen) {
3066 tx->desc_limit = MAX_DESC;
3067 } else if (!tx->coalesce_buf) {
3068
3069 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3070 GFP_ATOMIC);
3071 if (!tx->coalesce_buf)
3072 goto enomem;
3073 tx->coalesce_idx = 0;
3074 }
3075 return 0;
3076 }
3077
3078 if (unlikely(tx->num_desc == MAX_DESC))
3079 goto enomem;
3080
3081 tx->descp = kmalloc_array(
3082 MAX_DESC,
3083 sizeof(struct sdma_desc),
3084 GFP_ATOMIC);
3085 if (!tx->descp)
3086 goto enomem;
3087
3088
3089 tx->desc_limit = MAX_DESC - 1;
3090
3091 for (i = 0; i < tx->num_desc; i++)
3092 tx->descp[i] = tx->descs[i];
3093 return 0;
3094enomem:
3095 __sdma_txclean(dd, tx);
3096 return -ENOMEM;
3097}
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3116 int type, void *kvaddr, struct page *page,
3117 unsigned long offset, u16 len)
3118{
3119 int pad_len, rval;
3120 dma_addr_t addr;
3121
3122 rval = _extend_sdma_tx_descs(dd, tx);
3123 if (rval) {
3124 __sdma_txclean(dd, tx);
3125 return rval;
3126 }
3127
3128
3129 if (tx->coalesce_buf) {
3130 if (type == SDMA_MAP_NONE) {
3131 __sdma_txclean(dd, tx);
3132 return -EINVAL;
3133 }
3134
3135 if (type == SDMA_MAP_PAGE) {
3136 kvaddr = kmap(page);
3137 kvaddr += offset;
3138 } else if (WARN_ON(!kvaddr)) {
3139 __sdma_txclean(dd, tx);
3140 return -EINVAL;
3141 }
3142
3143 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3144 tx->coalesce_idx += len;
3145 if (type == SDMA_MAP_PAGE)
3146 kunmap(page);
3147
3148
3149 if (tx->tlen - tx->coalesce_idx)
3150 return 0;
3151
3152
3153 pad_len = tx->packet_len & (sizeof(u32) - 1);
3154 if (pad_len) {
3155 pad_len = sizeof(u32) - pad_len;
3156 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3157
3158 tx->packet_len += pad_len;
3159 tx->tlen += pad_len;
3160 }
3161
3162
3163 addr = dma_map_single(&dd->pcidev->dev,
3164 tx->coalesce_buf,
3165 tx->tlen,
3166 DMA_TO_DEVICE);
3167
3168 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3169 __sdma_txclean(dd, tx);
3170 return -ENOSPC;
3171 }
3172
3173
3174 tx->desc_limit = MAX_DESC;
3175 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3176 addr, tx->tlen);
3177 }
3178
3179 return 1;
3180}
3181
3182
3183void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3184{
3185 struct sdma_engine *sde;
3186 int i;
3187 u64 sreg;
3188
3189 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3190 SD(CHECK_SLID_MASK_SHIFT)) |
3191 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3192 SD(CHECK_SLID_VALUE_SHIFT));
3193
3194 for (i = 0; i < dd->num_sdma; i++) {
3195 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3196 i, (u32)sreg);
3197 sde = &dd->per_sdma[i];
3198 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3199 }
3200}
3201
3202
3203int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3204{
3205 int rval = 0;
3206
3207 tx->num_desc++;
3208 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3209 rval = _extend_sdma_tx_descs(dd, tx);
3210 if (rval) {
3211 __sdma_txclean(dd, tx);
3212 return rval;
3213 }
3214 }
3215
3216 make_tx_sdma_desc(
3217 tx,
3218 SDMA_MAP_NONE,
3219 dd->sdma_pad_phys,
3220 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3221 _sdma_close_tx(dd, tx);
3222 return rval;
3223}
3224
3225
3226
3227
3228
3229
3230
3231
3232void _sdma_txreq_ahgadd(
3233 struct sdma_txreq *tx,
3234 u8 num_ahg,
3235 u8 ahg_entry,
3236 u32 *ahg,
3237 u8 ahg_hlen)
3238{
3239 u32 i, shift = 0, desc = 0;
3240 u8 mode;
3241
3242 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3243
3244 if (num_ahg == 1)
3245 mode = SDMA_AHG_APPLY_UPDATE1;
3246 else if (num_ahg <= 5)
3247 mode = SDMA_AHG_APPLY_UPDATE2;
3248 else
3249 mode = SDMA_AHG_APPLY_UPDATE3;
3250 tx->num_desc++;
3251
3252 switch (mode) {
3253 case SDMA_AHG_APPLY_UPDATE3:
3254 tx->num_desc++;
3255 tx->descs[2].qw[0] = 0;
3256 tx->descs[2].qw[1] = 0;
3257
3258 case SDMA_AHG_APPLY_UPDATE2:
3259 tx->num_desc++;
3260 tx->descs[1].qw[0] = 0;
3261 tx->descs[1].qw[1] = 0;
3262 break;
3263 }
3264 ahg_hlen >>= 2;
3265 tx->descs[0].qw[1] |=
3266 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3267 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3268 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3269 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3270 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3271 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3272 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3273 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3274 for (i = 0; i < (num_ahg - 1); i++) {
3275 if (!shift && !(i & 2))
3276 desc++;
3277 tx->descs[desc].qw[!!(i & 2)] |=
3278 (((u64)ahg[i + 1])
3279 << shift);
3280 shift = (shift + 32) & 63;
3281 }
3282}
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292int sdma_ahg_alloc(struct sdma_engine *sde)
3293{
3294 int nr;
3295 int oldbit;
3296
3297 if (!sde) {
3298 trace_hfi1_ahg_allocate(sde, -EINVAL);
3299 return -EINVAL;
3300 }
3301 while (1) {
3302 nr = ffz(READ_ONCE(sde->ahg_bits));
3303 if (nr > 31) {
3304 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3305 return -ENOSPC;
3306 }
3307 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3308 if (!oldbit)
3309 break;
3310 cpu_relax();
3311 }
3312 trace_hfi1_ahg_allocate(sde, nr);
3313 return nr;
3314}
3315
3316
3317
3318
3319
3320
3321
3322
3323void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3324{
3325 if (!sde)
3326 return;
3327 trace_hfi1_ahg_deallocate(sde, ahg_index);
3328 if (ahg_index < 0 || ahg_index > 31)
3329 return;
3330 clear_bit(ahg_index, &sde->ahg_bits);
3331}
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3342{
3343 int i;
3344 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3345 sdma_event_e80_hw_freeze;
3346
3347
3348 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3349
3350
3351 for (i = 0; i < dd->num_sdma; i++)
3352 sdma_process_event(&dd->per_sdma[i], event);
3353
3354
3355}
3356
3357
3358
3359
3360
3361void sdma_freeze(struct hfi1_devdata *dd)
3362{
3363 int i;
3364 int ret;
3365
3366
3367
3368
3369
3370 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3371 atomic_read(&dd->sdma_unfreeze_count) <=
3372 0);
3373
3374 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3375 return;
3376
3377
3378 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3379
3380
3381 for (i = 0; i < dd->num_sdma; i++)
3382 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3383
3384
3385
3386
3387
3388
3389 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3390 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3391
3392}
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402void sdma_unfreeze(struct hfi1_devdata *dd)
3403{
3404 int i;
3405
3406
3407 for (i = 0; i < dd->num_sdma; i++)
3408 sdma_process_event(&dd->per_sdma[i],
3409 sdma_event_e82_hw_unfreeze);
3410}
3411
3412
3413
3414
3415
3416
3417void _sdma_engine_progress_schedule(
3418 struct sdma_engine *sde)
3419{
3420 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3421
3422 write_csr(sde->dd,
3423 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3424 sde->progress_mask);
3425}
3426