1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <linux/spinlock.h>
49#include <linux/seqlock.h>
50#include <linux/netdevice.h>
51#include <linux/moduleparam.h>
52#include <linux/bitops.h>
53#include <linux/timer.h>
54#include <linux/vmalloc.h>
55#include <linux/highmem.h>
56
57#include "hfi.h"
58#include "common.h"
59#include "qp.h"
60#include "sdma.h"
61#include "iowait.h"
62#include "trace.h"
63
64
65#define SDMA_DESCQ_CNT 2048
66#define SDMA_DESC_INTR 64
67#define INVALID_TAIL 0xffff
68
69static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
70module_param(sdma_descq_cnt, uint, S_IRUGO);
71MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
72
73static uint sdma_idle_cnt = 250;
74module_param(sdma_idle_cnt, uint, S_IRUGO);
75MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
76
77uint mod_num_sdma;
78module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
79MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
80
81static uint sdma_desct_intr = SDMA_DESC_INTR;
82module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
83MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
84
85#define SDMA_WAIT_BATCH_SIZE 20
86
87#define SDMA_ERR_HALT_TIMEOUT 10
88
89
90#define SD(name) SEND_DMA_##name
91#define ALL_SDMA_ENG_HALT_ERRS \
92 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
93 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
94 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
95 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
96 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
97 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
98 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
99 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
100 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
101 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
102 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
103 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
104 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
105 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
106 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
107 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
108 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
109 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
110
111
112#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
113#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
114#define SDMA_SENDCTRL_OP_HALT BIT(2)
115#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
116
117
118#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
119SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
120#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
121SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
122
123static const char * const sdma_state_names[] = {
124 [sdma_state_s00_hw_down] = "s00_HwDown",
125 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
126 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
127 [sdma_state_s20_idle] = "s20_Idle",
128 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
129 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
130 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
131 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
132 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
133 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
134 [sdma_state_s99_running] = "s99_Running",
135};
136
137#ifdef CONFIG_SDMA_VERBOSITY
138static const char * const sdma_event_names[] = {
139 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
140 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
141 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
142 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
143 [sdma_event_e30_go_running] = "e30_GoRunning",
144 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
145 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
146 [sdma_event_e60_hw_halted] = "e60_HwHalted",
147 [sdma_event_e70_go_idle] = "e70_GoIdle",
148 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
149 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
150 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
151 [sdma_event_e85_link_down] = "e85_LinkDown",
152 [sdma_event_e90_sw_halted] = "e90_SwHalted",
153};
154#endif
155
156static const struct sdma_set_state_action sdma_action_table[] = {
157 [sdma_state_s00_hw_down] = {
158 .go_s99_running_tofalse = 1,
159 .op_enable = 0,
160 .op_intenable = 0,
161 .op_halt = 0,
162 .op_cleanup = 0,
163 },
164 [sdma_state_s10_hw_start_up_halt_wait] = {
165 .op_enable = 0,
166 .op_intenable = 0,
167 .op_halt = 1,
168 .op_cleanup = 0,
169 },
170 [sdma_state_s15_hw_start_up_clean_wait] = {
171 .op_enable = 0,
172 .op_intenable = 1,
173 .op_halt = 0,
174 .op_cleanup = 1,
175 },
176 [sdma_state_s20_idle] = {
177 .op_enable = 0,
178 .op_intenable = 1,
179 .op_halt = 0,
180 .op_cleanup = 0,
181 },
182 [sdma_state_s30_sw_clean_up_wait] = {
183 .op_enable = 0,
184 .op_intenable = 0,
185 .op_halt = 0,
186 .op_cleanup = 0,
187 },
188 [sdma_state_s40_hw_clean_up_wait] = {
189 .op_enable = 0,
190 .op_intenable = 0,
191 .op_halt = 0,
192 .op_cleanup = 1,
193 },
194 [sdma_state_s50_hw_halt_wait] = {
195 .op_enable = 0,
196 .op_intenable = 0,
197 .op_halt = 0,
198 .op_cleanup = 0,
199 },
200 [sdma_state_s60_idle_halt_wait] = {
201 .go_s99_running_tofalse = 1,
202 .op_enable = 0,
203 .op_intenable = 0,
204 .op_halt = 1,
205 .op_cleanup = 0,
206 },
207 [sdma_state_s80_hw_freeze] = {
208 .op_enable = 0,
209 .op_intenable = 0,
210 .op_halt = 0,
211 .op_cleanup = 0,
212 },
213 [sdma_state_s82_freeze_sw_clean] = {
214 .op_enable = 0,
215 .op_intenable = 0,
216 .op_halt = 0,
217 .op_cleanup = 0,
218 },
219 [sdma_state_s99_running] = {
220 .op_enable = 1,
221 .op_intenable = 1,
222 .op_halt = 0,
223 .op_cleanup = 0,
224 .go_s99_running_totrue = 1,
225 },
226};
227
228#define SDMA_TAIL_UPDATE_THRESH 0x1F
229
230
231static void sdma_complete(struct kref *);
232static void sdma_finalput(struct sdma_state *);
233static void sdma_get(struct sdma_state *);
234static void sdma_hw_clean_up_task(unsigned long);
235static void sdma_put(struct sdma_state *);
236static void sdma_set_state(struct sdma_engine *, enum sdma_states);
237static void sdma_start_hw_clean_up(struct sdma_engine *);
238static void sdma_sw_clean_up_task(unsigned long);
239static void sdma_sendctrl(struct sdma_engine *, unsigned);
240static void init_sdma_regs(struct sdma_engine *, u32, uint);
241static void sdma_process_event(
242 struct sdma_engine *sde,
243 enum sdma_events event);
244static void __sdma_process_event(
245 struct sdma_engine *sde,
246 enum sdma_events event);
247static void dump_sdma_state(struct sdma_engine *sde);
248static void sdma_make_progress(struct sdma_engine *sde, u64 status);
249static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail);
250static void sdma_flush_descq(struct sdma_engine *sde);
251
252
253
254
255
256static const char *sdma_state_name(enum sdma_states state)
257{
258 return sdma_state_names[state];
259}
260
261static void sdma_get(struct sdma_state *ss)
262{
263 kref_get(&ss->kref);
264}
265
266static void sdma_complete(struct kref *kref)
267{
268 struct sdma_state *ss =
269 container_of(kref, struct sdma_state, kref);
270
271 complete(&ss->comp);
272}
273
274static void sdma_put(struct sdma_state *ss)
275{
276 kref_put(&ss->kref, sdma_complete);
277}
278
279static void sdma_finalput(struct sdma_state *ss)
280{
281 sdma_put(ss);
282 wait_for_completion(&ss->comp);
283}
284
285static inline void write_sde_csr(
286 struct sdma_engine *sde,
287 u32 offset0,
288 u64 value)
289{
290 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
291}
292
293static inline u64 read_sde_csr(
294 struct sdma_engine *sde,
295 u32 offset0)
296{
297 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
298}
299
300
301
302
303
304static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
305 int pause)
306{
307 u64 off = 8 * sde->this_idx;
308 struct hfi1_devdata *dd = sde->dd;
309 int lcnt = 0;
310 u64 reg_prev;
311 u64 reg = 0;
312
313 while (1) {
314 reg_prev = reg;
315 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
316
317 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
318 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
319 if (reg == 0)
320 break;
321
322 if (reg != reg_prev)
323 lcnt = 0;
324 if (lcnt++ > 500) {
325
326 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
327 __func__, sde->this_idx, (u32)reg);
328 queue_work(dd->pport->hfi1_wq,
329 &dd->pport->link_bounce_work);
330 break;
331 }
332 udelay(1);
333 }
334}
335
336
337
338
339
340void sdma_wait(struct hfi1_devdata *dd)
341{
342 int i;
343
344 for (i = 0; i < dd->num_sdma; i++) {
345 struct sdma_engine *sde = &dd->per_sdma[i];
346
347 sdma_wait_for_packet_egress(sde, 0);
348 }
349}
350
351static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
352{
353 u64 reg;
354
355 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
356 return;
357 reg = cnt;
358 reg &= SD(DESC_CNT_CNT_MASK);
359 reg <<= SD(DESC_CNT_CNT_SHIFT);
360 write_sde_csr(sde, SD(DESC_CNT), reg);
361}
362
363static inline void complete_tx(struct sdma_engine *sde,
364 struct sdma_txreq *tx,
365 int res)
366{
367
368 struct iowait *wait = tx->wait;
369 callback_t complete = tx->complete;
370
371#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
372 trace_hfi1_sdma_out_sn(sde, tx->sn);
373 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
374 dd_dev_err(sde->dd, "expected %llu got %llu\n",
375 sde->head_sn, tx->sn);
376 sde->head_sn++;
377#endif
378 __sdma_txclean(sde->dd, tx);
379 if (complete)
380 (*complete)(tx, res);
381 if (wait && iowait_sdma_dec(wait))
382 iowait_drain_wakeup(wait);
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403static void sdma_flush(struct sdma_engine *sde)
404{
405 struct sdma_txreq *txp, *txp_next;
406 LIST_HEAD(flushlist);
407 unsigned long flags;
408
409
410 sdma_flush_descq(sde);
411 spin_lock_irqsave(&sde->flushlist_lock, flags);
412
413 list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
414 list_del_init(&txp->list);
415 list_add_tail(&txp->list, &flushlist);
416 }
417 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
418
419 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
420 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
421}
422
423
424
425
426
427
428
429
430
431
432
433static void sdma_field_flush(struct work_struct *work)
434{
435 unsigned long flags;
436 struct sdma_engine *sde =
437 container_of(work, struct sdma_engine, flush_worker);
438
439 write_seqlock_irqsave(&sde->head_lock, flags);
440 if (!__sdma_running(sde))
441 sdma_flush(sde);
442 write_sequnlock_irqrestore(&sde->head_lock, flags);
443}
444
445static void sdma_err_halt_wait(struct work_struct *work)
446{
447 struct sdma_engine *sde = container_of(work, struct sdma_engine,
448 err_halt_worker);
449 u64 statuscsr;
450 unsigned long timeout;
451
452 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
453 while (1) {
454 statuscsr = read_sde_csr(sde, SD(STATUS));
455 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
456 if (statuscsr)
457 break;
458 if (time_after(jiffies, timeout)) {
459 dd_dev_err(sde->dd,
460 "SDMA engine %d - timeout waiting for engine to halt\n",
461 sde->this_idx);
462
463
464
465
466 break;
467 }
468 usleep_range(80, 120);
469 }
470
471 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
472}
473
474static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
475{
476 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
477 unsigned index;
478 struct hfi1_devdata *dd = sde->dd;
479
480 for (index = 0; index < dd->num_sdma; index++) {
481 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
482
483 if (curr_sdma != sde)
484 curr_sdma->progress_check_head =
485 curr_sdma->descq_head;
486 }
487 dd_dev_err(sde->dd,
488 "SDMA engine %d - check scheduled\n",
489 sde->this_idx);
490 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
491 }
492}
493
494static void sdma_err_progress_check(unsigned long data)
495{
496 unsigned index;
497 struct sdma_engine *sde = (struct sdma_engine *)data;
498
499 dd_dev_err(sde->dd, "SDE progress check event\n");
500 for (index = 0; index < sde->dd->num_sdma; index++) {
501 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
502 unsigned long flags;
503
504
505 if (curr_sde == sde)
506 continue;
507
508
509
510
511
512 spin_lock_irqsave(&curr_sde->tail_lock, flags);
513 write_seqlock(&curr_sde->head_lock);
514
515
516 if (curr_sde->state.current_state != sdma_state_s99_running) {
517 write_sequnlock(&curr_sde->head_lock);
518 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
519 continue;
520 }
521
522 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
523 (curr_sde->descq_head ==
524 curr_sde->progress_check_head))
525 __sdma_process_event(curr_sde,
526 sdma_event_e90_sw_halted);
527 write_sequnlock(&curr_sde->head_lock);
528 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
529 }
530 schedule_work(&sde->err_halt_worker);
531}
532
533static void sdma_hw_clean_up_task(unsigned long opaque)
534{
535 struct sdma_engine *sde = (struct sdma_engine *)opaque;
536 u64 statuscsr;
537
538 while (1) {
539#ifdef CONFIG_SDMA_VERBOSITY
540 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
541 sde->this_idx, slashstrip(__FILE__), __LINE__,
542 __func__);
543#endif
544 statuscsr = read_sde_csr(sde, SD(STATUS));
545 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
546 if (statuscsr)
547 break;
548 udelay(10);
549 }
550
551 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
552}
553
554static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
555{
556 smp_read_barrier_depends();
557 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
558}
559
560
561
562
563static void sdma_flush_descq(struct sdma_engine *sde)
564{
565 u16 head, tail;
566 int progress = 0;
567 struct sdma_txreq *txp = get_txhead(sde);
568
569
570
571
572
573
574 head = sde->descq_head & sde->sdma_mask;
575 tail = sde->descq_tail & sde->sdma_mask;
576 while (head != tail) {
577
578 head = ++sde->descq_head & sde->sdma_mask;
579
580 if (txp && txp->next_descq_idx == head) {
581
582 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
583 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
584 trace_hfi1_sdma_progress(sde, head, tail, txp);
585 txp = get_txhead(sde);
586 }
587 progress++;
588 }
589 if (progress)
590 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
591}
592
593static void sdma_sw_clean_up_task(unsigned long opaque)
594{
595 struct sdma_engine *sde = (struct sdma_engine *)opaque;
596 unsigned long flags;
597
598 spin_lock_irqsave(&sde->tail_lock, flags);
599 write_seqlock(&sde->head_lock);
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620 sdma_make_progress(sde, 0);
621
622 sdma_flush(sde);
623
624
625
626
627
628
629 sde->descq_tail = 0;
630 sde->descq_head = 0;
631 sde->desc_avail = sdma_descq_freecnt(sde);
632 *sde->head_dma = 0;
633
634 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
635
636 write_sequnlock(&sde->head_lock);
637 spin_unlock_irqrestore(&sde->tail_lock, flags);
638}
639
640static void sdma_sw_tear_down(struct sdma_engine *sde)
641{
642 struct sdma_state *ss = &sde->state;
643
644
645 sdma_put(ss);
646
647
648 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
649 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
650}
651
652static void sdma_start_hw_clean_up(struct sdma_engine *sde)
653{
654 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
655}
656
657static void sdma_set_state(struct sdma_engine *sde,
658 enum sdma_states next_state)
659{
660 struct sdma_state *ss = &sde->state;
661 const struct sdma_set_state_action *action = sdma_action_table;
662 unsigned op = 0;
663
664 trace_hfi1_sdma_state(
665 sde,
666 sdma_state_names[ss->current_state],
667 sdma_state_names[next_state]);
668
669
670 ss->previous_state = ss->current_state;
671 ss->previous_op = ss->current_op;
672 ss->current_state = next_state;
673
674 if (ss->previous_state != sdma_state_s99_running &&
675 next_state == sdma_state_s99_running)
676 sdma_flush(sde);
677
678 if (action[next_state].op_enable)
679 op |= SDMA_SENDCTRL_OP_ENABLE;
680
681 if (action[next_state].op_intenable)
682 op |= SDMA_SENDCTRL_OP_INTENABLE;
683
684 if (action[next_state].op_halt)
685 op |= SDMA_SENDCTRL_OP_HALT;
686
687 if (action[next_state].op_cleanup)
688 op |= SDMA_SENDCTRL_OP_CLEANUP;
689
690 if (action[next_state].go_s99_running_tofalse)
691 ss->go_s99_running = 0;
692
693 if (action[next_state].go_s99_running_totrue)
694 ss->go_s99_running = 1;
695
696 ss->current_op = op;
697 sdma_sendctrl(sde, ss->current_op);
698}
699
700
701
702
703
704
705
706
707
708
709
710
711
712u16 sdma_get_descq_cnt(void)
713{
714 u16 count = sdma_descq_cnt;
715
716 if (!count)
717 return SDMA_DESCQ_CNT;
718
719
720
721 if (!is_power_of_2(count))
722 return SDMA_DESCQ_CNT;
723 if (count < 64 || count > 32768)
724 return SDMA_DESCQ_CNT;
725 return count;
726}
727
728
729
730
731
732
733
734
735int sdma_engine_get_vl(struct sdma_engine *sde)
736{
737 struct hfi1_devdata *dd = sde->dd;
738 struct sdma_vl_map *m;
739 u8 vl;
740
741 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
742 return -EINVAL;
743
744 rcu_read_lock();
745 m = rcu_dereference(dd->sdma_map);
746 if (unlikely(!m)) {
747 rcu_read_unlock();
748 return -EINVAL;
749 }
750 vl = m->engine_to_vl[sde->this_idx];
751 rcu_read_unlock();
752
753 return vl;
754}
755
756
757
758
759
760
761
762
763
764
765
766struct sdma_engine *sdma_select_engine_vl(
767 struct hfi1_devdata *dd,
768 u32 selector,
769 u8 vl)
770{
771 struct sdma_vl_map *m;
772 struct sdma_map_elem *e;
773 struct sdma_engine *rval;
774
775
776
777
778
779 if (vl >= num_vls) {
780 rval = NULL;
781 goto done;
782 }
783
784 rcu_read_lock();
785 m = rcu_dereference(dd->sdma_map);
786 if (unlikely(!m)) {
787 rcu_read_unlock();
788 return &dd->per_sdma[0];
789 }
790 e = m->map[vl & m->mask];
791 rval = e->sde[selector & e->mask];
792 rcu_read_unlock();
793
794done:
795 rval = !rval ? &dd->per_sdma[0] : rval;
796 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
797 return rval;
798}
799
800
801
802
803
804
805
806
807
808
809struct sdma_engine *sdma_select_engine_sc(
810 struct hfi1_devdata *dd,
811 u32 selector,
812 u8 sc5)
813{
814 u8 vl = sc_to_vlt(dd, sc5);
815
816 return sdma_select_engine_vl(dd, selector, vl);
817}
818
819struct sdma_rht_map_elem {
820 u32 mask;
821 u8 ctr;
822 struct sdma_engine *sde[0];
823};
824
825struct sdma_rht_node {
826 unsigned long cpu_id;
827 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
828 struct rhash_head node;
829};
830
831#define NR_CPUS_HINT 192
832
833static const struct rhashtable_params sdma_rht_params = {
834 .nelem_hint = NR_CPUS_HINT,
835 .head_offset = offsetof(struct sdma_rht_node, node),
836 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
837 .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
838 .max_size = NR_CPUS,
839 .min_size = 8,
840 .automatic_shrinking = true,
841};
842
843
844
845
846
847
848
849
850
851
852
853
854struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
855 u32 selector, u8 vl)
856{
857 struct sdma_rht_node *rht_node;
858 struct sdma_engine *sde = NULL;
859 const struct cpumask *current_mask = tsk_cpus_allowed(current);
860 unsigned long cpu_id;
861
862
863
864
865
866 if (cpumask_weight(current_mask) != 1)
867 goto out;
868
869 cpu_id = smp_processor_id();
870 rcu_read_lock();
871 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
872 sdma_rht_params);
873
874 if (rht_node && rht_node->map[vl]) {
875 struct sdma_rht_map_elem *map = rht_node->map[vl];
876
877 sde = map->sde[selector & map->mask];
878 }
879 rcu_read_unlock();
880
881 if (sde)
882 return sde;
883
884out:
885 return sdma_select_engine_vl(dd, selector, vl);
886}
887
888static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
889{
890 int i;
891
892 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
893 map->sde[map->ctr + i] = map->sde[i];
894}
895
896static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
897 struct sdma_engine *sde)
898{
899 unsigned int i, pow;
900
901
902 for (i = 0; i < map->ctr; i++) {
903 if (map->sde[i] == sde) {
904 memmove(&map->sde[i], &map->sde[i + 1],
905 (map->ctr - i - 1) * sizeof(map->sde[0]));
906 map->ctr--;
907 pow = roundup_pow_of_two(map->ctr ? : 1);
908 map->mask = pow - 1;
909 sdma_populate_sde_map(map);
910 break;
911 }
912 }
913}
914
915
916
917
918static DEFINE_MUTEX(process_to_sde_mutex);
919
920ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
921 size_t count)
922{
923 struct hfi1_devdata *dd = sde->dd;
924 cpumask_var_t mask, new_mask;
925 unsigned long cpu;
926 int ret, vl, sz;
927
928 vl = sdma_engine_get_vl(sde);
929 if (unlikely(vl < 0))
930 return -EINVAL;
931
932 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
933 if (!ret)
934 return -ENOMEM;
935
936 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
937 if (!ret) {
938 free_cpumask_var(mask);
939 return -ENOMEM;
940 }
941 ret = cpulist_parse(buf, mask);
942 if (ret)
943 goto out_free;
944
945 if (!cpumask_subset(mask, cpu_online_mask)) {
946 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
947 ret = -EINVAL;
948 goto out_free;
949 }
950
951 sz = sizeof(struct sdma_rht_map_elem) +
952 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
953
954 mutex_lock(&process_to_sde_mutex);
955
956 for_each_cpu(cpu, mask) {
957 struct sdma_rht_node *rht_node;
958
959
960 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
961 cpumask_set_cpu(cpu, new_mask);
962 continue;
963 }
964
965 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
966 sdma_rht_params);
967 if (!rht_node) {
968 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
969 if (!rht_node) {
970 ret = -ENOMEM;
971 goto out;
972 }
973
974 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
975 if (!rht_node->map[vl]) {
976 kfree(rht_node);
977 ret = -ENOMEM;
978 goto out;
979 }
980 rht_node->cpu_id = cpu;
981 rht_node->map[vl]->mask = 0;
982 rht_node->map[vl]->ctr = 1;
983 rht_node->map[vl]->sde[0] = sde;
984
985 ret = rhashtable_insert_fast(&dd->sdma_rht,
986 &rht_node->node,
987 sdma_rht_params);
988 if (ret) {
989 kfree(rht_node->map[vl]);
990 kfree(rht_node);
991 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
992 cpu);
993 goto out;
994 }
995
996 } else {
997 int ctr, pow;
998
999
1000 if (!rht_node->map[vl])
1001 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
1002
1003 if (!rht_node->map[vl]) {
1004 ret = -ENOMEM;
1005 goto out;
1006 }
1007
1008 rht_node->map[vl]->ctr++;
1009 ctr = rht_node->map[vl]->ctr;
1010 rht_node->map[vl]->sde[ctr - 1] = sde;
1011 pow = roundup_pow_of_two(ctr);
1012 rht_node->map[vl]->mask = pow - 1;
1013
1014
1015 sdma_populate_sde_map(rht_node->map[vl]);
1016 }
1017 cpumask_set_cpu(cpu, new_mask);
1018 }
1019
1020
1021 for_each_cpu(cpu, cpu_online_mask) {
1022 struct sdma_rht_node *rht_node;
1023
1024
1025 if (cpumask_test_cpu(cpu, mask))
1026 continue;
1027
1028 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
1029 sdma_rht_params);
1030 if (rht_node) {
1031 bool empty = true;
1032 int i;
1033
1034
1035 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1036 if (rht_node->map[i])
1037 sdma_cleanup_sde_map(rht_node->map[i],
1038 sde);
1039
1040
1041 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1042 if (!rht_node->map[i])
1043 continue;
1044
1045 if (rht_node->map[i]->ctr) {
1046 empty = false;
1047 break;
1048 }
1049 }
1050
1051 if (empty) {
1052 ret = rhashtable_remove_fast(&dd->sdma_rht,
1053 &rht_node->node,
1054 sdma_rht_params);
1055 WARN_ON(ret);
1056
1057 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1058 kfree(rht_node->map[i]);
1059
1060 kfree(rht_node);
1061 }
1062 }
1063 }
1064
1065 cpumask_copy(&sde->cpu_mask, new_mask);
1066out:
1067 mutex_unlock(&process_to_sde_mutex);
1068out_free:
1069 free_cpumask_var(mask);
1070 free_cpumask_var(new_mask);
1071 return ret ? : strnlen(buf, PAGE_SIZE);
1072}
1073
1074ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1075{
1076 mutex_lock(&process_to_sde_mutex);
1077 if (cpumask_empty(&sde->cpu_mask))
1078 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1079 else
1080 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1081 mutex_unlock(&process_to_sde_mutex);
1082 return strnlen(buf, PAGE_SIZE);
1083}
1084
1085static void sdma_rht_free(void *ptr, void *arg)
1086{
1087 struct sdma_rht_node *rht_node = ptr;
1088 int i;
1089
1090 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1091 kfree(rht_node->map[i]);
1092
1093 kfree(rht_node);
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1105 struct hfi1_devdata *dd,
1106 unsigned long cpuid)
1107{
1108 struct sdma_rht_node *rht_node;
1109 int i, j;
1110
1111 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
1112 sdma_rht_params);
1113 if (!rht_node)
1114 return;
1115
1116 seq_printf(s, "cpu%3lu: ", cpuid);
1117 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1118 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1119 continue;
1120
1121 seq_printf(s, " vl%d: [", i);
1122
1123 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1124 if (!rht_node->map[i]->sde[j])
1125 continue;
1126
1127 if (j > 0)
1128 seq_puts(s, ",");
1129
1130 seq_printf(s, " sdma%2d",
1131 rht_node->map[i]->sde[j]->this_idx);
1132 }
1133 seq_puts(s, " ]");
1134 }
1135
1136 seq_puts(s, "\n");
1137}
1138
1139
1140
1141
1142static void sdma_map_free(struct sdma_vl_map *m)
1143{
1144 int i;
1145
1146 for (i = 0; m && i < m->actual_vls; i++)
1147 kfree(m->map[i]);
1148 kfree(m);
1149}
1150
1151
1152
1153
1154static void sdma_map_rcu_callback(struct rcu_head *list)
1155{
1156 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1157
1158 sdma_map_free(m);
1159}
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1189{
1190 int i, j;
1191 int extra, sde_per_vl;
1192 int engine = 0;
1193 u8 lvl_engines[OPA_MAX_VLS];
1194 struct sdma_vl_map *oldmap, *newmap;
1195
1196 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1197 return 0;
1198
1199 if (!vl_engines) {
1200
1201 sde_per_vl = dd->num_sdma / num_vls;
1202
1203 extra = dd->num_sdma % num_vls;
1204 vl_engines = lvl_engines;
1205
1206 for (i = num_vls - 1; i >= 0; i--, extra--)
1207 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1208 }
1209
1210 newmap = kzalloc(
1211 sizeof(struct sdma_vl_map) +
1212 roundup_pow_of_two(num_vls) *
1213 sizeof(struct sdma_map_elem *),
1214 GFP_KERNEL);
1215 if (!newmap)
1216 goto bail;
1217 newmap->actual_vls = num_vls;
1218 newmap->vls = roundup_pow_of_two(num_vls);
1219 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1220
1221 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1222 newmap->engine_to_vl[i] = -1;
1223 for (i = 0; i < newmap->vls; i++) {
1224
1225 int first_engine = engine;
1226
1227 if (i < newmap->actual_vls) {
1228 int sz = roundup_pow_of_two(vl_engines[i]);
1229
1230
1231 newmap->map[i] = kzalloc(
1232 sizeof(struct sdma_map_elem) +
1233 sz * sizeof(struct sdma_engine *),
1234 GFP_KERNEL);
1235 if (!newmap->map[i])
1236 goto bail;
1237 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1238
1239 for (j = 0; j < sz; j++) {
1240 newmap->map[i]->sde[j] =
1241 &dd->per_sdma[engine];
1242 if (++engine >= first_engine + vl_engines[i])
1243
1244 engine = first_engine;
1245 }
1246
1247 for (j = 0; j < vl_engines[i]; j++)
1248 newmap->engine_to_vl[first_engine + j] = i;
1249 } else {
1250
1251 newmap->map[i] = newmap->map[i % num_vls];
1252 }
1253 engine = first_engine + vl_engines[i];
1254 }
1255
1256 spin_lock_irq(&dd->sde_map_lock);
1257 oldmap = rcu_dereference_protected(dd->sdma_map,
1258 lockdep_is_held(&dd->sde_map_lock));
1259
1260
1261 rcu_assign_pointer(dd->sdma_map, newmap);
1262
1263 spin_unlock_irq(&dd->sde_map_lock);
1264
1265 if (oldmap)
1266 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1267 return 0;
1268bail:
1269
1270 sdma_map_free(newmap);
1271 return -ENOMEM;
1272}
1273
1274
1275
1276
1277
1278
1279
1280static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1281{
1282 size_t i;
1283 struct sdma_engine *sde;
1284
1285 if (dd->sdma_pad_dma) {
1286 dma_free_coherent(&dd->pcidev->dev, 4,
1287 (void *)dd->sdma_pad_dma,
1288 dd->sdma_pad_phys);
1289 dd->sdma_pad_dma = NULL;
1290 dd->sdma_pad_phys = 0;
1291 }
1292 if (dd->sdma_heads_dma) {
1293 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1294 (void *)dd->sdma_heads_dma,
1295 dd->sdma_heads_phys);
1296 dd->sdma_heads_dma = NULL;
1297 dd->sdma_heads_phys = 0;
1298 }
1299 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1300 sde = &dd->per_sdma[i];
1301
1302 sde->head_dma = NULL;
1303 sde->head_phys = 0;
1304
1305 if (sde->descq) {
1306 dma_free_coherent(
1307 &dd->pcidev->dev,
1308 sde->descq_cnt * sizeof(u64[2]),
1309 sde->descq,
1310 sde->descq_phys
1311 );
1312 sde->descq = NULL;
1313 sde->descq_phys = 0;
1314 }
1315 kvfree(sde->tx_ring);
1316 sde->tx_ring = NULL;
1317 }
1318 spin_lock_irq(&dd->sde_map_lock);
1319 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1320 RCU_INIT_POINTER(dd->sdma_map, NULL);
1321 spin_unlock_irq(&dd->sde_map_lock);
1322 synchronize_rcu();
1323 kfree(dd->per_sdma);
1324 dd->per_sdma = NULL;
1325}
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340int sdma_init(struct hfi1_devdata *dd, u8 port)
1341{
1342 unsigned this_idx;
1343 struct sdma_engine *sde;
1344 u16 descq_cnt;
1345 void *curr_head;
1346 struct hfi1_pportdata *ppd = dd->pport + port;
1347 u32 per_sdma_credits;
1348 uint idle_cnt = sdma_idle_cnt;
1349 size_t num_engines = dd->chip_sdma_engines;
1350
1351 if (!HFI1_CAP_IS_KSET(SDMA)) {
1352 HFI1_CAP_CLEAR(SDMA_AHG);
1353 return 0;
1354 }
1355 if (mod_num_sdma &&
1356
1357 mod_num_sdma <= dd->chip_sdma_engines &&
1358
1359 mod_num_sdma >= num_vls)
1360 num_engines = mod_num_sdma;
1361
1362 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1363 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
1364 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1365 dd->chip_sdma_mem_size);
1366
1367 per_sdma_credits =
1368 dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
1369
1370
1371 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1372 atomic_set(&dd->sdma_unfreeze_count, 0);
1373
1374 descq_cnt = sdma_get_descq_cnt();
1375 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1376 num_engines, descq_cnt);
1377
1378
1379 dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
1380 if (!dd->per_sdma)
1381 return -ENOMEM;
1382
1383 idle_cnt = ns_to_cclock(dd, idle_cnt);
1384 if (!sdma_desct_intr)
1385 sdma_desct_intr = SDMA_DESC_INTR;
1386
1387
1388 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1389 sde = &dd->per_sdma[this_idx];
1390 sde->dd = dd;
1391 sde->ppd = ppd;
1392 sde->this_idx = this_idx;
1393 sde->descq_cnt = descq_cnt;
1394 sde->desc_avail = sdma_descq_freecnt(sde);
1395 sde->sdma_shift = ilog2(descq_cnt);
1396 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1397
1398
1399 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1400 this_idx);
1401 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1402 this_idx);
1403 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1404 this_idx);
1405
1406 sde->imask = sde->int_mask | sde->progress_mask |
1407 sde->idle_mask;
1408
1409 spin_lock_init(&sde->tail_lock);
1410 seqlock_init(&sde->head_lock);
1411 spin_lock_init(&sde->senddmactrl_lock);
1412 spin_lock_init(&sde->flushlist_lock);
1413
1414 sde->ahg_bits = 0xfffffffe00000000ULL;
1415
1416 sdma_set_state(sde, sdma_state_s00_hw_down);
1417
1418
1419 kref_init(&sde->state.kref);
1420 init_completion(&sde->state.comp);
1421
1422 INIT_LIST_HEAD(&sde->flushlist);
1423 INIT_LIST_HEAD(&sde->dmawait);
1424
1425 sde->tail_csr =
1426 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1427
1428 if (idle_cnt)
1429 dd->default_desc1 =
1430 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1431 else
1432 dd->default_desc1 =
1433 SDMA_DESC1_INT_REQ_FLAG;
1434
1435 tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
1436 (unsigned long)sde);
1437
1438 tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
1439 (unsigned long)sde);
1440 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1441 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1442
1443 sde->progress_check_head = 0;
1444
1445 setup_timer(&sde->err_progress_check_timer,
1446 sdma_err_progress_check, (unsigned long)sde);
1447
1448 sde->descq = dma_zalloc_coherent(
1449 &dd->pcidev->dev,
1450 descq_cnt * sizeof(u64[2]),
1451 &sde->descq_phys,
1452 GFP_KERNEL
1453 );
1454 if (!sde->descq)
1455 goto bail;
1456 sde->tx_ring =
1457 kcalloc(descq_cnt, sizeof(struct sdma_txreq *),
1458 GFP_KERNEL);
1459 if (!sde->tx_ring)
1460 sde->tx_ring =
1461 vzalloc(
1462 sizeof(struct sdma_txreq *) *
1463 descq_cnt);
1464 if (!sde->tx_ring)
1465 goto bail;
1466 }
1467
1468 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1469
1470 dd->sdma_heads_dma = dma_zalloc_coherent(
1471 &dd->pcidev->dev,
1472 dd->sdma_heads_size,
1473 &dd->sdma_heads_phys,
1474 GFP_KERNEL
1475 );
1476 if (!dd->sdma_heads_dma) {
1477 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1478 goto bail;
1479 }
1480
1481
1482 dd->sdma_pad_dma = dma_zalloc_coherent(
1483 &dd->pcidev->dev,
1484 sizeof(u32),
1485 &dd->sdma_pad_phys,
1486 GFP_KERNEL
1487 );
1488 if (!dd->sdma_pad_dma) {
1489 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1490 goto bail;
1491 }
1492
1493
1494 curr_head = (void *)dd->sdma_heads_dma;
1495 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1496 unsigned long phys_offset;
1497
1498 sde = &dd->per_sdma[this_idx];
1499
1500 sde->head_dma = curr_head;
1501 curr_head += L1_CACHE_BYTES;
1502 phys_offset = (unsigned long)sde->head_dma -
1503 (unsigned long)dd->sdma_heads_dma;
1504 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1505 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1506 }
1507 dd->flags |= HFI1_HAS_SEND_DMA;
1508 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1509 dd->num_sdma = num_engines;
1510 if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
1511 goto bail;
1512
1513 if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
1514 goto bail;
1515
1516 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1517 return 0;
1518
1519bail:
1520 sdma_clean(dd, num_engines);
1521 return -ENOMEM;
1522}
1523
1524
1525
1526
1527
1528
1529
1530void sdma_all_running(struct hfi1_devdata *dd)
1531{
1532 struct sdma_engine *sde;
1533 unsigned int i;
1534
1535
1536 for (i = 0; i < dd->num_sdma; ++i) {
1537 sde = &dd->per_sdma[i];
1538 sdma_process_event(sde, sdma_event_e30_go_running);
1539 }
1540}
1541
1542
1543
1544
1545
1546
1547
1548void sdma_all_idle(struct hfi1_devdata *dd)
1549{
1550 struct sdma_engine *sde;
1551 unsigned int i;
1552
1553
1554 for (i = 0; i < dd->num_sdma; ++i) {
1555 sde = &dd->per_sdma[i];
1556 sdma_process_event(sde, sdma_event_e70_go_idle);
1557 }
1558}
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568void sdma_start(struct hfi1_devdata *dd)
1569{
1570 unsigned i;
1571 struct sdma_engine *sde;
1572
1573
1574 for (i = 0; i < dd->num_sdma; ++i) {
1575 sde = &dd->per_sdma[i];
1576 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1577 }
1578}
1579
1580
1581
1582
1583
1584void sdma_exit(struct hfi1_devdata *dd)
1585{
1586 unsigned this_idx;
1587 struct sdma_engine *sde;
1588
1589 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1590 ++this_idx) {
1591 sde = &dd->per_sdma[this_idx];
1592 if (!list_empty(&sde->dmawait))
1593 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1594 sde->this_idx);
1595 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1596
1597 del_timer_sync(&sde->err_progress_check_timer);
1598
1599
1600
1601
1602
1603
1604 sdma_finalput(&sde->state);
1605 }
1606 sdma_clean(dd, dd->num_sdma);
1607 rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
1608}
1609
1610
1611
1612
1613static inline void sdma_unmap_desc(
1614 struct hfi1_devdata *dd,
1615 struct sdma_desc *descp)
1616{
1617 switch (sdma_mapping_type(descp)) {
1618 case SDMA_MAP_SINGLE:
1619 dma_unmap_single(
1620 &dd->pcidev->dev,
1621 sdma_mapping_addr(descp),
1622 sdma_mapping_len(descp),
1623 DMA_TO_DEVICE);
1624 break;
1625 case SDMA_MAP_PAGE:
1626 dma_unmap_page(
1627 &dd->pcidev->dev,
1628 sdma_mapping_addr(descp),
1629 sdma_mapping_len(descp),
1630 DMA_TO_DEVICE);
1631 break;
1632 }
1633}
1634
1635
1636
1637
1638
1639static inline u8 ahg_mode(struct sdma_txreq *tx)
1640{
1641 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1642 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1643}
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656void __sdma_txclean(
1657 struct hfi1_devdata *dd,
1658 struct sdma_txreq *tx)
1659{
1660 u16 i;
1661
1662 if (tx->num_desc) {
1663 u8 skip = 0, mode = ahg_mode(tx);
1664
1665
1666 sdma_unmap_desc(dd, &tx->descp[0]);
1667
1668 if (mode > SDMA_AHG_APPLY_UPDATE1)
1669 skip = mode >> 1;
1670 for (i = 1 + skip; i < tx->num_desc; i++)
1671 sdma_unmap_desc(dd, &tx->descp[i]);
1672 tx->num_desc = 0;
1673 }
1674 kfree(tx->coalesce_buf);
1675 tx->coalesce_buf = NULL;
1676
1677 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1678 tx->desc_limit = ARRAY_SIZE(tx->descs);
1679 kfree(tx->descp);
1680 }
1681}
1682
1683static inline u16 sdma_gethead(struct sdma_engine *sde)
1684{
1685 struct hfi1_devdata *dd = sde->dd;
1686 int use_dmahead;
1687 u16 hwhead;
1688
1689#ifdef CONFIG_SDMA_VERBOSITY
1690 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1691 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1692#endif
1693
1694retry:
1695 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1696 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1697 hwhead = use_dmahead ?
1698 (u16)le64_to_cpu(*sde->head_dma) :
1699 (u16)read_sde_csr(sde, SD(HEAD));
1700
1701 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1702 u16 cnt;
1703 u16 swtail;
1704 u16 swhead;
1705 int sane;
1706
1707 swhead = sde->descq_head & sde->sdma_mask;
1708
1709 swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
1710 cnt = sde->descq_cnt;
1711
1712 if (swhead < swtail)
1713
1714 sane = (hwhead >= swhead) & (hwhead <= swtail);
1715 else if (swhead > swtail)
1716
1717 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1718 (hwhead <= swtail);
1719 else
1720
1721 sane = (hwhead == swhead);
1722
1723 if (unlikely(!sane)) {
1724 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
1725 sde->this_idx,
1726 use_dmahead ? "dma" : "kreg",
1727 hwhead, swhead, swtail, cnt);
1728 if (use_dmahead) {
1729
1730 use_dmahead = 0;
1731 goto retry;
1732 }
1733
1734 hwhead = swhead;
1735 }
1736 }
1737 return hwhead;
1738}
1739
1740
1741
1742
1743
1744
1745
1746static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
1747{
1748 struct iowait *wait, *nw;
1749 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1750 unsigned i, n = 0, seq;
1751 struct sdma_txreq *stx;
1752 struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
1753
1754#ifdef CONFIG_SDMA_VERBOSITY
1755 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1756 slashstrip(__FILE__), __LINE__, __func__);
1757 dd_dev_err(sde->dd, "avail: %u\n", avail);
1758#endif
1759
1760 do {
1761 seq = read_seqbegin(&dev->iowait_lock);
1762 if (!list_empty(&sde->dmawait)) {
1763
1764 write_seqlock(&dev->iowait_lock);
1765
1766 list_for_each_entry_safe(
1767 wait,
1768 nw,
1769 &sde->dmawait,
1770 list) {
1771 u16 num_desc = 0;
1772
1773 if (!wait->wakeup)
1774 continue;
1775 if (n == ARRAY_SIZE(waits))
1776 break;
1777 if (!list_empty(&wait->tx_head)) {
1778 stx = list_first_entry(
1779 &wait->tx_head,
1780 struct sdma_txreq,
1781 list);
1782 num_desc = stx->num_desc;
1783 }
1784 if (num_desc > avail)
1785 break;
1786 avail -= num_desc;
1787 list_del_init(&wait->list);
1788 waits[n++] = wait;
1789 }
1790 write_sequnlock(&dev->iowait_lock);
1791 break;
1792 }
1793 } while (read_seqretry(&dev->iowait_lock, seq));
1794
1795 for (i = 0; i < n; i++)
1796 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1797}
1798
1799
1800static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1801{
1802 struct sdma_txreq *txp = NULL;
1803 int progress = 0;
1804 u16 hwhead, swhead;
1805 int idle_check_done = 0;
1806
1807 hwhead = sdma_gethead(sde);
1808
1809
1810
1811
1812
1813
1814
1815retry:
1816 txp = get_txhead(sde);
1817 swhead = sde->descq_head & sde->sdma_mask;
1818 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1819 while (swhead != hwhead) {
1820
1821 swhead = ++sde->descq_head & sde->sdma_mask;
1822
1823
1824 if (txp && txp->next_descq_idx == swhead) {
1825
1826 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1827 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1828
1829 txp = get_txhead(sde);
1830 }
1831 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1832 progress++;
1833 }
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844 if ((status & sde->idle_mask) && !idle_check_done) {
1845 u16 swtail;
1846
1847 swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
1848 if (swtail != hwhead) {
1849 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1850 idle_check_done = 1;
1851 goto retry;
1852 }
1853 }
1854
1855 sde->last_status = status;
1856 if (progress)
1857 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1858}
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1870{
1871 trace_hfi1_sdma_engine_interrupt(sde, status);
1872 write_seqlock(&sde->head_lock);
1873 sdma_set_desc_cnt(sde, sdma_desct_intr);
1874 if (status & sde->idle_mask)
1875 sde->idle_int_cnt++;
1876 else if (status & sde->progress_mask)
1877 sde->progress_int_cnt++;
1878 else if (status & sde->int_mask)
1879 sde->sdma_int_cnt++;
1880 sdma_make_progress(sde, status);
1881 write_sequnlock(&sde->head_lock);
1882}
1883
1884
1885
1886
1887
1888
1889void sdma_engine_error(struct sdma_engine *sde, u64 status)
1890{
1891 unsigned long flags;
1892
1893#ifdef CONFIG_SDMA_VERBOSITY
1894 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1895 sde->this_idx,
1896 (unsigned long long)status,
1897 sdma_state_names[sde->state.current_state]);
1898#endif
1899 spin_lock_irqsave(&sde->tail_lock, flags);
1900 write_seqlock(&sde->head_lock);
1901 if (status & ALL_SDMA_ENG_HALT_ERRS)
1902 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1903 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1904 dd_dev_err(sde->dd,
1905 "SDMA (%u) engine error: 0x%llx state %s\n",
1906 sde->this_idx,
1907 (unsigned long long)status,
1908 sdma_state_names[sde->state.current_state]);
1909 dump_sdma_state(sde);
1910 }
1911 write_sequnlock(&sde->head_lock);
1912 spin_unlock_irqrestore(&sde->tail_lock, flags);
1913}
1914
1915static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1916{
1917 u64 set_senddmactrl = 0;
1918 u64 clr_senddmactrl = 0;
1919 unsigned long flags;
1920
1921#ifdef CONFIG_SDMA_VERBOSITY
1922 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1923 sde->this_idx,
1924 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1925 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1926 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1927 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1928#endif
1929
1930 if (op & SDMA_SENDCTRL_OP_ENABLE)
1931 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1932 else
1933 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1934
1935 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1936 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1937 else
1938 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1939
1940 if (op & SDMA_SENDCTRL_OP_HALT)
1941 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1942 else
1943 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1944
1945 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1946
1947 sde->p_senddmactrl |= set_senddmactrl;
1948 sde->p_senddmactrl &= ~clr_senddmactrl;
1949
1950 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1951 write_sde_csr(sde, SD(CTRL),
1952 sde->p_senddmactrl |
1953 SD(CTRL_SDMA_CLEANUP_SMASK));
1954 else
1955 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1956
1957 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1958
1959#ifdef CONFIG_SDMA_VERBOSITY
1960 sdma_dumpstate(sde);
1961#endif
1962}
1963
1964static void sdma_setlengen(struct sdma_engine *sde)
1965{
1966#ifdef CONFIG_SDMA_VERBOSITY
1967 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1968 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1969#endif
1970
1971
1972
1973
1974
1975
1976 write_sde_csr(sde, SD(LEN_GEN),
1977 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
1978 write_sde_csr(sde, SD(LEN_GEN),
1979 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
1980 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
1981}
1982
1983static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
1984{
1985
1986 smp_wmb();
1987 writeq(tail, sde->tail_csr);
1988}
1989
1990
1991
1992
1993
1994static void sdma_hw_start_up(struct sdma_engine *sde)
1995{
1996 u64 reg;
1997
1998#ifdef CONFIG_SDMA_VERBOSITY
1999 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2000 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2001#endif
2002
2003 sdma_setlengen(sde);
2004 sdma_update_tail(sde, 0);
2005 *sde->head_dma = 0;
2006
2007 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
2008 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
2009 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
2010}
2011
2012
2013
2014
2015
2016
2017static void set_sdma_integrity(struct sdma_engine *sde)
2018{
2019 struct hfi1_devdata *dd = sde->dd;
2020
2021 write_sde_csr(sde, SD(CHECK_ENABLE),
2022 hfi1_pkt_base_sdma_integrity(dd));
2023}
2024
2025static void init_sdma_regs(
2026 struct sdma_engine *sde,
2027 u32 credits,
2028 uint idle_cnt)
2029{
2030 u8 opval, opmask;
2031#ifdef CONFIG_SDMA_VERBOSITY
2032 struct hfi1_devdata *dd = sde->dd;
2033
2034 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2035 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2036#endif
2037
2038 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2039 sdma_setlengen(sde);
2040 sdma_update_tail(sde, 0);
2041 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2042 write_sde_csr(sde, SD(DESC_CNT), 0);
2043 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2044 write_sde_csr(sde, SD(MEMORY),
2045 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2046 ((u64)(credits * sde->this_idx) <<
2047 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2048 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2049 set_sdma_integrity(sde);
2050 opmask = OPCODE_CHECK_MASK_DISABLED;
2051 opval = OPCODE_CHECK_VAL_DISABLED;
2052 write_sde_csr(sde, SD(CHECK_OPCODE),
2053 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2054 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2055}
2056
2057#ifdef CONFIG_SDMA_VERBOSITY
2058
2059#define sdma_dumpstate_helper0(reg) do { \
2060 csr = read_csr(sde->dd, reg); \
2061 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2062 } while (0)
2063
2064#define sdma_dumpstate_helper(reg) do { \
2065 csr = read_sde_csr(sde, reg); \
2066 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2067 #reg, sde->this_idx, csr); \
2068 } while (0)
2069
2070#define sdma_dumpstate_helper2(reg) do { \
2071 csr = read_csr(sde->dd, reg + (8 * i)); \
2072 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2073 #reg, i, csr); \
2074 } while (0)
2075
2076void sdma_dumpstate(struct sdma_engine *sde)
2077{
2078 u64 csr;
2079 unsigned i;
2080
2081 sdma_dumpstate_helper(SD(CTRL));
2082 sdma_dumpstate_helper(SD(STATUS));
2083 sdma_dumpstate_helper0(SD(ERR_STATUS));
2084 sdma_dumpstate_helper0(SD(ERR_MASK));
2085 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2086 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2087
2088 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2089 sdma_dumpstate_helper2(CCE_INT_STATUS);
2090 sdma_dumpstate_helper2(CCE_INT_MASK);
2091 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2092 }
2093
2094 sdma_dumpstate_helper(SD(TAIL));
2095 sdma_dumpstate_helper(SD(HEAD));
2096 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2097 sdma_dumpstate_helper(SD(IDLE_CNT));
2098 sdma_dumpstate_helper(SD(RELOAD_CNT));
2099 sdma_dumpstate_helper(SD(DESC_CNT));
2100 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2101 sdma_dumpstate_helper(SD(MEMORY));
2102 sdma_dumpstate_helper0(SD(ENGINES));
2103 sdma_dumpstate_helper0(SD(MEM_SIZE));
2104
2105 sdma_dumpstate_helper(SD(BASE_ADDR));
2106 sdma_dumpstate_helper(SD(LEN_GEN));
2107 sdma_dumpstate_helper(SD(HEAD_ADDR));
2108 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2109 sdma_dumpstate_helper(SD(CHECK_VL));
2110 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2111 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2112 sdma_dumpstate_helper(SD(CHECK_SLID));
2113 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2114}
2115#endif
2116
2117static void dump_sdma_state(struct sdma_engine *sde)
2118{
2119 struct hw_sdma_desc *descq;
2120 struct hw_sdma_desc *descqp;
2121 u64 desc[2];
2122 u64 addr;
2123 u8 gen;
2124 u16 len;
2125 u16 head, tail, cnt;
2126
2127 head = sde->descq_head & sde->sdma_mask;
2128 tail = sde->descq_tail & sde->sdma_mask;
2129 cnt = sdma_descq_freecnt(sde);
2130 descq = sde->descq;
2131
2132 dd_dev_err(sde->dd,
2133 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2134 sde->this_idx, head, tail, cnt,
2135 !list_empty(&sde->flushlist));
2136
2137
2138 while (head != tail) {
2139 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2140
2141 descqp = &sde->descq[head];
2142 desc[0] = le64_to_cpu(descqp->qw[0]);
2143 desc[1] = le64_to_cpu(descqp->qw[1]);
2144 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2145 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2146 'H' : '-';
2147 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2148 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2149 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2150 & SDMA_DESC0_PHY_ADDR_MASK;
2151 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2152 & SDMA_DESC1_GENERATION_MASK;
2153 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2154 & SDMA_DESC0_BYTE_COUNT_MASK;
2155 dd_dev_err(sde->dd,
2156 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2157 head, flags, addr, gen, len);
2158 dd_dev_err(sde->dd,
2159 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2160 desc[0], desc[1]);
2161 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2162 dd_dev_err(sde->dd,
2163 "\taidx: %u amode: %u alen: %u\n",
2164 (u8)((desc[1] &
2165 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2166 SDMA_DESC1_HEADER_INDEX_SHIFT),
2167 (u8)((desc[1] &
2168 SDMA_DESC1_HEADER_MODE_SMASK) >>
2169 SDMA_DESC1_HEADER_MODE_SHIFT),
2170 (u8)((desc[1] &
2171 SDMA_DESC1_HEADER_DWS_SMASK) >>
2172 SDMA_DESC1_HEADER_DWS_SHIFT));
2173 head++;
2174 head &= sde->sdma_mask;
2175 }
2176}
2177
2178#define SDE_FMT \
2179 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2180
2181
2182
2183
2184
2185
2186
2187void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2188{
2189 u16 head, tail;
2190 struct hw_sdma_desc *descqp;
2191 u64 desc[2];
2192 u64 addr;
2193 u8 gen;
2194 u16 len;
2195
2196 head = sde->descq_head & sde->sdma_mask;
2197 tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
2198 seq_printf(s, SDE_FMT, sde->this_idx,
2199 sde->cpu,
2200 sdma_state_name(sde->state.current_state),
2201 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2202 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2203 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2204 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2205 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2206 (unsigned long long)le64_to_cpu(*sde->head_dma),
2207 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2208 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2209 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2210 (unsigned long long)sde->last_status,
2211 (unsigned long long)sde->ahg_bits,
2212 sde->tx_tail,
2213 sde->tx_head,
2214 sde->descq_tail,
2215 sde->descq_head,
2216 !list_empty(&sde->flushlist),
2217 sde->descq_full_count,
2218 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2219
2220
2221 while (head != tail) {
2222 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2223
2224 descqp = &sde->descq[head];
2225 desc[0] = le64_to_cpu(descqp->qw[0]);
2226 desc[1] = le64_to_cpu(descqp->qw[1]);
2227 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2228 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2229 'H' : '-';
2230 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2231 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2232 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2233 & SDMA_DESC0_PHY_ADDR_MASK;
2234 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2235 & SDMA_DESC1_GENERATION_MASK;
2236 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2237 & SDMA_DESC0_BYTE_COUNT_MASK;
2238 seq_printf(s,
2239 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2240 head, flags, addr, gen, len);
2241 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2242 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2243 (u8)((desc[1] &
2244 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2245 SDMA_DESC1_HEADER_INDEX_SHIFT),
2246 (u8)((desc[1] &
2247 SDMA_DESC1_HEADER_MODE_SMASK) >>
2248 SDMA_DESC1_HEADER_MODE_SHIFT));
2249 head = (head + 1) & sde->sdma_mask;
2250 }
2251}
2252
2253
2254
2255
2256
2257static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2258{
2259 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2260
2261 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2262 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2263 << SDMA_DESC1_GENERATION_SHIFT;
2264 return qw1;
2265}
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2284{
2285 int i;
2286 u16 tail;
2287 struct sdma_desc *descp = tx->descp;
2288 u8 skip = 0, mode = ahg_mode(tx);
2289
2290 tail = sde->descq_tail & sde->sdma_mask;
2291 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2292 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2293 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2294 tail, &sde->descq[tail]);
2295 tail = ++sde->descq_tail & sde->sdma_mask;
2296 descp++;
2297 if (mode > SDMA_AHG_APPLY_UPDATE1)
2298 skip = mode >> 1;
2299 for (i = 1; i < tx->num_desc; i++, descp++) {
2300 u64 qw1;
2301
2302 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2303 if (skip) {
2304
2305 qw1 = descp->qw[1];
2306 skip--;
2307 } else {
2308
2309 qw1 = add_gen(sde, descp->qw[1]);
2310 }
2311 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2312 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2313 tail, &sde->descq[tail]);
2314 tail = ++sde->descq_tail & sde->sdma_mask;
2315 }
2316 tx->next_descq_idx = tail;
2317#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2318 tx->sn = sde->tail_sn++;
2319 trace_hfi1_sdma_in_sn(sde, tx->sn);
2320 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2321#endif
2322 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2323 sde->desc_avail -= tx->num_desc;
2324 return tail;
2325}
2326
2327
2328
2329
2330static int sdma_check_progress(
2331 struct sdma_engine *sde,
2332 struct iowait *wait,
2333 struct sdma_txreq *tx)
2334{
2335 int ret;
2336
2337 sde->desc_avail = sdma_descq_freecnt(sde);
2338 if (tx->num_desc <= sde->desc_avail)
2339 return -EAGAIN;
2340
2341 if (wait && wait->sleep) {
2342 unsigned seq;
2343
2344 seq = raw_seqcount_begin(
2345 (const seqcount_t *)&sde->head_lock.seqcount);
2346 ret = wait->sleep(sde, wait, tx, seq);
2347 if (ret == -EAGAIN)
2348 sde->desc_avail = sdma_descq_freecnt(sde);
2349 } else {
2350 ret = -EBUSY;
2351 }
2352 return ret;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369int sdma_send_txreq(struct sdma_engine *sde,
2370 struct iowait *wait,
2371 struct sdma_txreq *tx)
2372{
2373 int ret = 0;
2374 u16 tail;
2375 unsigned long flags;
2376
2377
2378 if (unlikely(tx->tlen))
2379 return -EINVAL;
2380 tx->wait = wait;
2381 spin_lock_irqsave(&sde->tail_lock, flags);
2382retry:
2383 if (unlikely(!__sdma_running(sde)))
2384 goto unlock_noconn;
2385 if (unlikely(tx->num_desc > sde->desc_avail))
2386 goto nodesc;
2387 tail = submit_tx(sde, tx);
2388 if (wait)
2389 iowait_sdma_inc(wait);
2390 sdma_update_tail(sde, tail);
2391unlock:
2392 spin_unlock_irqrestore(&sde->tail_lock, flags);
2393 return ret;
2394unlock_noconn:
2395 if (wait)
2396 iowait_sdma_inc(wait);
2397 tx->next_descq_idx = 0;
2398#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2399 tx->sn = sde->tail_sn++;
2400 trace_hfi1_sdma_in_sn(sde, tx->sn);
2401#endif
2402 spin_lock(&sde->flushlist_lock);
2403 list_add_tail(&tx->list, &sde->flushlist);
2404 spin_unlock(&sde->flushlist_lock);
2405 if (wait) {
2406 wait->tx_count++;
2407 wait->count += tx->num_desc;
2408 }
2409 schedule_work(&sde->flush_worker);
2410 ret = -ECOMM;
2411 goto unlock;
2412nodesc:
2413 ret = sdma_check_progress(sde, wait, tx);
2414 if (ret == -EAGAIN) {
2415 ret = 0;
2416 goto retry;
2417 }
2418 sde->descq_full_count++;
2419 goto unlock;
2420}
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
2451 struct list_head *tx_list, u32 *count_out)
2452{
2453 struct sdma_txreq *tx, *tx_next;
2454 int ret = 0;
2455 unsigned long flags;
2456 u16 tail = INVALID_TAIL;
2457 u32 submit_count = 0, flush_count = 0, total_count;
2458
2459 spin_lock_irqsave(&sde->tail_lock, flags);
2460retry:
2461 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2462 tx->wait = wait;
2463 if (unlikely(!__sdma_running(sde)))
2464 goto unlock_noconn;
2465 if (unlikely(tx->num_desc > sde->desc_avail))
2466 goto nodesc;
2467 if (unlikely(tx->tlen)) {
2468 ret = -EINVAL;
2469 goto update_tail;
2470 }
2471 list_del_init(&tx->list);
2472 tail = submit_tx(sde, tx);
2473 submit_count++;
2474 if (tail != INVALID_TAIL &&
2475 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2476 sdma_update_tail(sde, tail);
2477 tail = INVALID_TAIL;
2478 }
2479 }
2480update_tail:
2481 total_count = submit_count + flush_count;
2482 if (wait)
2483 iowait_sdma_add(wait, total_count);
2484 if (tail != INVALID_TAIL)
2485 sdma_update_tail(sde, tail);
2486 spin_unlock_irqrestore(&sde->tail_lock, flags);
2487 *count_out = total_count;
2488 return ret;
2489unlock_noconn:
2490 spin_lock(&sde->flushlist_lock);
2491 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2492 tx->wait = wait;
2493 list_del_init(&tx->list);
2494 tx->next_descq_idx = 0;
2495#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2496 tx->sn = sde->tail_sn++;
2497 trace_hfi1_sdma_in_sn(sde, tx->sn);
2498#endif
2499 list_add_tail(&tx->list, &sde->flushlist);
2500 flush_count++;
2501 if (wait) {
2502 wait->tx_count++;
2503 wait->count += tx->num_desc;
2504 }
2505 }
2506 spin_unlock(&sde->flushlist_lock);
2507 schedule_work(&sde->flush_worker);
2508 ret = -ECOMM;
2509 goto update_tail;
2510nodesc:
2511 ret = sdma_check_progress(sde, wait, tx);
2512 if (ret == -EAGAIN) {
2513 ret = 0;
2514 goto retry;
2515 }
2516 sde->descq_full_count++;
2517 goto update_tail;
2518}
2519
2520static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2521{
2522 unsigned long flags;
2523
2524 spin_lock_irqsave(&sde->tail_lock, flags);
2525 write_seqlock(&sde->head_lock);
2526
2527 __sdma_process_event(sde, event);
2528
2529 if (sde->state.current_state == sdma_state_s99_running)
2530 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2531
2532 write_sequnlock(&sde->head_lock);
2533 spin_unlock_irqrestore(&sde->tail_lock, flags);
2534}
2535
2536static void __sdma_process_event(struct sdma_engine *sde,
2537 enum sdma_events event)
2538{
2539 struct sdma_state *ss = &sde->state;
2540 int need_progress = 0;
2541
2542
2543#ifdef CONFIG_SDMA_VERBOSITY
2544 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2545 sdma_state_names[ss->current_state],
2546 sdma_event_names[event]);
2547#endif
2548
2549 switch (ss->current_state) {
2550 case sdma_state_s00_hw_down:
2551 switch (event) {
2552 case sdma_event_e00_go_hw_down:
2553 break;
2554 case sdma_event_e30_go_running:
2555
2556
2557
2558
2559
2560
2561
2562 ss->go_s99_running = 1;
2563
2564 case sdma_event_e10_go_hw_start:
2565
2566 sdma_get(&sde->state);
2567 sdma_set_state(sde,
2568 sdma_state_s10_hw_start_up_halt_wait);
2569 break;
2570 case sdma_event_e15_hw_halt_done:
2571 break;
2572 case sdma_event_e25_hw_clean_up_done:
2573 break;
2574 case sdma_event_e40_sw_cleaned:
2575 sdma_sw_tear_down(sde);
2576 break;
2577 case sdma_event_e50_hw_cleaned:
2578 break;
2579 case sdma_event_e60_hw_halted:
2580 break;
2581 case sdma_event_e70_go_idle:
2582 break;
2583 case sdma_event_e80_hw_freeze:
2584 break;
2585 case sdma_event_e81_hw_frozen:
2586 break;
2587 case sdma_event_e82_hw_unfreeze:
2588 break;
2589 case sdma_event_e85_link_down:
2590 break;
2591 case sdma_event_e90_sw_halted:
2592 break;
2593 }
2594 break;
2595
2596 case sdma_state_s10_hw_start_up_halt_wait:
2597 switch (event) {
2598 case sdma_event_e00_go_hw_down:
2599 sdma_set_state(sde, sdma_state_s00_hw_down);
2600 sdma_sw_tear_down(sde);
2601 break;
2602 case sdma_event_e10_go_hw_start:
2603 break;
2604 case sdma_event_e15_hw_halt_done:
2605 sdma_set_state(sde,
2606 sdma_state_s15_hw_start_up_clean_wait);
2607 sdma_start_hw_clean_up(sde);
2608 break;
2609 case sdma_event_e25_hw_clean_up_done:
2610 break;
2611 case sdma_event_e30_go_running:
2612 ss->go_s99_running = 1;
2613 break;
2614 case sdma_event_e40_sw_cleaned:
2615 break;
2616 case sdma_event_e50_hw_cleaned:
2617 break;
2618 case sdma_event_e60_hw_halted:
2619 schedule_work(&sde->err_halt_worker);
2620 break;
2621 case sdma_event_e70_go_idle:
2622 ss->go_s99_running = 0;
2623 break;
2624 case sdma_event_e80_hw_freeze:
2625 break;
2626 case sdma_event_e81_hw_frozen:
2627 break;
2628 case sdma_event_e82_hw_unfreeze:
2629 break;
2630 case sdma_event_e85_link_down:
2631 break;
2632 case sdma_event_e90_sw_halted:
2633 break;
2634 }
2635 break;
2636
2637 case sdma_state_s15_hw_start_up_clean_wait:
2638 switch (event) {
2639 case sdma_event_e00_go_hw_down:
2640 sdma_set_state(sde, sdma_state_s00_hw_down);
2641 sdma_sw_tear_down(sde);
2642 break;
2643 case sdma_event_e10_go_hw_start:
2644 break;
2645 case sdma_event_e15_hw_halt_done:
2646 break;
2647 case sdma_event_e25_hw_clean_up_done:
2648 sdma_hw_start_up(sde);
2649 sdma_set_state(sde, ss->go_s99_running ?
2650 sdma_state_s99_running :
2651 sdma_state_s20_idle);
2652 break;
2653 case sdma_event_e30_go_running:
2654 ss->go_s99_running = 1;
2655 break;
2656 case sdma_event_e40_sw_cleaned:
2657 break;
2658 case sdma_event_e50_hw_cleaned:
2659 break;
2660 case sdma_event_e60_hw_halted:
2661 break;
2662 case sdma_event_e70_go_idle:
2663 ss->go_s99_running = 0;
2664 break;
2665 case sdma_event_e80_hw_freeze:
2666 break;
2667 case sdma_event_e81_hw_frozen:
2668 break;
2669 case sdma_event_e82_hw_unfreeze:
2670 break;
2671 case sdma_event_e85_link_down:
2672 break;
2673 case sdma_event_e90_sw_halted:
2674 break;
2675 }
2676 break;
2677
2678 case sdma_state_s20_idle:
2679 switch (event) {
2680 case sdma_event_e00_go_hw_down:
2681 sdma_set_state(sde, sdma_state_s00_hw_down);
2682 sdma_sw_tear_down(sde);
2683 break;
2684 case sdma_event_e10_go_hw_start:
2685 break;
2686 case sdma_event_e15_hw_halt_done:
2687 break;
2688 case sdma_event_e25_hw_clean_up_done:
2689 break;
2690 case sdma_event_e30_go_running:
2691 sdma_set_state(sde, sdma_state_s99_running);
2692 ss->go_s99_running = 1;
2693 break;
2694 case sdma_event_e40_sw_cleaned:
2695 break;
2696 case sdma_event_e50_hw_cleaned:
2697 break;
2698 case sdma_event_e60_hw_halted:
2699 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2700 schedule_work(&sde->err_halt_worker);
2701 break;
2702 case sdma_event_e70_go_idle:
2703 break;
2704 case sdma_event_e85_link_down:
2705
2706 case sdma_event_e80_hw_freeze:
2707 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2708 atomic_dec(&sde->dd->sdma_unfreeze_count);
2709 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2710 break;
2711 case sdma_event_e81_hw_frozen:
2712 break;
2713 case sdma_event_e82_hw_unfreeze:
2714 break;
2715 case sdma_event_e90_sw_halted:
2716 break;
2717 }
2718 break;
2719
2720 case sdma_state_s30_sw_clean_up_wait:
2721 switch (event) {
2722 case sdma_event_e00_go_hw_down:
2723 sdma_set_state(sde, sdma_state_s00_hw_down);
2724 break;
2725 case sdma_event_e10_go_hw_start:
2726 break;
2727 case sdma_event_e15_hw_halt_done:
2728 break;
2729 case sdma_event_e25_hw_clean_up_done:
2730 break;
2731 case sdma_event_e30_go_running:
2732 ss->go_s99_running = 1;
2733 break;
2734 case sdma_event_e40_sw_cleaned:
2735 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2736 sdma_start_hw_clean_up(sde);
2737 break;
2738 case sdma_event_e50_hw_cleaned:
2739 break;
2740 case sdma_event_e60_hw_halted:
2741 break;
2742 case sdma_event_e70_go_idle:
2743 ss->go_s99_running = 0;
2744 break;
2745 case sdma_event_e80_hw_freeze:
2746 break;
2747 case sdma_event_e81_hw_frozen:
2748 break;
2749 case sdma_event_e82_hw_unfreeze:
2750 break;
2751 case sdma_event_e85_link_down:
2752 ss->go_s99_running = 0;
2753 break;
2754 case sdma_event_e90_sw_halted:
2755 break;
2756 }
2757 break;
2758
2759 case sdma_state_s40_hw_clean_up_wait:
2760 switch (event) {
2761 case sdma_event_e00_go_hw_down:
2762 sdma_set_state(sde, sdma_state_s00_hw_down);
2763 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2764 break;
2765 case sdma_event_e10_go_hw_start:
2766 break;
2767 case sdma_event_e15_hw_halt_done:
2768 break;
2769 case sdma_event_e25_hw_clean_up_done:
2770 sdma_hw_start_up(sde);
2771 sdma_set_state(sde, ss->go_s99_running ?
2772 sdma_state_s99_running :
2773 sdma_state_s20_idle);
2774 break;
2775 case sdma_event_e30_go_running:
2776 ss->go_s99_running = 1;
2777 break;
2778 case sdma_event_e40_sw_cleaned:
2779 break;
2780 case sdma_event_e50_hw_cleaned:
2781 break;
2782 case sdma_event_e60_hw_halted:
2783 break;
2784 case sdma_event_e70_go_idle:
2785 ss->go_s99_running = 0;
2786 break;
2787 case sdma_event_e80_hw_freeze:
2788 break;
2789 case sdma_event_e81_hw_frozen:
2790 break;
2791 case sdma_event_e82_hw_unfreeze:
2792 break;
2793 case sdma_event_e85_link_down:
2794 ss->go_s99_running = 0;
2795 break;
2796 case sdma_event_e90_sw_halted:
2797 break;
2798 }
2799 break;
2800
2801 case sdma_state_s50_hw_halt_wait:
2802 switch (event) {
2803 case sdma_event_e00_go_hw_down:
2804 sdma_set_state(sde, sdma_state_s00_hw_down);
2805 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2806 break;
2807 case sdma_event_e10_go_hw_start:
2808 break;
2809 case sdma_event_e15_hw_halt_done:
2810 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2811 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2812 break;
2813 case sdma_event_e25_hw_clean_up_done:
2814 break;
2815 case sdma_event_e30_go_running:
2816 ss->go_s99_running = 1;
2817 break;
2818 case sdma_event_e40_sw_cleaned:
2819 break;
2820 case sdma_event_e50_hw_cleaned:
2821 break;
2822 case sdma_event_e60_hw_halted:
2823 schedule_work(&sde->err_halt_worker);
2824 break;
2825 case sdma_event_e70_go_idle:
2826 ss->go_s99_running = 0;
2827 break;
2828 case sdma_event_e80_hw_freeze:
2829 break;
2830 case sdma_event_e81_hw_frozen:
2831 break;
2832 case sdma_event_e82_hw_unfreeze:
2833 break;
2834 case sdma_event_e85_link_down:
2835 ss->go_s99_running = 0;
2836 break;
2837 case sdma_event_e90_sw_halted:
2838 break;
2839 }
2840 break;
2841
2842 case sdma_state_s60_idle_halt_wait:
2843 switch (event) {
2844 case sdma_event_e00_go_hw_down:
2845 sdma_set_state(sde, sdma_state_s00_hw_down);
2846 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2847 break;
2848 case sdma_event_e10_go_hw_start:
2849 break;
2850 case sdma_event_e15_hw_halt_done:
2851 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2852 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2853 break;
2854 case sdma_event_e25_hw_clean_up_done:
2855 break;
2856 case sdma_event_e30_go_running:
2857 ss->go_s99_running = 1;
2858 break;
2859 case sdma_event_e40_sw_cleaned:
2860 break;
2861 case sdma_event_e50_hw_cleaned:
2862 break;
2863 case sdma_event_e60_hw_halted:
2864 schedule_work(&sde->err_halt_worker);
2865 break;
2866 case sdma_event_e70_go_idle:
2867 ss->go_s99_running = 0;
2868 break;
2869 case sdma_event_e80_hw_freeze:
2870 break;
2871 case sdma_event_e81_hw_frozen:
2872 break;
2873 case sdma_event_e82_hw_unfreeze:
2874 break;
2875 case sdma_event_e85_link_down:
2876 break;
2877 case sdma_event_e90_sw_halted:
2878 break;
2879 }
2880 break;
2881
2882 case sdma_state_s80_hw_freeze:
2883 switch (event) {
2884 case sdma_event_e00_go_hw_down:
2885 sdma_set_state(sde, sdma_state_s00_hw_down);
2886 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2887 break;
2888 case sdma_event_e10_go_hw_start:
2889 break;
2890 case sdma_event_e15_hw_halt_done:
2891 break;
2892 case sdma_event_e25_hw_clean_up_done:
2893 break;
2894 case sdma_event_e30_go_running:
2895 ss->go_s99_running = 1;
2896 break;
2897 case sdma_event_e40_sw_cleaned:
2898 break;
2899 case sdma_event_e50_hw_cleaned:
2900 break;
2901 case sdma_event_e60_hw_halted:
2902 break;
2903 case sdma_event_e70_go_idle:
2904 ss->go_s99_running = 0;
2905 break;
2906 case sdma_event_e80_hw_freeze:
2907 break;
2908 case sdma_event_e81_hw_frozen:
2909 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2910 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2911 break;
2912 case sdma_event_e82_hw_unfreeze:
2913 break;
2914 case sdma_event_e85_link_down:
2915 break;
2916 case sdma_event_e90_sw_halted:
2917 break;
2918 }
2919 break;
2920
2921 case sdma_state_s82_freeze_sw_clean:
2922 switch (event) {
2923 case sdma_event_e00_go_hw_down:
2924 sdma_set_state(sde, sdma_state_s00_hw_down);
2925 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2926 break;
2927 case sdma_event_e10_go_hw_start:
2928 break;
2929 case sdma_event_e15_hw_halt_done:
2930 break;
2931 case sdma_event_e25_hw_clean_up_done:
2932 break;
2933 case sdma_event_e30_go_running:
2934 ss->go_s99_running = 1;
2935 break;
2936 case sdma_event_e40_sw_cleaned:
2937
2938 atomic_dec(&sde->dd->sdma_unfreeze_count);
2939 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2940 break;
2941 case sdma_event_e50_hw_cleaned:
2942 break;
2943 case sdma_event_e60_hw_halted:
2944 break;
2945 case sdma_event_e70_go_idle:
2946 ss->go_s99_running = 0;
2947 break;
2948 case sdma_event_e80_hw_freeze:
2949 break;
2950 case sdma_event_e81_hw_frozen:
2951 break;
2952 case sdma_event_e82_hw_unfreeze:
2953 sdma_hw_start_up(sde);
2954 sdma_set_state(sde, ss->go_s99_running ?
2955 sdma_state_s99_running :
2956 sdma_state_s20_idle);
2957 break;
2958 case sdma_event_e85_link_down:
2959 break;
2960 case sdma_event_e90_sw_halted:
2961 break;
2962 }
2963 break;
2964
2965 case sdma_state_s99_running:
2966 switch (event) {
2967 case sdma_event_e00_go_hw_down:
2968 sdma_set_state(sde, sdma_state_s00_hw_down);
2969 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2970 break;
2971 case sdma_event_e10_go_hw_start:
2972 break;
2973 case sdma_event_e15_hw_halt_done:
2974 break;
2975 case sdma_event_e25_hw_clean_up_done:
2976 break;
2977 case sdma_event_e30_go_running:
2978 break;
2979 case sdma_event_e40_sw_cleaned:
2980 break;
2981 case sdma_event_e50_hw_cleaned:
2982 break;
2983 case sdma_event_e60_hw_halted:
2984 need_progress = 1;
2985 sdma_err_progress_check_schedule(sde);
2986 case sdma_event_e90_sw_halted:
2987
2988
2989
2990
2991 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2992 schedule_work(&sde->err_halt_worker);
2993 break;
2994 case sdma_event_e70_go_idle:
2995 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
2996 break;
2997 case sdma_event_e85_link_down:
2998 ss->go_s99_running = 0;
2999
3000 case sdma_event_e80_hw_freeze:
3001 sdma_set_state(sde, sdma_state_s80_hw_freeze);
3002 atomic_dec(&sde->dd->sdma_unfreeze_count);
3003 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
3004 break;
3005 case sdma_event_e81_hw_frozen:
3006 break;
3007 case sdma_event_e82_hw_unfreeze:
3008 break;
3009 }
3010 break;
3011 }
3012
3013 ss->last_event = event;
3014 if (need_progress)
3015 sdma_make_progress(sde, 0);
3016}
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3032{
3033 int i;
3034
3035
3036 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3037
3038 if (!tx->tlen) {
3039 tx->desc_limit = MAX_DESC;
3040 } else if (!tx->coalesce_buf) {
3041
3042 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3043 GFP_ATOMIC);
3044 if (!tx->coalesce_buf)
3045 goto enomem;
3046 tx->coalesce_idx = 0;
3047 }
3048 return 0;
3049 }
3050
3051 if (unlikely(tx->num_desc == MAX_DESC))
3052 goto enomem;
3053
3054 tx->descp = kmalloc_array(
3055 MAX_DESC,
3056 sizeof(struct sdma_desc),
3057 GFP_ATOMIC);
3058 if (!tx->descp)
3059 goto enomem;
3060
3061
3062 tx->desc_limit = MAX_DESC - 1;
3063
3064 for (i = 0; i < tx->num_desc; i++)
3065 tx->descp[i] = tx->descs[i];
3066 return 0;
3067enomem:
3068 __sdma_txclean(dd, tx);
3069 return -ENOMEM;
3070}
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3089 int type, void *kvaddr, struct page *page,
3090 unsigned long offset, u16 len)
3091{
3092 int pad_len, rval;
3093 dma_addr_t addr;
3094
3095 rval = _extend_sdma_tx_descs(dd, tx);
3096 if (rval) {
3097 __sdma_txclean(dd, tx);
3098 return rval;
3099 }
3100
3101
3102 if (tx->coalesce_buf) {
3103 if (type == SDMA_MAP_NONE) {
3104 __sdma_txclean(dd, tx);
3105 return -EINVAL;
3106 }
3107
3108 if (type == SDMA_MAP_PAGE) {
3109 kvaddr = kmap(page);
3110 kvaddr += offset;
3111 } else if (WARN_ON(!kvaddr)) {
3112 __sdma_txclean(dd, tx);
3113 return -EINVAL;
3114 }
3115
3116 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3117 tx->coalesce_idx += len;
3118 if (type == SDMA_MAP_PAGE)
3119 kunmap(page);
3120
3121
3122 if (tx->tlen - tx->coalesce_idx)
3123 return 0;
3124
3125
3126 pad_len = tx->packet_len & (sizeof(u32) - 1);
3127 if (pad_len) {
3128 pad_len = sizeof(u32) - pad_len;
3129 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3130
3131 tx->packet_len += pad_len;
3132 tx->tlen += pad_len;
3133 }
3134
3135
3136 addr = dma_map_single(&dd->pcidev->dev,
3137 tx->coalesce_buf,
3138 tx->tlen,
3139 DMA_TO_DEVICE);
3140
3141 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3142 __sdma_txclean(dd, tx);
3143 return -ENOSPC;
3144 }
3145
3146
3147 tx->desc_limit = MAX_DESC;
3148 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3149 addr, tx->tlen);
3150 }
3151
3152 return 1;
3153}
3154
3155
3156void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3157{
3158 struct sdma_engine *sde;
3159 int i;
3160 u64 sreg;
3161
3162 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3163 SD(CHECK_SLID_MASK_SHIFT)) |
3164 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3165 SD(CHECK_SLID_VALUE_SHIFT));
3166
3167 for (i = 0; i < dd->num_sdma; i++) {
3168 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3169 i, (u32)sreg);
3170 sde = &dd->per_sdma[i];
3171 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3172 }
3173}
3174
3175
3176int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3177{
3178 int rval = 0;
3179
3180 tx->num_desc++;
3181 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3182 rval = _extend_sdma_tx_descs(dd, tx);
3183 if (rval) {
3184 __sdma_txclean(dd, tx);
3185 return rval;
3186 }
3187 }
3188
3189 make_tx_sdma_desc(
3190 tx,
3191 SDMA_MAP_NONE,
3192 dd->sdma_pad_phys,
3193 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3194 _sdma_close_tx(dd, tx);
3195 return rval;
3196}
3197
3198
3199
3200
3201
3202
3203
3204
3205void _sdma_txreq_ahgadd(
3206 struct sdma_txreq *tx,
3207 u8 num_ahg,
3208 u8 ahg_entry,
3209 u32 *ahg,
3210 u8 ahg_hlen)
3211{
3212 u32 i, shift = 0, desc = 0;
3213 u8 mode;
3214
3215 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3216
3217 if (num_ahg == 1)
3218 mode = SDMA_AHG_APPLY_UPDATE1;
3219 else if (num_ahg <= 5)
3220 mode = SDMA_AHG_APPLY_UPDATE2;
3221 else
3222 mode = SDMA_AHG_APPLY_UPDATE3;
3223 tx->num_desc++;
3224
3225 switch (mode) {
3226 case SDMA_AHG_APPLY_UPDATE3:
3227 tx->num_desc++;
3228 tx->descs[2].qw[0] = 0;
3229 tx->descs[2].qw[1] = 0;
3230
3231 case SDMA_AHG_APPLY_UPDATE2:
3232 tx->num_desc++;
3233 tx->descs[1].qw[0] = 0;
3234 tx->descs[1].qw[1] = 0;
3235 break;
3236 }
3237 ahg_hlen >>= 2;
3238 tx->descs[0].qw[1] |=
3239 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3240 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3241 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3242 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3243 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3244 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3245 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3246 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3247 for (i = 0; i < (num_ahg - 1); i++) {
3248 if (!shift && !(i & 2))
3249 desc++;
3250 tx->descs[desc].qw[!!(i & 2)] |=
3251 (((u64)ahg[i + 1])
3252 << shift);
3253 shift = (shift + 32) & 63;
3254 }
3255}
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265int sdma_ahg_alloc(struct sdma_engine *sde)
3266{
3267 int nr;
3268 int oldbit;
3269
3270 if (!sde) {
3271 trace_hfi1_ahg_allocate(sde, -EINVAL);
3272 return -EINVAL;
3273 }
3274 while (1) {
3275 nr = ffz(ACCESS_ONCE(sde->ahg_bits));
3276 if (nr > 31) {
3277 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3278 return -ENOSPC;
3279 }
3280 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3281 if (!oldbit)
3282 break;
3283 cpu_relax();
3284 }
3285 trace_hfi1_ahg_allocate(sde, nr);
3286 return nr;
3287}
3288
3289
3290
3291
3292
3293
3294
3295
3296void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3297{
3298 if (!sde)
3299 return;
3300 trace_hfi1_ahg_deallocate(sde, ahg_index);
3301 if (ahg_index < 0 || ahg_index > 31)
3302 return;
3303 clear_bit(ahg_index, &sde->ahg_bits);
3304}
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3315{
3316 int i;
3317 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3318 sdma_event_e80_hw_freeze;
3319
3320
3321 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3322
3323
3324 for (i = 0; i < dd->num_sdma; i++)
3325 sdma_process_event(&dd->per_sdma[i], event);
3326
3327
3328}
3329
3330
3331
3332
3333
3334void sdma_freeze(struct hfi1_devdata *dd)
3335{
3336 int i;
3337 int ret;
3338
3339
3340
3341
3342
3343 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3344 atomic_read(&dd->sdma_unfreeze_count) <=
3345 0);
3346
3347 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3348 return;
3349
3350
3351 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3352
3353
3354 for (i = 0; i < dd->num_sdma; i++)
3355 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3356
3357
3358
3359
3360
3361
3362 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3363 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3364
3365}
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375void sdma_unfreeze(struct hfi1_devdata *dd)
3376{
3377 int i;
3378
3379
3380 for (i = 0; i < dd->num_sdma; i++)
3381 sdma_process_event(&dd->per_sdma[i],
3382 sdma_event_e82_hw_unfreeze);
3383}
3384
3385
3386
3387
3388
3389
3390void _sdma_engine_progress_schedule(
3391 struct sdma_engine *sde)
3392{
3393 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3394
3395 write_csr(sde->dd,
3396 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3397 sde->progress_mask);
3398}
3399