1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <linux/spinlock.h>
49#include <linux/seqlock.h>
50#include <linux/netdevice.h>
51#include <linux/moduleparam.h>
52#include <linux/bitops.h>
53#include <linux/timer.h>
54#include <linux/vmalloc.h>
55#include <linux/highmem.h>
56
57#include "hfi.h"
58#include "common.h"
59#include "qp.h"
60#include "sdma.h"
61#include "iowait.h"
62#include "trace.h"
63
64
65#define SDMA_DESCQ_CNT 2048
66#define SDMA_DESC_INTR 64
67#define INVALID_TAIL 0xffff
68
69static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
70module_param(sdma_descq_cnt, uint, S_IRUGO);
71MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
72
73static uint sdma_idle_cnt = 250;
74module_param(sdma_idle_cnt, uint, S_IRUGO);
75MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
76
77uint mod_num_sdma;
78module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
79MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
80
81static uint sdma_desct_intr = SDMA_DESC_INTR;
82module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
83MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
84
85#define SDMA_WAIT_BATCH_SIZE 20
86
87#define SDMA_ERR_HALT_TIMEOUT 10
88
89
90#define SD(name) SEND_DMA_##name
91#define ALL_SDMA_ENG_HALT_ERRS \
92 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
93 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
94 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
95 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
96 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
97 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
98 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
99 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
100 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
101 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
102 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
103 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
104 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
105 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
106 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
107 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
108 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
109 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
110
111
112#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
113#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
114#define SDMA_SENDCTRL_OP_HALT BIT(2)
115#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
116
117
118#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
119SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
120#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
121SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
122
123static const char * const sdma_state_names[] = {
124 [sdma_state_s00_hw_down] = "s00_HwDown",
125 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
126 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
127 [sdma_state_s20_idle] = "s20_Idle",
128 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
129 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
130 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
131 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
132 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
133 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
134 [sdma_state_s99_running] = "s99_Running",
135};
136
137#ifdef CONFIG_SDMA_VERBOSITY
138static const char * const sdma_event_names[] = {
139 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
140 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
141 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
142 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
143 [sdma_event_e30_go_running] = "e30_GoRunning",
144 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
145 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
146 [sdma_event_e60_hw_halted] = "e60_HwHalted",
147 [sdma_event_e70_go_idle] = "e70_GoIdle",
148 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
149 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
150 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
151 [sdma_event_e85_link_down] = "e85_LinkDown",
152 [sdma_event_e90_sw_halted] = "e90_SwHalted",
153};
154#endif
155
156static const struct sdma_set_state_action sdma_action_table[] = {
157 [sdma_state_s00_hw_down] = {
158 .go_s99_running_tofalse = 1,
159 .op_enable = 0,
160 .op_intenable = 0,
161 .op_halt = 0,
162 .op_cleanup = 0,
163 },
164 [sdma_state_s10_hw_start_up_halt_wait] = {
165 .op_enable = 0,
166 .op_intenable = 0,
167 .op_halt = 1,
168 .op_cleanup = 0,
169 },
170 [sdma_state_s15_hw_start_up_clean_wait] = {
171 .op_enable = 0,
172 .op_intenable = 1,
173 .op_halt = 0,
174 .op_cleanup = 1,
175 },
176 [sdma_state_s20_idle] = {
177 .op_enable = 0,
178 .op_intenable = 1,
179 .op_halt = 0,
180 .op_cleanup = 0,
181 },
182 [sdma_state_s30_sw_clean_up_wait] = {
183 .op_enable = 0,
184 .op_intenable = 0,
185 .op_halt = 0,
186 .op_cleanup = 0,
187 },
188 [sdma_state_s40_hw_clean_up_wait] = {
189 .op_enable = 0,
190 .op_intenable = 0,
191 .op_halt = 0,
192 .op_cleanup = 1,
193 },
194 [sdma_state_s50_hw_halt_wait] = {
195 .op_enable = 0,
196 .op_intenable = 0,
197 .op_halt = 0,
198 .op_cleanup = 0,
199 },
200 [sdma_state_s60_idle_halt_wait] = {
201 .go_s99_running_tofalse = 1,
202 .op_enable = 0,
203 .op_intenable = 0,
204 .op_halt = 1,
205 .op_cleanup = 0,
206 },
207 [sdma_state_s80_hw_freeze] = {
208 .op_enable = 0,
209 .op_intenable = 0,
210 .op_halt = 0,
211 .op_cleanup = 0,
212 },
213 [sdma_state_s82_freeze_sw_clean] = {
214 .op_enable = 0,
215 .op_intenable = 0,
216 .op_halt = 0,
217 .op_cleanup = 0,
218 },
219 [sdma_state_s99_running] = {
220 .op_enable = 1,
221 .op_intenable = 1,
222 .op_halt = 0,
223 .op_cleanup = 0,
224 .go_s99_running_totrue = 1,
225 },
226};
227
228#define SDMA_TAIL_UPDATE_THRESH 0x1F
229
230
231static void sdma_complete(struct kref *);
232static void sdma_finalput(struct sdma_state *);
233static void sdma_get(struct sdma_state *);
234static void sdma_hw_clean_up_task(unsigned long);
235static void sdma_put(struct sdma_state *);
236static void sdma_set_state(struct sdma_engine *, enum sdma_states);
237static void sdma_start_hw_clean_up(struct sdma_engine *);
238static void sdma_sw_clean_up_task(unsigned long);
239static void sdma_sendctrl(struct sdma_engine *, unsigned);
240static void init_sdma_regs(struct sdma_engine *, u32, uint);
241static void sdma_process_event(
242 struct sdma_engine *sde,
243 enum sdma_events event);
244static void __sdma_process_event(
245 struct sdma_engine *sde,
246 enum sdma_events event);
247static void dump_sdma_state(struct sdma_engine *sde);
248static void sdma_make_progress(struct sdma_engine *sde, u64 status);
249static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
250static void sdma_flush_descq(struct sdma_engine *sde);
251
252
253
254
255
256static const char *sdma_state_name(enum sdma_states state)
257{
258 return sdma_state_names[state];
259}
260
261static void sdma_get(struct sdma_state *ss)
262{
263 kref_get(&ss->kref);
264}
265
266static void sdma_complete(struct kref *kref)
267{
268 struct sdma_state *ss =
269 container_of(kref, struct sdma_state, kref);
270
271 complete(&ss->comp);
272}
273
274static void sdma_put(struct sdma_state *ss)
275{
276 kref_put(&ss->kref, sdma_complete);
277}
278
279static void sdma_finalput(struct sdma_state *ss)
280{
281 sdma_put(ss);
282 wait_for_completion(&ss->comp);
283}
284
285static inline void write_sde_csr(
286 struct sdma_engine *sde,
287 u32 offset0,
288 u64 value)
289{
290 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
291}
292
293static inline u64 read_sde_csr(
294 struct sdma_engine *sde,
295 u32 offset0)
296{
297 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
298}
299
300
301
302
303
304static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
305 int pause)
306{
307 u64 off = 8 * sde->this_idx;
308 struct hfi1_devdata *dd = sde->dd;
309 int lcnt = 0;
310 u64 reg_prev;
311 u64 reg = 0;
312
313 while (1) {
314 reg_prev = reg;
315 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
316
317 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
318 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
319 if (reg == 0)
320 break;
321
322 if (reg != reg_prev)
323 lcnt = 0;
324 if (lcnt++ > 500) {
325
326 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
327 __func__, sde->this_idx, (u32)reg);
328 queue_work(dd->pport->link_wq,
329 &dd->pport->link_bounce_work);
330 break;
331 }
332 udelay(1);
333 }
334}
335
336
337
338
339
340void sdma_wait(struct hfi1_devdata *dd)
341{
342 int i;
343
344 for (i = 0; i < dd->num_sdma; i++) {
345 struct sdma_engine *sde = &dd->per_sdma[i];
346
347 sdma_wait_for_packet_egress(sde, 0);
348 }
349}
350
351static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
352{
353 u64 reg;
354
355 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
356 return;
357 reg = cnt;
358 reg &= SD(DESC_CNT_CNT_MASK);
359 reg <<= SD(DESC_CNT_CNT_SHIFT);
360 write_sde_csr(sde, SD(DESC_CNT), reg);
361}
362
363static inline void complete_tx(struct sdma_engine *sde,
364 struct sdma_txreq *tx,
365 int res)
366{
367
368 struct iowait *wait = tx->wait;
369 callback_t complete = tx->complete;
370
371#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
372 trace_hfi1_sdma_out_sn(sde, tx->sn);
373 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
374 dd_dev_err(sde->dd, "expected %llu got %llu\n",
375 sde->head_sn, tx->sn);
376 sde->head_sn++;
377#endif
378 __sdma_txclean(sde->dd, tx);
379 if (complete)
380 (*complete)(tx, res);
381 if (iowait_sdma_dec(wait))
382 iowait_drain_wakeup(wait);
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403static void sdma_flush(struct sdma_engine *sde)
404{
405 struct sdma_txreq *txp, *txp_next;
406 LIST_HEAD(flushlist);
407 unsigned long flags;
408 uint seq;
409
410
411 sdma_flush_descq(sde);
412 spin_lock_irqsave(&sde->flushlist_lock, flags);
413
414 list_splice_init(&sde->flushlist, &flushlist);
415 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
416
417 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
418 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
419
420 do {
421 struct iowait *w, *nw;
422
423 seq = read_seqbegin(&sde->waitlock);
424 if (!list_empty(&sde->dmawait)) {
425 write_seqlock(&sde->waitlock);
426 list_for_each_entry_safe(w, nw, &sde->dmawait, list) {
427 if (w->wakeup) {
428 w->wakeup(w, SDMA_AVAIL_REASON);
429 list_del_init(&w->list);
430 }
431 }
432 write_sequnlock(&sde->waitlock);
433 }
434 } while (read_seqretry(&sde->waitlock, seq));
435}
436
437
438
439
440
441
442
443
444
445
446
447static void sdma_field_flush(struct work_struct *work)
448{
449 unsigned long flags;
450 struct sdma_engine *sde =
451 container_of(work, struct sdma_engine, flush_worker);
452
453 write_seqlock_irqsave(&sde->head_lock, flags);
454 if (!__sdma_running(sde))
455 sdma_flush(sde);
456 write_sequnlock_irqrestore(&sde->head_lock, flags);
457}
458
459static void sdma_err_halt_wait(struct work_struct *work)
460{
461 struct sdma_engine *sde = container_of(work, struct sdma_engine,
462 err_halt_worker);
463 u64 statuscsr;
464 unsigned long timeout;
465
466 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
467 while (1) {
468 statuscsr = read_sde_csr(sde, SD(STATUS));
469 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
470 if (statuscsr)
471 break;
472 if (time_after(jiffies, timeout)) {
473 dd_dev_err(sde->dd,
474 "SDMA engine %d - timeout waiting for engine to halt\n",
475 sde->this_idx);
476
477
478
479
480 break;
481 }
482 usleep_range(80, 120);
483 }
484
485 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
486}
487
488static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
489{
490 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
491 unsigned index;
492 struct hfi1_devdata *dd = sde->dd;
493
494 for (index = 0; index < dd->num_sdma; index++) {
495 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
496
497 if (curr_sdma != sde)
498 curr_sdma->progress_check_head =
499 curr_sdma->descq_head;
500 }
501 dd_dev_err(sde->dd,
502 "SDMA engine %d - check scheduled\n",
503 sde->this_idx);
504 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
505 }
506}
507
508static void sdma_err_progress_check(struct timer_list *t)
509{
510 unsigned index;
511 struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
512
513 dd_dev_err(sde->dd, "SDE progress check event\n");
514 for (index = 0; index < sde->dd->num_sdma; index++) {
515 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
516 unsigned long flags;
517
518
519 if (curr_sde == sde)
520 continue;
521
522
523
524
525
526 spin_lock_irqsave(&curr_sde->tail_lock, flags);
527 write_seqlock(&curr_sde->head_lock);
528
529
530 if (curr_sde->state.current_state != sdma_state_s99_running) {
531 write_sequnlock(&curr_sde->head_lock);
532 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
533 continue;
534 }
535
536 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
537 (curr_sde->descq_head ==
538 curr_sde->progress_check_head))
539 __sdma_process_event(curr_sde,
540 sdma_event_e90_sw_halted);
541 write_sequnlock(&curr_sde->head_lock);
542 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
543 }
544 schedule_work(&sde->err_halt_worker);
545}
546
547static void sdma_hw_clean_up_task(unsigned long opaque)
548{
549 struct sdma_engine *sde = (struct sdma_engine *)opaque;
550 u64 statuscsr;
551
552 while (1) {
553#ifdef CONFIG_SDMA_VERBOSITY
554 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
555 sde->this_idx, slashstrip(__FILE__), __LINE__,
556 __func__);
557#endif
558 statuscsr = read_sde_csr(sde, SD(STATUS));
559 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
560 if (statuscsr)
561 break;
562 udelay(10);
563 }
564
565 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
566}
567
568static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
569{
570 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
571}
572
573
574
575
576static void sdma_flush_descq(struct sdma_engine *sde)
577{
578 u16 head, tail;
579 int progress = 0;
580 struct sdma_txreq *txp = get_txhead(sde);
581
582
583
584
585
586
587 head = sde->descq_head & sde->sdma_mask;
588 tail = sde->descq_tail & sde->sdma_mask;
589 while (head != tail) {
590
591 head = ++sde->descq_head & sde->sdma_mask;
592
593 if (txp && txp->next_descq_idx == head) {
594
595 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
596 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
597 trace_hfi1_sdma_progress(sde, head, tail, txp);
598 txp = get_txhead(sde);
599 }
600 progress++;
601 }
602 if (progress)
603 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
604}
605
606static void sdma_sw_clean_up_task(unsigned long opaque)
607{
608 struct sdma_engine *sde = (struct sdma_engine *)opaque;
609 unsigned long flags;
610
611 spin_lock_irqsave(&sde->tail_lock, flags);
612 write_seqlock(&sde->head_lock);
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633 sdma_make_progress(sde, 0);
634
635 sdma_flush(sde);
636
637
638
639
640
641
642 sde->descq_tail = 0;
643 sde->descq_head = 0;
644 sde->desc_avail = sdma_descq_freecnt(sde);
645 *sde->head_dma = 0;
646
647 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
648
649 write_sequnlock(&sde->head_lock);
650 spin_unlock_irqrestore(&sde->tail_lock, flags);
651}
652
653static void sdma_sw_tear_down(struct sdma_engine *sde)
654{
655 struct sdma_state *ss = &sde->state;
656
657
658 sdma_put(ss);
659
660
661 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
662 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
663}
664
665static void sdma_start_hw_clean_up(struct sdma_engine *sde)
666{
667 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
668}
669
670static void sdma_set_state(struct sdma_engine *sde,
671 enum sdma_states next_state)
672{
673 struct sdma_state *ss = &sde->state;
674 const struct sdma_set_state_action *action = sdma_action_table;
675 unsigned op = 0;
676
677 trace_hfi1_sdma_state(
678 sde,
679 sdma_state_names[ss->current_state],
680 sdma_state_names[next_state]);
681
682
683 ss->previous_state = ss->current_state;
684 ss->previous_op = ss->current_op;
685 ss->current_state = next_state;
686
687 if (ss->previous_state != sdma_state_s99_running &&
688 next_state == sdma_state_s99_running)
689 sdma_flush(sde);
690
691 if (action[next_state].op_enable)
692 op |= SDMA_SENDCTRL_OP_ENABLE;
693
694 if (action[next_state].op_intenable)
695 op |= SDMA_SENDCTRL_OP_INTENABLE;
696
697 if (action[next_state].op_halt)
698 op |= SDMA_SENDCTRL_OP_HALT;
699
700 if (action[next_state].op_cleanup)
701 op |= SDMA_SENDCTRL_OP_CLEANUP;
702
703 if (action[next_state].go_s99_running_tofalse)
704 ss->go_s99_running = 0;
705
706 if (action[next_state].go_s99_running_totrue)
707 ss->go_s99_running = 1;
708
709 ss->current_op = op;
710 sdma_sendctrl(sde, ss->current_op);
711}
712
713
714
715
716
717
718
719
720
721
722
723
724
725u16 sdma_get_descq_cnt(void)
726{
727 u16 count = sdma_descq_cnt;
728
729 if (!count)
730 return SDMA_DESCQ_CNT;
731
732
733
734 if (!is_power_of_2(count))
735 return SDMA_DESCQ_CNT;
736 if (count < 64 || count > 32768)
737 return SDMA_DESCQ_CNT;
738 return count;
739}
740
741
742
743
744
745
746
747
748int sdma_engine_get_vl(struct sdma_engine *sde)
749{
750 struct hfi1_devdata *dd = sde->dd;
751 struct sdma_vl_map *m;
752 u8 vl;
753
754 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
755 return -EINVAL;
756
757 rcu_read_lock();
758 m = rcu_dereference(dd->sdma_map);
759 if (unlikely(!m)) {
760 rcu_read_unlock();
761 return -EINVAL;
762 }
763 vl = m->engine_to_vl[sde->this_idx];
764 rcu_read_unlock();
765
766 return vl;
767}
768
769
770
771
772
773
774
775
776
777
778
779struct sdma_engine *sdma_select_engine_vl(
780 struct hfi1_devdata *dd,
781 u32 selector,
782 u8 vl)
783{
784 struct sdma_vl_map *m;
785 struct sdma_map_elem *e;
786 struct sdma_engine *rval;
787
788
789
790
791
792 if (vl >= num_vls) {
793 rval = NULL;
794 goto done;
795 }
796
797 rcu_read_lock();
798 m = rcu_dereference(dd->sdma_map);
799 if (unlikely(!m)) {
800 rcu_read_unlock();
801 return &dd->per_sdma[0];
802 }
803 e = m->map[vl & m->mask];
804 rval = e->sde[selector & e->mask];
805 rcu_read_unlock();
806
807done:
808 rval = !rval ? &dd->per_sdma[0] : rval;
809 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
810 return rval;
811}
812
813
814
815
816
817
818
819
820
821
822struct sdma_engine *sdma_select_engine_sc(
823 struct hfi1_devdata *dd,
824 u32 selector,
825 u8 sc5)
826{
827 u8 vl = sc_to_vlt(dd, sc5);
828
829 return sdma_select_engine_vl(dd, selector, vl);
830}
831
832struct sdma_rht_map_elem {
833 u32 mask;
834 u8 ctr;
835 struct sdma_engine *sde[0];
836};
837
838struct sdma_rht_node {
839 unsigned long cpu_id;
840 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
841 struct rhash_head node;
842};
843
844#define NR_CPUS_HINT 192
845
846static const struct rhashtable_params sdma_rht_params = {
847 .nelem_hint = NR_CPUS_HINT,
848 .head_offset = offsetof(struct sdma_rht_node, node),
849 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
850 .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
851 .max_size = NR_CPUS,
852 .min_size = 8,
853 .automatic_shrinking = true,
854};
855
856
857
858
859
860
861
862
863
864
865
866
867struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
868 u32 selector, u8 vl)
869{
870 struct sdma_rht_node *rht_node;
871 struct sdma_engine *sde = NULL;
872 const struct cpumask *current_mask = ¤t->cpus_allowed;
873 unsigned long cpu_id;
874
875
876
877
878
879 if (cpumask_weight(current_mask) != 1)
880 goto out;
881
882 cpu_id = smp_processor_id();
883 rcu_read_lock();
884 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
885 sdma_rht_params);
886
887 if (rht_node && rht_node->map[vl]) {
888 struct sdma_rht_map_elem *map = rht_node->map[vl];
889
890 sde = map->sde[selector & map->mask];
891 }
892 rcu_read_unlock();
893
894 if (sde)
895 return sde;
896
897out:
898 return sdma_select_engine_vl(dd, selector, vl);
899}
900
901static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
902{
903 int i;
904
905 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
906 map->sde[map->ctr + i] = map->sde[i];
907}
908
909static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
910 struct sdma_engine *sde)
911{
912 unsigned int i, pow;
913
914
915 for (i = 0; i < map->ctr; i++) {
916 if (map->sde[i] == sde) {
917 memmove(&map->sde[i], &map->sde[i + 1],
918 (map->ctr - i - 1) * sizeof(map->sde[0]));
919 map->ctr--;
920 pow = roundup_pow_of_two(map->ctr ? : 1);
921 map->mask = pow - 1;
922 sdma_populate_sde_map(map);
923 break;
924 }
925 }
926}
927
928
929
930
931static DEFINE_MUTEX(process_to_sde_mutex);
932
933ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
934 size_t count)
935{
936 struct hfi1_devdata *dd = sde->dd;
937 cpumask_var_t mask, new_mask;
938 unsigned long cpu;
939 int ret, vl, sz;
940 struct sdma_rht_node *rht_node;
941
942 vl = sdma_engine_get_vl(sde);
943 if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
944 return -EINVAL;
945
946 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
947 if (!ret)
948 return -ENOMEM;
949
950 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
951 if (!ret) {
952 free_cpumask_var(mask);
953 return -ENOMEM;
954 }
955 ret = cpulist_parse(buf, mask);
956 if (ret)
957 goto out_free;
958
959 if (!cpumask_subset(mask, cpu_online_mask)) {
960 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
961 ret = -EINVAL;
962 goto out_free;
963 }
964
965 sz = sizeof(struct sdma_rht_map_elem) +
966 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
967
968 mutex_lock(&process_to_sde_mutex);
969
970 for_each_cpu(cpu, mask) {
971
972 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
973 cpumask_set_cpu(cpu, new_mask);
974 continue;
975 }
976
977 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
978 sdma_rht_params);
979 if (!rht_node) {
980 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
981 if (!rht_node) {
982 ret = -ENOMEM;
983 goto out;
984 }
985
986 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
987 if (!rht_node->map[vl]) {
988 kfree(rht_node);
989 ret = -ENOMEM;
990 goto out;
991 }
992 rht_node->cpu_id = cpu;
993 rht_node->map[vl]->mask = 0;
994 rht_node->map[vl]->ctr = 1;
995 rht_node->map[vl]->sde[0] = sde;
996
997 ret = rhashtable_insert_fast(dd->sdma_rht,
998 &rht_node->node,
999 sdma_rht_params);
1000 if (ret) {
1001 kfree(rht_node->map[vl]);
1002 kfree(rht_node);
1003 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
1004 cpu);
1005 goto out;
1006 }
1007
1008 } else {
1009 int ctr, pow;
1010
1011
1012 if (!rht_node->map[vl])
1013 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
1014
1015 if (!rht_node->map[vl]) {
1016 ret = -ENOMEM;
1017 goto out;
1018 }
1019
1020 rht_node->map[vl]->ctr++;
1021 ctr = rht_node->map[vl]->ctr;
1022 rht_node->map[vl]->sde[ctr - 1] = sde;
1023 pow = roundup_pow_of_two(ctr);
1024 rht_node->map[vl]->mask = pow - 1;
1025
1026
1027 sdma_populate_sde_map(rht_node->map[vl]);
1028 }
1029 cpumask_set_cpu(cpu, new_mask);
1030 }
1031
1032
1033 for_each_cpu(cpu, cpu_online_mask) {
1034 struct sdma_rht_node *rht_node;
1035
1036
1037 if (cpumask_test_cpu(cpu, mask))
1038 continue;
1039
1040 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
1041 sdma_rht_params);
1042 if (rht_node) {
1043 bool empty = true;
1044 int i;
1045
1046
1047 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1048 if (rht_node->map[i])
1049 sdma_cleanup_sde_map(rht_node->map[i],
1050 sde);
1051
1052
1053 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1054 if (!rht_node->map[i])
1055 continue;
1056
1057 if (rht_node->map[i]->ctr) {
1058 empty = false;
1059 break;
1060 }
1061 }
1062
1063 if (empty) {
1064 ret = rhashtable_remove_fast(dd->sdma_rht,
1065 &rht_node->node,
1066 sdma_rht_params);
1067 WARN_ON(ret);
1068
1069 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1070 kfree(rht_node->map[i]);
1071
1072 kfree(rht_node);
1073 }
1074 }
1075 }
1076
1077 cpumask_copy(&sde->cpu_mask, new_mask);
1078out:
1079 mutex_unlock(&process_to_sde_mutex);
1080out_free:
1081 free_cpumask_var(mask);
1082 free_cpumask_var(new_mask);
1083 return ret ? : strnlen(buf, PAGE_SIZE);
1084}
1085
1086ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1087{
1088 mutex_lock(&process_to_sde_mutex);
1089 if (cpumask_empty(&sde->cpu_mask))
1090 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1091 else
1092 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1093 mutex_unlock(&process_to_sde_mutex);
1094 return strnlen(buf, PAGE_SIZE);
1095}
1096
1097static void sdma_rht_free(void *ptr, void *arg)
1098{
1099 struct sdma_rht_node *rht_node = ptr;
1100 int i;
1101
1102 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1103 kfree(rht_node->map[i]);
1104
1105 kfree(rht_node);
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1117 struct hfi1_devdata *dd,
1118 unsigned long cpuid)
1119{
1120 struct sdma_rht_node *rht_node;
1121 int i, j;
1122
1123 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
1124 sdma_rht_params);
1125 if (!rht_node)
1126 return;
1127
1128 seq_printf(s, "cpu%3lu: ", cpuid);
1129 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1130 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1131 continue;
1132
1133 seq_printf(s, " vl%d: [", i);
1134
1135 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1136 if (!rht_node->map[i]->sde[j])
1137 continue;
1138
1139 if (j > 0)
1140 seq_puts(s, ",");
1141
1142 seq_printf(s, " sdma%2d",
1143 rht_node->map[i]->sde[j]->this_idx);
1144 }
1145 seq_puts(s, " ]");
1146 }
1147
1148 seq_puts(s, "\n");
1149}
1150
1151
1152
1153
1154static void sdma_map_free(struct sdma_vl_map *m)
1155{
1156 int i;
1157
1158 for (i = 0; m && i < m->actual_vls; i++)
1159 kfree(m->map[i]);
1160 kfree(m);
1161}
1162
1163
1164
1165
1166static void sdma_map_rcu_callback(struct rcu_head *list)
1167{
1168 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1169
1170 sdma_map_free(m);
1171}
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1201{
1202 int i, j;
1203 int extra, sde_per_vl;
1204 int engine = 0;
1205 u8 lvl_engines[OPA_MAX_VLS];
1206 struct sdma_vl_map *oldmap, *newmap;
1207
1208 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1209 return 0;
1210
1211 if (!vl_engines) {
1212
1213 sde_per_vl = dd->num_sdma / num_vls;
1214
1215 extra = dd->num_sdma % num_vls;
1216 vl_engines = lvl_engines;
1217
1218 for (i = num_vls - 1; i >= 0; i--, extra--)
1219 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1220 }
1221
1222 newmap = kzalloc(
1223 sizeof(struct sdma_vl_map) +
1224 roundup_pow_of_two(num_vls) *
1225 sizeof(struct sdma_map_elem *),
1226 GFP_KERNEL);
1227 if (!newmap)
1228 goto bail;
1229 newmap->actual_vls = num_vls;
1230 newmap->vls = roundup_pow_of_two(num_vls);
1231 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1232
1233 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1234 newmap->engine_to_vl[i] = -1;
1235 for (i = 0; i < newmap->vls; i++) {
1236
1237 int first_engine = engine;
1238
1239 if (i < newmap->actual_vls) {
1240 int sz = roundup_pow_of_two(vl_engines[i]);
1241
1242
1243 newmap->map[i] = kzalloc(
1244 sizeof(struct sdma_map_elem) +
1245 sz * sizeof(struct sdma_engine *),
1246 GFP_KERNEL);
1247 if (!newmap->map[i])
1248 goto bail;
1249 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1250
1251 for (j = 0; j < sz; j++) {
1252 newmap->map[i]->sde[j] =
1253 &dd->per_sdma[engine];
1254 if (++engine >= first_engine + vl_engines[i])
1255
1256 engine = first_engine;
1257 }
1258
1259 for (j = 0; j < vl_engines[i]; j++)
1260 newmap->engine_to_vl[first_engine + j] = i;
1261 } else {
1262
1263 newmap->map[i] = newmap->map[i % num_vls];
1264 }
1265 engine = first_engine + vl_engines[i];
1266 }
1267
1268 spin_lock_irq(&dd->sde_map_lock);
1269 oldmap = rcu_dereference_protected(dd->sdma_map,
1270 lockdep_is_held(&dd->sde_map_lock));
1271
1272
1273 rcu_assign_pointer(dd->sdma_map, newmap);
1274
1275 spin_unlock_irq(&dd->sde_map_lock);
1276
1277 if (oldmap)
1278 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1279 return 0;
1280bail:
1281
1282 sdma_map_free(newmap);
1283 return -ENOMEM;
1284}
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1295{
1296 size_t i;
1297 struct sdma_engine *sde;
1298
1299 if (dd->sdma_pad_dma) {
1300 dma_free_coherent(&dd->pcidev->dev, 4,
1301 (void *)dd->sdma_pad_dma,
1302 dd->sdma_pad_phys);
1303 dd->sdma_pad_dma = NULL;
1304 dd->sdma_pad_phys = 0;
1305 }
1306 if (dd->sdma_heads_dma) {
1307 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1308 (void *)dd->sdma_heads_dma,
1309 dd->sdma_heads_phys);
1310 dd->sdma_heads_dma = NULL;
1311 dd->sdma_heads_phys = 0;
1312 }
1313 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1314 sde = &dd->per_sdma[i];
1315
1316 sde->head_dma = NULL;
1317 sde->head_phys = 0;
1318
1319 if (sde->descq) {
1320 dma_free_coherent(
1321 &dd->pcidev->dev,
1322 sde->descq_cnt * sizeof(u64[2]),
1323 sde->descq,
1324 sde->descq_phys
1325 );
1326 sde->descq = NULL;
1327 sde->descq_phys = 0;
1328 }
1329 kvfree(sde->tx_ring);
1330 sde->tx_ring = NULL;
1331 }
1332 spin_lock_irq(&dd->sde_map_lock);
1333 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1334 RCU_INIT_POINTER(dd->sdma_map, NULL);
1335 spin_unlock_irq(&dd->sde_map_lock);
1336 synchronize_rcu();
1337 kfree(dd->per_sdma);
1338 dd->per_sdma = NULL;
1339
1340 if (dd->sdma_rht) {
1341 rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
1342 kfree(dd->sdma_rht);
1343 dd->sdma_rht = NULL;
1344 }
1345}
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358int sdma_init(struct hfi1_devdata *dd, u8 port)
1359{
1360 unsigned this_idx;
1361 struct sdma_engine *sde;
1362 struct rhashtable *tmp_sdma_rht;
1363 u16 descq_cnt;
1364 void *curr_head;
1365 struct hfi1_pportdata *ppd = dd->pport + port;
1366 u32 per_sdma_credits;
1367 uint idle_cnt = sdma_idle_cnt;
1368 size_t num_engines = chip_sdma_engines(dd);
1369 int ret = -ENOMEM;
1370
1371 if (!HFI1_CAP_IS_KSET(SDMA)) {
1372 HFI1_CAP_CLEAR(SDMA_AHG);
1373 return 0;
1374 }
1375 if (mod_num_sdma &&
1376
1377 mod_num_sdma <= chip_sdma_engines(dd) &&
1378
1379 mod_num_sdma >= num_vls)
1380 num_engines = mod_num_sdma;
1381
1382 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1383 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
1384 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1385 chip_sdma_mem_size(dd));
1386
1387 per_sdma_credits =
1388 chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
1389
1390
1391 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1392 atomic_set(&dd->sdma_unfreeze_count, 0);
1393
1394 descq_cnt = sdma_get_descq_cnt();
1395 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1396 num_engines, descq_cnt);
1397
1398
1399 dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
1400 GFP_KERNEL, dd->node);
1401 if (!dd->per_sdma)
1402 return ret;
1403
1404 idle_cnt = ns_to_cclock(dd, idle_cnt);
1405 if (idle_cnt)
1406 dd->default_desc1 =
1407 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1408 else
1409 dd->default_desc1 =
1410 SDMA_DESC1_INT_REQ_FLAG;
1411
1412 if (!sdma_desct_intr)
1413 sdma_desct_intr = SDMA_DESC_INTR;
1414
1415
1416 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1417 sde = &dd->per_sdma[this_idx];
1418 sde->dd = dd;
1419 sde->ppd = ppd;
1420 sde->this_idx = this_idx;
1421 sde->descq_cnt = descq_cnt;
1422 sde->desc_avail = sdma_descq_freecnt(sde);
1423 sde->sdma_shift = ilog2(descq_cnt);
1424 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1425
1426
1427 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1428 this_idx);
1429 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1430 this_idx);
1431 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1432 this_idx);
1433
1434 sde->imask = sde->int_mask | sde->progress_mask |
1435 sde->idle_mask;
1436
1437 spin_lock_init(&sde->tail_lock);
1438 seqlock_init(&sde->head_lock);
1439 spin_lock_init(&sde->senddmactrl_lock);
1440 spin_lock_init(&sde->flushlist_lock);
1441 seqlock_init(&sde->waitlock);
1442
1443 sde->ahg_bits = 0xfffffffe00000000ULL;
1444
1445 sdma_set_state(sde, sdma_state_s00_hw_down);
1446
1447
1448 kref_init(&sde->state.kref);
1449 init_completion(&sde->state.comp);
1450
1451 INIT_LIST_HEAD(&sde->flushlist);
1452 INIT_LIST_HEAD(&sde->dmawait);
1453
1454 sde->tail_csr =
1455 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1456
1457 tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
1458 (unsigned long)sde);
1459
1460 tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
1461 (unsigned long)sde);
1462 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1463 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1464
1465 sde->progress_check_head = 0;
1466
1467 timer_setup(&sde->err_progress_check_timer,
1468 sdma_err_progress_check, 0);
1469
1470 sde->descq = dma_alloc_coherent(&dd->pcidev->dev,
1471 descq_cnt * sizeof(u64[2]),
1472 &sde->descq_phys, GFP_KERNEL);
1473 if (!sde->descq)
1474 goto bail;
1475 sde->tx_ring =
1476 kvzalloc_node(array_size(descq_cnt,
1477 sizeof(struct sdma_txreq *)),
1478 GFP_KERNEL, dd->node);
1479 if (!sde->tx_ring)
1480 goto bail;
1481 }
1482
1483 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1484
1485 dd->sdma_heads_dma = dma_alloc_coherent(&dd->pcidev->dev,
1486 dd->sdma_heads_size,
1487 &dd->sdma_heads_phys,
1488 GFP_KERNEL);
1489 if (!dd->sdma_heads_dma) {
1490 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1491 goto bail;
1492 }
1493
1494
1495 dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
1496 &dd->sdma_pad_phys, GFP_KERNEL);
1497 if (!dd->sdma_pad_dma) {
1498 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1499 goto bail;
1500 }
1501
1502
1503 curr_head = (void *)dd->sdma_heads_dma;
1504 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1505 unsigned long phys_offset;
1506
1507 sde = &dd->per_sdma[this_idx];
1508
1509 sde->head_dma = curr_head;
1510 curr_head += L1_CACHE_BYTES;
1511 phys_offset = (unsigned long)sde->head_dma -
1512 (unsigned long)dd->sdma_heads_dma;
1513 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1514 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1515 }
1516 dd->flags |= HFI1_HAS_SEND_DMA;
1517 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1518 dd->num_sdma = num_engines;
1519 ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
1520 if (ret < 0)
1521 goto bail;
1522
1523 tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
1524 if (!tmp_sdma_rht) {
1525 ret = -ENOMEM;
1526 goto bail;
1527 }
1528
1529 ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
1530 if (ret < 0)
1531 goto bail;
1532 dd->sdma_rht = tmp_sdma_rht;
1533
1534 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1535 return 0;
1536
1537bail:
1538 sdma_clean(dd, num_engines);
1539 return ret;
1540}
1541
1542
1543
1544
1545
1546
1547
1548void sdma_all_running(struct hfi1_devdata *dd)
1549{
1550 struct sdma_engine *sde;
1551 unsigned int i;
1552
1553
1554 for (i = 0; i < dd->num_sdma; ++i) {
1555 sde = &dd->per_sdma[i];
1556 sdma_process_event(sde, sdma_event_e30_go_running);
1557 }
1558}
1559
1560
1561
1562
1563
1564
1565
1566void sdma_all_idle(struct hfi1_devdata *dd)
1567{
1568 struct sdma_engine *sde;
1569 unsigned int i;
1570
1571
1572 for (i = 0; i < dd->num_sdma; ++i) {
1573 sde = &dd->per_sdma[i];
1574 sdma_process_event(sde, sdma_event_e70_go_idle);
1575 }
1576}
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586void sdma_start(struct hfi1_devdata *dd)
1587{
1588 unsigned i;
1589 struct sdma_engine *sde;
1590
1591
1592 for (i = 0; i < dd->num_sdma; ++i) {
1593 sde = &dd->per_sdma[i];
1594 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1595 }
1596}
1597
1598
1599
1600
1601
1602void sdma_exit(struct hfi1_devdata *dd)
1603{
1604 unsigned this_idx;
1605 struct sdma_engine *sde;
1606
1607 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1608 ++this_idx) {
1609 sde = &dd->per_sdma[this_idx];
1610 if (!list_empty(&sde->dmawait))
1611 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1612 sde->this_idx);
1613 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1614
1615 del_timer_sync(&sde->err_progress_check_timer);
1616
1617
1618
1619
1620
1621
1622 sdma_finalput(&sde->state);
1623 }
1624}
1625
1626
1627
1628
1629static inline void sdma_unmap_desc(
1630 struct hfi1_devdata *dd,
1631 struct sdma_desc *descp)
1632{
1633 switch (sdma_mapping_type(descp)) {
1634 case SDMA_MAP_SINGLE:
1635 dma_unmap_single(
1636 &dd->pcidev->dev,
1637 sdma_mapping_addr(descp),
1638 sdma_mapping_len(descp),
1639 DMA_TO_DEVICE);
1640 break;
1641 case SDMA_MAP_PAGE:
1642 dma_unmap_page(
1643 &dd->pcidev->dev,
1644 sdma_mapping_addr(descp),
1645 sdma_mapping_len(descp),
1646 DMA_TO_DEVICE);
1647 break;
1648 }
1649}
1650
1651
1652
1653
1654
1655static inline u8 ahg_mode(struct sdma_txreq *tx)
1656{
1657 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1658 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672void __sdma_txclean(
1673 struct hfi1_devdata *dd,
1674 struct sdma_txreq *tx)
1675{
1676 u16 i;
1677
1678 if (tx->num_desc) {
1679 u8 skip = 0, mode = ahg_mode(tx);
1680
1681
1682 sdma_unmap_desc(dd, &tx->descp[0]);
1683
1684 if (mode > SDMA_AHG_APPLY_UPDATE1)
1685 skip = mode >> 1;
1686 for (i = 1 + skip; i < tx->num_desc; i++)
1687 sdma_unmap_desc(dd, &tx->descp[i]);
1688 tx->num_desc = 0;
1689 }
1690 kfree(tx->coalesce_buf);
1691 tx->coalesce_buf = NULL;
1692
1693 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1694 tx->desc_limit = ARRAY_SIZE(tx->descs);
1695 kfree(tx->descp);
1696 }
1697}
1698
1699static inline u16 sdma_gethead(struct sdma_engine *sde)
1700{
1701 struct hfi1_devdata *dd = sde->dd;
1702 int use_dmahead;
1703 u16 hwhead;
1704
1705#ifdef CONFIG_SDMA_VERBOSITY
1706 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1707 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1708#endif
1709
1710retry:
1711 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1712 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1713 hwhead = use_dmahead ?
1714 (u16)le64_to_cpu(*sde->head_dma) :
1715 (u16)read_sde_csr(sde, SD(HEAD));
1716
1717 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1718 u16 cnt;
1719 u16 swtail;
1720 u16 swhead;
1721 int sane;
1722
1723 swhead = sde->descq_head & sde->sdma_mask;
1724
1725 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1726 cnt = sde->descq_cnt;
1727
1728 if (swhead < swtail)
1729
1730 sane = (hwhead >= swhead) & (hwhead <= swtail);
1731 else if (swhead > swtail)
1732
1733 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1734 (hwhead <= swtail);
1735 else
1736
1737 sane = (hwhead == swhead);
1738
1739 if (unlikely(!sane)) {
1740 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
1741 sde->this_idx,
1742 use_dmahead ? "dma" : "kreg",
1743 hwhead, swhead, swtail, cnt);
1744 if (use_dmahead) {
1745
1746 use_dmahead = 0;
1747 goto retry;
1748 }
1749
1750 hwhead = swhead;
1751 }
1752 }
1753 return hwhead;
1754}
1755
1756
1757
1758
1759
1760
1761
1762static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1763{
1764 struct iowait *wait, *nw, *twait;
1765 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1766 uint i, n = 0, seq, tidx = 0;
1767
1768#ifdef CONFIG_SDMA_VERBOSITY
1769 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1770 slashstrip(__FILE__), __LINE__, __func__);
1771 dd_dev_err(sde->dd, "avail: %u\n", avail);
1772#endif
1773
1774 do {
1775 seq = read_seqbegin(&sde->waitlock);
1776 if (!list_empty(&sde->dmawait)) {
1777
1778 write_seqlock(&sde->waitlock);
1779
1780 list_for_each_entry_safe(
1781 wait,
1782 nw,
1783 &sde->dmawait,
1784 list) {
1785 u32 num_desc;
1786
1787 if (!wait->wakeup)
1788 continue;
1789 if (n == ARRAY_SIZE(waits))
1790 break;
1791 iowait_init_priority(wait);
1792 num_desc = iowait_get_all_desc(wait);
1793 if (num_desc > avail)
1794 break;
1795 avail -= num_desc;
1796
1797 if (n) {
1798 twait = waits[tidx];
1799 tidx =
1800 iowait_priority_update_top(wait,
1801 twait,
1802 n,
1803 tidx);
1804 }
1805 list_del_init(&wait->list);
1806 waits[n++] = wait;
1807 }
1808 write_sequnlock(&sde->waitlock);
1809 break;
1810 }
1811 } while (read_seqretry(&sde->waitlock, seq));
1812
1813
1814 if (n)
1815 waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
1816
1817 for (i = 0; i < n; i++)
1818 if (i != tidx)
1819 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1820}
1821
1822
1823static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1824{
1825 struct sdma_txreq *txp = NULL;
1826 int progress = 0;
1827 u16 hwhead, swhead;
1828 int idle_check_done = 0;
1829
1830 hwhead = sdma_gethead(sde);
1831
1832
1833
1834
1835
1836
1837
1838retry:
1839 txp = get_txhead(sde);
1840 swhead = sde->descq_head & sde->sdma_mask;
1841 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1842 while (swhead != hwhead) {
1843
1844 swhead = ++sde->descq_head & sde->sdma_mask;
1845
1846
1847 if (txp && txp->next_descq_idx == swhead) {
1848
1849 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1850 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1851
1852 txp = get_txhead(sde);
1853 }
1854 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1855 progress++;
1856 }
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867 if ((status & sde->idle_mask) && !idle_check_done) {
1868 u16 swtail;
1869
1870 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1871 if (swtail != hwhead) {
1872 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1873 idle_check_done = 1;
1874 goto retry;
1875 }
1876 }
1877
1878 sde->last_status = status;
1879 if (progress)
1880 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1893{
1894 trace_hfi1_sdma_engine_interrupt(sde, status);
1895 write_seqlock(&sde->head_lock);
1896 sdma_set_desc_cnt(sde, sdma_desct_intr);
1897 if (status & sde->idle_mask)
1898 sde->idle_int_cnt++;
1899 else if (status & sde->progress_mask)
1900 sde->progress_int_cnt++;
1901 else if (status & sde->int_mask)
1902 sde->sdma_int_cnt++;
1903 sdma_make_progress(sde, status);
1904 write_sequnlock(&sde->head_lock);
1905}
1906
1907
1908
1909
1910
1911
1912void sdma_engine_error(struct sdma_engine *sde, u64 status)
1913{
1914 unsigned long flags;
1915
1916#ifdef CONFIG_SDMA_VERBOSITY
1917 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1918 sde->this_idx,
1919 (unsigned long long)status,
1920 sdma_state_names[sde->state.current_state]);
1921#endif
1922 spin_lock_irqsave(&sde->tail_lock, flags);
1923 write_seqlock(&sde->head_lock);
1924 if (status & ALL_SDMA_ENG_HALT_ERRS)
1925 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1926 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1927 dd_dev_err(sde->dd,
1928 "SDMA (%u) engine error: 0x%llx state %s\n",
1929 sde->this_idx,
1930 (unsigned long long)status,
1931 sdma_state_names[sde->state.current_state]);
1932 dump_sdma_state(sde);
1933 }
1934 write_sequnlock(&sde->head_lock);
1935 spin_unlock_irqrestore(&sde->tail_lock, flags);
1936}
1937
1938static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1939{
1940 u64 set_senddmactrl = 0;
1941 u64 clr_senddmactrl = 0;
1942 unsigned long flags;
1943
1944#ifdef CONFIG_SDMA_VERBOSITY
1945 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1946 sde->this_idx,
1947 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1948 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1949 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1950 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1951#endif
1952
1953 if (op & SDMA_SENDCTRL_OP_ENABLE)
1954 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1955 else
1956 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1957
1958 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1959 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1960 else
1961 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1962
1963 if (op & SDMA_SENDCTRL_OP_HALT)
1964 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1965 else
1966 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1967
1968 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1969
1970 sde->p_senddmactrl |= set_senddmactrl;
1971 sde->p_senddmactrl &= ~clr_senddmactrl;
1972
1973 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1974 write_sde_csr(sde, SD(CTRL),
1975 sde->p_senddmactrl |
1976 SD(CTRL_SDMA_CLEANUP_SMASK));
1977 else
1978 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1979
1980 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1981
1982#ifdef CONFIG_SDMA_VERBOSITY
1983 sdma_dumpstate(sde);
1984#endif
1985}
1986
1987static void sdma_setlengen(struct sdma_engine *sde)
1988{
1989#ifdef CONFIG_SDMA_VERBOSITY
1990 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1991 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1992#endif
1993
1994
1995
1996
1997
1998
1999 write_sde_csr(sde, SD(LEN_GEN),
2000 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
2001 write_sde_csr(sde, SD(LEN_GEN),
2002 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
2003 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
2004}
2005
2006static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
2007{
2008
2009 smp_wmb();
2010 writeq(tail, sde->tail_csr);
2011}
2012
2013
2014
2015
2016
2017static void sdma_hw_start_up(struct sdma_engine *sde)
2018{
2019 u64 reg;
2020
2021#ifdef CONFIG_SDMA_VERBOSITY
2022 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2023 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2024#endif
2025
2026 sdma_setlengen(sde);
2027 sdma_update_tail(sde, 0);
2028 *sde->head_dma = 0;
2029
2030 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
2031 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
2032 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
2033}
2034
2035
2036
2037
2038
2039
2040static void set_sdma_integrity(struct sdma_engine *sde)
2041{
2042 struct hfi1_devdata *dd = sde->dd;
2043
2044 write_sde_csr(sde, SD(CHECK_ENABLE),
2045 hfi1_pkt_base_sdma_integrity(dd));
2046}
2047
2048static void init_sdma_regs(
2049 struct sdma_engine *sde,
2050 u32 credits,
2051 uint idle_cnt)
2052{
2053 u8 opval, opmask;
2054#ifdef CONFIG_SDMA_VERBOSITY
2055 struct hfi1_devdata *dd = sde->dd;
2056
2057 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2058 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2059#endif
2060
2061 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2062 sdma_setlengen(sde);
2063 sdma_update_tail(sde, 0);
2064 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2065 write_sde_csr(sde, SD(DESC_CNT), 0);
2066 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2067 write_sde_csr(sde, SD(MEMORY),
2068 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2069 ((u64)(credits * sde->this_idx) <<
2070 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2071 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2072 set_sdma_integrity(sde);
2073 opmask = OPCODE_CHECK_MASK_DISABLED;
2074 opval = OPCODE_CHECK_VAL_DISABLED;
2075 write_sde_csr(sde, SD(CHECK_OPCODE),
2076 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2077 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2078}
2079
2080#ifdef CONFIG_SDMA_VERBOSITY
2081
2082#define sdma_dumpstate_helper0(reg) do { \
2083 csr = read_csr(sde->dd, reg); \
2084 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2085 } while (0)
2086
2087#define sdma_dumpstate_helper(reg) do { \
2088 csr = read_sde_csr(sde, reg); \
2089 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2090 #reg, sde->this_idx, csr); \
2091 } while (0)
2092
2093#define sdma_dumpstate_helper2(reg) do { \
2094 csr = read_csr(sde->dd, reg + (8 * i)); \
2095 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2096 #reg, i, csr); \
2097 } while (0)
2098
2099void sdma_dumpstate(struct sdma_engine *sde)
2100{
2101 u64 csr;
2102 unsigned i;
2103
2104 sdma_dumpstate_helper(SD(CTRL));
2105 sdma_dumpstate_helper(SD(STATUS));
2106 sdma_dumpstate_helper0(SD(ERR_STATUS));
2107 sdma_dumpstate_helper0(SD(ERR_MASK));
2108 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2109 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2110
2111 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2112 sdma_dumpstate_helper2(CCE_INT_STATUS);
2113 sdma_dumpstate_helper2(CCE_INT_MASK);
2114 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2115 }
2116
2117 sdma_dumpstate_helper(SD(TAIL));
2118 sdma_dumpstate_helper(SD(HEAD));
2119 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2120 sdma_dumpstate_helper(SD(IDLE_CNT));
2121 sdma_dumpstate_helper(SD(RELOAD_CNT));
2122 sdma_dumpstate_helper(SD(DESC_CNT));
2123 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2124 sdma_dumpstate_helper(SD(MEMORY));
2125 sdma_dumpstate_helper0(SD(ENGINES));
2126 sdma_dumpstate_helper0(SD(MEM_SIZE));
2127
2128 sdma_dumpstate_helper(SD(BASE_ADDR));
2129 sdma_dumpstate_helper(SD(LEN_GEN));
2130 sdma_dumpstate_helper(SD(HEAD_ADDR));
2131 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2132 sdma_dumpstate_helper(SD(CHECK_VL));
2133 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2134 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2135 sdma_dumpstate_helper(SD(CHECK_SLID));
2136 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2137}
2138#endif
2139
2140static void dump_sdma_state(struct sdma_engine *sde)
2141{
2142 struct hw_sdma_desc *descqp;
2143 u64 desc[2];
2144 u64 addr;
2145 u8 gen;
2146 u16 len;
2147 u16 head, tail, cnt;
2148
2149 head = sde->descq_head & sde->sdma_mask;
2150 tail = sde->descq_tail & sde->sdma_mask;
2151 cnt = sdma_descq_freecnt(sde);
2152
2153 dd_dev_err(sde->dd,
2154 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2155 sde->this_idx, head, tail, cnt,
2156 !list_empty(&sde->flushlist));
2157
2158
2159 while (head != tail) {
2160 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2161
2162 descqp = &sde->descq[head];
2163 desc[0] = le64_to_cpu(descqp->qw[0]);
2164 desc[1] = le64_to_cpu(descqp->qw[1]);
2165 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2166 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2167 'H' : '-';
2168 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2169 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2170 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2171 & SDMA_DESC0_PHY_ADDR_MASK;
2172 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2173 & SDMA_DESC1_GENERATION_MASK;
2174 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2175 & SDMA_DESC0_BYTE_COUNT_MASK;
2176 dd_dev_err(sde->dd,
2177 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2178 head, flags, addr, gen, len);
2179 dd_dev_err(sde->dd,
2180 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2181 desc[0], desc[1]);
2182 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2183 dd_dev_err(sde->dd,
2184 "\taidx: %u amode: %u alen: %u\n",
2185 (u8)((desc[1] &
2186 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2187 SDMA_DESC1_HEADER_INDEX_SHIFT),
2188 (u8)((desc[1] &
2189 SDMA_DESC1_HEADER_MODE_SMASK) >>
2190 SDMA_DESC1_HEADER_MODE_SHIFT),
2191 (u8)((desc[1] &
2192 SDMA_DESC1_HEADER_DWS_SMASK) >>
2193 SDMA_DESC1_HEADER_DWS_SHIFT));
2194 head++;
2195 head &= sde->sdma_mask;
2196 }
2197}
2198
2199#define SDE_FMT \
2200 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2201
2202
2203
2204
2205
2206
2207
2208void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2209{
2210 u16 head, tail;
2211 struct hw_sdma_desc *descqp;
2212 u64 desc[2];
2213 u64 addr;
2214 u8 gen;
2215 u16 len;
2216
2217 head = sde->descq_head & sde->sdma_mask;
2218 tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
2219 seq_printf(s, SDE_FMT, sde->this_idx,
2220 sde->cpu,
2221 sdma_state_name(sde->state.current_state),
2222 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2223 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2224 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2225 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2226 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2227 (unsigned long long)le64_to_cpu(*sde->head_dma),
2228 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2229 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2230 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2231 (unsigned long long)sde->last_status,
2232 (unsigned long long)sde->ahg_bits,
2233 sde->tx_tail,
2234 sde->tx_head,
2235 sde->descq_tail,
2236 sde->descq_head,
2237 !list_empty(&sde->flushlist),
2238 sde->descq_full_count,
2239 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2240
2241
2242 while (head != tail) {
2243 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2244
2245 descqp = &sde->descq[head];
2246 desc[0] = le64_to_cpu(descqp->qw[0]);
2247 desc[1] = le64_to_cpu(descqp->qw[1]);
2248 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2249 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2250 'H' : '-';
2251 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2252 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2253 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2254 & SDMA_DESC0_PHY_ADDR_MASK;
2255 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2256 & SDMA_DESC1_GENERATION_MASK;
2257 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2258 & SDMA_DESC0_BYTE_COUNT_MASK;
2259 seq_printf(s,
2260 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2261 head, flags, addr, gen, len);
2262 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2263 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2264 (u8)((desc[1] &
2265 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2266 SDMA_DESC1_HEADER_INDEX_SHIFT),
2267 (u8)((desc[1] &
2268 SDMA_DESC1_HEADER_MODE_SMASK) >>
2269 SDMA_DESC1_HEADER_MODE_SHIFT));
2270 head = (head + 1) & sde->sdma_mask;
2271 }
2272}
2273
2274
2275
2276
2277
2278static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2279{
2280 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2281
2282 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2283 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2284 << SDMA_DESC1_GENERATION_SHIFT;
2285 return qw1;
2286}
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2305{
2306 int i;
2307 u16 tail;
2308 struct sdma_desc *descp = tx->descp;
2309 u8 skip = 0, mode = ahg_mode(tx);
2310
2311 tail = sde->descq_tail & sde->sdma_mask;
2312 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2313 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2314 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2315 tail, &sde->descq[tail]);
2316 tail = ++sde->descq_tail & sde->sdma_mask;
2317 descp++;
2318 if (mode > SDMA_AHG_APPLY_UPDATE1)
2319 skip = mode >> 1;
2320 for (i = 1; i < tx->num_desc; i++, descp++) {
2321 u64 qw1;
2322
2323 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2324 if (skip) {
2325
2326 qw1 = descp->qw[1];
2327 skip--;
2328 } else {
2329
2330 qw1 = add_gen(sde, descp->qw[1]);
2331 }
2332 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2333 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2334 tail, &sde->descq[tail]);
2335 tail = ++sde->descq_tail & sde->sdma_mask;
2336 }
2337 tx->next_descq_idx = tail;
2338#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2339 tx->sn = sde->tail_sn++;
2340 trace_hfi1_sdma_in_sn(sde, tx->sn);
2341 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2342#endif
2343 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2344 sde->desc_avail -= tx->num_desc;
2345 return tail;
2346}
2347
2348
2349
2350
2351static int sdma_check_progress(
2352 struct sdma_engine *sde,
2353 struct iowait_work *wait,
2354 struct sdma_txreq *tx,
2355 bool pkts_sent)
2356{
2357 int ret;
2358
2359 sde->desc_avail = sdma_descq_freecnt(sde);
2360 if (tx->num_desc <= sde->desc_avail)
2361 return -EAGAIN;
2362
2363 if (wait && iowait_ioww_to_iow(wait)->sleep) {
2364 unsigned seq;
2365
2366 seq = raw_seqcount_begin(
2367 (const seqcount_t *)&sde->head_lock.seqcount);
2368 ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
2369 if (ret == -EAGAIN)
2370 sde->desc_avail = sdma_descq_freecnt(sde);
2371 } else {
2372 ret = -EBUSY;
2373 }
2374 return ret;
2375}
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392int sdma_send_txreq(struct sdma_engine *sde,
2393 struct iowait_work *wait,
2394 struct sdma_txreq *tx,
2395 bool pkts_sent)
2396{
2397 int ret = 0;
2398 u16 tail;
2399 unsigned long flags;
2400
2401
2402 if (unlikely(tx->tlen))
2403 return -EINVAL;
2404 tx->wait = iowait_ioww_to_iow(wait);
2405 spin_lock_irqsave(&sde->tail_lock, flags);
2406retry:
2407 if (unlikely(!__sdma_running(sde)))
2408 goto unlock_noconn;
2409 if (unlikely(tx->num_desc > sde->desc_avail))
2410 goto nodesc;
2411 tail = submit_tx(sde, tx);
2412 if (wait)
2413 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2414 sdma_update_tail(sde, tail);
2415unlock:
2416 spin_unlock_irqrestore(&sde->tail_lock, flags);
2417 return ret;
2418unlock_noconn:
2419 if (wait)
2420 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2421 tx->next_descq_idx = 0;
2422#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2423 tx->sn = sde->tail_sn++;
2424 trace_hfi1_sdma_in_sn(sde, tx->sn);
2425#endif
2426 spin_lock(&sde->flushlist_lock);
2427 list_add_tail(&tx->list, &sde->flushlist);
2428 spin_unlock(&sde->flushlist_lock);
2429 iowait_inc_wait_count(wait, tx->num_desc);
2430 queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
2431 ret = -ECOMM;
2432 goto unlock;
2433nodesc:
2434 ret = sdma_check_progress(sde, wait, tx, pkts_sent);
2435 if (ret == -EAGAIN) {
2436 ret = 0;
2437 goto retry;
2438 }
2439 sde->descq_full_count++;
2440 goto unlock;
2441}
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
2472 struct list_head *tx_list, u16 *count_out)
2473{
2474 struct sdma_txreq *tx, *tx_next;
2475 int ret = 0;
2476 unsigned long flags;
2477 u16 tail = INVALID_TAIL;
2478 u32 submit_count = 0, flush_count = 0, total_count;
2479
2480 spin_lock_irqsave(&sde->tail_lock, flags);
2481retry:
2482 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2483 tx->wait = iowait_ioww_to_iow(wait);
2484 if (unlikely(!__sdma_running(sde)))
2485 goto unlock_noconn;
2486 if (unlikely(tx->num_desc > sde->desc_avail))
2487 goto nodesc;
2488 if (unlikely(tx->tlen)) {
2489 ret = -EINVAL;
2490 goto update_tail;
2491 }
2492 list_del_init(&tx->list);
2493 tail = submit_tx(sde, tx);
2494 submit_count++;
2495 if (tail != INVALID_TAIL &&
2496 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2497 sdma_update_tail(sde, tail);
2498 tail = INVALID_TAIL;
2499 }
2500 }
2501update_tail:
2502 total_count = submit_count + flush_count;
2503 if (wait) {
2504 iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
2505 iowait_starve_clear(submit_count > 0,
2506 iowait_ioww_to_iow(wait));
2507 }
2508 if (tail != INVALID_TAIL)
2509 sdma_update_tail(sde, tail);
2510 spin_unlock_irqrestore(&sde->tail_lock, flags);
2511 *count_out = total_count;
2512 return ret;
2513unlock_noconn:
2514 spin_lock(&sde->flushlist_lock);
2515 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2516 tx->wait = iowait_ioww_to_iow(wait);
2517 list_del_init(&tx->list);
2518 tx->next_descq_idx = 0;
2519#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2520 tx->sn = sde->tail_sn++;
2521 trace_hfi1_sdma_in_sn(sde, tx->sn);
2522#endif
2523 list_add_tail(&tx->list, &sde->flushlist);
2524 flush_count++;
2525 iowait_inc_wait_count(wait, tx->num_desc);
2526 }
2527 spin_unlock(&sde->flushlist_lock);
2528 queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
2529 ret = -ECOMM;
2530 goto update_tail;
2531nodesc:
2532 ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
2533 if (ret == -EAGAIN) {
2534 ret = 0;
2535 goto retry;
2536 }
2537 sde->descq_full_count++;
2538 goto update_tail;
2539}
2540
2541static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2542{
2543 unsigned long flags;
2544
2545 spin_lock_irqsave(&sde->tail_lock, flags);
2546 write_seqlock(&sde->head_lock);
2547
2548 __sdma_process_event(sde, event);
2549
2550 if (sde->state.current_state == sdma_state_s99_running)
2551 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2552
2553 write_sequnlock(&sde->head_lock);
2554 spin_unlock_irqrestore(&sde->tail_lock, flags);
2555}
2556
2557static void __sdma_process_event(struct sdma_engine *sde,
2558 enum sdma_events event)
2559{
2560 struct sdma_state *ss = &sde->state;
2561 int need_progress = 0;
2562
2563
2564#ifdef CONFIG_SDMA_VERBOSITY
2565 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2566 sdma_state_names[ss->current_state],
2567 sdma_event_names[event]);
2568#endif
2569
2570 switch (ss->current_state) {
2571 case sdma_state_s00_hw_down:
2572 switch (event) {
2573 case sdma_event_e00_go_hw_down:
2574 break;
2575 case sdma_event_e30_go_running:
2576
2577
2578
2579
2580
2581
2582
2583 ss->go_s99_running = 1;
2584
2585 case sdma_event_e10_go_hw_start:
2586
2587 sdma_get(&sde->state);
2588 sdma_set_state(sde,
2589 sdma_state_s10_hw_start_up_halt_wait);
2590 break;
2591 case sdma_event_e15_hw_halt_done:
2592 break;
2593 case sdma_event_e25_hw_clean_up_done:
2594 break;
2595 case sdma_event_e40_sw_cleaned:
2596 sdma_sw_tear_down(sde);
2597 break;
2598 case sdma_event_e50_hw_cleaned:
2599 break;
2600 case sdma_event_e60_hw_halted:
2601 break;
2602 case sdma_event_e70_go_idle:
2603 break;
2604 case sdma_event_e80_hw_freeze:
2605 break;
2606 case sdma_event_e81_hw_frozen:
2607 break;
2608 case sdma_event_e82_hw_unfreeze:
2609 break;
2610 case sdma_event_e85_link_down:
2611 break;
2612 case sdma_event_e90_sw_halted:
2613 break;
2614 }
2615 break;
2616
2617 case sdma_state_s10_hw_start_up_halt_wait:
2618 switch (event) {
2619 case sdma_event_e00_go_hw_down:
2620 sdma_set_state(sde, sdma_state_s00_hw_down);
2621 sdma_sw_tear_down(sde);
2622 break;
2623 case sdma_event_e10_go_hw_start:
2624 break;
2625 case sdma_event_e15_hw_halt_done:
2626 sdma_set_state(sde,
2627 sdma_state_s15_hw_start_up_clean_wait);
2628 sdma_start_hw_clean_up(sde);
2629 break;
2630 case sdma_event_e25_hw_clean_up_done:
2631 break;
2632 case sdma_event_e30_go_running:
2633 ss->go_s99_running = 1;
2634 break;
2635 case sdma_event_e40_sw_cleaned:
2636 break;
2637 case sdma_event_e50_hw_cleaned:
2638 break;
2639 case sdma_event_e60_hw_halted:
2640 schedule_work(&sde->err_halt_worker);
2641 break;
2642 case sdma_event_e70_go_idle:
2643 ss->go_s99_running = 0;
2644 break;
2645 case sdma_event_e80_hw_freeze:
2646 break;
2647 case sdma_event_e81_hw_frozen:
2648 break;
2649 case sdma_event_e82_hw_unfreeze:
2650 break;
2651 case sdma_event_e85_link_down:
2652 break;
2653 case sdma_event_e90_sw_halted:
2654 break;
2655 }
2656 break;
2657
2658 case sdma_state_s15_hw_start_up_clean_wait:
2659 switch (event) {
2660 case sdma_event_e00_go_hw_down:
2661 sdma_set_state(sde, sdma_state_s00_hw_down);
2662 sdma_sw_tear_down(sde);
2663 break;
2664 case sdma_event_e10_go_hw_start:
2665 break;
2666 case sdma_event_e15_hw_halt_done:
2667 break;
2668 case sdma_event_e25_hw_clean_up_done:
2669 sdma_hw_start_up(sde);
2670 sdma_set_state(sde, ss->go_s99_running ?
2671 sdma_state_s99_running :
2672 sdma_state_s20_idle);
2673 break;
2674 case sdma_event_e30_go_running:
2675 ss->go_s99_running = 1;
2676 break;
2677 case sdma_event_e40_sw_cleaned:
2678 break;
2679 case sdma_event_e50_hw_cleaned:
2680 break;
2681 case sdma_event_e60_hw_halted:
2682 break;
2683 case sdma_event_e70_go_idle:
2684 ss->go_s99_running = 0;
2685 break;
2686 case sdma_event_e80_hw_freeze:
2687 break;
2688 case sdma_event_e81_hw_frozen:
2689 break;
2690 case sdma_event_e82_hw_unfreeze:
2691 break;
2692 case sdma_event_e85_link_down:
2693 break;
2694 case sdma_event_e90_sw_halted:
2695 break;
2696 }
2697 break;
2698
2699 case sdma_state_s20_idle:
2700 switch (event) {
2701 case sdma_event_e00_go_hw_down:
2702 sdma_set_state(sde, sdma_state_s00_hw_down);
2703 sdma_sw_tear_down(sde);
2704 break;
2705 case sdma_event_e10_go_hw_start:
2706 break;
2707 case sdma_event_e15_hw_halt_done:
2708 break;
2709 case sdma_event_e25_hw_clean_up_done:
2710 break;
2711 case sdma_event_e30_go_running:
2712 sdma_set_state(sde, sdma_state_s99_running);
2713 ss->go_s99_running = 1;
2714 break;
2715 case sdma_event_e40_sw_cleaned:
2716 break;
2717 case sdma_event_e50_hw_cleaned:
2718 break;
2719 case sdma_event_e60_hw_halted:
2720 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2721 schedule_work(&sde->err_halt_worker);
2722 break;
2723 case sdma_event_e70_go_idle:
2724 break;
2725 case sdma_event_e85_link_down:
2726
2727 case sdma_event_e80_hw_freeze:
2728 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2729 atomic_dec(&sde->dd->sdma_unfreeze_count);
2730 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2731 break;
2732 case sdma_event_e81_hw_frozen:
2733 break;
2734 case sdma_event_e82_hw_unfreeze:
2735 break;
2736 case sdma_event_e90_sw_halted:
2737 break;
2738 }
2739 break;
2740
2741 case sdma_state_s30_sw_clean_up_wait:
2742 switch (event) {
2743 case sdma_event_e00_go_hw_down:
2744 sdma_set_state(sde, sdma_state_s00_hw_down);
2745 break;
2746 case sdma_event_e10_go_hw_start:
2747 break;
2748 case sdma_event_e15_hw_halt_done:
2749 break;
2750 case sdma_event_e25_hw_clean_up_done:
2751 break;
2752 case sdma_event_e30_go_running:
2753 ss->go_s99_running = 1;
2754 break;
2755 case sdma_event_e40_sw_cleaned:
2756 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2757 sdma_start_hw_clean_up(sde);
2758 break;
2759 case sdma_event_e50_hw_cleaned:
2760 break;
2761 case sdma_event_e60_hw_halted:
2762 break;
2763 case sdma_event_e70_go_idle:
2764 ss->go_s99_running = 0;
2765 break;
2766 case sdma_event_e80_hw_freeze:
2767 break;
2768 case sdma_event_e81_hw_frozen:
2769 break;
2770 case sdma_event_e82_hw_unfreeze:
2771 break;
2772 case sdma_event_e85_link_down:
2773 ss->go_s99_running = 0;
2774 break;
2775 case sdma_event_e90_sw_halted:
2776 break;
2777 }
2778 break;
2779
2780 case sdma_state_s40_hw_clean_up_wait:
2781 switch (event) {
2782 case sdma_event_e00_go_hw_down:
2783 sdma_set_state(sde, sdma_state_s00_hw_down);
2784 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2785 break;
2786 case sdma_event_e10_go_hw_start:
2787 break;
2788 case sdma_event_e15_hw_halt_done:
2789 break;
2790 case sdma_event_e25_hw_clean_up_done:
2791 sdma_hw_start_up(sde);
2792 sdma_set_state(sde, ss->go_s99_running ?
2793 sdma_state_s99_running :
2794 sdma_state_s20_idle);
2795 break;
2796 case sdma_event_e30_go_running:
2797 ss->go_s99_running = 1;
2798 break;
2799 case sdma_event_e40_sw_cleaned:
2800 break;
2801 case sdma_event_e50_hw_cleaned:
2802 break;
2803 case sdma_event_e60_hw_halted:
2804 break;
2805 case sdma_event_e70_go_idle:
2806 ss->go_s99_running = 0;
2807 break;
2808 case sdma_event_e80_hw_freeze:
2809 break;
2810 case sdma_event_e81_hw_frozen:
2811 break;
2812 case sdma_event_e82_hw_unfreeze:
2813 break;
2814 case sdma_event_e85_link_down:
2815 ss->go_s99_running = 0;
2816 break;
2817 case sdma_event_e90_sw_halted:
2818 break;
2819 }
2820 break;
2821
2822 case sdma_state_s50_hw_halt_wait:
2823 switch (event) {
2824 case sdma_event_e00_go_hw_down:
2825 sdma_set_state(sde, sdma_state_s00_hw_down);
2826 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2827 break;
2828 case sdma_event_e10_go_hw_start:
2829 break;
2830 case sdma_event_e15_hw_halt_done:
2831 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2832 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2833 break;
2834 case sdma_event_e25_hw_clean_up_done:
2835 break;
2836 case sdma_event_e30_go_running:
2837 ss->go_s99_running = 1;
2838 break;
2839 case sdma_event_e40_sw_cleaned:
2840 break;
2841 case sdma_event_e50_hw_cleaned:
2842 break;
2843 case sdma_event_e60_hw_halted:
2844 schedule_work(&sde->err_halt_worker);
2845 break;
2846 case sdma_event_e70_go_idle:
2847 ss->go_s99_running = 0;
2848 break;
2849 case sdma_event_e80_hw_freeze:
2850 break;
2851 case sdma_event_e81_hw_frozen:
2852 break;
2853 case sdma_event_e82_hw_unfreeze:
2854 break;
2855 case sdma_event_e85_link_down:
2856 ss->go_s99_running = 0;
2857 break;
2858 case sdma_event_e90_sw_halted:
2859 break;
2860 }
2861 break;
2862
2863 case sdma_state_s60_idle_halt_wait:
2864 switch (event) {
2865 case sdma_event_e00_go_hw_down:
2866 sdma_set_state(sde, sdma_state_s00_hw_down);
2867 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2868 break;
2869 case sdma_event_e10_go_hw_start:
2870 break;
2871 case sdma_event_e15_hw_halt_done:
2872 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2873 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2874 break;
2875 case sdma_event_e25_hw_clean_up_done:
2876 break;
2877 case sdma_event_e30_go_running:
2878 ss->go_s99_running = 1;
2879 break;
2880 case sdma_event_e40_sw_cleaned:
2881 break;
2882 case sdma_event_e50_hw_cleaned:
2883 break;
2884 case sdma_event_e60_hw_halted:
2885 schedule_work(&sde->err_halt_worker);
2886 break;
2887 case sdma_event_e70_go_idle:
2888 ss->go_s99_running = 0;
2889 break;
2890 case sdma_event_e80_hw_freeze:
2891 break;
2892 case sdma_event_e81_hw_frozen:
2893 break;
2894 case sdma_event_e82_hw_unfreeze:
2895 break;
2896 case sdma_event_e85_link_down:
2897 break;
2898 case sdma_event_e90_sw_halted:
2899 break;
2900 }
2901 break;
2902
2903 case sdma_state_s80_hw_freeze:
2904 switch (event) {
2905 case sdma_event_e00_go_hw_down:
2906 sdma_set_state(sde, sdma_state_s00_hw_down);
2907 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2908 break;
2909 case sdma_event_e10_go_hw_start:
2910 break;
2911 case sdma_event_e15_hw_halt_done:
2912 break;
2913 case sdma_event_e25_hw_clean_up_done:
2914 break;
2915 case sdma_event_e30_go_running:
2916 ss->go_s99_running = 1;
2917 break;
2918 case sdma_event_e40_sw_cleaned:
2919 break;
2920 case sdma_event_e50_hw_cleaned:
2921 break;
2922 case sdma_event_e60_hw_halted:
2923 break;
2924 case sdma_event_e70_go_idle:
2925 ss->go_s99_running = 0;
2926 break;
2927 case sdma_event_e80_hw_freeze:
2928 break;
2929 case sdma_event_e81_hw_frozen:
2930 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2931 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2932 break;
2933 case sdma_event_e82_hw_unfreeze:
2934 break;
2935 case sdma_event_e85_link_down:
2936 break;
2937 case sdma_event_e90_sw_halted:
2938 break;
2939 }
2940 break;
2941
2942 case sdma_state_s82_freeze_sw_clean:
2943 switch (event) {
2944 case sdma_event_e00_go_hw_down:
2945 sdma_set_state(sde, sdma_state_s00_hw_down);
2946 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2947 break;
2948 case sdma_event_e10_go_hw_start:
2949 break;
2950 case sdma_event_e15_hw_halt_done:
2951 break;
2952 case sdma_event_e25_hw_clean_up_done:
2953 break;
2954 case sdma_event_e30_go_running:
2955 ss->go_s99_running = 1;
2956 break;
2957 case sdma_event_e40_sw_cleaned:
2958
2959 atomic_dec(&sde->dd->sdma_unfreeze_count);
2960 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2961 break;
2962 case sdma_event_e50_hw_cleaned:
2963 break;
2964 case sdma_event_e60_hw_halted:
2965 break;
2966 case sdma_event_e70_go_idle:
2967 ss->go_s99_running = 0;
2968 break;
2969 case sdma_event_e80_hw_freeze:
2970 break;
2971 case sdma_event_e81_hw_frozen:
2972 break;
2973 case sdma_event_e82_hw_unfreeze:
2974 sdma_hw_start_up(sde);
2975 sdma_set_state(sde, ss->go_s99_running ?
2976 sdma_state_s99_running :
2977 sdma_state_s20_idle);
2978 break;
2979 case sdma_event_e85_link_down:
2980 break;
2981 case sdma_event_e90_sw_halted:
2982 break;
2983 }
2984 break;
2985
2986 case sdma_state_s99_running:
2987 switch (event) {
2988 case sdma_event_e00_go_hw_down:
2989 sdma_set_state(sde, sdma_state_s00_hw_down);
2990 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2991 break;
2992 case sdma_event_e10_go_hw_start:
2993 break;
2994 case sdma_event_e15_hw_halt_done:
2995 break;
2996 case sdma_event_e25_hw_clean_up_done:
2997 break;
2998 case sdma_event_e30_go_running:
2999 break;
3000 case sdma_event_e40_sw_cleaned:
3001 break;
3002 case sdma_event_e50_hw_cleaned:
3003 break;
3004 case sdma_event_e60_hw_halted:
3005 need_progress = 1;
3006 sdma_err_progress_check_schedule(sde);
3007
3008 case sdma_event_e90_sw_halted:
3009
3010
3011
3012
3013 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
3014 schedule_work(&sde->err_halt_worker);
3015 break;
3016 case sdma_event_e70_go_idle:
3017 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
3018 break;
3019 case sdma_event_e85_link_down:
3020 ss->go_s99_running = 0;
3021
3022 case sdma_event_e80_hw_freeze:
3023 sdma_set_state(sde, sdma_state_s80_hw_freeze);
3024 atomic_dec(&sde->dd->sdma_unfreeze_count);
3025 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
3026 break;
3027 case sdma_event_e81_hw_frozen:
3028 break;
3029 case sdma_event_e82_hw_unfreeze:
3030 break;
3031 }
3032 break;
3033 }
3034
3035 ss->last_event = event;
3036 if (need_progress)
3037 sdma_make_progress(sde, 0);
3038}
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3054{
3055 int i;
3056
3057
3058 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3059
3060 if (!tx->tlen) {
3061 tx->desc_limit = MAX_DESC;
3062 } else if (!tx->coalesce_buf) {
3063
3064 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3065 GFP_ATOMIC);
3066 if (!tx->coalesce_buf)
3067 goto enomem;
3068 tx->coalesce_idx = 0;
3069 }
3070 return 0;
3071 }
3072
3073 if (unlikely(tx->num_desc == MAX_DESC))
3074 goto enomem;
3075
3076 tx->descp = kmalloc_array(
3077 MAX_DESC,
3078 sizeof(struct sdma_desc),
3079 GFP_ATOMIC);
3080 if (!tx->descp)
3081 goto enomem;
3082
3083
3084 tx->desc_limit = MAX_DESC - 1;
3085
3086 for (i = 0; i < tx->num_desc; i++)
3087 tx->descp[i] = tx->descs[i];
3088 return 0;
3089enomem:
3090 __sdma_txclean(dd, tx);
3091 return -ENOMEM;
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3111 int type, void *kvaddr, struct page *page,
3112 unsigned long offset, u16 len)
3113{
3114 int pad_len, rval;
3115 dma_addr_t addr;
3116
3117 rval = _extend_sdma_tx_descs(dd, tx);
3118 if (rval) {
3119 __sdma_txclean(dd, tx);
3120 return rval;
3121 }
3122
3123
3124 if (tx->coalesce_buf) {
3125 if (type == SDMA_MAP_NONE) {
3126 __sdma_txclean(dd, tx);
3127 return -EINVAL;
3128 }
3129
3130 if (type == SDMA_MAP_PAGE) {
3131 kvaddr = kmap(page);
3132 kvaddr += offset;
3133 } else if (WARN_ON(!kvaddr)) {
3134 __sdma_txclean(dd, tx);
3135 return -EINVAL;
3136 }
3137
3138 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3139 tx->coalesce_idx += len;
3140 if (type == SDMA_MAP_PAGE)
3141 kunmap(page);
3142
3143
3144 if (tx->tlen - tx->coalesce_idx)
3145 return 0;
3146
3147
3148 pad_len = tx->packet_len & (sizeof(u32) - 1);
3149 if (pad_len) {
3150 pad_len = sizeof(u32) - pad_len;
3151 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3152
3153 tx->packet_len += pad_len;
3154 tx->tlen += pad_len;
3155 }
3156
3157
3158 addr = dma_map_single(&dd->pcidev->dev,
3159 tx->coalesce_buf,
3160 tx->tlen,
3161 DMA_TO_DEVICE);
3162
3163 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3164 __sdma_txclean(dd, tx);
3165 return -ENOSPC;
3166 }
3167
3168
3169 tx->desc_limit = MAX_DESC;
3170 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3171 addr, tx->tlen);
3172 }
3173
3174 return 1;
3175}
3176
3177
3178void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3179{
3180 struct sdma_engine *sde;
3181 int i;
3182 u64 sreg;
3183
3184 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3185 SD(CHECK_SLID_MASK_SHIFT)) |
3186 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3187 SD(CHECK_SLID_VALUE_SHIFT));
3188
3189 for (i = 0; i < dd->num_sdma; i++) {
3190 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3191 i, (u32)sreg);
3192 sde = &dd->per_sdma[i];
3193 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3194 }
3195}
3196
3197
3198int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3199{
3200 int rval = 0;
3201
3202 tx->num_desc++;
3203 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3204 rval = _extend_sdma_tx_descs(dd, tx);
3205 if (rval) {
3206 __sdma_txclean(dd, tx);
3207 return rval;
3208 }
3209 }
3210
3211 make_tx_sdma_desc(
3212 tx,
3213 SDMA_MAP_NONE,
3214 dd->sdma_pad_phys,
3215 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3216 _sdma_close_tx(dd, tx);
3217 return rval;
3218}
3219
3220
3221
3222
3223
3224
3225
3226
3227void _sdma_txreq_ahgadd(
3228 struct sdma_txreq *tx,
3229 u8 num_ahg,
3230 u8 ahg_entry,
3231 u32 *ahg,
3232 u8 ahg_hlen)
3233{
3234 u32 i, shift = 0, desc = 0;
3235 u8 mode;
3236
3237 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3238
3239 if (num_ahg == 1)
3240 mode = SDMA_AHG_APPLY_UPDATE1;
3241 else if (num_ahg <= 5)
3242 mode = SDMA_AHG_APPLY_UPDATE2;
3243 else
3244 mode = SDMA_AHG_APPLY_UPDATE3;
3245 tx->num_desc++;
3246
3247 switch (mode) {
3248 case SDMA_AHG_APPLY_UPDATE3:
3249 tx->num_desc++;
3250 tx->descs[2].qw[0] = 0;
3251 tx->descs[2].qw[1] = 0;
3252
3253 case SDMA_AHG_APPLY_UPDATE2:
3254 tx->num_desc++;
3255 tx->descs[1].qw[0] = 0;
3256 tx->descs[1].qw[1] = 0;
3257 break;
3258 }
3259 ahg_hlen >>= 2;
3260 tx->descs[0].qw[1] |=
3261 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3262 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3263 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3264 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3265 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3266 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3267 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3268 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3269 for (i = 0; i < (num_ahg - 1); i++) {
3270 if (!shift && !(i & 2))
3271 desc++;
3272 tx->descs[desc].qw[!!(i & 2)] |=
3273 (((u64)ahg[i + 1])
3274 << shift);
3275 shift = (shift + 32) & 63;
3276 }
3277}
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287int sdma_ahg_alloc(struct sdma_engine *sde)
3288{
3289 int nr;
3290 int oldbit;
3291
3292 if (!sde) {
3293 trace_hfi1_ahg_allocate(sde, -EINVAL);
3294 return -EINVAL;
3295 }
3296 while (1) {
3297 nr = ffz(READ_ONCE(sde->ahg_bits));
3298 if (nr > 31) {
3299 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3300 return -ENOSPC;
3301 }
3302 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3303 if (!oldbit)
3304 break;
3305 cpu_relax();
3306 }
3307 trace_hfi1_ahg_allocate(sde, nr);
3308 return nr;
3309}
3310
3311
3312
3313
3314
3315
3316
3317
3318void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3319{
3320 if (!sde)
3321 return;
3322 trace_hfi1_ahg_deallocate(sde, ahg_index);
3323 if (ahg_index < 0 || ahg_index > 31)
3324 return;
3325 clear_bit(ahg_index, &sde->ahg_bits);
3326}
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3337{
3338 int i;
3339 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3340 sdma_event_e80_hw_freeze;
3341
3342
3343 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3344
3345
3346 for (i = 0; i < dd->num_sdma; i++)
3347 sdma_process_event(&dd->per_sdma[i], event);
3348
3349
3350}
3351
3352
3353
3354
3355
3356void sdma_freeze(struct hfi1_devdata *dd)
3357{
3358 int i;
3359 int ret;
3360
3361
3362
3363
3364
3365 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3366 atomic_read(&dd->sdma_unfreeze_count) <=
3367 0);
3368
3369 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3370 return;
3371
3372
3373 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3374
3375
3376 for (i = 0; i < dd->num_sdma; i++)
3377 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3378
3379
3380
3381
3382
3383
3384 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3385 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3386
3387}
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397void sdma_unfreeze(struct hfi1_devdata *dd)
3398{
3399 int i;
3400
3401
3402 for (i = 0; i < dd->num_sdma; i++)
3403 sdma_process_event(&dd->per_sdma[i],
3404 sdma_event_e82_hw_unfreeze);
3405}
3406
3407
3408
3409
3410
3411
3412void _sdma_engine_progress_schedule(
3413 struct sdma_engine *sde)
3414{
3415 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3416
3417 write_csr(sde->dd,
3418 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3419 sde->progress_mask);
3420}
3421