1
2
3
4
5
6#include <linux/spinlock.h>
7#include <linux/seqlock.h>
8#include <linux/netdevice.h>
9#include <linux/moduleparam.h>
10#include <linux/bitops.h>
11#include <linux/timer.h>
12#include <linux/vmalloc.h>
13#include <linux/highmem.h>
14
15#include "hfi.h"
16#include "common.h"
17#include "qp.h"
18#include "sdma.h"
19#include "iowait.h"
20#include "trace.h"
21
22
23#define SDMA_DESCQ_CNT 2048
24#define SDMA_DESC_INTR 64
25#define INVALID_TAIL 0xffff
26#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
27
28static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
29module_param(sdma_descq_cnt, uint, S_IRUGO);
30MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
31
32static uint sdma_idle_cnt = 250;
33module_param(sdma_idle_cnt, uint, S_IRUGO);
34MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
35
36uint mod_num_sdma;
37module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
38MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
39
40static uint sdma_desct_intr = SDMA_DESC_INTR;
41module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
42MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
43
44#define SDMA_WAIT_BATCH_SIZE 20
45
46#define SDMA_ERR_HALT_TIMEOUT 10
47
48
49#define SD(name) SEND_DMA_##name
50#define ALL_SDMA_ENG_HALT_ERRS \
51 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
52 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
53 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
54 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
55 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
56 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
57 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
58 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
59 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
60 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
61 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
62 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
63 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
64 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
65 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
66 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
67 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
68 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
69
70
71#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
72#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
73#define SDMA_SENDCTRL_OP_HALT BIT(2)
74#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
75
76
77#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
78SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
79#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
80SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
81
82static const char * const sdma_state_names[] = {
83 [sdma_state_s00_hw_down] = "s00_HwDown",
84 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
85 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
86 [sdma_state_s20_idle] = "s20_Idle",
87 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
88 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
89 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
90 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
91 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
92 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
93 [sdma_state_s99_running] = "s99_Running",
94};
95
96#ifdef CONFIG_SDMA_VERBOSITY
97static const char * const sdma_event_names[] = {
98 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
99 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
100 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
101 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
102 [sdma_event_e30_go_running] = "e30_GoRunning",
103 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
104 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
105 [sdma_event_e60_hw_halted] = "e60_HwHalted",
106 [sdma_event_e70_go_idle] = "e70_GoIdle",
107 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
108 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
109 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
110 [sdma_event_e85_link_down] = "e85_LinkDown",
111 [sdma_event_e90_sw_halted] = "e90_SwHalted",
112};
113#endif
114
115static const struct sdma_set_state_action sdma_action_table[] = {
116 [sdma_state_s00_hw_down] = {
117 .go_s99_running_tofalse = 1,
118 .op_enable = 0,
119 .op_intenable = 0,
120 .op_halt = 0,
121 .op_cleanup = 0,
122 },
123 [sdma_state_s10_hw_start_up_halt_wait] = {
124 .op_enable = 0,
125 .op_intenable = 0,
126 .op_halt = 1,
127 .op_cleanup = 0,
128 },
129 [sdma_state_s15_hw_start_up_clean_wait] = {
130 .op_enable = 0,
131 .op_intenable = 1,
132 .op_halt = 0,
133 .op_cleanup = 1,
134 },
135 [sdma_state_s20_idle] = {
136 .op_enable = 0,
137 .op_intenable = 1,
138 .op_halt = 0,
139 .op_cleanup = 0,
140 },
141 [sdma_state_s30_sw_clean_up_wait] = {
142 .op_enable = 0,
143 .op_intenable = 0,
144 .op_halt = 0,
145 .op_cleanup = 0,
146 },
147 [sdma_state_s40_hw_clean_up_wait] = {
148 .op_enable = 0,
149 .op_intenable = 0,
150 .op_halt = 0,
151 .op_cleanup = 1,
152 },
153 [sdma_state_s50_hw_halt_wait] = {
154 .op_enable = 0,
155 .op_intenable = 0,
156 .op_halt = 0,
157 .op_cleanup = 0,
158 },
159 [sdma_state_s60_idle_halt_wait] = {
160 .go_s99_running_tofalse = 1,
161 .op_enable = 0,
162 .op_intenable = 0,
163 .op_halt = 1,
164 .op_cleanup = 0,
165 },
166 [sdma_state_s80_hw_freeze] = {
167 .op_enable = 0,
168 .op_intenable = 0,
169 .op_halt = 0,
170 .op_cleanup = 0,
171 },
172 [sdma_state_s82_freeze_sw_clean] = {
173 .op_enable = 0,
174 .op_intenable = 0,
175 .op_halt = 0,
176 .op_cleanup = 0,
177 },
178 [sdma_state_s99_running] = {
179 .op_enable = 1,
180 .op_intenable = 1,
181 .op_halt = 0,
182 .op_cleanup = 0,
183 .go_s99_running_totrue = 1,
184 },
185};
186
187#define SDMA_TAIL_UPDATE_THRESH 0x1F
188
189
190static void sdma_complete(struct kref *);
191static void sdma_finalput(struct sdma_state *);
192static void sdma_get(struct sdma_state *);
193static void sdma_hw_clean_up_task(struct tasklet_struct *);
194static void sdma_put(struct sdma_state *);
195static void sdma_set_state(struct sdma_engine *, enum sdma_states);
196static void sdma_start_hw_clean_up(struct sdma_engine *);
197static void sdma_sw_clean_up_task(struct tasklet_struct *);
198static void sdma_sendctrl(struct sdma_engine *, unsigned);
199static void init_sdma_regs(struct sdma_engine *, u32, uint);
200static void sdma_process_event(
201 struct sdma_engine *sde,
202 enum sdma_events event);
203static void __sdma_process_event(
204 struct sdma_engine *sde,
205 enum sdma_events event);
206static void dump_sdma_state(struct sdma_engine *sde);
207static void sdma_make_progress(struct sdma_engine *sde, u64 status);
208static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
209static void sdma_flush_descq(struct sdma_engine *sde);
210
211
212
213
214
215static const char *sdma_state_name(enum sdma_states state)
216{
217 return sdma_state_names[state];
218}
219
220static void sdma_get(struct sdma_state *ss)
221{
222 kref_get(&ss->kref);
223}
224
225static void sdma_complete(struct kref *kref)
226{
227 struct sdma_state *ss =
228 container_of(kref, struct sdma_state, kref);
229
230 complete(&ss->comp);
231}
232
233static void sdma_put(struct sdma_state *ss)
234{
235 kref_put(&ss->kref, sdma_complete);
236}
237
238static void sdma_finalput(struct sdma_state *ss)
239{
240 sdma_put(ss);
241 wait_for_completion(&ss->comp);
242}
243
244static inline void write_sde_csr(
245 struct sdma_engine *sde,
246 u32 offset0,
247 u64 value)
248{
249 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
250}
251
252static inline u64 read_sde_csr(
253 struct sdma_engine *sde,
254 u32 offset0)
255{
256 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
257}
258
259
260
261
262
263static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
264 int pause)
265{
266 u64 off = 8 * sde->this_idx;
267 struct hfi1_devdata *dd = sde->dd;
268 int lcnt = 0;
269 u64 reg_prev;
270 u64 reg = 0;
271
272 while (1) {
273 reg_prev = reg;
274 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
275
276 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
277 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
278 if (reg == 0)
279 break;
280
281 if (reg != reg_prev)
282 lcnt = 0;
283 if (lcnt++ > 500) {
284
285 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
286 __func__, sde->this_idx, (u32)reg);
287 queue_work(dd->pport->link_wq,
288 &dd->pport->link_bounce_work);
289 break;
290 }
291 udelay(1);
292 }
293}
294
295
296
297
298
299void sdma_wait(struct hfi1_devdata *dd)
300{
301 int i;
302
303 for (i = 0; i < dd->num_sdma; i++) {
304 struct sdma_engine *sde = &dd->per_sdma[i];
305
306 sdma_wait_for_packet_egress(sde, 0);
307 }
308}
309
310static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
311{
312 u64 reg;
313
314 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
315 return;
316 reg = cnt;
317 reg &= SD(DESC_CNT_CNT_MASK);
318 reg <<= SD(DESC_CNT_CNT_SHIFT);
319 write_sde_csr(sde, SD(DESC_CNT), reg);
320}
321
322static inline void complete_tx(struct sdma_engine *sde,
323 struct sdma_txreq *tx,
324 int res)
325{
326
327 struct iowait *wait = tx->wait;
328 callback_t complete = tx->complete;
329
330#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
331 trace_hfi1_sdma_out_sn(sde, tx->sn);
332 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
333 dd_dev_err(sde->dd, "expected %llu got %llu\n",
334 sde->head_sn, tx->sn);
335 sde->head_sn++;
336#endif
337 __sdma_txclean(sde->dd, tx);
338 if (complete)
339 (*complete)(tx, res);
340 if (iowait_sdma_dec(wait))
341 iowait_drain_wakeup(wait);
342}
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362static void sdma_flush(struct sdma_engine *sde)
363{
364 struct sdma_txreq *txp, *txp_next;
365 LIST_HEAD(flushlist);
366 unsigned long flags;
367 uint seq;
368
369
370 sdma_flush_descq(sde);
371 spin_lock_irqsave(&sde->flushlist_lock, flags);
372
373 list_splice_init(&sde->flushlist, &flushlist);
374 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
375
376 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
377 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
378
379 do {
380 struct iowait *w, *nw;
381
382 seq = read_seqbegin(&sde->waitlock);
383 if (!list_empty(&sde->dmawait)) {
384 write_seqlock(&sde->waitlock);
385 list_for_each_entry_safe(w, nw, &sde->dmawait, list) {
386 if (w->wakeup) {
387 w->wakeup(w, SDMA_AVAIL_REASON);
388 list_del_init(&w->list);
389 }
390 }
391 write_sequnlock(&sde->waitlock);
392 }
393 } while (read_seqretry(&sde->waitlock, seq));
394}
395
396
397
398
399
400
401
402
403
404
405
406static void sdma_field_flush(struct work_struct *work)
407{
408 unsigned long flags;
409 struct sdma_engine *sde =
410 container_of(work, struct sdma_engine, flush_worker);
411
412 write_seqlock_irqsave(&sde->head_lock, flags);
413 if (!__sdma_running(sde))
414 sdma_flush(sde);
415 write_sequnlock_irqrestore(&sde->head_lock, flags);
416}
417
418static void sdma_err_halt_wait(struct work_struct *work)
419{
420 struct sdma_engine *sde = container_of(work, struct sdma_engine,
421 err_halt_worker);
422 u64 statuscsr;
423 unsigned long timeout;
424
425 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
426 while (1) {
427 statuscsr = read_sde_csr(sde, SD(STATUS));
428 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
429 if (statuscsr)
430 break;
431 if (time_after(jiffies, timeout)) {
432 dd_dev_err(sde->dd,
433 "SDMA engine %d - timeout waiting for engine to halt\n",
434 sde->this_idx);
435
436
437
438
439 break;
440 }
441 usleep_range(80, 120);
442 }
443
444 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
445}
446
447static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
448{
449 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
450 unsigned index;
451 struct hfi1_devdata *dd = sde->dd;
452
453 for (index = 0; index < dd->num_sdma; index++) {
454 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
455
456 if (curr_sdma != sde)
457 curr_sdma->progress_check_head =
458 curr_sdma->descq_head;
459 }
460 dd_dev_err(sde->dd,
461 "SDMA engine %d - check scheduled\n",
462 sde->this_idx);
463 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
464 }
465}
466
467static void sdma_err_progress_check(struct timer_list *t)
468{
469 unsigned index;
470 struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
471
472 dd_dev_err(sde->dd, "SDE progress check event\n");
473 for (index = 0; index < sde->dd->num_sdma; index++) {
474 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
475 unsigned long flags;
476
477
478 if (curr_sde == sde)
479 continue;
480
481
482
483
484
485 spin_lock_irqsave(&curr_sde->tail_lock, flags);
486 write_seqlock(&curr_sde->head_lock);
487
488
489 if (curr_sde->state.current_state != sdma_state_s99_running) {
490 write_sequnlock(&curr_sde->head_lock);
491 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
492 continue;
493 }
494
495 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
496 (curr_sde->descq_head ==
497 curr_sde->progress_check_head))
498 __sdma_process_event(curr_sde,
499 sdma_event_e90_sw_halted);
500 write_sequnlock(&curr_sde->head_lock);
501 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
502 }
503 schedule_work(&sde->err_halt_worker);
504}
505
506static void sdma_hw_clean_up_task(struct tasklet_struct *t)
507{
508 struct sdma_engine *sde = from_tasklet(sde, t,
509 sdma_hw_clean_up_task);
510 u64 statuscsr;
511
512 while (1) {
513#ifdef CONFIG_SDMA_VERBOSITY
514 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
515 sde->this_idx, slashstrip(__FILE__), __LINE__,
516 __func__);
517#endif
518 statuscsr = read_sde_csr(sde, SD(STATUS));
519 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
520 if (statuscsr)
521 break;
522 udelay(10);
523 }
524
525 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
526}
527
528static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
529{
530 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
531}
532
533
534
535
536static void sdma_flush_descq(struct sdma_engine *sde)
537{
538 u16 head, tail;
539 int progress = 0;
540 struct sdma_txreq *txp = get_txhead(sde);
541
542
543
544
545
546
547 head = sde->descq_head & sde->sdma_mask;
548 tail = sde->descq_tail & sde->sdma_mask;
549 while (head != tail) {
550
551 head = ++sde->descq_head & sde->sdma_mask;
552
553 if (txp && txp->next_descq_idx == head) {
554
555 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
556 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
557 trace_hfi1_sdma_progress(sde, head, tail, txp);
558 txp = get_txhead(sde);
559 }
560 progress++;
561 }
562 if (progress)
563 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
564}
565
566static void sdma_sw_clean_up_task(struct tasklet_struct *t)
567{
568 struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
569 unsigned long flags;
570
571 spin_lock_irqsave(&sde->tail_lock, flags);
572 write_seqlock(&sde->head_lock);
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593 sdma_make_progress(sde, 0);
594
595 sdma_flush(sde);
596
597
598
599
600
601
602 sde->descq_tail = 0;
603 sde->descq_head = 0;
604 sde->desc_avail = sdma_descq_freecnt(sde);
605 *sde->head_dma = 0;
606
607 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
608
609 write_sequnlock(&sde->head_lock);
610 spin_unlock_irqrestore(&sde->tail_lock, flags);
611}
612
613static void sdma_sw_tear_down(struct sdma_engine *sde)
614{
615 struct sdma_state *ss = &sde->state;
616
617
618 sdma_put(ss);
619
620
621 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
622 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
623}
624
625static void sdma_start_hw_clean_up(struct sdma_engine *sde)
626{
627 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
628}
629
630static void sdma_set_state(struct sdma_engine *sde,
631 enum sdma_states next_state)
632{
633 struct sdma_state *ss = &sde->state;
634 const struct sdma_set_state_action *action = sdma_action_table;
635 unsigned op = 0;
636
637 trace_hfi1_sdma_state(
638 sde,
639 sdma_state_names[ss->current_state],
640 sdma_state_names[next_state]);
641
642
643 ss->previous_state = ss->current_state;
644 ss->previous_op = ss->current_op;
645 ss->current_state = next_state;
646
647 if (ss->previous_state != sdma_state_s99_running &&
648 next_state == sdma_state_s99_running)
649 sdma_flush(sde);
650
651 if (action[next_state].op_enable)
652 op |= SDMA_SENDCTRL_OP_ENABLE;
653
654 if (action[next_state].op_intenable)
655 op |= SDMA_SENDCTRL_OP_INTENABLE;
656
657 if (action[next_state].op_halt)
658 op |= SDMA_SENDCTRL_OP_HALT;
659
660 if (action[next_state].op_cleanup)
661 op |= SDMA_SENDCTRL_OP_CLEANUP;
662
663 if (action[next_state].go_s99_running_tofalse)
664 ss->go_s99_running = 0;
665
666 if (action[next_state].go_s99_running_totrue)
667 ss->go_s99_running = 1;
668
669 ss->current_op = op;
670 sdma_sendctrl(sde, ss->current_op);
671}
672
673
674
675
676
677
678
679
680
681
682
683
684
685u16 sdma_get_descq_cnt(void)
686{
687 u16 count = sdma_descq_cnt;
688
689 if (!count)
690 return SDMA_DESCQ_CNT;
691
692
693
694 if (!is_power_of_2(count))
695 return SDMA_DESCQ_CNT;
696 if (count < 64 || count > 32768)
697 return SDMA_DESCQ_CNT;
698 return count;
699}
700
701
702
703
704
705
706
707
708int sdma_engine_get_vl(struct sdma_engine *sde)
709{
710 struct hfi1_devdata *dd = sde->dd;
711 struct sdma_vl_map *m;
712 u8 vl;
713
714 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
715 return -EINVAL;
716
717 rcu_read_lock();
718 m = rcu_dereference(dd->sdma_map);
719 if (unlikely(!m)) {
720 rcu_read_unlock();
721 return -EINVAL;
722 }
723 vl = m->engine_to_vl[sde->this_idx];
724 rcu_read_unlock();
725
726 return vl;
727}
728
729
730
731
732
733
734
735
736
737
738
739struct sdma_engine *sdma_select_engine_vl(
740 struct hfi1_devdata *dd,
741 u32 selector,
742 u8 vl)
743{
744 struct sdma_vl_map *m;
745 struct sdma_map_elem *e;
746 struct sdma_engine *rval;
747
748
749
750
751
752 if (vl >= num_vls) {
753 rval = NULL;
754 goto done;
755 }
756
757 rcu_read_lock();
758 m = rcu_dereference(dd->sdma_map);
759 if (unlikely(!m)) {
760 rcu_read_unlock();
761 return &dd->per_sdma[0];
762 }
763 e = m->map[vl & m->mask];
764 rval = e->sde[selector & e->mask];
765 rcu_read_unlock();
766
767done:
768 rval = !rval ? &dd->per_sdma[0] : rval;
769 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
770 return rval;
771}
772
773
774
775
776
777
778
779
780
781
782struct sdma_engine *sdma_select_engine_sc(
783 struct hfi1_devdata *dd,
784 u32 selector,
785 u8 sc5)
786{
787 u8 vl = sc_to_vlt(dd, sc5);
788
789 return sdma_select_engine_vl(dd, selector, vl);
790}
791
792struct sdma_rht_map_elem {
793 u32 mask;
794 u8 ctr;
795 struct sdma_engine *sde[];
796};
797
798struct sdma_rht_node {
799 unsigned long cpu_id;
800 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
801 struct rhash_head node;
802};
803
804#define NR_CPUS_HINT 192
805
806static const struct rhashtable_params sdma_rht_params = {
807 .nelem_hint = NR_CPUS_HINT,
808 .head_offset = offsetof(struct sdma_rht_node, node),
809 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
810 .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
811 .max_size = NR_CPUS,
812 .min_size = 8,
813 .automatic_shrinking = true,
814};
815
816
817
818
819
820
821
822
823
824
825
826
827struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
828 u32 selector, u8 vl)
829{
830 struct sdma_rht_node *rht_node;
831 struct sdma_engine *sde = NULL;
832 unsigned long cpu_id;
833
834
835
836
837
838 if (current->nr_cpus_allowed != 1)
839 goto out;
840
841 cpu_id = smp_processor_id();
842 rcu_read_lock();
843 rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
844 sdma_rht_params);
845
846 if (rht_node && rht_node->map[vl]) {
847 struct sdma_rht_map_elem *map = rht_node->map[vl];
848
849 sde = map->sde[selector & map->mask];
850 }
851 rcu_read_unlock();
852
853 if (sde)
854 return sde;
855
856out:
857 return sdma_select_engine_vl(dd, selector, vl);
858}
859
860static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
861{
862 int i;
863
864 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
865 map->sde[map->ctr + i] = map->sde[i];
866}
867
868static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
869 struct sdma_engine *sde)
870{
871 unsigned int i, pow;
872
873
874 for (i = 0; i < map->ctr; i++) {
875 if (map->sde[i] == sde) {
876 memmove(&map->sde[i], &map->sde[i + 1],
877 (map->ctr - i - 1) * sizeof(map->sde[0]));
878 map->ctr--;
879 pow = roundup_pow_of_two(map->ctr ? : 1);
880 map->mask = pow - 1;
881 sdma_populate_sde_map(map);
882 break;
883 }
884 }
885}
886
887
888
889
890static DEFINE_MUTEX(process_to_sde_mutex);
891
892ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
893 size_t count)
894{
895 struct hfi1_devdata *dd = sde->dd;
896 cpumask_var_t mask, new_mask;
897 unsigned long cpu;
898 int ret, vl, sz;
899 struct sdma_rht_node *rht_node;
900
901 vl = sdma_engine_get_vl(sde);
902 if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
903 return -EINVAL;
904
905 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
906 if (!ret)
907 return -ENOMEM;
908
909 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
910 if (!ret) {
911 free_cpumask_var(mask);
912 return -ENOMEM;
913 }
914 ret = cpulist_parse(buf, mask);
915 if (ret)
916 goto out_free;
917
918 if (!cpumask_subset(mask, cpu_online_mask)) {
919 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
920 ret = -EINVAL;
921 goto out_free;
922 }
923
924 sz = sizeof(struct sdma_rht_map_elem) +
925 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
926
927 mutex_lock(&process_to_sde_mutex);
928
929 for_each_cpu(cpu, mask) {
930
931 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
932 cpumask_set_cpu(cpu, new_mask);
933 continue;
934 }
935
936 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
937 sdma_rht_params);
938 if (!rht_node) {
939 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
940 if (!rht_node) {
941 ret = -ENOMEM;
942 goto out;
943 }
944
945 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
946 if (!rht_node->map[vl]) {
947 kfree(rht_node);
948 ret = -ENOMEM;
949 goto out;
950 }
951 rht_node->cpu_id = cpu;
952 rht_node->map[vl]->mask = 0;
953 rht_node->map[vl]->ctr = 1;
954 rht_node->map[vl]->sde[0] = sde;
955
956 ret = rhashtable_insert_fast(dd->sdma_rht,
957 &rht_node->node,
958 sdma_rht_params);
959 if (ret) {
960 kfree(rht_node->map[vl]);
961 kfree(rht_node);
962 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
963 cpu);
964 goto out;
965 }
966
967 } else {
968 int ctr, pow;
969
970
971 if (!rht_node->map[vl])
972 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
973
974 if (!rht_node->map[vl]) {
975 ret = -ENOMEM;
976 goto out;
977 }
978
979 rht_node->map[vl]->ctr++;
980 ctr = rht_node->map[vl]->ctr;
981 rht_node->map[vl]->sde[ctr - 1] = sde;
982 pow = roundup_pow_of_two(ctr);
983 rht_node->map[vl]->mask = pow - 1;
984
985
986 sdma_populate_sde_map(rht_node->map[vl]);
987 }
988 cpumask_set_cpu(cpu, new_mask);
989 }
990
991
992 for_each_cpu(cpu, cpu_online_mask) {
993 struct sdma_rht_node *rht_node;
994
995
996 if (cpumask_test_cpu(cpu, mask))
997 continue;
998
999 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
1000 sdma_rht_params);
1001 if (rht_node) {
1002 bool empty = true;
1003 int i;
1004
1005
1006 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1007 if (rht_node->map[i])
1008 sdma_cleanup_sde_map(rht_node->map[i],
1009 sde);
1010
1011
1012 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1013 if (!rht_node->map[i])
1014 continue;
1015
1016 if (rht_node->map[i]->ctr) {
1017 empty = false;
1018 break;
1019 }
1020 }
1021
1022 if (empty) {
1023 ret = rhashtable_remove_fast(dd->sdma_rht,
1024 &rht_node->node,
1025 sdma_rht_params);
1026 WARN_ON(ret);
1027
1028 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1029 kfree(rht_node->map[i]);
1030
1031 kfree(rht_node);
1032 }
1033 }
1034 }
1035
1036 cpumask_copy(&sde->cpu_mask, new_mask);
1037out:
1038 mutex_unlock(&process_to_sde_mutex);
1039out_free:
1040 free_cpumask_var(mask);
1041 free_cpumask_var(new_mask);
1042 return ret ? : strnlen(buf, PAGE_SIZE);
1043}
1044
1045ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1046{
1047 mutex_lock(&process_to_sde_mutex);
1048 if (cpumask_empty(&sde->cpu_mask))
1049 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1050 else
1051 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1052 mutex_unlock(&process_to_sde_mutex);
1053 return strnlen(buf, PAGE_SIZE);
1054}
1055
1056static void sdma_rht_free(void *ptr, void *arg)
1057{
1058 struct sdma_rht_node *rht_node = ptr;
1059 int i;
1060
1061 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1062 kfree(rht_node->map[i]);
1063
1064 kfree(rht_node);
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1076 struct hfi1_devdata *dd,
1077 unsigned long cpuid)
1078{
1079 struct sdma_rht_node *rht_node;
1080 int i, j;
1081
1082 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
1083 sdma_rht_params);
1084 if (!rht_node)
1085 return;
1086
1087 seq_printf(s, "cpu%3lu: ", cpuid);
1088 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1089 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1090 continue;
1091
1092 seq_printf(s, " vl%d: [", i);
1093
1094 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1095 if (!rht_node->map[i]->sde[j])
1096 continue;
1097
1098 if (j > 0)
1099 seq_puts(s, ",");
1100
1101 seq_printf(s, " sdma%2d",
1102 rht_node->map[i]->sde[j]->this_idx);
1103 }
1104 seq_puts(s, " ]");
1105 }
1106
1107 seq_puts(s, "\n");
1108}
1109
1110
1111
1112
1113static void sdma_map_free(struct sdma_vl_map *m)
1114{
1115 int i;
1116
1117 for (i = 0; m && i < m->actual_vls; i++)
1118 kfree(m->map[i]);
1119 kfree(m);
1120}
1121
1122
1123
1124
1125static void sdma_map_rcu_callback(struct rcu_head *list)
1126{
1127 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1128
1129 sdma_map_free(m);
1130}
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1160{
1161 int i, j;
1162 int extra, sde_per_vl;
1163 int engine = 0;
1164 u8 lvl_engines[OPA_MAX_VLS];
1165 struct sdma_vl_map *oldmap, *newmap;
1166
1167 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1168 return 0;
1169
1170 if (!vl_engines) {
1171
1172 sde_per_vl = dd->num_sdma / num_vls;
1173
1174 extra = dd->num_sdma % num_vls;
1175 vl_engines = lvl_engines;
1176
1177 for (i = num_vls - 1; i >= 0; i--, extra--)
1178 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1179 }
1180
1181 newmap = kzalloc(
1182 sizeof(struct sdma_vl_map) +
1183 roundup_pow_of_two(num_vls) *
1184 sizeof(struct sdma_map_elem *),
1185 GFP_KERNEL);
1186 if (!newmap)
1187 goto bail;
1188 newmap->actual_vls = num_vls;
1189 newmap->vls = roundup_pow_of_two(num_vls);
1190 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1191
1192 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1193 newmap->engine_to_vl[i] = -1;
1194 for (i = 0; i < newmap->vls; i++) {
1195
1196 int first_engine = engine;
1197
1198 if (i < newmap->actual_vls) {
1199 int sz = roundup_pow_of_two(vl_engines[i]);
1200
1201
1202 newmap->map[i] = kzalloc(
1203 sizeof(struct sdma_map_elem) +
1204 sz * sizeof(struct sdma_engine *),
1205 GFP_KERNEL);
1206 if (!newmap->map[i])
1207 goto bail;
1208 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1209
1210 for (j = 0; j < sz; j++) {
1211 newmap->map[i]->sde[j] =
1212 &dd->per_sdma[engine];
1213 if (++engine >= first_engine + vl_engines[i])
1214
1215 engine = first_engine;
1216 }
1217
1218 for (j = 0; j < vl_engines[i]; j++)
1219 newmap->engine_to_vl[first_engine + j] = i;
1220 } else {
1221
1222 newmap->map[i] = newmap->map[i % num_vls];
1223 }
1224 engine = first_engine + vl_engines[i];
1225 }
1226
1227 spin_lock_irq(&dd->sde_map_lock);
1228 oldmap = rcu_dereference_protected(dd->sdma_map,
1229 lockdep_is_held(&dd->sde_map_lock));
1230
1231
1232 rcu_assign_pointer(dd->sdma_map, newmap);
1233
1234 spin_unlock_irq(&dd->sde_map_lock);
1235
1236 if (oldmap)
1237 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1238 return 0;
1239bail:
1240
1241 sdma_map_free(newmap);
1242 return -ENOMEM;
1243}
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1254{
1255 size_t i;
1256 struct sdma_engine *sde;
1257
1258 if (dd->sdma_pad_dma) {
1259 dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
1260 (void *)dd->sdma_pad_dma,
1261 dd->sdma_pad_phys);
1262 dd->sdma_pad_dma = NULL;
1263 dd->sdma_pad_phys = 0;
1264 }
1265 if (dd->sdma_heads_dma) {
1266 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1267 (void *)dd->sdma_heads_dma,
1268 dd->sdma_heads_phys);
1269 dd->sdma_heads_dma = NULL;
1270 dd->sdma_heads_phys = 0;
1271 }
1272 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1273 sde = &dd->per_sdma[i];
1274
1275 sde->head_dma = NULL;
1276 sde->head_phys = 0;
1277
1278 if (sde->descq) {
1279 dma_free_coherent(
1280 &dd->pcidev->dev,
1281 sde->descq_cnt * sizeof(u64[2]),
1282 sde->descq,
1283 sde->descq_phys
1284 );
1285 sde->descq = NULL;
1286 sde->descq_phys = 0;
1287 }
1288 kvfree(sde->tx_ring);
1289 sde->tx_ring = NULL;
1290 }
1291 spin_lock_irq(&dd->sde_map_lock);
1292 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1293 RCU_INIT_POINTER(dd->sdma_map, NULL);
1294 spin_unlock_irq(&dd->sde_map_lock);
1295 synchronize_rcu();
1296 kfree(dd->per_sdma);
1297 dd->per_sdma = NULL;
1298
1299 if (dd->sdma_rht) {
1300 rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
1301 kfree(dd->sdma_rht);
1302 dd->sdma_rht = NULL;
1303 }
1304}
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317int sdma_init(struct hfi1_devdata *dd, u8 port)
1318{
1319 unsigned this_idx;
1320 struct sdma_engine *sde;
1321 struct rhashtable *tmp_sdma_rht;
1322 u16 descq_cnt;
1323 void *curr_head;
1324 struct hfi1_pportdata *ppd = dd->pport + port;
1325 u32 per_sdma_credits;
1326 uint idle_cnt = sdma_idle_cnt;
1327 size_t num_engines = chip_sdma_engines(dd);
1328 int ret = -ENOMEM;
1329
1330 if (!HFI1_CAP_IS_KSET(SDMA)) {
1331 HFI1_CAP_CLEAR(SDMA_AHG);
1332 return 0;
1333 }
1334 if (mod_num_sdma &&
1335
1336 mod_num_sdma <= chip_sdma_engines(dd) &&
1337
1338 mod_num_sdma >= num_vls)
1339 num_engines = mod_num_sdma;
1340
1341 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1342 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
1343 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1344 chip_sdma_mem_size(dd));
1345
1346 per_sdma_credits =
1347 chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
1348
1349
1350 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1351 atomic_set(&dd->sdma_unfreeze_count, 0);
1352
1353 descq_cnt = sdma_get_descq_cnt();
1354 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1355 num_engines, descq_cnt);
1356
1357
1358 dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
1359 GFP_KERNEL, dd->node);
1360 if (!dd->per_sdma)
1361 return ret;
1362
1363 idle_cnt = ns_to_cclock(dd, idle_cnt);
1364 if (idle_cnt)
1365 dd->default_desc1 =
1366 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1367 else
1368 dd->default_desc1 =
1369 SDMA_DESC1_INT_REQ_FLAG;
1370
1371 if (!sdma_desct_intr)
1372 sdma_desct_intr = SDMA_DESC_INTR;
1373
1374
1375 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1376 sde = &dd->per_sdma[this_idx];
1377 sde->dd = dd;
1378 sde->ppd = ppd;
1379 sde->this_idx = this_idx;
1380 sde->descq_cnt = descq_cnt;
1381 sde->desc_avail = sdma_descq_freecnt(sde);
1382 sde->sdma_shift = ilog2(descq_cnt);
1383 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1384
1385
1386 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1387 this_idx);
1388 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1389 this_idx);
1390 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1391 this_idx);
1392
1393 sde->imask = sde->int_mask | sde->progress_mask |
1394 sde->idle_mask;
1395
1396 spin_lock_init(&sde->tail_lock);
1397 seqlock_init(&sde->head_lock);
1398 spin_lock_init(&sde->senddmactrl_lock);
1399 spin_lock_init(&sde->flushlist_lock);
1400 seqlock_init(&sde->waitlock);
1401
1402 sde->ahg_bits = 0xfffffffe00000000ULL;
1403
1404 sdma_set_state(sde, sdma_state_s00_hw_down);
1405
1406
1407 kref_init(&sde->state.kref);
1408 init_completion(&sde->state.comp);
1409
1410 INIT_LIST_HEAD(&sde->flushlist);
1411 INIT_LIST_HEAD(&sde->dmawait);
1412
1413 sde->tail_csr =
1414 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1415
1416 tasklet_setup(&sde->sdma_hw_clean_up_task,
1417 sdma_hw_clean_up_task);
1418 tasklet_setup(&sde->sdma_sw_clean_up_task,
1419 sdma_sw_clean_up_task);
1420 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1421 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1422
1423 sde->progress_check_head = 0;
1424
1425 timer_setup(&sde->err_progress_check_timer,
1426 sdma_err_progress_check, 0);
1427
1428 sde->descq = dma_alloc_coherent(&dd->pcidev->dev,
1429 descq_cnt * sizeof(u64[2]),
1430 &sde->descq_phys, GFP_KERNEL);
1431 if (!sde->descq)
1432 goto bail;
1433 sde->tx_ring =
1434 kvzalloc_node(array_size(descq_cnt,
1435 sizeof(struct sdma_txreq *)),
1436 GFP_KERNEL, dd->node);
1437 if (!sde->tx_ring)
1438 goto bail;
1439 }
1440
1441 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1442
1443 dd->sdma_heads_dma = dma_alloc_coherent(&dd->pcidev->dev,
1444 dd->sdma_heads_size,
1445 &dd->sdma_heads_phys,
1446 GFP_KERNEL);
1447 if (!dd->sdma_heads_dma) {
1448 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1449 goto bail;
1450 }
1451
1452
1453 dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
1454 &dd->sdma_pad_phys, GFP_KERNEL);
1455 if (!dd->sdma_pad_dma) {
1456 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1457 goto bail;
1458 }
1459
1460
1461 curr_head = (void *)dd->sdma_heads_dma;
1462 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1463 unsigned long phys_offset;
1464
1465 sde = &dd->per_sdma[this_idx];
1466
1467 sde->head_dma = curr_head;
1468 curr_head += L1_CACHE_BYTES;
1469 phys_offset = (unsigned long)sde->head_dma -
1470 (unsigned long)dd->sdma_heads_dma;
1471 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1472 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1473 }
1474 dd->flags |= HFI1_HAS_SEND_DMA;
1475 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1476 dd->num_sdma = num_engines;
1477 ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
1478 if (ret < 0)
1479 goto bail;
1480
1481 tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
1482 if (!tmp_sdma_rht) {
1483 ret = -ENOMEM;
1484 goto bail;
1485 }
1486
1487 ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
1488 if (ret < 0) {
1489 kfree(tmp_sdma_rht);
1490 goto bail;
1491 }
1492
1493 dd->sdma_rht = tmp_sdma_rht;
1494
1495 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1496 return 0;
1497
1498bail:
1499 sdma_clean(dd, num_engines);
1500 return ret;
1501}
1502
1503
1504
1505
1506
1507
1508
1509void sdma_all_running(struct hfi1_devdata *dd)
1510{
1511 struct sdma_engine *sde;
1512 unsigned int i;
1513
1514
1515 for (i = 0; i < dd->num_sdma; ++i) {
1516 sde = &dd->per_sdma[i];
1517 sdma_process_event(sde, sdma_event_e30_go_running);
1518 }
1519}
1520
1521
1522
1523
1524
1525
1526
1527void sdma_all_idle(struct hfi1_devdata *dd)
1528{
1529 struct sdma_engine *sde;
1530 unsigned int i;
1531
1532
1533 for (i = 0; i < dd->num_sdma; ++i) {
1534 sde = &dd->per_sdma[i];
1535 sdma_process_event(sde, sdma_event_e70_go_idle);
1536 }
1537}
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547void sdma_start(struct hfi1_devdata *dd)
1548{
1549 unsigned i;
1550 struct sdma_engine *sde;
1551
1552
1553 for (i = 0; i < dd->num_sdma; ++i) {
1554 sde = &dd->per_sdma[i];
1555 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1556 }
1557}
1558
1559
1560
1561
1562
1563void sdma_exit(struct hfi1_devdata *dd)
1564{
1565 unsigned this_idx;
1566 struct sdma_engine *sde;
1567
1568 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1569 ++this_idx) {
1570 sde = &dd->per_sdma[this_idx];
1571 if (!list_empty(&sde->dmawait))
1572 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1573 sde->this_idx);
1574 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1575
1576 del_timer_sync(&sde->err_progress_check_timer);
1577
1578
1579
1580
1581
1582
1583 sdma_finalput(&sde->state);
1584 }
1585}
1586
1587
1588
1589
1590static inline void sdma_unmap_desc(
1591 struct hfi1_devdata *dd,
1592 struct sdma_desc *descp)
1593{
1594 switch (sdma_mapping_type(descp)) {
1595 case SDMA_MAP_SINGLE:
1596 dma_unmap_single(
1597 &dd->pcidev->dev,
1598 sdma_mapping_addr(descp),
1599 sdma_mapping_len(descp),
1600 DMA_TO_DEVICE);
1601 break;
1602 case SDMA_MAP_PAGE:
1603 dma_unmap_page(
1604 &dd->pcidev->dev,
1605 sdma_mapping_addr(descp),
1606 sdma_mapping_len(descp),
1607 DMA_TO_DEVICE);
1608 break;
1609 }
1610}
1611
1612
1613
1614
1615
1616static inline u8 ahg_mode(struct sdma_txreq *tx)
1617{
1618 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1619 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1620}
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633void __sdma_txclean(
1634 struct hfi1_devdata *dd,
1635 struct sdma_txreq *tx)
1636{
1637 u16 i;
1638
1639 if (tx->num_desc) {
1640 u8 skip = 0, mode = ahg_mode(tx);
1641
1642
1643 sdma_unmap_desc(dd, &tx->descp[0]);
1644
1645 if (mode > SDMA_AHG_APPLY_UPDATE1)
1646 skip = mode >> 1;
1647 for (i = 1 + skip; i < tx->num_desc; i++)
1648 sdma_unmap_desc(dd, &tx->descp[i]);
1649 tx->num_desc = 0;
1650 }
1651 kfree(tx->coalesce_buf);
1652 tx->coalesce_buf = NULL;
1653
1654 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1655 tx->desc_limit = ARRAY_SIZE(tx->descs);
1656 kfree(tx->descp);
1657 }
1658}
1659
1660static inline u16 sdma_gethead(struct sdma_engine *sde)
1661{
1662 struct hfi1_devdata *dd = sde->dd;
1663 int use_dmahead;
1664 u16 hwhead;
1665
1666#ifdef CONFIG_SDMA_VERBOSITY
1667 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1668 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1669#endif
1670
1671retry:
1672 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1673 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1674 hwhead = use_dmahead ?
1675 (u16)le64_to_cpu(*sde->head_dma) :
1676 (u16)read_sde_csr(sde, SD(HEAD));
1677
1678 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1679 u16 cnt;
1680 u16 swtail;
1681 u16 swhead;
1682 int sane;
1683
1684 swhead = sde->descq_head & sde->sdma_mask;
1685
1686 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1687 cnt = sde->descq_cnt;
1688
1689 if (swhead < swtail)
1690
1691 sane = (hwhead >= swhead) & (hwhead <= swtail);
1692 else if (swhead > swtail)
1693
1694 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1695 (hwhead <= swtail);
1696 else
1697
1698 sane = (hwhead == swhead);
1699
1700 if (unlikely(!sane)) {
1701 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
1702 sde->this_idx,
1703 use_dmahead ? "dma" : "kreg",
1704 hwhead, swhead, swtail, cnt);
1705 if (use_dmahead) {
1706
1707 use_dmahead = 0;
1708 goto retry;
1709 }
1710
1711 hwhead = swhead;
1712 }
1713 }
1714 return hwhead;
1715}
1716
1717
1718
1719
1720
1721
1722
1723static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1724{
1725 struct iowait *wait, *nw, *twait;
1726 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1727 uint i, n = 0, seq, tidx = 0;
1728
1729#ifdef CONFIG_SDMA_VERBOSITY
1730 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1731 slashstrip(__FILE__), __LINE__, __func__);
1732 dd_dev_err(sde->dd, "avail: %u\n", avail);
1733#endif
1734
1735 do {
1736 seq = read_seqbegin(&sde->waitlock);
1737 if (!list_empty(&sde->dmawait)) {
1738
1739 write_seqlock(&sde->waitlock);
1740
1741 list_for_each_entry_safe(
1742 wait,
1743 nw,
1744 &sde->dmawait,
1745 list) {
1746 u32 num_desc;
1747
1748 if (!wait->wakeup)
1749 continue;
1750 if (n == ARRAY_SIZE(waits))
1751 break;
1752 iowait_init_priority(wait);
1753 num_desc = iowait_get_all_desc(wait);
1754 if (num_desc > avail)
1755 break;
1756 avail -= num_desc;
1757
1758 if (n) {
1759 twait = waits[tidx];
1760 tidx =
1761 iowait_priority_update_top(wait,
1762 twait,
1763 n,
1764 tidx);
1765 }
1766 list_del_init(&wait->list);
1767 waits[n++] = wait;
1768 }
1769 write_sequnlock(&sde->waitlock);
1770 break;
1771 }
1772 } while (read_seqretry(&sde->waitlock, seq));
1773
1774
1775 if (n)
1776 waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
1777
1778 for (i = 0; i < n; i++)
1779 if (i != tidx)
1780 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1781}
1782
1783
1784static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1785{
1786 struct sdma_txreq *txp = NULL;
1787 int progress = 0;
1788 u16 hwhead, swhead;
1789 int idle_check_done = 0;
1790
1791 hwhead = sdma_gethead(sde);
1792
1793
1794
1795
1796
1797
1798
1799retry:
1800 txp = get_txhead(sde);
1801 swhead = sde->descq_head & sde->sdma_mask;
1802 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1803 while (swhead != hwhead) {
1804
1805 swhead = ++sde->descq_head & sde->sdma_mask;
1806
1807
1808 if (txp && txp->next_descq_idx == swhead) {
1809
1810 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1811 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1812
1813 txp = get_txhead(sde);
1814 }
1815 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1816 progress++;
1817 }
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828 if ((status & sde->idle_mask) && !idle_check_done) {
1829 u16 swtail;
1830
1831 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1832 if (swtail != hwhead) {
1833 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1834 idle_check_done = 1;
1835 goto retry;
1836 }
1837 }
1838
1839 sde->last_status = status;
1840 if (progress)
1841 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1842}
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1854{
1855 trace_hfi1_sdma_engine_interrupt(sde, status);
1856 write_seqlock(&sde->head_lock);
1857 sdma_set_desc_cnt(sde, sdma_desct_intr);
1858 if (status & sde->idle_mask)
1859 sde->idle_int_cnt++;
1860 else if (status & sde->progress_mask)
1861 sde->progress_int_cnt++;
1862 else if (status & sde->int_mask)
1863 sde->sdma_int_cnt++;
1864 sdma_make_progress(sde, status);
1865 write_sequnlock(&sde->head_lock);
1866}
1867
1868
1869
1870
1871
1872
1873void sdma_engine_error(struct sdma_engine *sde, u64 status)
1874{
1875 unsigned long flags;
1876
1877#ifdef CONFIG_SDMA_VERBOSITY
1878 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1879 sde->this_idx,
1880 (unsigned long long)status,
1881 sdma_state_names[sde->state.current_state]);
1882#endif
1883 spin_lock_irqsave(&sde->tail_lock, flags);
1884 write_seqlock(&sde->head_lock);
1885 if (status & ALL_SDMA_ENG_HALT_ERRS)
1886 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1887 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1888 dd_dev_err(sde->dd,
1889 "SDMA (%u) engine error: 0x%llx state %s\n",
1890 sde->this_idx,
1891 (unsigned long long)status,
1892 sdma_state_names[sde->state.current_state]);
1893 dump_sdma_state(sde);
1894 }
1895 write_sequnlock(&sde->head_lock);
1896 spin_unlock_irqrestore(&sde->tail_lock, flags);
1897}
1898
1899static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1900{
1901 u64 set_senddmactrl = 0;
1902 u64 clr_senddmactrl = 0;
1903 unsigned long flags;
1904
1905#ifdef CONFIG_SDMA_VERBOSITY
1906 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1907 sde->this_idx,
1908 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1909 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1910 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1911 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1912#endif
1913
1914 if (op & SDMA_SENDCTRL_OP_ENABLE)
1915 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1916 else
1917 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1918
1919 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1920 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1921 else
1922 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1923
1924 if (op & SDMA_SENDCTRL_OP_HALT)
1925 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1926 else
1927 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1928
1929 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1930
1931 sde->p_senddmactrl |= set_senddmactrl;
1932 sde->p_senddmactrl &= ~clr_senddmactrl;
1933
1934 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1935 write_sde_csr(sde, SD(CTRL),
1936 sde->p_senddmactrl |
1937 SD(CTRL_SDMA_CLEANUP_SMASK));
1938 else
1939 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1940
1941 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1942
1943#ifdef CONFIG_SDMA_VERBOSITY
1944 sdma_dumpstate(sde);
1945#endif
1946}
1947
1948static void sdma_setlengen(struct sdma_engine *sde)
1949{
1950#ifdef CONFIG_SDMA_VERBOSITY
1951 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1952 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1953#endif
1954
1955
1956
1957
1958
1959
1960 write_sde_csr(sde, SD(LEN_GEN),
1961 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
1962 write_sde_csr(sde, SD(LEN_GEN),
1963 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
1964 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
1965}
1966
1967static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
1968{
1969
1970 smp_wmb();
1971 writeq(tail, sde->tail_csr);
1972}
1973
1974
1975
1976
1977
1978static void sdma_hw_start_up(struct sdma_engine *sde)
1979{
1980 u64 reg;
1981
1982#ifdef CONFIG_SDMA_VERBOSITY
1983 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1984 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1985#endif
1986
1987 sdma_setlengen(sde);
1988 sdma_update_tail(sde, 0);
1989 *sde->head_dma = 0;
1990
1991 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
1992 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
1993 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
1994}
1995
1996
1997
1998
1999
2000
2001static void set_sdma_integrity(struct sdma_engine *sde)
2002{
2003 struct hfi1_devdata *dd = sde->dd;
2004
2005 write_sde_csr(sde, SD(CHECK_ENABLE),
2006 hfi1_pkt_base_sdma_integrity(dd));
2007}
2008
2009static void init_sdma_regs(
2010 struct sdma_engine *sde,
2011 u32 credits,
2012 uint idle_cnt)
2013{
2014 u8 opval, opmask;
2015#ifdef CONFIG_SDMA_VERBOSITY
2016 struct hfi1_devdata *dd = sde->dd;
2017
2018 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2019 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2020#endif
2021
2022 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2023 sdma_setlengen(sde);
2024 sdma_update_tail(sde, 0);
2025 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2026 write_sde_csr(sde, SD(DESC_CNT), 0);
2027 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2028 write_sde_csr(sde, SD(MEMORY),
2029 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2030 ((u64)(credits * sde->this_idx) <<
2031 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2032 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2033 set_sdma_integrity(sde);
2034 opmask = OPCODE_CHECK_MASK_DISABLED;
2035 opval = OPCODE_CHECK_VAL_DISABLED;
2036 write_sde_csr(sde, SD(CHECK_OPCODE),
2037 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2038 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2039}
2040
2041#ifdef CONFIG_SDMA_VERBOSITY
2042
2043#define sdma_dumpstate_helper0(reg) do { \
2044 csr = read_csr(sde->dd, reg); \
2045 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2046 } while (0)
2047
2048#define sdma_dumpstate_helper(reg) do { \
2049 csr = read_sde_csr(sde, reg); \
2050 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2051 #reg, sde->this_idx, csr); \
2052 } while (0)
2053
2054#define sdma_dumpstate_helper2(reg) do { \
2055 csr = read_csr(sde->dd, reg + (8 * i)); \
2056 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2057 #reg, i, csr); \
2058 } while (0)
2059
2060void sdma_dumpstate(struct sdma_engine *sde)
2061{
2062 u64 csr;
2063 unsigned i;
2064
2065 sdma_dumpstate_helper(SD(CTRL));
2066 sdma_dumpstate_helper(SD(STATUS));
2067 sdma_dumpstate_helper0(SD(ERR_STATUS));
2068 sdma_dumpstate_helper0(SD(ERR_MASK));
2069 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2070 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2071
2072 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2073 sdma_dumpstate_helper2(CCE_INT_STATUS);
2074 sdma_dumpstate_helper2(CCE_INT_MASK);
2075 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2076 }
2077
2078 sdma_dumpstate_helper(SD(TAIL));
2079 sdma_dumpstate_helper(SD(HEAD));
2080 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2081 sdma_dumpstate_helper(SD(IDLE_CNT));
2082 sdma_dumpstate_helper(SD(RELOAD_CNT));
2083 sdma_dumpstate_helper(SD(DESC_CNT));
2084 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2085 sdma_dumpstate_helper(SD(MEMORY));
2086 sdma_dumpstate_helper0(SD(ENGINES));
2087 sdma_dumpstate_helper0(SD(MEM_SIZE));
2088
2089 sdma_dumpstate_helper(SD(BASE_ADDR));
2090 sdma_dumpstate_helper(SD(LEN_GEN));
2091 sdma_dumpstate_helper(SD(HEAD_ADDR));
2092 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2093 sdma_dumpstate_helper(SD(CHECK_VL));
2094 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2095 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2096 sdma_dumpstate_helper(SD(CHECK_SLID));
2097 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2098}
2099#endif
2100
2101static void dump_sdma_state(struct sdma_engine *sde)
2102{
2103 struct hw_sdma_desc *descqp;
2104 u64 desc[2];
2105 u64 addr;
2106 u8 gen;
2107 u16 len;
2108 u16 head, tail, cnt;
2109
2110 head = sde->descq_head & sde->sdma_mask;
2111 tail = sde->descq_tail & sde->sdma_mask;
2112 cnt = sdma_descq_freecnt(sde);
2113
2114 dd_dev_err(sde->dd,
2115 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2116 sde->this_idx, head, tail, cnt,
2117 !list_empty(&sde->flushlist));
2118
2119
2120 while (head != tail) {
2121 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2122
2123 descqp = &sde->descq[head];
2124 desc[0] = le64_to_cpu(descqp->qw[0]);
2125 desc[1] = le64_to_cpu(descqp->qw[1]);
2126 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2127 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2128 'H' : '-';
2129 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2130 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2131 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2132 & SDMA_DESC0_PHY_ADDR_MASK;
2133 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2134 & SDMA_DESC1_GENERATION_MASK;
2135 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2136 & SDMA_DESC0_BYTE_COUNT_MASK;
2137 dd_dev_err(sde->dd,
2138 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2139 head, flags, addr, gen, len);
2140 dd_dev_err(sde->dd,
2141 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2142 desc[0], desc[1]);
2143 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2144 dd_dev_err(sde->dd,
2145 "\taidx: %u amode: %u alen: %u\n",
2146 (u8)((desc[1] &
2147 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2148 SDMA_DESC1_HEADER_INDEX_SHIFT),
2149 (u8)((desc[1] &
2150 SDMA_DESC1_HEADER_MODE_SMASK) >>
2151 SDMA_DESC1_HEADER_MODE_SHIFT),
2152 (u8)((desc[1] &
2153 SDMA_DESC1_HEADER_DWS_SMASK) >>
2154 SDMA_DESC1_HEADER_DWS_SHIFT));
2155 head++;
2156 head &= sde->sdma_mask;
2157 }
2158}
2159
2160#define SDE_FMT \
2161 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2162
2163
2164
2165
2166
2167
2168
2169void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2170{
2171 u16 head, tail;
2172 struct hw_sdma_desc *descqp;
2173 u64 desc[2];
2174 u64 addr;
2175 u8 gen;
2176 u16 len;
2177
2178 head = sde->descq_head & sde->sdma_mask;
2179 tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
2180 seq_printf(s, SDE_FMT, sde->this_idx,
2181 sde->cpu,
2182 sdma_state_name(sde->state.current_state),
2183 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2184 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2185 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2186 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2187 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2188 (unsigned long long)le64_to_cpu(*sde->head_dma),
2189 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2190 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2191 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2192 (unsigned long long)sde->last_status,
2193 (unsigned long long)sde->ahg_bits,
2194 sde->tx_tail,
2195 sde->tx_head,
2196 sde->descq_tail,
2197 sde->descq_head,
2198 !list_empty(&sde->flushlist),
2199 sde->descq_full_count,
2200 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2201
2202
2203 while (head != tail) {
2204 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2205
2206 descqp = &sde->descq[head];
2207 desc[0] = le64_to_cpu(descqp->qw[0]);
2208 desc[1] = le64_to_cpu(descqp->qw[1]);
2209 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2210 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2211 'H' : '-';
2212 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2213 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2214 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2215 & SDMA_DESC0_PHY_ADDR_MASK;
2216 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2217 & SDMA_DESC1_GENERATION_MASK;
2218 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2219 & SDMA_DESC0_BYTE_COUNT_MASK;
2220 seq_printf(s,
2221 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2222 head, flags, addr, gen, len);
2223 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2224 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2225 (u8)((desc[1] &
2226 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2227 SDMA_DESC1_HEADER_INDEX_SHIFT),
2228 (u8)((desc[1] &
2229 SDMA_DESC1_HEADER_MODE_SMASK) >>
2230 SDMA_DESC1_HEADER_MODE_SHIFT));
2231 head = (head + 1) & sde->sdma_mask;
2232 }
2233}
2234
2235
2236
2237
2238
2239static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2240{
2241 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2242
2243 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2244 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2245 << SDMA_DESC1_GENERATION_SHIFT;
2246 return qw1;
2247}
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2266{
2267 int i;
2268 u16 tail;
2269 struct sdma_desc *descp = tx->descp;
2270 u8 skip = 0, mode = ahg_mode(tx);
2271
2272 tail = sde->descq_tail & sde->sdma_mask;
2273 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2274 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2275 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2276 tail, &sde->descq[tail]);
2277 tail = ++sde->descq_tail & sde->sdma_mask;
2278 descp++;
2279 if (mode > SDMA_AHG_APPLY_UPDATE1)
2280 skip = mode >> 1;
2281 for (i = 1; i < tx->num_desc; i++, descp++) {
2282 u64 qw1;
2283
2284 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2285 if (skip) {
2286
2287 qw1 = descp->qw[1];
2288 skip--;
2289 } else {
2290
2291 qw1 = add_gen(sde, descp->qw[1]);
2292 }
2293 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2294 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2295 tail, &sde->descq[tail]);
2296 tail = ++sde->descq_tail & sde->sdma_mask;
2297 }
2298 tx->next_descq_idx = tail;
2299#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2300 tx->sn = sde->tail_sn++;
2301 trace_hfi1_sdma_in_sn(sde, tx->sn);
2302 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2303#endif
2304 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2305 sde->desc_avail -= tx->num_desc;
2306 return tail;
2307}
2308
2309
2310
2311
2312static int sdma_check_progress(
2313 struct sdma_engine *sde,
2314 struct iowait_work *wait,
2315 struct sdma_txreq *tx,
2316 bool pkts_sent)
2317{
2318 int ret;
2319
2320 sde->desc_avail = sdma_descq_freecnt(sde);
2321 if (tx->num_desc <= sde->desc_avail)
2322 return -EAGAIN;
2323
2324 if (wait && iowait_ioww_to_iow(wait)->sleep) {
2325 unsigned seq;
2326
2327 seq = raw_seqcount_begin(
2328 (const seqcount_t *)&sde->head_lock.seqcount);
2329 ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
2330 if (ret == -EAGAIN)
2331 sde->desc_avail = sdma_descq_freecnt(sde);
2332 } else {
2333 ret = -EBUSY;
2334 }
2335 return ret;
2336}
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353int sdma_send_txreq(struct sdma_engine *sde,
2354 struct iowait_work *wait,
2355 struct sdma_txreq *tx,
2356 bool pkts_sent)
2357{
2358 int ret = 0;
2359 u16 tail;
2360 unsigned long flags;
2361
2362
2363 if (unlikely(tx->tlen))
2364 return -EINVAL;
2365 tx->wait = iowait_ioww_to_iow(wait);
2366 spin_lock_irqsave(&sde->tail_lock, flags);
2367retry:
2368 if (unlikely(!__sdma_running(sde)))
2369 goto unlock_noconn;
2370 if (unlikely(tx->num_desc > sde->desc_avail))
2371 goto nodesc;
2372 tail = submit_tx(sde, tx);
2373 if (wait)
2374 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2375 sdma_update_tail(sde, tail);
2376unlock:
2377 spin_unlock_irqrestore(&sde->tail_lock, flags);
2378 return ret;
2379unlock_noconn:
2380 if (wait)
2381 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2382 tx->next_descq_idx = 0;
2383#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2384 tx->sn = sde->tail_sn++;
2385 trace_hfi1_sdma_in_sn(sde, tx->sn);
2386#endif
2387 spin_lock(&sde->flushlist_lock);
2388 list_add_tail(&tx->list, &sde->flushlist);
2389 spin_unlock(&sde->flushlist_lock);
2390 iowait_inc_wait_count(wait, tx->num_desc);
2391 queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
2392 ret = -ECOMM;
2393 goto unlock;
2394nodesc:
2395 ret = sdma_check_progress(sde, wait, tx, pkts_sent);
2396 if (ret == -EAGAIN) {
2397 ret = 0;
2398 goto retry;
2399 }
2400 sde->descq_full_count++;
2401 goto unlock;
2402}
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
2433 struct list_head *tx_list, u16 *count_out)
2434{
2435 struct sdma_txreq *tx, *tx_next;
2436 int ret = 0;
2437 unsigned long flags;
2438 u16 tail = INVALID_TAIL;
2439 u32 submit_count = 0, flush_count = 0, total_count;
2440
2441 spin_lock_irqsave(&sde->tail_lock, flags);
2442retry:
2443 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2444 tx->wait = iowait_ioww_to_iow(wait);
2445 if (unlikely(!__sdma_running(sde)))
2446 goto unlock_noconn;
2447 if (unlikely(tx->num_desc > sde->desc_avail))
2448 goto nodesc;
2449 if (unlikely(tx->tlen)) {
2450 ret = -EINVAL;
2451 goto update_tail;
2452 }
2453 list_del_init(&tx->list);
2454 tail = submit_tx(sde, tx);
2455 submit_count++;
2456 if (tail != INVALID_TAIL &&
2457 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2458 sdma_update_tail(sde, tail);
2459 tail = INVALID_TAIL;
2460 }
2461 }
2462update_tail:
2463 total_count = submit_count + flush_count;
2464 if (wait) {
2465 iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
2466 iowait_starve_clear(submit_count > 0,
2467 iowait_ioww_to_iow(wait));
2468 }
2469 if (tail != INVALID_TAIL)
2470 sdma_update_tail(sde, tail);
2471 spin_unlock_irqrestore(&sde->tail_lock, flags);
2472 *count_out = total_count;
2473 return ret;
2474unlock_noconn:
2475 spin_lock(&sde->flushlist_lock);
2476 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2477 tx->wait = iowait_ioww_to_iow(wait);
2478 list_del_init(&tx->list);
2479 tx->next_descq_idx = 0;
2480#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2481 tx->sn = sde->tail_sn++;
2482 trace_hfi1_sdma_in_sn(sde, tx->sn);
2483#endif
2484 list_add_tail(&tx->list, &sde->flushlist);
2485 flush_count++;
2486 iowait_inc_wait_count(wait, tx->num_desc);
2487 }
2488 spin_unlock(&sde->flushlist_lock);
2489 queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
2490 ret = -ECOMM;
2491 goto update_tail;
2492nodesc:
2493 ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
2494 if (ret == -EAGAIN) {
2495 ret = 0;
2496 goto retry;
2497 }
2498 sde->descq_full_count++;
2499 goto update_tail;
2500}
2501
2502static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2503{
2504 unsigned long flags;
2505
2506 spin_lock_irqsave(&sde->tail_lock, flags);
2507 write_seqlock(&sde->head_lock);
2508
2509 __sdma_process_event(sde, event);
2510
2511 if (sde->state.current_state == sdma_state_s99_running)
2512 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2513
2514 write_sequnlock(&sde->head_lock);
2515 spin_unlock_irqrestore(&sde->tail_lock, flags);
2516}
2517
2518static void __sdma_process_event(struct sdma_engine *sde,
2519 enum sdma_events event)
2520{
2521 struct sdma_state *ss = &sde->state;
2522 int need_progress = 0;
2523
2524
2525#ifdef CONFIG_SDMA_VERBOSITY
2526 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2527 sdma_state_names[ss->current_state],
2528 sdma_event_names[event]);
2529#endif
2530
2531 switch (ss->current_state) {
2532 case sdma_state_s00_hw_down:
2533 switch (event) {
2534 case sdma_event_e00_go_hw_down:
2535 break;
2536 case sdma_event_e30_go_running:
2537
2538
2539
2540
2541
2542
2543
2544 ss->go_s99_running = 1;
2545 fallthrough;
2546 case sdma_event_e10_go_hw_start:
2547
2548 sdma_get(&sde->state);
2549 sdma_set_state(sde,
2550 sdma_state_s10_hw_start_up_halt_wait);
2551 break;
2552 case sdma_event_e15_hw_halt_done:
2553 break;
2554 case sdma_event_e25_hw_clean_up_done:
2555 break;
2556 case sdma_event_e40_sw_cleaned:
2557 sdma_sw_tear_down(sde);
2558 break;
2559 case sdma_event_e50_hw_cleaned:
2560 break;
2561 case sdma_event_e60_hw_halted:
2562 break;
2563 case sdma_event_e70_go_idle:
2564 break;
2565 case sdma_event_e80_hw_freeze:
2566 break;
2567 case sdma_event_e81_hw_frozen:
2568 break;
2569 case sdma_event_e82_hw_unfreeze:
2570 break;
2571 case sdma_event_e85_link_down:
2572 break;
2573 case sdma_event_e90_sw_halted:
2574 break;
2575 }
2576 break;
2577
2578 case sdma_state_s10_hw_start_up_halt_wait:
2579 switch (event) {
2580 case sdma_event_e00_go_hw_down:
2581 sdma_set_state(sde, sdma_state_s00_hw_down);
2582 sdma_sw_tear_down(sde);
2583 break;
2584 case sdma_event_e10_go_hw_start:
2585 break;
2586 case sdma_event_e15_hw_halt_done:
2587 sdma_set_state(sde,
2588 sdma_state_s15_hw_start_up_clean_wait);
2589 sdma_start_hw_clean_up(sde);
2590 break;
2591 case sdma_event_e25_hw_clean_up_done:
2592 break;
2593 case sdma_event_e30_go_running:
2594 ss->go_s99_running = 1;
2595 break;
2596 case sdma_event_e40_sw_cleaned:
2597 break;
2598 case sdma_event_e50_hw_cleaned:
2599 break;
2600 case sdma_event_e60_hw_halted:
2601 schedule_work(&sde->err_halt_worker);
2602 break;
2603 case sdma_event_e70_go_idle:
2604 ss->go_s99_running = 0;
2605 break;
2606 case sdma_event_e80_hw_freeze:
2607 break;
2608 case sdma_event_e81_hw_frozen:
2609 break;
2610 case sdma_event_e82_hw_unfreeze:
2611 break;
2612 case sdma_event_e85_link_down:
2613 break;
2614 case sdma_event_e90_sw_halted:
2615 break;
2616 }
2617 break;
2618
2619 case sdma_state_s15_hw_start_up_clean_wait:
2620 switch (event) {
2621 case sdma_event_e00_go_hw_down:
2622 sdma_set_state(sde, sdma_state_s00_hw_down);
2623 sdma_sw_tear_down(sde);
2624 break;
2625 case sdma_event_e10_go_hw_start:
2626 break;
2627 case sdma_event_e15_hw_halt_done:
2628 break;
2629 case sdma_event_e25_hw_clean_up_done:
2630 sdma_hw_start_up(sde);
2631 sdma_set_state(sde, ss->go_s99_running ?
2632 sdma_state_s99_running :
2633 sdma_state_s20_idle);
2634 break;
2635 case sdma_event_e30_go_running:
2636 ss->go_s99_running = 1;
2637 break;
2638 case sdma_event_e40_sw_cleaned:
2639 break;
2640 case sdma_event_e50_hw_cleaned:
2641 break;
2642 case sdma_event_e60_hw_halted:
2643 break;
2644 case sdma_event_e70_go_idle:
2645 ss->go_s99_running = 0;
2646 break;
2647 case sdma_event_e80_hw_freeze:
2648 break;
2649 case sdma_event_e81_hw_frozen:
2650 break;
2651 case sdma_event_e82_hw_unfreeze:
2652 break;
2653 case sdma_event_e85_link_down:
2654 break;
2655 case sdma_event_e90_sw_halted:
2656 break;
2657 }
2658 break;
2659
2660 case sdma_state_s20_idle:
2661 switch (event) {
2662 case sdma_event_e00_go_hw_down:
2663 sdma_set_state(sde, sdma_state_s00_hw_down);
2664 sdma_sw_tear_down(sde);
2665 break;
2666 case sdma_event_e10_go_hw_start:
2667 break;
2668 case sdma_event_e15_hw_halt_done:
2669 break;
2670 case sdma_event_e25_hw_clean_up_done:
2671 break;
2672 case sdma_event_e30_go_running:
2673 sdma_set_state(sde, sdma_state_s99_running);
2674 ss->go_s99_running = 1;
2675 break;
2676 case sdma_event_e40_sw_cleaned:
2677 break;
2678 case sdma_event_e50_hw_cleaned:
2679 break;
2680 case sdma_event_e60_hw_halted:
2681 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2682 schedule_work(&sde->err_halt_worker);
2683 break;
2684 case sdma_event_e70_go_idle:
2685 break;
2686 case sdma_event_e85_link_down:
2687 case sdma_event_e80_hw_freeze:
2688 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2689 atomic_dec(&sde->dd->sdma_unfreeze_count);
2690 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2691 break;
2692 case sdma_event_e81_hw_frozen:
2693 break;
2694 case sdma_event_e82_hw_unfreeze:
2695 break;
2696 case sdma_event_e90_sw_halted:
2697 break;
2698 }
2699 break;
2700
2701 case sdma_state_s30_sw_clean_up_wait:
2702 switch (event) {
2703 case sdma_event_e00_go_hw_down:
2704 sdma_set_state(sde, sdma_state_s00_hw_down);
2705 break;
2706 case sdma_event_e10_go_hw_start:
2707 break;
2708 case sdma_event_e15_hw_halt_done:
2709 break;
2710 case sdma_event_e25_hw_clean_up_done:
2711 break;
2712 case sdma_event_e30_go_running:
2713 ss->go_s99_running = 1;
2714 break;
2715 case sdma_event_e40_sw_cleaned:
2716 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2717 sdma_start_hw_clean_up(sde);
2718 break;
2719 case sdma_event_e50_hw_cleaned:
2720 break;
2721 case sdma_event_e60_hw_halted:
2722 break;
2723 case sdma_event_e70_go_idle:
2724 ss->go_s99_running = 0;
2725 break;
2726 case sdma_event_e80_hw_freeze:
2727 break;
2728 case sdma_event_e81_hw_frozen:
2729 break;
2730 case sdma_event_e82_hw_unfreeze:
2731 break;
2732 case sdma_event_e85_link_down:
2733 ss->go_s99_running = 0;
2734 break;
2735 case sdma_event_e90_sw_halted:
2736 break;
2737 }
2738 break;
2739
2740 case sdma_state_s40_hw_clean_up_wait:
2741 switch (event) {
2742 case sdma_event_e00_go_hw_down:
2743 sdma_set_state(sde, sdma_state_s00_hw_down);
2744 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2745 break;
2746 case sdma_event_e10_go_hw_start:
2747 break;
2748 case sdma_event_e15_hw_halt_done:
2749 break;
2750 case sdma_event_e25_hw_clean_up_done:
2751 sdma_hw_start_up(sde);
2752 sdma_set_state(sde, ss->go_s99_running ?
2753 sdma_state_s99_running :
2754 sdma_state_s20_idle);
2755 break;
2756 case sdma_event_e30_go_running:
2757 ss->go_s99_running = 1;
2758 break;
2759 case sdma_event_e40_sw_cleaned:
2760 break;
2761 case sdma_event_e50_hw_cleaned:
2762 break;
2763 case sdma_event_e60_hw_halted:
2764 break;
2765 case sdma_event_e70_go_idle:
2766 ss->go_s99_running = 0;
2767 break;
2768 case sdma_event_e80_hw_freeze:
2769 break;
2770 case sdma_event_e81_hw_frozen:
2771 break;
2772 case sdma_event_e82_hw_unfreeze:
2773 break;
2774 case sdma_event_e85_link_down:
2775 ss->go_s99_running = 0;
2776 break;
2777 case sdma_event_e90_sw_halted:
2778 break;
2779 }
2780 break;
2781
2782 case sdma_state_s50_hw_halt_wait:
2783 switch (event) {
2784 case sdma_event_e00_go_hw_down:
2785 sdma_set_state(sde, sdma_state_s00_hw_down);
2786 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2787 break;
2788 case sdma_event_e10_go_hw_start:
2789 break;
2790 case sdma_event_e15_hw_halt_done:
2791 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2792 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2793 break;
2794 case sdma_event_e25_hw_clean_up_done:
2795 break;
2796 case sdma_event_e30_go_running:
2797 ss->go_s99_running = 1;
2798 break;
2799 case sdma_event_e40_sw_cleaned:
2800 break;
2801 case sdma_event_e50_hw_cleaned:
2802 break;
2803 case sdma_event_e60_hw_halted:
2804 schedule_work(&sde->err_halt_worker);
2805 break;
2806 case sdma_event_e70_go_idle:
2807 ss->go_s99_running = 0;
2808 break;
2809 case sdma_event_e80_hw_freeze:
2810 break;
2811 case sdma_event_e81_hw_frozen:
2812 break;
2813 case sdma_event_e82_hw_unfreeze:
2814 break;
2815 case sdma_event_e85_link_down:
2816 ss->go_s99_running = 0;
2817 break;
2818 case sdma_event_e90_sw_halted:
2819 break;
2820 }
2821 break;
2822
2823 case sdma_state_s60_idle_halt_wait:
2824 switch (event) {
2825 case sdma_event_e00_go_hw_down:
2826 sdma_set_state(sde, sdma_state_s00_hw_down);
2827 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2828 break;
2829 case sdma_event_e10_go_hw_start:
2830 break;
2831 case sdma_event_e15_hw_halt_done:
2832 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2833 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2834 break;
2835 case sdma_event_e25_hw_clean_up_done:
2836 break;
2837 case sdma_event_e30_go_running:
2838 ss->go_s99_running = 1;
2839 break;
2840 case sdma_event_e40_sw_cleaned:
2841 break;
2842 case sdma_event_e50_hw_cleaned:
2843 break;
2844 case sdma_event_e60_hw_halted:
2845 schedule_work(&sde->err_halt_worker);
2846 break;
2847 case sdma_event_e70_go_idle:
2848 ss->go_s99_running = 0;
2849 break;
2850 case sdma_event_e80_hw_freeze:
2851 break;
2852 case sdma_event_e81_hw_frozen:
2853 break;
2854 case sdma_event_e82_hw_unfreeze:
2855 break;
2856 case sdma_event_e85_link_down:
2857 break;
2858 case sdma_event_e90_sw_halted:
2859 break;
2860 }
2861 break;
2862
2863 case sdma_state_s80_hw_freeze:
2864 switch (event) {
2865 case sdma_event_e00_go_hw_down:
2866 sdma_set_state(sde, sdma_state_s00_hw_down);
2867 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2868 break;
2869 case sdma_event_e10_go_hw_start:
2870 break;
2871 case sdma_event_e15_hw_halt_done:
2872 break;
2873 case sdma_event_e25_hw_clean_up_done:
2874 break;
2875 case sdma_event_e30_go_running:
2876 ss->go_s99_running = 1;
2877 break;
2878 case sdma_event_e40_sw_cleaned:
2879 break;
2880 case sdma_event_e50_hw_cleaned:
2881 break;
2882 case sdma_event_e60_hw_halted:
2883 break;
2884 case sdma_event_e70_go_idle:
2885 ss->go_s99_running = 0;
2886 break;
2887 case sdma_event_e80_hw_freeze:
2888 break;
2889 case sdma_event_e81_hw_frozen:
2890 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2891 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2892 break;
2893 case sdma_event_e82_hw_unfreeze:
2894 break;
2895 case sdma_event_e85_link_down:
2896 break;
2897 case sdma_event_e90_sw_halted:
2898 break;
2899 }
2900 break;
2901
2902 case sdma_state_s82_freeze_sw_clean:
2903 switch (event) {
2904 case sdma_event_e00_go_hw_down:
2905 sdma_set_state(sde, sdma_state_s00_hw_down);
2906 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2907 break;
2908 case sdma_event_e10_go_hw_start:
2909 break;
2910 case sdma_event_e15_hw_halt_done:
2911 break;
2912 case sdma_event_e25_hw_clean_up_done:
2913 break;
2914 case sdma_event_e30_go_running:
2915 ss->go_s99_running = 1;
2916 break;
2917 case sdma_event_e40_sw_cleaned:
2918
2919 atomic_dec(&sde->dd->sdma_unfreeze_count);
2920 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2921 break;
2922 case sdma_event_e50_hw_cleaned:
2923 break;
2924 case sdma_event_e60_hw_halted:
2925 break;
2926 case sdma_event_e70_go_idle:
2927 ss->go_s99_running = 0;
2928 break;
2929 case sdma_event_e80_hw_freeze:
2930 break;
2931 case sdma_event_e81_hw_frozen:
2932 break;
2933 case sdma_event_e82_hw_unfreeze:
2934 sdma_hw_start_up(sde);
2935 sdma_set_state(sde, ss->go_s99_running ?
2936 sdma_state_s99_running :
2937 sdma_state_s20_idle);
2938 break;
2939 case sdma_event_e85_link_down:
2940 break;
2941 case sdma_event_e90_sw_halted:
2942 break;
2943 }
2944 break;
2945
2946 case sdma_state_s99_running:
2947 switch (event) {
2948 case sdma_event_e00_go_hw_down:
2949 sdma_set_state(sde, sdma_state_s00_hw_down);
2950 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2951 break;
2952 case sdma_event_e10_go_hw_start:
2953 break;
2954 case sdma_event_e15_hw_halt_done:
2955 break;
2956 case sdma_event_e25_hw_clean_up_done:
2957 break;
2958 case sdma_event_e30_go_running:
2959 break;
2960 case sdma_event_e40_sw_cleaned:
2961 break;
2962 case sdma_event_e50_hw_cleaned:
2963 break;
2964 case sdma_event_e60_hw_halted:
2965 need_progress = 1;
2966 sdma_err_progress_check_schedule(sde);
2967 fallthrough;
2968 case sdma_event_e90_sw_halted:
2969
2970
2971
2972
2973 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2974 schedule_work(&sde->err_halt_worker);
2975 break;
2976 case sdma_event_e70_go_idle:
2977 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
2978 break;
2979 case sdma_event_e85_link_down:
2980 ss->go_s99_running = 0;
2981 fallthrough;
2982 case sdma_event_e80_hw_freeze:
2983 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2984 atomic_dec(&sde->dd->sdma_unfreeze_count);
2985 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2986 break;
2987 case sdma_event_e81_hw_frozen:
2988 break;
2989 case sdma_event_e82_hw_unfreeze:
2990 break;
2991 }
2992 break;
2993 }
2994
2995 ss->last_event = event;
2996 if (need_progress)
2997 sdma_make_progress(sde, 0);
2998}
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3014{
3015 int i;
3016 struct sdma_desc *descp;
3017
3018
3019 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3020
3021 if (!tx->tlen) {
3022 tx->desc_limit = MAX_DESC;
3023 } else if (!tx->coalesce_buf) {
3024
3025 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3026 GFP_ATOMIC);
3027 if (!tx->coalesce_buf)
3028 goto enomem;
3029 tx->coalesce_idx = 0;
3030 }
3031 return 0;
3032 }
3033
3034 if (unlikely(tx->num_desc == MAX_DESC))
3035 goto enomem;
3036
3037 descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC);
3038 if (!descp)
3039 goto enomem;
3040 tx->descp = descp;
3041
3042
3043 tx->desc_limit = MAX_DESC - 1;
3044
3045 for (i = 0; i < tx->num_desc; i++)
3046 tx->descp[i] = tx->descs[i];
3047 return 0;
3048enomem:
3049 __sdma_txclean(dd, tx);
3050 return -ENOMEM;
3051}
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3070 int type, void *kvaddr, struct page *page,
3071 unsigned long offset, u16 len)
3072{
3073 int pad_len, rval;
3074 dma_addr_t addr;
3075
3076 rval = _extend_sdma_tx_descs(dd, tx);
3077 if (rval) {
3078 __sdma_txclean(dd, tx);
3079 return rval;
3080 }
3081
3082
3083 if (tx->coalesce_buf) {
3084 if (type == SDMA_MAP_NONE) {
3085 __sdma_txclean(dd, tx);
3086 return -EINVAL;
3087 }
3088
3089 if (type == SDMA_MAP_PAGE) {
3090 kvaddr = kmap_local_page(page);
3091 kvaddr += offset;
3092 } else if (WARN_ON(!kvaddr)) {
3093 __sdma_txclean(dd, tx);
3094 return -EINVAL;
3095 }
3096
3097 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3098 tx->coalesce_idx += len;
3099 if (type == SDMA_MAP_PAGE)
3100 kunmap_local(kvaddr);
3101
3102
3103 if (tx->tlen - tx->coalesce_idx)
3104 return 0;
3105
3106
3107 pad_len = tx->packet_len & (sizeof(u32) - 1);
3108 if (pad_len) {
3109 pad_len = sizeof(u32) - pad_len;
3110 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3111
3112 tx->packet_len += pad_len;
3113 tx->tlen += pad_len;
3114 }
3115
3116
3117 addr = dma_map_single(&dd->pcidev->dev,
3118 tx->coalesce_buf,
3119 tx->tlen,
3120 DMA_TO_DEVICE);
3121
3122 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3123 __sdma_txclean(dd, tx);
3124 return -ENOSPC;
3125 }
3126
3127
3128 tx->desc_limit = MAX_DESC;
3129 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3130 addr, tx->tlen);
3131 }
3132
3133 return 1;
3134}
3135
3136
3137void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3138{
3139 struct sdma_engine *sde;
3140 int i;
3141 u64 sreg;
3142
3143 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3144 SD(CHECK_SLID_MASK_SHIFT)) |
3145 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3146 SD(CHECK_SLID_VALUE_SHIFT));
3147
3148 for (i = 0; i < dd->num_sdma; i++) {
3149 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3150 i, (u32)sreg);
3151 sde = &dd->per_sdma[i];
3152 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3153 }
3154}
3155
3156
3157int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3158{
3159 int rval = 0;
3160
3161 tx->num_desc++;
3162 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3163 rval = _extend_sdma_tx_descs(dd, tx);
3164 if (rval) {
3165 __sdma_txclean(dd, tx);
3166 return rval;
3167 }
3168 }
3169
3170 make_tx_sdma_desc(
3171 tx,
3172 SDMA_MAP_NONE,
3173 dd->sdma_pad_phys,
3174 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3175 _sdma_close_tx(dd, tx);
3176 return rval;
3177}
3178
3179
3180
3181
3182
3183
3184
3185
3186void _sdma_txreq_ahgadd(
3187 struct sdma_txreq *tx,
3188 u8 num_ahg,
3189 u8 ahg_entry,
3190 u32 *ahg,
3191 u8 ahg_hlen)
3192{
3193 u32 i, shift = 0, desc = 0;
3194 u8 mode;
3195
3196 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3197
3198 if (num_ahg == 1)
3199 mode = SDMA_AHG_APPLY_UPDATE1;
3200 else if (num_ahg <= 5)
3201 mode = SDMA_AHG_APPLY_UPDATE2;
3202 else
3203 mode = SDMA_AHG_APPLY_UPDATE3;
3204 tx->num_desc++;
3205
3206 switch (mode) {
3207 case SDMA_AHG_APPLY_UPDATE3:
3208 tx->num_desc++;
3209 tx->descs[2].qw[0] = 0;
3210 tx->descs[2].qw[1] = 0;
3211 fallthrough;
3212 case SDMA_AHG_APPLY_UPDATE2:
3213 tx->num_desc++;
3214 tx->descs[1].qw[0] = 0;
3215 tx->descs[1].qw[1] = 0;
3216 break;
3217 }
3218 ahg_hlen >>= 2;
3219 tx->descs[0].qw[1] |=
3220 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3221 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3222 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3223 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3224 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3225 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3226 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3227 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3228 for (i = 0; i < (num_ahg - 1); i++) {
3229 if (!shift && !(i & 2))
3230 desc++;
3231 tx->descs[desc].qw[!!(i & 2)] |=
3232 (((u64)ahg[i + 1])
3233 << shift);
3234 shift = (shift + 32) & 63;
3235 }
3236}
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246int sdma_ahg_alloc(struct sdma_engine *sde)
3247{
3248 int nr;
3249 int oldbit;
3250
3251 if (!sde) {
3252 trace_hfi1_ahg_allocate(sde, -EINVAL);
3253 return -EINVAL;
3254 }
3255 while (1) {
3256 nr = ffz(READ_ONCE(sde->ahg_bits));
3257 if (nr > 31) {
3258 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3259 return -ENOSPC;
3260 }
3261 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3262 if (!oldbit)
3263 break;
3264 cpu_relax();
3265 }
3266 trace_hfi1_ahg_allocate(sde, nr);
3267 return nr;
3268}
3269
3270
3271
3272
3273
3274
3275
3276
3277void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3278{
3279 if (!sde)
3280 return;
3281 trace_hfi1_ahg_deallocate(sde, ahg_index);
3282 if (ahg_index < 0 || ahg_index > 31)
3283 return;
3284 clear_bit(ahg_index, &sde->ahg_bits);
3285}
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3296{
3297 int i;
3298 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3299 sdma_event_e80_hw_freeze;
3300
3301
3302 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3303
3304
3305 for (i = 0; i < dd->num_sdma; i++)
3306 sdma_process_event(&dd->per_sdma[i], event);
3307
3308
3309}
3310
3311
3312
3313
3314
3315void sdma_freeze(struct hfi1_devdata *dd)
3316{
3317 int i;
3318 int ret;
3319
3320
3321
3322
3323
3324 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3325 atomic_read(&dd->sdma_unfreeze_count) <=
3326 0);
3327
3328 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3329 return;
3330
3331
3332 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3333
3334
3335 for (i = 0; i < dd->num_sdma; i++)
3336 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3337
3338
3339
3340
3341
3342
3343 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3344 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3345
3346}
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356void sdma_unfreeze(struct hfi1_devdata *dd)
3357{
3358 int i;
3359
3360
3361 for (i = 0; i < dd->num_sdma; i++)
3362 sdma_process_event(&dd->per_sdma[i],
3363 sdma_event_e82_hw_unfreeze);
3364}
3365
3366
3367
3368
3369
3370
3371void _sdma_engine_progress_schedule(
3372 struct sdma_engine *sde)
3373{
3374 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3375
3376 write_csr(sde->dd,
3377 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3378 sde->progress_mask);
3379}
3380