1
2
3
4
5
6#include <linux/delay.h>
7#include "hfi.h"
8#include "qp.h"
9#include "trace.h"
10
11#define SC(name) SEND_CTXT_##name
12
13
14
15static void sc_wait_for_packet_egress(struct send_context *sc, int pause);
16
17
18
19
20
21void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
22{
23 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK);
24 while (1) {
25 udelay(1);
26 sendctrl = read_csr(dd, SEND_CTRL);
27 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0)
28 break;
29 }
30}
31
32
33void pio_send_control(struct hfi1_devdata *dd, int op)
34{
35 u64 reg, mask;
36 unsigned long flags;
37 int write = 1;
38 int flush = 0;
39 int i;
40
41 spin_lock_irqsave(&dd->sendctrl_lock, flags);
42
43 reg = read_csr(dd, SEND_CTRL);
44 switch (op) {
45 case PSC_GLOBAL_ENABLE:
46 reg |= SEND_CTRL_SEND_ENABLE_SMASK;
47 fallthrough;
48 case PSC_DATA_VL_ENABLE:
49 mask = 0;
50 for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
51 if (!dd->vld[i].mtu)
52 mask |= BIT_ULL(i);
53
54 mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
55 SEND_CTRL_UNSUPPORTED_VL_SHIFT;
56 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
57 break;
58 case PSC_GLOBAL_DISABLE:
59 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK;
60 break;
61 case PSC_GLOBAL_VLARB_ENABLE:
62 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK;
63 break;
64 case PSC_GLOBAL_VLARB_DISABLE:
65 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK;
66 break;
67 case PSC_CM_RESET:
68 __cm_reset(dd, reg);
69 write = 0;
70 break;
71 case PSC_DATA_VL_DISABLE:
72 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK;
73 flush = 1;
74 break;
75 default:
76 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op);
77 break;
78 }
79
80 if (write) {
81 write_csr(dd, SEND_CTRL, reg);
82 if (flush)
83 (void)read_csr(dd, SEND_CTRL);
84 }
85
86 spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
87}
88
89
90#define NUM_SC_POOLS 2
91
92
93#define SCS_POOL_0 -1
94#define SCS_POOL_1 -2
95
96
97#define SCC_PER_VL -1
98#define SCC_PER_CPU -2
99#define SCC_PER_KRCVQ -3
100
101
102#define SCS_ACK_CREDITS 32
103#define SCS_VL15_CREDITS 102
104
105#define PIO_THRESHOLD_CEILING 4096
106
107#define PIO_WAIT_BATCH_SIZE 5
108
109
110static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
111 [SC_KERNEL] = { .size = SCS_POOL_0,
112 .count = SCC_PER_VL },
113 [SC_ACK] = { .size = SCS_ACK_CREDITS,
114 .count = SCC_PER_KRCVQ },
115 [SC_USER] = { .size = SCS_POOL_0,
116 .count = SCC_PER_CPU },
117 [SC_VL15] = { .size = SCS_VL15_CREDITS,
118 .count = 1 },
119
120};
121
122
123struct mem_pool_config {
124 int centipercent;
125 int absolute_blocks;
126};
127
128
129static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = {
130
131 { 10000, -1 },
132 { 0, -1 },
133};
134
135
136struct mem_pool_info {
137 int centipercent;
138
139
140
141 int count;
142 int blocks;
143 int size;
144};
145
146
147
148
149
150
151
152
153
154
155static int wildcard_to_pool(int wc)
156{
157 if (wc >= 0)
158 return -1;
159 return -wc - 1;
160}
161
162static const char *sc_type_names[SC_MAX] = {
163 "kernel",
164 "ack",
165 "user",
166 "vl15"
167};
168
169static const char *sc_type_name(int index)
170{
171 if (index < 0 || index >= SC_MAX)
172 return "unknown";
173 return sc_type_names[index];
174}
175
176
177
178
179
180
181int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
182{
183 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
184 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1;
185 int total_contexts = 0;
186 int fixed_blocks;
187 int pool_blocks;
188 int used_blocks;
189 int cp_total;
190 int ab_total;
191 int extra;
192 int i;
193
194
195
196
197
198
199
200
201
202
203 if (HFI1_CAP_IS_KSET(SDMA)) {
204 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
205 piothreshold : PIO_THRESHOLD_CEILING;
206 sc_config_sizes[SC_KERNEL].size =
207 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
208 }
209
210
211
212
213
214
215
216
217 cp_total = 0;
218 ab_total = 0;
219 for (i = 0; i < NUM_SC_POOLS; i++) {
220 int cp = sc_mem_pool_config[i].centipercent;
221 int ab = sc_mem_pool_config[i].absolute_blocks;
222
223
224
225
226
227 if (cp >= 0) {
228 cp_total += cp;
229 } else if (ab >= 0) {
230 ab_total += ab;
231 } else {
232 dd_dev_err(
233 dd,
234 "Send context memory pool %d: both the block count and centipercent are invalid\n",
235 i);
236 return -EINVAL;
237 }
238
239 mem_pool_info[i].centipercent = cp;
240 mem_pool_info[i].blocks = ab;
241 }
242
243
244 if (cp_total != 0 && ab_total != 0) {
245 dd_dev_err(
246 dd,
247 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n");
248 return -EINVAL;
249 }
250
251
252 if (cp_total != 0 && cp_total != 10000) {
253 dd_dev_err(
254 dd,
255 "Send context memory pool centipercent is %d, expecting 10000\n",
256 cp_total);
257 return -EINVAL;
258 }
259
260
261 if (ab_total > total_blocks) {
262 dd_dev_err(
263 dd,
264 "Send context memory pool absolute block count %d is larger than the memory size %d\n",
265 ab_total, total_blocks);
266 return -EINVAL;
267 }
268
269
270
271
272
273
274
275
276 fixed_blocks = 0;
277 for (i = 0; i < SC_MAX; i++) {
278 int count = sc_config_sizes[i].count;
279 int size = sc_config_sizes[i].size;
280 int pool;
281
282
283
284
285
286
287
288 if (i == SC_ACK) {
289 count = dd->n_krcv_queues;
290 } else if (i == SC_KERNEL) {
291 count = INIT_SC_PER_VL * num_vls;
292 } else if (count == SCC_PER_CPU) {
293 count = dd->num_rcv_contexts - dd->n_krcv_queues;
294 } else if (count < 0) {
295 dd_dev_err(
296 dd,
297 "%s send context invalid count wildcard %d\n",
298 sc_type_name(i), count);
299 return -EINVAL;
300 }
301 if (total_contexts + count > chip_send_contexts(dd))
302 count = chip_send_contexts(dd) - total_contexts;
303
304 total_contexts += count;
305
306
307
308
309
310
311
312 pool = wildcard_to_pool(size);
313 if (pool == -1) {
314 fixed_blocks += size * count;
315 } else if (pool < NUM_SC_POOLS) {
316 mem_pool_info[pool].count += count;
317 } else {
318 dd_dev_err(
319 dd,
320 "%s send context invalid pool wildcard %d\n",
321 sc_type_name(i), size);
322 return -EINVAL;
323 }
324
325 dd->sc_sizes[i].count = count;
326 dd->sc_sizes[i].size = size;
327 }
328 if (fixed_blocks > total_blocks) {
329 dd_dev_err(
330 dd,
331 "Send context fixed block count, %u, larger than total block count %u\n",
332 fixed_blocks, total_blocks);
333 return -EINVAL;
334 }
335
336
337 pool_blocks = total_blocks - fixed_blocks;
338 if (ab_total > pool_blocks) {
339 dd_dev_err(
340 dd,
341 "Send context fixed pool sizes, %u, larger than pool block count %u\n",
342 ab_total, pool_blocks);
343 return -EINVAL;
344 }
345
346 pool_blocks -= ab_total;
347
348 for (i = 0; i < NUM_SC_POOLS; i++) {
349 struct mem_pool_info *pi = &mem_pool_info[i];
350
351
352 if (pi->centipercent >= 0)
353 pi->blocks = (pool_blocks * pi->centipercent) / 10000;
354
355 if (pi->blocks == 0 && pi->count != 0) {
356 dd_dev_err(
357 dd,
358 "Send context memory pool %d has %u contexts, but no blocks\n",
359 i, pi->count);
360 return -EINVAL;
361 }
362 if (pi->count == 0) {
363
364 if (pi->blocks != 0)
365 dd_dev_err(
366 dd,
367 "Send context memory pool %d has %u blocks, but zero contexts\n",
368 i, pi->blocks);
369 pi->size = 0;
370 } else {
371 pi->size = pi->blocks / pi->count;
372 }
373 }
374
375
376 used_blocks = 0;
377 for (i = 0; i < SC_MAX; i++) {
378 if (dd->sc_sizes[i].size < 0) {
379 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size);
380
381 WARN_ON_ONCE(pool >= NUM_SC_POOLS);
382 dd->sc_sizes[i].size = mem_pool_info[pool].size;
383 }
384
385#define PIO_MAX_BLOCKS 1024
386 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS)
387 dd->sc_sizes[i].size = PIO_MAX_BLOCKS;
388
389
390 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count;
391 }
392 extra = total_blocks - used_blocks;
393 if (extra != 0)
394 dd_dev_info(dd, "unused send context blocks: %d\n", extra);
395
396 return total_contexts;
397}
398
399int init_send_contexts(struct hfi1_devdata *dd)
400{
401 u16 base;
402 int ret, i, j, context;
403
404 ret = init_credit_return(dd);
405 if (ret)
406 return ret;
407
408 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8),
409 GFP_KERNEL);
410 dd->send_contexts = kcalloc(dd->num_send_contexts,
411 sizeof(struct send_context_info),
412 GFP_KERNEL);
413 if (!dd->send_contexts || !dd->hw_to_sw) {
414 kfree(dd->hw_to_sw);
415 kfree(dd->send_contexts);
416 free_credit_return(dd);
417 return -ENOMEM;
418 }
419
420
421 for (i = 0; i < TXE_NUM_CONTEXTS; i++)
422 dd->hw_to_sw[i] = INVALID_SCI;
423
424
425
426
427
428 context = 0;
429 base = 1;
430 for (i = 0; i < SC_MAX; i++) {
431 struct sc_config_sizes *scs = &dd->sc_sizes[i];
432
433 for (j = 0; j < scs->count; j++) {
434 struct send_context_info *sci =
435 &dd->send_contexts[context];
436 sci->type = i;
437 sci->base = base;
438 sci->credits = scs->size;
439
440 context++;
441 base += scs->size;
442 }
443 }
444
445 return 0;
446}
447
448
449
450
451
452
453static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
454 u32 *hw_context)
455{
456 struct send_context_info *sci;
457 u32 index;
458 u32 context;
459
460 for (index = 0, sci = &dd->send_contexts[0];
461 index < dd->num_send_contexts; index++, sci++) {
462 if (sci->type == type && sci->allocated == 0) {
463 sci->allocated = 1;
464
465 context = chip_send_contexts(dd) - index - 1;
466 dd->hw_to_sw[context] = index;
467 *sw_index = index;
468 *hw_context = context;
469 return 0;
470 }
471 }
472 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type);
473 return -ENOSPC;
474}
475
476
477
478
479
480
481static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context)
482{
483 struct send_context_info *sci;
484
485 sci = &dd->send_contexts[sw_index];
486 if (!sci->allocated) {
487 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
488 __func__, sw_index, hw_context);
489 }
490 sci->allocated = 0;
491 dd->hw_to_sw[hw_context] = INVALID_SCI;
492}
493
494
495static inline u32 group_context(u32 context, u32 group)
496{
497 return (context >> group) << group;
498}
499
500
501static inline u32 group_size(u32 group)
502{
503 return 1 << group;
504}
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
520{
521 u32 gc = group_context(sc->hw_context, sc->group);
522 u32 index = sc->hw_context & 0x7;
523
524 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
525 *dma = (unsigned long)
526 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
527}
528
529
530
531
532
533static void sc_halted(struct work_struct *work)
534{
535 struct send_context *sc;
536
537 sc = container_of(work, struct send_context, halt_work);
538 sc_restart(sc);
539}
540
541
542
543
544
545
546
547
548
549
550
551u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
552{
553 u32 release_credits;
554 u32 threshold;
555
556
557 mtu += hdrqentsize << 2;
558 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE);
559
560
561 if (sc->credits <= release_credits)
562 threshold = 1;
563 else
564 threshold = sc->credits - release_credits;
565
566 return threshold;
567}
568
569
570
571
572
573
574
575
576u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
577{
578 return (sc->credits * percent) / 100;
579}
580
581
582
583
584void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
585{
586 unsigned long flags;
587 u32 old_threshold;
588 int force_return = 0;
589
590 spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
591
592 old_threshold = (sc->credit_ctrl >>
593 SC(CREDIT_CTRL_THRESHOLD_SHIFT))
594 & SC(CREDIT_CTRL_THRESHOLD_MASK);
595
596 if (new_threshold != old_threshold) {
597 sc->credit_ctrl =
598 (sc->credit_ctrl
599 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK))
600 | ((new_threshold
601 & SC(CREDIT_CTRL_THRESHOLD_MASK))
602 << SC(CREDIT_CTRL_THRESHOLD_SHIFT));
603 write_kctxt_csr(sc->dd, sc->hw_context,
604 SC(CREDIT_CTRL), sc->credit_ctrl);
605
606
607 force_return = 1;
608 }
609
610 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
611
612 if (force_return)
613 sc_return_credits(sc);
614}
615
616
617
618
619
620
621void set_pio_integrity(struct send_context *sc)
622{
623 struct hfi1_devdata *dd = sc->dd;
624 u32 hw_context = sc->hw_context;
625 int type = sc->type;
626
627 write_kctxt_csr(dd, hw_context,
628 SC(CHECK_ENABLE),
629 hfi1_pkt_default_send_ctxt_mask(dd, type));
630}
631
632static u32 get_buffers_allocated(struct send_context *sc)
633{
634 int cpu;
635 u32 ret = 0;
636
637 for_each_possible_cpu(cpu)
638 ret += *per_cpu_ptr(sc->buffers_allocated, cpu);
639 return ret;
640}
641
642static void reset_buffers_allocated(struct send_context *sc)
643{
644 int cpu;
645
646 for_each_possible_cpu(cpu)
647 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0;
648}
649
650
651
652
653
654struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
655 uint hdrqentsize, int numa)
656{
657 struct send_context_info *sci;
658 struct send_context *sc = NULL;
659 dma_addr_t dma;
660 unsigned long flags;
661 u64 reg;
662 u32 thresh;
663 u32 sw_index;
664 u32 hw_context;
665 int ret;
666 u8 opval, opmask;
667
668
669 if (dd->flags & HFI1_FROZEN)
670 return NULL;
671
672 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa);
673 if (!sc)
674 return NULL;
675
676 sc->buffers_allocated = alloc_percpu(u32);
677 if (!sc->buffers_allocated) {
678 kfree(sc);
679 dd_dev_err(dd,
680 "Cannot allocate buffers_allocated per cpu counters\n"
681 );
682 return NULL;
683 }
684
685 spin_lock_irqsave(&dd->sc_lock, flags);
686 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
687 if (ret) {
688 spin_unlock_irqrestore(&dd->sc_lock, flags);
689 free_percpu(sc->buffers_allocated);
690 kfree(sc);
691 return NULL;
692 }
693
694 sci = &dd->send_contexts[sw_index];
695 sci->sc = sc;
696
697 sc->dd = dd;
698 sc->node = numa;
699 sc->type = type;
700 spin_lock_init(&sc->alloc_lock);
701 spin_lock_init(&sc->release_lock);
702 spin_lock_init(&sc->credit_ctrl_lock);
703 seqlock_init(&sc->waitlock);
704 INIT_LIST_HEAD(&sc->piowait);
705 INIT_WORK(&sc->halt_work, sc_halted);
706 init_waitqueue_head(&sc->halt_wait);
707
708
709 sc->group = 0;
710
711 sc->sw_index = sw_index;
712 sc->hw_context = hw_context;
713 cr_group_addresses(sc, &dma);
714 sc->credits = sci->credits;
715 sc->size = sc->credits * PIO_BLOCK_SIZE;
716
717
718#define PIO_ADDR_CONTEXT_MASK 0xfful
719#define PIO_ADDR_CONTEXT_SHIFT 16
720 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK)
721 << PIO_ADDR_CONTEXT_SHIFT);
722
723
724 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK))
725 << SC(CTRL_CTXT_DEPTH_SHIFT))
726 | ((sci->base & SC(CTRL_CTXT_BASE_MASK))
727 << SC(CTRL_CTXT_BASE_SHIFT));
728 write_kctxt_csr(dd, hw_context, SC(CTRL), reg);
729
730 set_pio_integrity(sc);
731
732
733 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1);
734
735
736 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
737 (SC(CHECK_PARTITION_KEY_VALUE_MASK) &
738 DEFAULT_PKEY) <<
739 SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
740
741
742 if (type == SC_USER) {
743 opval = USER_OPCODE_CHECK_VAL;
744 opmask = USER_OPCODE_CHECK_MASK;
745 } else {
746 opval = OPCODE_CHECK_VAL_DISABLED;
747 opmask = OPCODE_CHECK_MASK_DISABLED;
748 }
749
750
751 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
752 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
753 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
754
755
756 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
757 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
758
759
760
761
762
763
764
765
766
767
768
769
770 if (type == SC_ACK) {
771 thresh = sc_percent_to_threshold(sc, 50);
772 } else if (type == SC_USER) {
773 thresh = sc_percent_to_threshold(sc,
774 user_credit_return_threshold);
775 } else {
776 thresh = min(sc_percent_to_threshold(sc, 50),
777 sc_mtu_to_threshold(sc, hfi1_max_mtu,
778 hdrqentsize));
779 }
780 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
781
782 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN))
783 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK);
784 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN))
785 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK);
786
787
788 sc->credit_ctrl = reg;
789 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg);
790
791
792 if (type == SC_USER) {
793 reg = 1ULL << 15;
794 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg);
795 }
796
797 spin_unlock_irqrestore(&dd->sc_lock, flags);
798
799
800
801
802
803
804
805
806
807 if (type != SC_USER) {
808
809
810
811
812 sc->sr_size = sci->credits + 1;
813 sc->sr = kcalloc_node(sc->sr_size,
814 sizeof(union pio_shadow_ring),
815 GFP_KERNEL, numa);
816 if (!sc->sr) {
817 sc_free(sc);
818 return NULL;
819 }
820 }
821
822 hfi1_cdbg(PIO,
823 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n",
824 sw_index,
825 hw_context,
826 sc_type_name(type),
827 sc->group,
828 sc->credits,
829 sc->credit_ctrl,
830 thresh);
831
832 return sc;
833}
834
835
836void sc_free(struct send_context *sc)
837{
838 struct hfi1_devdata *dd;
839 unsigned long flags;
840 u32 sw_index;
841 u32 hw_context;
842
843 if (!sc)
844 return;
845
846 sc->flags |= SCF_IN_FREE;
847 dd = sc->dd;
848 if (!list_empty(&sc->piowait))
849 dd_dev_err(dd, "piowait list not empty!\n");
850 sw_index = sc->sw_index;
851 hw_context = sc->hw_context;
852 sc_disable(sc);
853 flush_work(&sc->halt_work);
854
855 spin_lock_irqsave(&dd->sc_lock, flags);
856 dd->send_contexts[sw_index].sc = NULL;
857
858
859 write_kctxt_csr(dd, hw_context, SC(CTRL), 0);
860 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0);
861 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0);
862 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0);
863 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0);
864 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0);
865 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0);
866
867
868 sc_hw_free(dd, sw_index, hw_context);
869 spin_unlock_irqrestore(&dd->sc_lock, flags);
870
871 kfree(sc->sr);
872 free_percpu(sc->buffers_allocated);
873 kfree(sc);
874}
875
876
877void sc_disable(struct send_context *sc)
878{
879 u64 reg;
880 struct pio_buf *pbuf;
881 LIST_HEAD(wake_list);
882
883 if (!sc)
884 return;
885
886
887 spin_lock_irq(&sc->alloc_lock);
888 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
889 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
890 sc->flags &= ~SCF_ENABLED;
891 sc_wait_for_packet_egress(sc, 1);
892 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
893
894
895
896
897
898
899
900
901 udelay(1);
902 spin_lock(&sc->release_lock);
903 if (sc->sr) {
904 while (sc->sr_tail != sc->sr_head) {
905 pbuf = &sc->sr[sc->sr_tail].pbuf;
906 if (pbuf->cb)
907 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE);
908 sc->sr_tail++;
909 if (sc->sr_tail >= sc->sr_size)
910 sc->sr_tail = 0;
911 }
912 }
913 spin_unlock(&sc->release_lock);
914
915 write_seqlock(&sc->waitlock);
916 if (!list_empty(&sc->piowait))
917 list_move(&sc->piowait, &wake_list);
918 write_sequnlock(&sc->waitlock);
919 while (!list_empty(&wake_list)) {
920 struct iowait *wait;
921 struct rvt_qp *qp;
922 struct hfi1_qp_priv *priv;
923
924 wait = list_first_entry(&wake_list, struct iowait, list);
925 qp = iowait_to_qp(wait);
926 priv = qp->priv;
927 list_del_init(&priv->s_iowait.list);
928 priv->s_iowait.lock = NULL;
929 hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
930 }
931
932 spin_unlock_irq(&sc->alloc_lock);
933}
934
935
936static u64 packet_occupancy(u64 reg)
937{
938 return (reg &
939 SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)
940 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT;
941}
942
943
944static bool egress_halted(u64 reg)
945{
946 return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK);
947}
948
949
950static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context)
951{
952 return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) &
953 SC(STATUS_CTXT_HALTED_SMASK));
954}
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
971{
972 struct hfi1_devdata *dd = sc->dd;
973 u64 reg = 0;
974 u64 reg_prev;
975 u32 loop = 0;
976
977 while (1) {
978 reg_prev = reg;
979 reg = read_csr(dd, sc->hw_context * 8 +
980 SEND_EGRESS_CTXT_STATUS);
981
982 if (sc->flags & SCF_HALTED ||
983 is_sc_halted(dd, sc->hw_context) || egress_halted(reg))
984 break;
985 reg = packet_occupancy(reg);
986 if (reg == 0)
987 break;
988
989 if (reg != reg_prev)
990 loop = 0;
991 if (loop > 50000) {
992
993 dd_dev_err(dd,
994 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
995 __func__, sc->sw_index,
996 sc->hw_context, (u32)reg);
997 queue_work(dd->pport->link_wq,
998 &dd->pport->link_bounce_work);
999 break;
1000 }
1001 loop++;
1002 udelay(1);
1003 }
1004
1005 if (pause)
1006
1007 pause_for_credit_return(dd);
1008}
1009
1010void sc_wait(struct hfi1_devdata *dd)
1011{
1012 int i;
1013
1014 for (i = 0; i < dd->num_send_contexts; i++) {
1015 struct send_context *sc = dd->send_contexts[i].sc;
1016
1017 if (!sc)
1018 continue;
1019 sc_wait_for_packet_egress(sc, 0);
1020 }
1021}
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032int sc_restart(struct send_context *sc)
1033{
1034 struct hfi1_devdata *dd = sc->dd;
1035 u64 reg;
1036 u32 loop;
1037 int count;
1038
1039
1040 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE))
1041 return -EINVAL;
1042
1043 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
1044 sc->hw_context);
1045
1046
1047
1048
1049
1050
1051
1052 loop = 0;
1053 while (1) {
1054 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS));
1055 if (reg & SC(STATUS_CTXT_HALTED_SMASK))
1056 break;
1057 if (loop > 100) {
1058 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
1059 __func__, sc->sw_index, sc->hw_context);
1060 return -ETIME;
1061 }
1062 loop++;
1063 udelay(1);
1064 }
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 if (sc->type != SC_USER) {
1077
1078 loop = 0;
1079 while (1) {
1080 count = get_buffers_allocated(sc);
1081 if (count == 0)
1082 break;
1083 if (loop > 100) {
1084 dd_dev_err(dd,
1085 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
1086 __func__, sc->sw_index,
1087 sc->hw_context, count);
1088 }
1089 loop++;
1090 udelay(1);
1091 }
1092 }
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 sc_disable(sc);
1104
1105
1106
1107
1108
1109
1110
1111 return sc_enable(sc);
1112}
1113
1114
1115
1116
1117
1118
1119void pio_freeze(struct hfi1_devdata *dd)
1120{
1121 struct send_context *sc;
1122 int i;
1123
1124 for (i = 0; i < dd->num_send_contexts; i++) {
1125 sc = dd->send_contexts[i].sc;
1126
1127
1128
1129
1130
1131 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
1132 continue;
1133
1134
1135 sc_disable(sc);
1136 }
1137}
1138
1139
1140
1141
1142
1143
1144
1145
1146void pio_kernel_unfreeze(struct hfi1_devdata *dd)
1147{
1148 struct send_context *sc;
1149 int i;
1150
1151 for (i = 0; i < dd->num_send_contexts; i++) {
1152 sc = dd->send_contexts[i].sc;
1153 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
1154 continue;
1155 if (sc->flags & SCF_LINK_DOWN)
1156 continue;
1157
1158 sc_enable(sc);
1159 }
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174void pio_kernel_linkup(struct hfi1_devdata *dd)
1175{
1176 struct send_context *sc;
1177 int i;
1178
1179 for (i = 0; i < dd->num_send_contexts; i++) {
1180 sc = dd->send_contexts[i].sc;
1181 if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
1182 continue;
1183
1184 sc_enable(sc);
1185 }
1186}
1187
1188
1189
1190
1191
1192
1193
1194static int pio_init_wait_progress(struct hfi1_devdata *dd)
1195{
1196 u64 reg;
1197 int max, count = 0;
1198
1199
1200 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5;
1201 while (1) {
1202 reg = read_csr(dd, SEND_PIO_INIT_CTXT);
1203 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK))
1204 break;
1205 if (count >= max)
1206 return -ETIMEDOUT;
1207 udelay(5);
1208 count++;
1209 }
1210
1211 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0;
1212}
1213
1214
1215
1216
1217
1218void pio_reset_all(struct hfi1_devdata *dd)
1219{
1220 int ret;
1221
1222
1223 ret = pio_init_wait_progress(dd);
1224
1225 if (ret == -EIO) {
1226
1227 write_csr(dd, SEND_PIO_ERR_CLEAR,
1228 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
1229 }
1230
1231
1232 write_csr(dd, SEND_PIO_INIT_CTXT,
1233 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
1234 udelay(2);
1235 ret = pio_init_wait_progress(dd);
1236 if (ret < 0) {
1237 dd_dev_err(dd,
1238 "PIO send context init %s while initializing all PIO blocks\n",
1239 ret == -ETIMEDOUT ? "is stuck" : "had an error");
1240 }
1241}
1242
1243
1244int sc_enable(struct send_context *sc)
1245{
1246 u64 sc_ctrl, reg, pio;
1247 struct hfi1_devdata *dd;
1248 unsigned long flags;
1249 int ret = 0;
1250
1251 if (!sc)
1252 return -EINVAL;
1253 dd = sc->dd;
1254
1255
1256
1257
1258
1259
1260
1261
1262 spin_lock_irqsave(&sc->alloc_lock, flags);
1263 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL));
1264 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK)))
1265 goto unlock;
1266
1267
1268
1269 *sc->hw_free = 0;
1270 sc->free = 0;
1271 sc->alloc_free = 0;
1272 sc->fill = 0;
1273 sc->fill_wrap = 0;
1274 sc->sr_head = 0;
1275 sc->sr_tail = 0;
1276 sc->flags = 0;
1277
1278 reset_buffers_allocated(sc);
1279
1280
1281
1282
1283
1284
1285
1286 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
1287 if (reg)
1288 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg);
1289
1290
1291
1292
1293
1294 spin_lock(&dd->sc_init_lock);
1295
1296
1297
1298
1299
1300
1301
1302 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) <<
1303 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) |
1304 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK;
1305 write_csr(dd, SEND_PIO_INIT_CTXT, pio);
1306
1307
1308
1309
1310 udelay(2);
1311 ret = pio_init_wait_progress(dd);
1312 spin_unlock(&dd->sc_init_lock);
1313 if (ret) {
1314 dd_dev_err(dd,
1315 "sctxt%u(%u): Context not enabled due to init failure %d\n",
1316 sc->sw_index, sc->hw_context, ret);
1317 goto unlock;
1318 }
1319
1320
1321
1322
1323 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK);
1324 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl);
1325
1326
1327
1328
1329 read_kctxt_csr(dd, sc->hw_context, SC(CTRL));
1330 sc->flags |= SCF_ENABLED;
1331
1332unlock:
1333 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1334
1335 return ret;
1336}
1337
1338
1339void sc_return_credits(struct send_context *sc)
1340{
1341 if (!sc)
1342 return;
1343
1344
1345 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
1346 SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
1347
1348
1349
1350
1351 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE));
1352
1353 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0);
1354}
1355
1356
1357void sc_flush(struct send_context *sc)
1358{
1359 if (!sc)
1360 return;
1361
1362 sc_wait_for_packet_egress(sc, 1);
1363}
1364
1365
1366void sc_drop(struct send_context *sc)
1367{
1368 if (!sc)
1369 return;
1370
1371 dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
1372 __func__, sc->sw_index, sc->hw_context);
1373}
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383void sc_stop(struct send_context *sc, int flag)
1384{
1385 unsigned long flags;
1386
1387
1388 spin_lock_irqsave(&sc->alloc_lock, flags);
1389
1390 sc->flags |= flag;
1391 sc->flags &= ~SCF_ENABLED;
1392 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1393 wake_up(&sc->halt_wait);
1394}
1395
1396#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32))
1397#define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
1411 pio_release_cb cb, void *arg)
1412{
1413 struct pio_buf *pbuf = NULL;
1414 unsigned long flags;
1415 unsigned long avail;
1416 unsigned long blocks = dwords_to_blocks(dw_len);
1417 u32 fill_wrap;
1418 int trycount = 0;
1419 u32 head, next;
1420
1421 spin_lock_irqsave(&sc->alloc_lock, flags);
1422 if (!(sc->flags & SCF_ENABLED)) {
1423 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1424 return ERR_PTR(-ECOMM);
1425 }
1426
1427retry:
1428 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free);
1429 if (blocks > avail) {
1430
1431 if (unlikely(trycount)) {
1432 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1433 goto done;
1434 }
1435
1436 sc->alloc_free = READ_ONCE(sc->free);
1437 avail =
1438 (unsigned long)sc->credits -
1439 (sc->fill - sc->alloc_free);
1440 if (blocks > avail) {
1441
1442 sc_release_update(sc);
1443 sc->alloc_free = READ_ONCE(sc->free);
1444 trycount++;
1445 goto retry;
1446 }
1447 }
1448
1449
1450
1451 preempt_disable();
1452 this_cpu_inc(*sc->buffers_allocated);
1453
1454
1455 head = sc->sr_head;
1456
1457
1458 sc->fill += blocks;
1459 fill_wrap = sc->fill_wrap;
1460 sc->fill_wrap += blocks;
1461 if (sc->fill_wrap >= sc->credits)
1462 sc->fill_wrap = sc->fill_wrap - sc->credits;
1463
1464
1465
1466
1467
1468
1469
1470
1471 pbuf = &sc->sr[head].pbuf;
1472 pbuf->sent_at = sc->fill;
1473 pbuf->cb = cb;
1474 pbuf->arg = arg;
1475 pbuf->sc = sc;
1476
1477
1478
1479 next = head + 1;
1480 if (next >= sc->sr_size)
1481 next = 0;
1482
1483
1484
1485
1486 smp_wmb();
1487 sc->sr_head = next;
1488 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1489
1490
1491 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
1492 pbuf->end = sc->base_addr + sc->size;
1493 pbuf->qw_written = 0;
1494 pbuf->carry_bytes = 0;
1495 pbuf->carry.val64 = 0;
1496done:
1497 return pbuf;
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511void sc_add_credit_return_intr(struct send_context *sc)
1512{
1513 unsigned long flags;
1514
1515
1516 spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
1517 if (sc->credit_intr_count == 0) {
1518 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
1519 write_kctxt_csr(sc->dd, sc->hw_context,
1520 SC(CREDIT_CTRL), sc->credit_ctrl);
1521 }
1522 sc->credit_intr_count++;
1523 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
1524}
1525
1526
1527
1528
1529
1530void sc_del_credit_return_intr(struct send_context *sc)
1531{
1532 unsigned long flags;
1533
1534 WARN_ON(sc->credit_intr_count == 0);
1535
1536
1537 spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
1538 sc->credit_intr_count--;
1539 if (sc->credit_intr_count == 0) {
1540 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
1541 write_kctxt_csr(sc->dd, sc->hw_context,
1542 SC(CREDIT_CTRL), sc->credit_ctrl);
1543 }
1544 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
1545}
1546
1547
1548
1549
1550
1551void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
1552{
1553 if (needint)
1554 sc_add_credit_return_intr(sc);
1555 else
1556 sc_del_credit_return_intr(sc);
1557 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
1558 if (needint)
1559 sc_return_credits(sc);
1560}
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570static void sc_piobufavail(struct send_context *sc)
1571{
1572 struct hfi1_devdata *dd = sc->dd;
1573 struct list_head *list;
1574 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
1575 struct rvt_qp *qp;
1576 struct hfi1_qp_priv *priv;
1577 unsigned long flags;
1578 uint i, n = 0, top_idx = 0;
1579
1580 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
1581 dd->send_contexts[sc->sw_index].type != SC_VL15)
1582 return;
1583 list = &sc->piowait;
1584
1585
1586
1587
1588
1589
1590 write_seqlock_irqsave(&sc->waitlock, flags);
1591 while (!list_empty(list)) {
1592 struct iowait *wait;
1593
1594 if (n == ARRAY_SIZE(qps))
1595 break;
1596 wait = list_first_entry(list, struct iowait, list);
1597 iowait_get_priority(wait);
1598 qp = iowait_to_qp(wait);
1599 priv = qp->priv;
1600 list_del_init(&priv->s_iowait.list);
1601 priv->s_iowait.lock = NULL;
1602 if (n) {
1603 priv = qps[top_idx]->priv;
1604 top_idx = iowait_priority_update_top(wait,
1605 &priv->s_iowait,
1606 n, top_idx);
1607 }
1608
1609
1610 qps[n++] = qp;
1611 }
1612
1613
1614
1615
1616 if (n) {
1617 hfi1_sc_wantpiobuf_intr(sc, 0);
1618 if (!list_empty(list))
1619 hfi1_sc_wantpiobuf_intr(sc, 1);
1620 }
1621 write_sequnlock_irqrestore(&sc->waitlock, flags);
1622
1623
1624 if (n)
1625 hfi1_qp_wakeup(qps[top_idx],
1626 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
1627 for (i = 0; i < n; i++)
1628 if (i != top_idx)
1629 hfi1_qp_wakeup(qps[i],
1630 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
1631}
1632
1633
1634static inline int fill_code(u64 hw_free)
1635{
1636 int code = 0;
1637
1638 if (hw_free & CR_STATUS_SMASK)
1639 code |= PRC_STATUS_ERR;
1640 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK)
1641 code |= PRC_PBC;
1642 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK)
1643 code |= PRC_THRESHOLD;
1644 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK)
1645 code |= PRC_FILL_ERR;
1646 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK)
1647 code |= PRC_SC_DISABLE;
1648 return code;
1649}
1650
1651
1652#define sent_before(a, b) time_before(a, b)
1653
1654
1655
1656
1657void sc_release_update(struct send_context *sc)
1658{
1659 struct pio_buf *pbuf;
1660 u64 hw_free;
1661 u32 head, tail;
1662 unsigned long old_free;
1663 unsigned long free;
1664 unsigned long extra;
1665 unsigned long flags;
1666 int code;
1667
1668 if (!sc)
1669 return;
1670
1671 spin_lock_irqsave(&sc->release_lock, flags);
1672
1673 hw_free = le64_to_cpu(*sc->hw_free);
1674 old_free = sc->free;
1675 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT)
1676 - (old_free & CR_COUNTER_MASK))
1677 & CR_COUNTER_MASK;
1678 free = old_free + extra;
1679 trace_hfi1_piofree(sc, extra);
1680
1681
1682 code = -1;
1683 head = READ_ONCE(sc->sr_head);
1684 tail = sc->sr_tail;
1685 while (head != tail) {
1686 pbuf = &sc->sr[tail].pbuf;
1687
1688 if (sent_before(free, pbuf->sent_at)) {
1689
1690 break;
1691 }
1692 if (pbuf->cb) {
1693 if (code < 0)
1694 code = fill_code(hw_free);
1695 (*pbuf->cb)(pbuf->arg, code);
1696 }
1697
1698 tail++;
1699 if (tail >= sc->sr_size)
1700 tail = 0;
1701 }
1702 sc->sr_tail = tail;
1703
1704 smp_wmb();
1705 sc->free = free;
1706 spin_unlock_irqrestore(&sc->release_lock, flags);
1707 sc_piobufavail(sc);
1708}
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
1720{
1721 struct send_context *sc;
1722 u32 sw_index;
1723 u32 gc, gc_end;
1724
1725 spin_lock(&dd->sc_lock);
1726 sw_index = dd->hw_to_sw[hw_context];
1727 if (unlikely(sw_index >= dd->num_send_contexts)) {
1728 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
1729 __func__, hw_context, sw_index);
1730 goto done;
1731 }
1732 sc = dd->send_contexts[sw_index].sc;
1733 if (unlikely(!sc))
1734 goto done;
1735
1736 gc = group_context(hw_context, sc->group);
1737 gc_end = gc + group_size(sc->group);
1738 for (; gc < gc_end; gc++) {
1739 sw_index = dd->hw_to_sw[gc];
1740 if (unlikely(sw_index >= dd->num_send_contexts)) {
1741 dd_dev_err(dd,
1742 "%s: invalid hw (%u) to sw (%u) mapping\n",
1743 __func__, hw_context, sw_index);
1744 continue;
1745 }
1746 sc_release_update(dd->send_contexts[sw_index].sc);
1747 }
1748done:
1749 spin_unlock(&dd->sc_lock);
1750}
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
1762 u32 selector, u8 vl)
1763{
1764 struct pio_vl_map *m;
1765 struct pio_map_elem *e;
1766 struct send_context *rval;
1767
1768
1769
1770
1771
1772
1773 if (unlikely(vl >= num_vls)) {
1774 rval = NULL;
1775 goto done;
1776 }
1777
1778 rcu_read_lock();
1779 m = rcu_dereference(dd->pio_map);
1780 if (unlikely(!m)) {
1781 rcu_read_unlock();
1782 return dd->vld[0].sc;
1783 }
1784 e = m->map[vl & m->mask];
1785 rval = e->ksc[selector & e->mask];
1786 rcu_read_unlock();
1787
1788done:
1789 rval = !rval ? dd->vld[0].sc : rval;
1790 return rval;
1791}
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
1802 u32 selector, u8 sc5)
1803{
1804 u8 vl = sc_to_vlt(dd, sc5);
1805
1806 return pio_select_send_context_vl(dd, selector, vl);
1807}
1808
1809
1810
1811
1812static void pio_map_free(struct pio_vl_map *m)
1813{
1814 int i;
1815
1816 for (i = 0; m && i < m->actual_vls; i++)
1817 kfree(m->map[i]);
1818 kfree(m);
1819}
1820
1821
1822
1823
1824static void pio_map_rcu_callback(struct rcu_head *list)
1825{
1826 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list);
1827
1828 pio_map_free(m);
1829}
1830
1831
1832
1833
1834static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
1835{
1836 u32 thres;
1837
1838 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
1839 50),
1840 sc_mtu_to_threshold(dd->kernel_send_context[scontext],
1841 dd->vld[i].mtu,
1842 dd->rcd[0]->rcvhdrqentsize));
1843 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
1844}
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
1875{
1876 int i, j;
1877 int extra, sc_per_vl;
1878 int scontext = 1;
1879 int num_kernel_send_contexts = 0;
1880 u8 lvl_scontexts[OPA_MAX_VLS];
1881 struct pio_vl_map *oldmap, *newmap;
1882
1883 if (!vl_scontexts) {
1884 for (i = 0; i < dd->num_send_contexts; i++)
1885 if (dd->send_contexts[i].type == SC_KERNEL)
1886 num_kernel_send_contexts++;
1887
1888 sc_per_vl = num_kernel_send_contexts / num_vls;
1889
1890 extra = num_kernel_send_contexts % num_vls;
1891 vl_scontexts = lvl_scontexts;
1892
1893 for (i = num_vls - 1; i >= 0; i--, extra--)
1894 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
1895 }
1896
1897 newmap = kzalloc(sizeof(*newmap) +
1898 roundup_pow_of_two(num_vls) *
1899 sizeof(struct pio_map_elem *),
1900 GFP_KERNEL);
1901 if (!newmap)
1902 goto bail;
1903 newmap->actual_vls = num_vls;
1904 newmap->vls = roundup_pow_of_two(num_vls);
1905 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1906 for (i = 0; i < newmap->vls; i++) {
1907
1908 int first_scontext = scontext;
1909
1910 if (i < newmap->actual_vls) {
1911 int sz = roundup_pow_of_two(vl_scontexts[i]);
1912
1913
1914 newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
1915 sz * sizeof(struct
1916 send_context *),
1917 GFP_KERNEL);
1918 if (!newmap->map[i])
1919 goto bail;
1920 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1921
1922
1923
1924
1925 for (j = 0; j < sz; j++) {
1926 if (dd->kernel_send_context[scontext]) {
1927 newmap->map[i]->ksc[j] =
1928 dd->kernel_send_context[scontext];
1929 set_threshold(dd, scontext, i);
1930 }
1931 if (++scontext >= first_scontext +
1932 vl_scontexts[i])
1933
1934 scontext = first_scontext;
1935 }
1936 } else {
1937
1938 newmap->map[i] = newmap->map[i % num_vls];
1939 }
1940 scontext = first_scontext + vl_scontexts[i];
1941 }
1942
1943 spin_lock_irq(&dd->pio_map_lock);
1944 oldmap = rcu_dereference_protected(dd->pio_map,
1945 lockdep_is_held(&dd->pio_map_lock));
1946
1947
1948 rcu_assign_pointer(dd->pio_map, newmap);
1949
1950 spin_unlock_irq(&dd->pio_map_lock);
1951
1952 if (oldmap)
1953 call_rcu(&oldmap->list, pio_map_rcu_callback);
1954 return 0;
1955bail:
1956
1957 pio_map_free(newmap);
1958 return -ENOMEM;
1959}
1960
1961void free_pio_map(struct hfi1_devdata *dd)
1962{
1963
1964 if (rcu_access_pointer(dd->pio_map)) {
1965 spin_lock_irq(&dd->pio_map_lock);
1966 pio_map_free(rcu_access_pointer(dd->pio_map));
1967 RCU_INIT_POINTER(dd->pio_map, NULL);
1968 spin_unlock_irq(&dd->pio_map_lock);
1969 synchronize_rcu();
1970 }
1971 kfree(dd->kernel_send_context);
1972 dd->kernel_send_context = NULL;
1973}
1974
1975int init_pervl_scs(struct hfi1_devdata *dd)
1976{
1977 int i;
1978 u64 mask, all_vl_mask = (u64)0x80ff;
1979 u64 data_vls_mask = (u64)0x00ff;
1980 u32 ctxt;
1981 struct hfi1_pportdata *ppd = dd->pport;
1982
1983 dd->vld[15].sc = sc_alloc(dd, SC_VL15,
1984 dd->rcd[0]->rcvhdrqentsize, dd->node);
1985 if (!dd->vld[15].sc)
1986 return -ENOMEM;
1987
1988 hfi1_init_ctxt(dd->vld[15].sc);
1989 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
1990
1991 dd->kernel_send_context = kcalloc_node(dd->num_send_contexts,
1992 sizeof(struct send_context *),
1993 GFP_KERNEL, dd->node);
1994 if (!dd->kernel_send_context)
1995 goto freesc15;
1996
1997 dd->kernel_send_context[0] = dd->vld[15].sc;
1998
1999 for (i = 0; i < num_vls; i++) {
2000
2001
2002
2003
2004
2005
2006
2007 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL,
2008 dd->rcd[0]->rcvhdrqentsize, dd->node);
2009 if (!dd->vld[i].sc)
2010 goto nomem;
2011 dd->kernel_send_context[i + 1] = dd->vld[i].sc;
2012 hfi1_init_ctxt(dd->vld[i].sc);
2013
2014 dd->vld[i].mtu = hfi1_max_mtu;
2015 }
2016 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
2017 dd->kernel_send_context[i + 1] =
2018 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node);
2019 if (!dd->kernel_send_context[i + 1])
2020 goto nomem;
2021 hfi1_init_ctxt(dd->kernel_send_context[i + 1]);
2022 }
2023
2024 sc_enable(dd->vld[15].sc);
2025 ctxt = dd->vld[15].sc->hw_context;
2026 mask = all_vl_mask & ~(1LL << 15);
2027 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
2028 dd_dev_info(dd,
2029 "Using send context %u(%u) for VL15\n",
2030 dd->vld[15].sc->sw_index, ctxt);
2031
2032 for (i = 0; i < num_vls; i++) {
2033 sc_enable(dd->vld[i].sc);
2034 ctxt = dd->vld[i].sc->hw_context;
2035 mask = all_vl_mask & ~(data_vls_mask);
2036 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
2037 }
2038 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
2039 sc_enable(dd->kernel_send_context[i + 1]);
2040 ctxt = dd->kernel_send_context[i + 1]->hw_context;
2041 mask = all_vl_mask & ~(data_vls_mask);
2042 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
2043 }
2044
2045 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
2046 goto nomem;
2047 return 0;
2048
2049nomem:
2050 for (i = 0; i < num_vls; i++) {
2051 sc_free(dd->vld[i].sc);
2052 dd->vld[i].sc = NULL;
2053 }
2054
2055 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
2056 sc_free(dd->kernel_send_context[i + 1]);
2057
2058 kfree(dd->kernel_send_context);
2059 dd->kernel_send_context = NULL;
2060
2061freesc15:
2062 sc_free(dd->vld[15].sc);
2063 return -ENOMEM;
2064}
2065
2066int init_credit_return(struct hfi1_devdata *dd)
2067{
2068 int ret;
2069 int i;
2070
2071 dd->cr_base = kcalloc(
2072 node_affinity.num_possible_nodes,
2073 sizeof(struct credit_return_base),
2074 GFP_KERNEL);
2075 if (!dd->cr_base) {
2076 ret = -ENOMEM;
2077 goto done;
2078 }
2079 for_each_node_with_cpus(i) {
2080 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
2081
2082 set_dev_node(&dd->pcidev->dev, i);
2083 dd->cr_base[i].va = dma_alloc_coherent(&dd->pcidev->dev,
2084 bytes,
2085 &dd->cr_base[i].dma,
2086 GFP_KERNEL);
2087 if (!dd->cr_base[i].va) {
2088 set_dev_node(&dd->pcidev->dev, dd->node);
2089 dd_dev_err(dd,
2090 "Unable to allocate credit return DMA range for NUMA %d\n",
2091 i);
2092 ret = -ENOMEM;
2093 goto done;
2094 }
2095 }
2096 set_dev_node(&dd->pcidev->dev, dd->node);
2097
2098 ret = 0;
2099done:
2100 return ret;
2101}
2102
2103void free_credit_return(struct hfi1_devdata *dd)
2104{
2105 int i;
2106
2107 if (!dd->cr_base)
2108 return;
2109 for (i = 0; i < node_affinity.num_possible_nodes; i++) {
2110 if (dd->cr_base[i].va) {
2111 dma_free_coherent(&dd->pcidev->dev,
2112 TXE_NUM_CONTEXTS *
2113 sizeof(struct credit_return),
2114 dd->cr_base[i].va,
2115 dd->cr_base[i].dma);
2116 }
2117 }
2118 kfree(dd->cr_base);
2119 dd->cr_base = NULL;
2120}
2121
2122void seqfile_dump_sci(struct seq_file *s, u32 i,
2123 struct send_context_info *sci)
2124{
2125 struct send_context *sc = sci->sc;
2126 u64 reg;
2127
2128 seq_printf(s, "SCI %u: type %u base %u credits %u\n",
2129 i, sci->type, sci->base, sci->credits);
2130 seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n",
2131 sc->flags, sc->sw_index, sc->hw_context, sc->group);
2132 seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n",
2133 sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail);
2134 seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n",
2135 sc->fill, sc->free, sc->fill_wrap, sc->alloc_free);
2136 seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n",
2137 sc->credit_intr_count, sc->credit_ctrl);
2138 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS));
2139 seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n",
2140 (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >>
2141 CR_COUNTER_SHIFT,
2142 (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) &
2143 SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK),
2144 reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK));
2145}
2146