1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <linux/delay.h>
49#include "hfi.h"
50#include "qp.h"
51#include "trace.h"
52
53#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350
54
55#define SC(name) SEND_CTXT_##name
56
57
58
59static void sc_wait_for_packet_egress(struct send_context *sc, int pause);
60
61
62
63
64
65void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
66{
67 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK);
68 while (1) {
69 udelay(1);
70 sendctrl = read_csr(dd, SEND_CTRL);
71 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0)
72 break;
73 }
74}
75
76
77#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
78#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
79#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
80#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
81 << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
82#endif
83
84
85void pio_send_control(struct hfi1_devdata *dd, int op)
86{
87 u64 reg, mask;
88 unsigned long flags;
89 int write = 1;
90 int flush = 0;
91
92 spin_lock_irqsave(&dd->sendctrl_lock, flags);
93
94 reg = read_csr(dd, SEND_CTRL);
95 switch (op) {
96 case PSC_GLOBAL_ENABLE:
97 reg |= SEND_CTRL_SEND_ENABLE_SMASK;
98
99 case PSC_DATA_VL_ENABLE:
100
101 mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
102 SEND_CTRL_UNSUPPORTED_VL_SHIFT;
103 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
104 break;
105 case PSC_GLOBAL_DISABLE:
106 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK;
107 break;
108 case PSC_GLOBAL_VLARB_ENABLE:
109 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK;
110 break;
111 case PSC_GLOBAL_VLARB_DISABLE:
112 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK;
113 break;
114 case PSC_CM_RESET:
115 __cm_reset(dd, reg);
116 write = 0;
117 break;
118 case PSC_DATA_VL_DISABLE:
119 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK;
120 flush = 1;
121 break;
122 default:
123 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op);
124 break;
125 }
126
127 if (write) {
128 write_csr(dd, SEND_CTRL, reg);
129 if (flush)
130 (void)read_csr(dd, SEND_CTRL);
131 }
132
133 spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
134}
135
136
137#define NUM_SC_POOLS 2
138
139
140#define SCS_POOL_0 -1
141#define SCS_POOL_1 -2
142
143
144#define SCC_PER_VL -1
145#define SCC_PER_CPU -2
146#define SCC_PER_KRCVQ -3
147
148
149#define SCS_ACK_CREDITS 32
150#define SCS_VL15_CREDITS 102
151
152#define PIO_THRESHOLD_CEILING 4096
153
154#define PIO_WAIT_BATCH_SIZE 5
155
156
157static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
158 [SC_KERNEL] = { .size = SCS_POOL_0,
159 .count = SCC_PER_VL },
160 [SC_ACK] = { .size = SCS_ACK_CREDITS,
161 .count = SCC_PER_KRCVQ },
162 [SC_USER] = { .size = SCS_POOL_0,
163 .count = SCC_PER_CPU },
164 [SC_VL15] = { .size = SCS_VL15_CREDITS,
165 .count = 1 },
166
167};
168
169
170struct mem_pool_config {
171 int centipercent;
172 int absolute_blocks;
173};
174
175
176static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = {
177
178 { 10000, -1 },
179 { 0, -1 },
180};
181
182
183struct mem_pool_info {
184 int centipercent;
185
186
187
188 int count;
189 int blocks;
190 int size;
191};
192
193
194
195
196
197
198
199
200
201
202static int wildcard_to_pool(int wc)
203{
204 if (wc >= 0)
205 return -1;
206 return -wc - 1;
207}
208
209static const char *sc_type_names[SC_MAX] = {
210 "kernel",
211 "ack",
212 "user",
213 "vl15"
214};
215
216static const char *sc_type_name(int index)
217{
218 if (index < 0 || index >= SC_MAX)
219 return "unknown";
220 return sc_type_names[index];
221}
222
223
224
225
226
227
228int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
229{
230 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
231 int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1;
232 int total_contexts = 0;
233 int fixed_blocks;
234 int pool_blocks;
235 int used_blocks;
236 int cp_total;
237 int ab_total;
238 int extra;
239 int i;
240
241
242
243
244
245
246
247
248
249
250 if (HFI1_CAP_IS_KSET(SDMA)) {
251 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
252 piothreshold : PIO_THRESHOLD_CEILING;
253 sc_config_sizes[SC_KERNEL].size =
254 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
255 }
256
257
258
259
260
261
262
263
264 cp_total = 0;
265 ab_total = 0;
266 for (i = 0; i < NUM_SC_POOLS; i++) {
267 int cp = sc_mem_pool_config[i].centipercent;
268 int ab = sc_mem_pool_config[i].absolute_blocks;
269
270
271
272
273
274 if (cp >= 0) {
275 cp_total += cp;
276 } else if (ab >= 0) {
277 ab_total += ab;
278 } else {
279 dd_dev_err(
280 dd,
281 "Send context memory pool %d: both the block count and centipercent are invalid\n",
282 i);
283 return -EINVAL;
284 }
285
286 mem_pool_info[i].centipercent = cp;
287 mem_pool_info[i].blocks = ab;
288 }
289
290
291 if (cp_total != 0 && ab_total != 0) {
292 dd_dev_err(
293 dd,
294 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n");
295 return -EINVAL;
296 }
297
298
299 if (cp_total != 0 && cp_total != 10000) {
300 dd_dev_err(
301 dd,
302 "Send context memory pool centipercent is %d, expecting 10000\n",
303 cp_total);
304 return -EINVAL;
305 }
306
307
308 if (ab_total > total_blocks) {
309 dd_dev_err(
310 dd,
311 "Send context memory pool absolute block count %d is larger than the memory size %d\n",
312 ab_total, total_blocks);
313 return -EINVAL;
314 }
315
316
317
318
319
320
321
322
323 fixed_blocks = 0;
324 for (i = 0; i < SC_MAX; i++) {
325 int count = sc_config_sizes[i].count;
326 int size = sc_config_sizes[i].size;
327 int pool;
328
329
330
331
332
333
334
335 if (i == SC_ACK) {
336 count = dd->n_krcv_queues;
337 } else if (i == SC_KERNEL) {
338 count = INIT_SC_PER_VL * num_vls;
339 } else if (count == SCC_PER_CPU) {
340 count = dd->num_rcv_contexts - dd->n_krcv_queues;
341 } else if (count < 0) {
342 dd_dev_err(
343 dd,
344 "%s send context invalid count wildcard %d\n",
345 sc_type_name(i), count);
346 return -EINVAL;
347 }
348 if (total_contexts + count > dd->chip_send_contexts)
349 count = dd->chip_send_contexts - total_contexts;
350
351 total_contexts += count;
352
353
354
355
356
357
358
359 pool = wildcard_to_pool(size);
360 if (pool == -1) {
361 fixed_blocks += size * count;
362 } else if (pool < NUM_SC_POOLS) {
363 mem_pool_info[pool].count += count;
364 } else {
365 dd_dev_err(
366 dd,
367 "%s send context invalid pool wildcard %d\n",
368 sc_type_name(i), size);
369 return -EINVAL;
370 }
371
372 dd->sc_sizes[i].count = count;
373 dd->sc_sizes[i].size = size;
374 }
375 if (fixed_blocks > total_blocks) {
376 dd_dev_err(
377 dd,
378 "Send context fixed block count, %u, larger than total block count %u\n",
379 fixed_blocks, total_blocks);
380 return -EINVAL;
381 }
382
383
384 pool_blocks = total_blocks - fixed_blocks;
385 if (ab_total > pool_blocks) {
386 dd_dev_err(
387 dd,
388 "Send context fixed pool sizes, %u, larger than pool block count %u\n",
389 ab_total, pool_blocks);
390 return -EINVAL;
391 }
392
393 pool_blocks -= ab_total;
394
395 for (i = 0; i < NUM_SC_POOLS; i++) {
396 struct mem_pool_info *pi = &mem_pool_info[i];
397
398
399 if (pi->centipercent >= 0)
400 pi->blocks = (pool_blocks * pi->centipercent) / 10000;
401
402 if (pi->blocks == 0 && pi->count != 0) {
403 dd_dev_err(
404 dd,
405 "Send context memory pool %d has %u contexts, but no blocks\n",
406 i, pi->count);
407 return -EINVAL;
408 }
409 if (pi->count == 0) {
410
411 if (pi->blocks != 0)
412 dd_dev_err(
413 dd,
414 "Send context memory pool %d has %u blocks, but zero contexts\n",
415 i, pi->blocks);
416 pi->size = 0;
417 } else {
418 pi->size = pi->blocks / pi->count;
419 }
420 }
421
422
423 used_blocks = 0;
424 for (i = 0; i < SC_MAX; i++) {
425 if (dd->sc_sizes[i].size < 0) {
426 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size);
427
428 WARN_ON_ONCE(pool >= NUM_SC_POOLS);
429 dd->sc_sizes[i].size = mem_pool_info[pool].size;
430 }
431
432#define PIO_MAX_BLOCKS 1024
433 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS)
434 dd->sc_sizes[i].size = PIO_MAX_BLOCKS;
435
436
437 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count;
438 }
439 extra = total_blocks - used_blocks;
440 if (extra != 0)
441 dd_dev_info(dd, "unused send context blocks: %d\n", extra);
442
443 return total_contexts;
444}
445
446int init_send_contexts(struct hfi1_devdata *dd)
447{
448 u16 base;
449 int ret, i, j, context;
450
451 ret = init_credit_return(dd);
452 if (ret)
453 return ret;
454
455 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8),
456 GFP_KERNEL);
457 dd->send_contexts = kcalloc(dd->num_send_contexts,
458 sizeof(struct send_context_info),
459 GFP_KERNEL);
460 if (!dd->send_contexts || !dd->hw_to_sw) {
461 kfree(dd->hw_to_sw);
462 kfree(dd->send_contexts);
463 free_credit_return(dd);
464 return -ENOMEM;
465 }
466
467
468 for (i = 0; i < TXE_NUM_CONTEXTS; i++)
469 dd->hw_to_sw[i] = INVALID_SCI;
470
471
472
473
474
475 context = 0;
476 base = 1;
477 for (i = 0; i < SC_MAX; i++) {
478 struct sc_config_sizes *scs = &dd->sc_sizes[i];
479
480 for (j = 0; j < scs->count; j++) {
481 struct send_context_info *sci =
482 &dd->send_contexts[context];
483 sci->type = i;
484 sci->base = base;
485 sci->credits = scs->size;
486
487 context++;
488 base += scs->size;
489 }
490 }
491
492 return 0;
493}
494
495
496
497
498
499
500static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
501 u32 *hw_context)
502{
503 struct send_context_info *sci;
504 u32 index;
505 u32 context;
506
507 for (index = 0, sci = &dd->send_contexts[0];
508 index < dd->num_send_contexts; index++, sci++) {
509 if (sci->type == type && sci->allocated == 0) {
510 sci->allocated = 1;
511
512 context = dd->chip_send_contexts - index - 1;
513 dd->hw_to_sw[context] = index;
514 *sw_index = index;
515 *hw_context = context;
516 return 0;
517 }
518 }
519 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type);
520 return -ENOSPC;
521}
522
523
524
525
526
527
528static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context)
529{
530 struct send_context_info *sci;
531
532 sci = &dd->send_contexts[sw_index];
533 if (!sci->allocated) {
534 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
535 __func__, sw_index, hw_context);
536 }
537 sci->allocated = 0;
538 dd->hw_to_sw[hw_context] = INVALID_SCI;
539}
540
541
542static inline u32 group_context(u32 context, u32 group)
543{
544 return (context >> group) << group;
545}
546
547
548static inline u32 group_size(u32 group)
549{
550 return 1 << group;
551}
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
567{
568 u32 gc = group_context(sc->hw_context, sc->group);
569 u32 index = sc->hw_context & 0x7;
570
571 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
572 *dma = (unsigned long)
573 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
574}
575
576
577
578
579
580static void sc_halted(struct work_struct *work)
581{
582 struct send_context *sc;
583
584 sc = container_of(work, struct send_context, halt_work);
585 sc_restart(sc);
586}
587
588
589
590
591
592
593
594
595
596
597
598u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
599{
600 u32 release_credits;
601 u32 threshold;
602
603
604 mtu += hdrqentsize << 2;
605 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE);
606
607
608 if (sc->credits <= release_credits)
609 threshold = 1;
610 else
611 threshold = sc->credits - release_credits;
612
613 return threshold;
614}
615
616
617
618
619
620
621
622
623u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
624{
625 return (sc->credits * percent) / 100;
626}
627
628
629
630
631void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
632{
633 unsigned long flags;
634 u32 old_threshold;
635 int force_return = 0;
636
637 spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
638
639 old_threshold = (sc->credit_ctrl >>
640 SC(CREDIT_CTRL_THRESHOLD_SHIFT))
641 & SC(CREDIT_CTRL_THRESHOLD_MASK);
642
643 if (new_threshold != old_threshold) {
644 sc->credit_ctrl =
645 (sc->credit_ctrl
646 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK))
647 | ((new_threshold
648 & SC(CREDIT_CTRL_THRESHOLD_MASK))
649 << SC(CREDIT_CTRL_THRESHOLD_SHIFT));
650 write_kctxt_csr(sc->dd, sc->hw_context,
651 SC(CREDIT_CTRL), sc->credit_ctrl);
652
653
654 force_return = 1;
655 }
656
657 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
658
659 if (force_return)
660 sc_return_credits(sc);
661}
662
663
664
665
666
667
668void set_pio_integrity(struct send_context *sc)
669{
670 struct hfi1_devdata *dd = sc->dd;
671 u32 hw_context = sc->hw_context;
672 int type = sc->type;
673
674 write_kctxt_csr(dd, hw_context,
675 SC(CHECK_ENABLE),
676 hfi1_pkt_default_send_ctxt_mask(dd, type));
677}
678
679static u32 get_buffers_allocated(struct send_context *sc)
680{
681 int cpu;
682 u32 ret = 0;
683
684 for_each_possible_cpu(cpu)
685 ret += *per_cpu_ptr(sc->buffers_allocated, cpu);
686 return ret;
687}
688
689static void reset_buffers_allocated(struct send_context *sc)
690{
691 int cpu;
692
693 for_each_possible_cpu(cpu)
694 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0;
695}
696
697
698
699
700
701struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
702 uint hdrqentsize, int numa)
703{
704 struct send_context_info *sci;
705 struct send_context *sc = NULL;
706 int req_type = type;
707 dma_addr_t dma;
708 unsigned long flags;
709 u64 reg;
710 u32 thresh;
711 u32 sw_index;
712 u32 hw_context;
713 int ret;
714 u8 opval, opmask;
715
716
717 if (dd->flags & HFI1_FROZEN)
718 return NULL;
719
720 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa);
721 if (!sc)
722 return NULL;
723
724 sc->buffers_allocated = alloc_percpu(u32);
725 if (!sc->buffers_allocated) {
726 kfree(sc);
727 dd_dev_err(dd,
728 "Cannot allocate buffers_allocated per cpu counters\n"
729 );
730 return NULL;
731 }
732
733
734
735
736
737 if (type == SC_VNIC)
738 type = SC_USER;
739
740 spin_lock_irqsave(&dd->sc_lock, flags);
741 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
742 if (ret) {
743 spin_unlock_irqrestore(&dd->sc_lock, flags);
744 free_percpu(sc->buffers_allocated);
745 kfree(sc);
746 return NULL;
747 }
748
749
750
751
752
753 if (req_type == SC_VNIC) {
754 dd->send_contexts[sw_index].type = SC_KERNEL;
755 type = SC_KERNEL;
756 }
757
758 sci = &dd->send_contexts[sw_index];
759 sci->sc = sc;
760
761 sc->dd = dd;
762 sc->node = numa;
763 sc->type = type;
764 spin_lock_init(&sc->alloc_lock);
765 spin_lock_init(&sc->release_lock);
766 spin_lock_init(&sc->credit_ctrl_lock);
767 INIT_LIST_HEAD(&sc->piowait);
768 INIT_WORK(&sc->halt_work, sc_halted);
769 init_waitqueue_head(&sc->halt_wait);
770
771
772 sc->group = 0;
773
774 sc->sw_index = sw_index;
775 sc->hw_context = hw_context;
776 cr_group_addresses(sc, &dma);
777 sc->credits = sci->credits;
778 sc->size = sc->credits * PIO_BLOCK_SIZE;
779
780
781#define PIO_ADDR_CONTEXT_MASK 0xfful
782#define PIO_ADDR_CONTEXT_SHIFT 16
783 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK)
784 << PIO_ADDR_CONTEXT_SHIFT);
785
786
787 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK))
788 << SC(CTRL_CTXT_DEPTH_SHIFT))
789 | ((sci->base & SC(CTRL_CTXT_BASE_MASK))
790 << SC(CTRL_CTXT_BASE_SHIFT));
791 write_kctxt_csr(dd, hw_context, SC(CTRL), reg);
792
793 set_pio_integrity(sc);
794
795
796 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1);
797
798
799 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
800 (SC(CHECK_PARTITION_KEY_VALUE_MASK) &
801 DEFAULT_PKEY) <<
802 SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
803
804
805 if (type == SC_USER) {
806 opval = USER_OPCODE_CHECK_VAL;
807 opmask = USER_OPCODE_CHECK_MASK;
808 } else {
809 opval = OPCODE_CHECK_VAL_DISABLED;
810 opmask = OPCODE_CHECK_MASK_DISABLED;
811 }
812
813
814 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
815 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
816 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
817
818
819 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
820 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
821
822
823
824
825
826
827
828
829
830
831
832
833 if (type == SC_ACK) {
834 thresh = sc_percent_to_threshold(sc, 50);
835 } else if (type == SC_USER) {
836 thresh = sc_percent_to_threshold(sc,
837 user_credit_return_threshold);
838 } else {
839 thresh = min(sc_percent_to_threshold(sc, 50),
840 sc_mtu_to_threshold(sc, hfi1_max_mtu,
841 hdrqentsize));
842 }
843 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
844
845 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN))
846 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK);
847 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN))
848 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK);
849
850
851 sc->credit_ctrl = reg;
852 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg);
853
854
855 if (type == SC_USER) {
856 reg = 1ULL << 15;
857 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg);
858 }
859
860 spin_unlock_irqrestore(&dd->sc_lock, flags);
861
862
863
864
865
866
867
868
869
870 if (type != SC_USER) {
871
872
873
874
875 sc->sr_size = sci->credits + 1;
876 sc->sr = kzalloc_node(sizeof(union pio_shadow_ring) *
877 sc->sr_size, GFP_KERNEL, numa);
878 if (!sc->sr) {
879 sc_free(sc);
880 return NULL;
881 }
882 }
883
884 hfi1_cdbg(PIO,
885 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n",
886 sw_index,
887 hw_context,
888 sc_type_name(type),
889 sc->group,
890 sc->credits,
891 sc->credit_ctrl,
892 thresh);
893
894 return sc;
895}
896
897
898void sc_free(struct send_context *sc)
899{
900 struct hfi1_devdata *dd;
901 unsigned long flags;
902 u32 sw_index;
903 u32 hw_context;
904
905 if (!sc)
906 return;
907
908 sc->flags |= SCF_IN_FREE;
909 dd = sc->dd;
910 if (!list_empty(&sc->piowait))
911 dd_dev_err(dd, "piowait list not empty!\n");
912 sw_index = sc->sw_index;
913 hw_context = sc->hw_context;
914 sc_disable(sc);
915 flush_work(&sc->halt_work);
916
917 spin_lock_irqsave(&dd->sc_lock, flags);
918 dd->send_contexts[sw_index].sc = NULL;
919
920
921 write_kctxt_csr(dd, hw_context, SC(CTRL), 0);
922 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0);
923 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0);
924 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0);
925 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0);
926 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0);
927 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0);
928
929
930 sc_hw_free(dd, sw_index, hw_context);
931 spin_unlock_irqrestore(&dd->sc_lock, flags);
932
933 kfree(sc->sr);
934 free_percpu(sc->buffers_allocated);
935 kfree(sc);
936}
937
938
939void sc_disable(struct send_context *sc)
940{
941 u64 reg;
942 unsigned long flags;
943 struct pio_buf *pbuf;
944
945 if (!sc)
946 return;
947
948
949 spin_lock_irqsave(&sc->alloc_lock, flags);
950 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
951 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
952 sc->flags &= ~SCF_ENABLED;
953 sc_wait_for_packet_egress(sc, 1);
954 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
955 spin_unlock_irqrestore(&sc->alloc_lock, flags);
956
957
958
959
960
961
962
963
964 udelay(1);
965 spin_lock_irqsave(&sc->release_lock, flags);
966 if (sc->sr) {
967 while (sc->sr_tail != sc->sr_head) {
968 pbuf = &sc->sr[sc->sr_tail].pbuf;
969 if (pbuf->cb)
970 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE);
971 sc->sr_tail++;
972 if (sc->sr_tail >= sc->sr_size)
973 sc->sr_tail = 0;
974 }
975 }
976 spin_unlock_irqrestore(&sc->release_lock, flags);
977}
978
979
980#define packet_occupancy(r) \
981 (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\
982 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT)
983
984
985#define egress_halted(r) \
986 ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK)
987
988
989static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
990{
991 struct hfi1_devdata *dd = sc->dd;
992 u64 reg = 0;
993 u64 reg_prev;
994 u32 loop = 0;
995
996 while (1) {
997 reg_prev = reg;
998 reg = read_csr(dd, sc->hw_context * 8 +
999 SEND_EGRESS_CTXT_STATUS);
1000
1001 if (egress_halted(reg))
1002 break;
1003 reg = packet_occupancy(reg);
1004 if (reg == 0)
1005 break;
1006
1007 if (reg != reg_prev)
1008 loop = 0;
1009 if (loop > 50000) {
1010
1011 dd_dev_err(dd,
1012 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
1013 __func__, sc->sw_index,
1014 sc->hw_context, (u32)reg);
1015 queue_work(dd->pport->hfi1_wq,
1016 &dd->pport->link_bounce_work);
1017 break;
1018 }
1019 loop++;
1020 udelay(1);
1021 }
1022
1023 if (pause)
1024
1025 pause_for_credit_return(dd);
1026}
1027
1028void sc_wait(struct hfi1_devdata *dd)
1029{
1030 int i;
1031
1032 for (i = 0; i < dd->num_send_contexts; i++) {
1033 struct send_context *sc = dd->send_contexts[i].sc;
1034
1035 if (!sc)
1036 continue;
1037 sc_wait_for_packet_egress(sc, 0);
1038 }
1039}
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050int sc_restart(struct send_context *sc)
1051{
1052 struct hfi1_devdata *dd = sc->dd;
1053 u64 reg;
1054 u32 loop;
1055 int count;
1056
1057
1058 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE))
1059 return -EINVAL;
1060
1061 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
1062 sc->hw_context);
1063
1064
1065
1066
1067
1068
1069
1070 loop = 0;
1071 while (1) {
1072 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS));
1073 if (reg & SC(STATUS_CTXT_HALTED_SMASK))
1074 break;
1075 if (loop > 100) {
1076 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
1077 __func__, sc->sw_index, sc->hw_context);
1078 return -ETIME;
1079 }
1080 loop++;
1081 udelay(1);
1082 }
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094 if (sc->type != SC_USER) {
1095
1096 loop = 0;
1097 while (1) {
1098 count = get_buffers_allocated(sc);
1099 if (count == 0)
1100 break;
1101 if (loop > 100) {
1102 dd_dev_err(dd,
1103 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
1104 __func__, sc->sw_index,
1105 sc->hw_context, count);
1106 }
1107 loop++;
1108 udelay(1);
1109 }
1110 }
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 sc_disable(sc);
1122
1123
1124
1125
1126
1127
1128
1129 return sc_enable(sc);
1130}
1131
1132
1133
1134
1135
1136
1137void pio_freeze(struct hfi1_devdata *dd)
1138{
1139 struct send_context *sc;
1140 int i;
1141
1142 for (i = 0; i < dd->num_send_contexts; i++) {
1143 sc = dd->send_contexts[i].sc;
1144
1145
1146
1147
1148
1149 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
1150 continue;
1151
1152
1153 sc_disable(sc);
1154 }
1155}
1156
1157
1158
1159
1160
1161
1162
1163
1164void pio_kernel_unfreeze(struct hfi1_devdata *dd)
1165{
1166 struct send_context *sc;
1167 int i;
1168
1169 for (i = 0; i < dd->num_send_contexts; i++) {
1170 sc = dd->send_contexts[i].sc;
1171 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
1172 continue;
1173
1174 sc_enable(sc);
1175 }
1176}
1177
1178
1179
1180
1181
1182
1183
1184static int pio_init_wait_progress(struct hfi1_devdata *dd)
1185{
1186 u64 reg;
1187 int max, count = 0;
1188
1189
1190 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5;
1191 while (1) {
1192 reg = read_csr(dd, SEND_PIO_INIT_CTXT);
1193 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK))
1194 break;
1195 if (count >= max)
1196 return -ETIMEDOUT;
1197 udelay(5);
1198 count++;
1199 }
1200
1201 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0;
1202}
1203
1204
1205
1206
1207
1208void pio_reset_all(struct hfi1_devdata *dd)
1209{
1210 int ret;
1211
1212
1213 ret = pio_init_wait_progress(dd);
1214
1215 if (ret == -EIO) {
1216
1217 write_csr(dd, SEND_PIO_ERR_CLEAR,
1218 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
1219 }
1220
1221
1222 write_csr(dd, SEND_PIO_INIT_CTXT,
1223 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
1224 udelay(2);
1225 ret = pio_init_wait_progress(dd);
1226 if (ret < 0) {
1227 dd_dev_err(dd,
1228 "PIO send context init %s while initializing all PIO blocks\n",
1229 ret == -ETIMEDOUT ? "is stuck" : "had an error");
1230 }
1231}
1232
1233
1234int sc_enable(struct send_context *sc)
1235{
1236 u64 sc_ctrl, reg, pio;
1237 struct hfi1_devdata *dd;
1238 unsigned long flags;
1239 int ret = 0;
1240
1241 if (!sc)
1242 return -EINVAL;
1243 dd = sc->dd;
1244
1245
1246
1247
1248
1249
1250
1251
1252 spin_lock_irqsave(&sc->alloc_lock, flags);
1253 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL));
1254 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK)))
1255 goto unlock;
1256
1257
1258
1259 *sc->hw_free = 0;
1260 sc->free = 0;
1261 sc->alloc_free = 0;
1262 sc->fill = 0;
1263 sc->fill_wrap = 0;
1264 sc->sr_head = 0;
1265 sc->sr_tail = 0;
1266 sc->flags = 0;
1267
1268 reset_buffers_allocated(sc);
1269
1270
1271
1272
1273
1274
1275
1276 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
1277 if (reg)
1278 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg);
1279
1280
1281
1282
1283
1284 spin_lock(&dd->sc_init_lock);
1285
1286
1287
1288
1289
1290
1291
1292 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) <<
1293 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) |
1294 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK;
1295 write_csr(dd, SEND_PIO_INIT_CTXT, pio);
1296
1297
1298
1299
1300 udelay(2);
1301 ret = pio_init_wait_progress(dd);
1302 spin_unlock(&dd->sc_init_lock);
1303 if (ret) {
1304 dd_dev_err(dd,
1305 "sctxt%u(%u): Context not enabled due to init failure %d\n",
1306 sc->sw_index, sc->hw_context, ret);
1307 goto unlock;
1308 }
1309
1310
1311
1312
1313 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK);
1314 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl);
1315
1316
1317
1318
1319 read_kctxt_csr(dd, sc->hw_context, SC(CTRL));
1320 sc->flags |= SCF_ENABLED;
1321
1322unlock:
1323 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1324
1325 return ret;
1326}
1327
1328
1329void sc_return_credits(struct send_context *sc)
1330{
1331 if (!sc)
1332 return;
1333
1334
1335 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
1336 SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
1337
1338
1339
1340
1341 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE));
1342
1343 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0);
1344}
1345
1346
1347void sc_flush(struct send_context *sc)
1348{
1349 if (!sc)
1350 return;
1351
1352 sc_wait_for_packet_egress(sc, 1);
1353}
1354
1355
1356void sc_drop(struct send_context *sc)
1357{
1358 if (!sc)
1359 return;
1360
1361 dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
1362 __func__, sc->sw_index, sc->hw_context);
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373void sc_stop(struct send_context *sc, int flag)
1374{
1375 unsigned long flags;
1376
1377
1378 sc->flags |= flag;
1379
1380
1381 spin_lock_irqsave(&sc->alloc_lock, flags);
1382 sc->flags &= ~SCF_ENABLED;
1383 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1384 wake_up(&sc->halt_wait);
1385}
1386
1387#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32))
1388#define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
1401 pio_release_cb cb, void *arg)
1402{
1403 struct pio_buf *pbuf = NULL;
1404 unsigned long flags;
1405 unsigned long avail;
1406 unsigned long blocks = dwords_to_blocks(dw_len);
1407 u32 fill_wrap;
1408 int trycount = 0;
1409 u32 head, next;
1410
1411 spin_lock_irqsave(&sc->alloc_lock, flags);
1412 if (!(sc->flags & SCF_ENABLED)) {
1413 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1414 goto done;
1415 }
1416
1417retry:
1418 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free);
1419 if (blocks > avail) {
1420
1421 if (unlikely(trycount)) {
1422 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1423 goto done;
1424 }
1425
1426 sc->alloc_free = ACCESS_ONCE(sc->free);
1427 avail =
1428 (unsigned long)sc->credits -
1429 (sc->fill - sc->alloc_free);
1430 if (blocks > avail) {
1431
1432 sc_release_update(sc);
1433 sc->alloc_free = ACCESS_ONCE(sc->free);
1434 trycount++;
1435 goto retry;
1436 }
1437 }
1438
1439
1440
1441 preempt_disable();
1442 this_cpu_inc(*sc->buffers_allocated);
1443
1444
1445 head = sc->sr_head;
1446
1447
1448 sc->fill += blocks;
1449 fill_wrap = sc->fill_wrap;
1450 sc->fill_wrap += blocks;
1451 if (sc->fill_wrap >= sc->credits)
1452 sc->fill_wrap = sc->fill_wrap - sc->credits;
1453
1454
1455
1456
1457
1458
1459
1460
1461 pbuf = &sc->sr[head].pbuf;
1462 pbuf->sent_at = sc->fill;
1463 pbuf->cb = cb;
1464 pbuf->arg = arg;
1465 pbuf->sc = sc;
1466
1467
1468
1469 next = head + 1;
1470 if (next >= sc->sr_size)
1471 next = 0;
1472
1473
1474
1475
1476 smp_wmb();
1477 sc->sr_head = next;
1478 spin_unlock_irqrestore(&sc->alloc_lock, flags);
1479
1480
1481 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
1482 pbuf->end = sc->base_addr + sc->size;
1483 pbuf->qw_written = 0;
1484 pbuf->carry_bytes = 0;
1485 pbuf->carry.val64 = 0;
1486done:
1487 return pbuf;
1488}
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501void sc_add_credit_return_intr(struct send_context *sc)
1502{
1503 unsigned long flags;
1504
1505
1506 spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
1507 if (sc->credit_intr_count == 0) {
1508 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
1509 write_kctxt_csr(sc->dd, sc->hw_context,
1510 SC(CREDIT_CTRL), sc->credit_ctrl);
1511 }
1512 sc->credit_intr_count++;
1513 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
1514}
1515
1516
1517
1518
1519
1520void sc_del_credit_return_intr(struct send_context *sc)
1521{
1522 unsigned long flags;
1523
1524 WARN_ON(sc->credit_intr_count == 0);
1525
1526
1527 spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
1528 sc->credit_intr_count--;
1529 if (sc->credit_intr_count == 0) {
1530 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
1531 write_kctxt_csr(sc->dd, sc->hw_context,
1532 SC(CREDIT_CTRL), sc->credit_ctrl);
1533 }
1534 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
1535}
1536
1537
1538
1539
1540
1541void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
1542{
1543 if (needint)
1544 sc_add_credit_return_intr(sc);
1545 else
1546 sc_del_credit_return_intr(sc);
1547 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
1548 if (needint) {
1549 mmiowb();
1550 sc_return_credits(sc);
1551 }
1552}
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562static void sc_piobufavail(struct send_context *sc)
1563{
1564 struct hfi1_devdata *dd = sc->dd;
1565 struct hfi1_ibdev *dev = &dd->verbs_dev;
1566 struct list_head *list;
1567 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
1568 struct rvt_qp *qp;
1569 struct hfi1_qp_priv *priv;
1570 unsigned long flags;
1571 unsigned i, n = 0;
1572
1573 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
1574 dd->send_contexts[sc->sw_index].type != SC_VL15)
1575 return;
1576 list = &sc->piowait;
1577
1578
1579
1580
1581
1582
1583 write_seqlock_irqsave(&dev->iowait_lock, flags);
1584 while (!list_empty(list)) {
1585 struct iowait *wait;
1586
1587 if (n == ARRAY_SIZE(qps))
1588 break;
1589 wait = list_first_entry(list, struct iowait, list);
1590 qp = iowait_to_qp(wait);
1591 priv = qp->priv;
1592 list_del_init(&priv->s_iowait.list);
1593 priv->s_iowait.lock = NULL;
1594
1595 qps[n++] = qp;
1596 }
1597
1598
1599
1600
1601 if (n) {
1602 hfi1_sc_wantpiobuf_intr(sc, 0);
1603 if (!list_empty(list))
1604 hfi1_sc_wantpiobuf_intr(sc, 1);
1605 }
1606 write_sequnlock_irqrestore(&dev->iowait_lock, flags);
1607
1608 for (i = 0; i < n; i++)
1609 hfi1_qp_wakeup(qps[i],
1610 RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
1611}
1612
1613
1614static inline int fill_code(u64 hw_free)
1615{
1616 int code = 0;
1617
1618 if (hw_free & CR_STATUS_SMASK)
1619 code |= PRC_STATUS_ERR;
1620 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK)
1621 code |= PRC_PBC;
1622 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK)
1623 code |= PRC_THRESHOLD;
1624 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK)
1625 code |= PRC_FILL_ERR;
1626 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK)
1627 code |= PRC_SC_DISABLE;
1628 return code;
1629}
1630
1631
1632#define sent_before(a, b) time_before(a, b)
1633
1634
1635
1636
1637void sc_release_update(struct send_context *sc)
1638{
1639 struct pio_buf *pbuf;
1640 u64 hw_free;
1641 u32 head, tail;
1642 unsigned long old_free;
1643 unsigned long free;
1644 unsigned long extra;
1645 unsigned long flags;
1646 int code;
1647
1648 if (!sc)
1649 return;
1650
1651 spin_lock_irqsave(&sc->release_lock, flags);
1652
1653 hw_free = le64_to_cpu(*sc->hw_free);
1654 old_free = sc->free;
1655 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT)
1656 - (old_free & CR_COUNTER_MASK))
1657 & CR_COUNTER_MASK;
1658 free = old_free + extra;
1659 trace_hfi1_piofree(sc, extra);
1660
1661
1662 code = -1;
1663 head = ACCESS_ONCE(sc->sr_head);
1664 tail = sc->sr_tail;
1665 while (head != tail) {
1666 pbuf = &sc->sr[tail].pbuf;
1667
1668 if (sent_before(free, pbuf->sent_at)) {
1669
1670 break;
1671 }
1672 if (pbuf->cb) {
1673 if (code < 0)
1674 code = fill_code(hw_free);
1675 (*pbuf->cb)(pbuf->arg, code);
1676 }
1677
1678 tail++;
1679 if (tail >= sc->sr_size)
1680 tail = 0;
1681 }
1682 sc->sr_tail = tail;
1683
1684 smp_wmb();
1685 sc->free = free;
1686 spin_unlock_irqrestore(&sc->release_lock, flags);
1687 sc_piobufavail(sc);
1688}
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
1700{
1701 struct send_context *sc;
1702 u32 sw_index;
1703 u32 gc, gc_end;
1704
1705 spin_lock(&dd->sc_lock);
1706 sw_index = dd->hw_to_sw[hw_context];
1707 if (unlikely(sw_index >= dd->num_send_contexts)) {
1708 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
1709 __func__, hw_context, sw_index);
1710 goto done;
1711 }
1712 sc = dd->send_contexts[sw_index].sc;
1713 if (unlikely(!sc))
1714 goto done;
1715
1716 gc = group_context(hw_context, sc->group);
1717 gc_end = gc + group_size(sc->group);
1718 for (; gc < gc_end; gc++) {
1719 sw_index = dd->hw_to_sw[gc];
1720 if (unlikely(sw_index >= dd->num_send_contexts)) {
1721 dd_dev_err(dd,
1722 "%s: invalid hw (%u) to sw (%u) mapping\n",
1723 __func__, hw_context, sw_index);
1724 continue;
1725 }
1726 sc_release_update(dd->send_contexts[sw_index].sc);
1727 }
1728done:
1729 spin_unlock(&dd->sc_lock);
1730}
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
1742 u32 selector, u8 vl)
1743{
1744 struct pio_vl_map *m;
1745 struct pio_map_elem *e;
1746 struct send_context *rval;
1747
1748
1749
1750
1751
1752
1753 if (unlikely(vl >= num_vls)) {
1754 rval = NULL;
1755 goto done;
1756 }
1757
1758 rcu_read_lock();
1759 m = rcu_dereference(dd->pio_map);
1760 if (unlikely(!m)) {
1761 rcu_read_unlock();
1762 return dd->vld[0].sc;
1763 }
1764 e = m->map[vl & m->mask];
1765 rval = e->ksc[selector & e->mask];
1766 rcu_read_unlock();
1767
1768done:
1769 rval = !rval ? dd->vld[0].sc : rval;
1770 return rval;
1771}
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
1782 u32 selector, u8 sc5)
1783{
1784 u8 vl = sc_to_vlt(dd, sc5);
1785
1786 return pio_select_send_context_vl(dd, selector, vl);
1787}
1788
1789
1790
1791
1792static void pio_map_free(struct pio_vl_map *m)
1793{
1794 int i;
1795
1796 for (i = 0; m && i < m->actual_vls; i++)
1797 kfree(m->map[i]);
1798 kfree(m);
1799}
1800
1801
1802
1803
1804static void pio_map_rcu_callback(struct rcu_head *list)
1805{
1806 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list);
1807
1808 pio_map_free(m);
1809}
1810
1811
1812
1813
1814static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
1815{
1816 u32 thres;
1817
1818 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
1819 50),
1820 sc_mtu_to_threshold(dd->kernel_send_context[scontext],
1821 dd->vld[i].mtu,
1822 dd->rcd[0]->rcvhdrqentsize));
1823 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
1824}
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
1855{
1856 int i, j;
1857 int extra, sc_per_vl;
1858 int scontext = 1;
1859 int num_kernel_send_contexts = 0;
1860 u8 lvl_scontexts[OPA_MAX_VLS];
1861 struct pio_vl_map *oldmap, *newmap;
1862
1863 if (!vl_scontexts) {
1864 for (i = 0; i < dd->num_send_contexts; i++)
1865 if (dd->send_contexts[i].type == SC_KERNEL)
1866 num_kernel_send_contexts++;
1867
1868 sc_per_vl = num_kernel_send_contexts / num_vls;
1869
1870 extra = num_kernel_send_contexts % num_vls;
1871 vl_scontexts = lvl_scontexts;
1872
1873 for (i = num_vls - 1; i >= 0; i--, extra--)
1874 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
1875 }
1876
1877 newmap = kzalloc(sizeof(*newmap) +
1878 roundup_pow_of_two(num_vls) *
1879 sizeof(struct pio_map_elem *),
1880 GFP_KERNEL);
1881 if (!newmap)
1882 goto bail;
1883 newmap->actual_vls = num_vls;
1884 newmap->vls = roundup_pow_of_two(num_vls);
1885 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1886 for (i = 0; i < newmap->vls; i++) {
1887
1888 int first_scontext = scontext;
1889
1890 if (i < newmap->actual_vls) {
1891 int sz = roundup_pow_of_two(vl_scontexts[i]);
1892
1893
1894 newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
1895 sz * sizeof(struct
1896 send_context *),
1897 GFP_KERNEL);
1898 if (!newmap->map[i])
1899 goto bail;
1900 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1901
1902
1903
1904
1905 for (j = 0; j < sz; j++) {
1906 if (dd->kernel_send_context[scontext]) {
1907 newmap->map[i]->ksc[j] =
1908 dd->kernel_send_context[scontext];
1909 set_threshold(dd, scontext, i);
1910 }
1911 if (++scontext >= first_scontext +
1912 vl_scontexts[i])
1913
1914 scontext = first_scontext;
1915 }
1916 } else {
1917
1918 newmap->map[i] = newmap->map[i % num_vls];
1919 }
1920 scontext = first_scontext + vl_scontexts[i];
1921 }
1922
1923 spin_lock_irq(&dd->pio_map_lock);
1924 oldmap = rcu_dereference_protected(dd->pio_map,
1925 lockdep_is_held(&dd->pio_map_lock));
1926
1927
1928 rcu_assign_pointer(dd->pio_map, newmap);
1929
1930 spin_unlock_irq(&dd->pio_map_lock);
1931
1932 if (oldmap)
1933 call_rcu(&oldmap->list, pio_map_rcu_callback);
1934 return 0;
1935bail:
1936
1937 pio_map_free(newmap);
1938 return -ENOMEM;
1939}
1940
1941void free_pio_map(struct hfi1_devdata *dd)
1942{
1943
1944 if (rcu_access_pointer(dd->pio_map)) {
1945 spin_lock_irq(&dd->pio_map_lock);
1946 pio_map_free(rcu_access_pointer(dd->pio_map));
1947 RCU_INIT_POINTER(dd->pio_map, NULL);
1948 spin_unlock_irq(&dd->pio_map_lock);
1949 synchronize_rcu();
1950 }
1951 kfree(dd->kernel_send_context);
1952 dd->kernel_send_context = NULL;
1953}
1954
1955int init_pervl_scs(struct hfi1_devdata *dd)
1956{
1957 int i;
1958 u64 mask, all_vl_mask = (u64)0x80ff;
1959 u64 data_vls_mask = (u64)0x00ff;
1960 u32 ctxt;
1961 struct hfi1_pportdata *ppd = dd->pport;
1962
1963 dd->vld[15].sc = sc_alloc(dd, SC_VL15,
1964 dd->rcd[0]->rcvhdrqentsize, dd->node);
1965 if (!dd->vld[15].sc)
1966 return -ENOMEM;
1967
1968 hfi1_init_ctxt(dd->vld[15].sc);
1969 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
1970
1971 dd->kernel_send_context = kzalloc_node(dd->num_send_contexts *
1972 sizeof(struct send_context *),
1973 GFP_KERNEL, dd->node);
1974 if (!dd->kernel_send_context)
1975 goto freesc15;
1976
1977 dd->kernel_send_context[0] = dd->vld[15].sc;
1978
1979 for (i = 0; i < num_vls; i++) {
1980
1981
1982
1983
1984
1985
1986
1987 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL,
1988 dd->rcd[0]->rcvhdrqentsize, dd->node);
1989 if (!dd->vld[i].sc)
1990 goto nomem;
1991 dd->kernel_send_context[i + 1] = dd->vld[i].sc;
1992 hfi1_init_ctxt(dd->vld[i].sc);
1993
1994 dd->vld[i].mtu = hfi1_max_mtu;
1995 }
1996 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
1997 dd->kernel_send_context[i + 1] =
1998 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node);
1999 if (!dd->kernel_send_context[i + 1])
2000 goto nomem;
2001 hfi1_init_ctxt(dd->kernel_send_context[i + 1]);
2002 }
2003
2004 sc_enable(dd->vld[15].sc);
2005 ctxt = dd->vld[15].sc->hw_context;
2006 mask = all_vl_mask & ~(1LL << 15);
2007 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
2008 dd_dev_info(dd,
2009 "Using send context %u(%u) for VL15\n",
2010 dd->vld[15].sc->sw_index, ctxt);
2011
2012 for (i = 0; i < num_vls; i++) {
2013 sc_enable(dd->vld[i].sc);
2014 ctxt = dd->vld[i].sc->hw_context;
2015 mask = all_vl_mask & ~(data_vls_mask);
2016 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
2017 }
2018 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
2019 sc_enable(dd->kernel_send_context[i + 1]);
2020 ctxt = dd->kernel_send_context[i + 1]->hw_context;
2021 mask = all_vl_mask & ~(data_vls_mask);
2022 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
2023 }
2024
2025 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
2026 goto nomem;
2027 return 0;
2028
2029nomem:
2030 for (i = 0; i < num_vls; i++) {
2031 sc_free(dd->vld[i].sc);
2032 dd->vld[i].sc = NULL;
2033 }
2034
2035 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
2036 sc_free(dd->kernel_send_context[i + 1]);
2037
2038 kfree(dd->kernel_send_context);
2039 dd->kernel_send_context = NULL;
2040
2041freesc15:
2042 sc_free(dd->vld[15].sc);
2043 return -ENOMEM;
2044}
2045
2046int init_credit_return(struct hfi1_devdata *dd)
2047{
2048 int ret;
2049 int i;
2050
2051 dd->cr_base = kcalloc(
2052 node_affinity.num_possible_nodes,
2053 sizeof(struct credit_return_base),
2054 GFP_KERNEL);
2055 if (!dd->cr_base) {
2056 ret = -ENOMEM;
2057 goto done;
2058 }
2059 for_each_node_with_cpus(i) {
2060 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
2061
2062 set_dev_node(&dd->pcidev->dev, i);
2063 dd->cr_base[i].va = dma_zalloc_coherent(
2064 &dd->pcidev->dev,
2065 bytes,
2066 &dd->cr_base[i].dma,
2067 GFP_KERNEL);
2068 if (!dd->cr_base[i].va) {
2069 set_dev_node(&dd->pcidev->dev, dd->node);
2070 dd_dev_err(dd,
2071 "Unable to allocate credit return DMA range for NUMA %d\n",
2072 i);
2073 ret = -ENOMEM;
2074 goto done;
2075 }
2076 }
2077 set_dev_node(&dd->pcidev->dev, dd->node);
2078
2079 ret = 0;
2080done:
2081 return ret;
2082}
2083
2084void free_credit_return(struct hfi1_devdata *dd)
2085{
2086 int i;
2087
2088 if (!dd->cr_base)
2089 return;
2090 for (i = 0; i < node_affinity.num_possible_nodes; i++) {
2091 if (dd->cr_base[i].va) {
2092 dma_free_coherent(&dd->pcidev->dev,
2093 TXE_NUM_CONTEXTS *
2094 sizeof(struct credit_return),
2095 dd->cr_base[i].va,
2096 dd->cr_base[i].dma);
2097 }
2098 }
2099 kfree(dd->cr_base);
2100 dd->cr_base = NULL;
2101}
2102