1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include <rdma/ib_mad.h>
49#include <rdma/ib_user_verbs.h>
50#include <linux/io.h>
51#include <linux/module.h>
52#include <linux/utsname.h>
53#include <linux/rculist.h>
54#include <linux/mm.h>
55#include <linux/random.h>
56#include <linux/vmalloc.h>
57
58#include "hfi.h"
59#include "common.h"
60#include "device.h"
61#include "trace.h"
62#include "qp.h"
63#include "verbs_txreq.h"
64
65static unsigned int hfi1_lkey_table_size = 16;
66module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
67 S_IRUGO);
68MODULE_PARM_DESC(lkey_table_size,
69 "LKEY table size in bits (2^n, 1 <= n <= 23)");
70
71static unsigned int hfi1_max_pds = 0xFFFF;
72module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO);
73MODULE_PARM_DESC(max_pds,
74 "Maximum number of protection domains to support");
75
76static unsigned int hfi1_max_ahs = 0xFFFF;
77module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
78MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
79
80unsigned int hfi1_max_cqes = 0x2FFFF;
81module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
82MODULE_PARM_DESC(max_cqes,
83 "Maximum number of completion queue entries to support");
84
85unsigned int hfi1_max_cqs = 0x1FFFF;
86module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO);
87MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
88
89unsigned int hfi1_max_qp_wrs = 0x3FFF;
90module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
91MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
92
93unsigned int hfi1_max_qps = 16384;
94module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
95MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
96
97unsigned int hfi1_max_sges = 0x60;
98module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO);
99MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
100
101unsigned int hfi1_max_mcast_grps = 16384;
102module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO);
103MODULE_PARM_DESC(max_mcast_grps,
104 "Maximum number of multicast groups to support");
105
106unsigned int hfi1_max_mcast_qp_attached = 16;
107module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached,
108 uint, S_IRUGO);
109MODULE_PARM_DESC(max_mcast_qp_attached,
110 "Maximum number of attached QPs to support");
111
112unsigned int hfi1_max_srqs = 1024;
113module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO);
114MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
115
116unsigned int hfi1_max_srq_sges = 128;
117module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO);
118MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
119
120unsigned int hfi1_max_srq_wrs = 0x1FFFF;
121module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
122MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
123
124unsigned short piothreshold = 256;
125module_param(piothreshold, ushort, S_IRUGO);
126MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
127
128#define COPY_CACHELESS 1
129#define COPY_ADAPTIVE 2
130static unsigned int sge_copy_mode;
131module_param(sge_copy_mode, uint, S_IRUGO);
132MODULE_PARM_DESC(sge_copy_mode,
133 "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");
134
135static void verbs_sdma_complete(
136 struct sdma_txreq *cookie,
137 int status);
138
139static int pio_wait(struct rvt_qp *qp,
140 struct send_context *sc,
141 struct hfi1_pkt_state *ps,
142 u32 flag);
143
144
145#define TXREQ_NAME_LEN 24
146
147static uint wss_threshold;
148module_param(wss_threshold, uint, S_IRUGO);
149MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
150static uint wss_clean_period = 256;
151module_param(wss_clean_period, uint, S_IRUGO);
152MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
153
154
155struct hfi1_wss {
156 unsigned long *entries;
157 atomic_t total_count;
158 atomic_t clean_counter;
159 atomic_t clean_entry;
160
161 int threshold;
162 int num_entries;
163 long pages_mask;
164};
165
166static struct hfi1_wss wss;
167
168int hfi1_wss_init(void)
169{
170 long llc_size;
171 long llc_bits;
172 long table_size;
173 long table_bits;
174
175
176 if (wss_threshold < 1 || wss_threshold > 100)
177 wss_threshold = 80;
178
179 if (wss_clean_period > 1000000)
180 wss_clean_period = 256;
181
182 if (wss_clean_period == 0)
183 wss_clean_period = 1;
184
185
186
187
188
189 llc_size = wss_llc_size() * 1024;
190 table_size = roundup_pow_of_two(llc_size);
191
192
193 llc_bits = llc_size / PAGE_SIZE;
194 table_bits = table_size / PAGE_SIZE;
195 wss.pages_mask = table_bits - 1;
196 wss.num_entries = table_bits / BITS_PER_LONG;
197
198 wss.threshold = (llc_bits * wss_threshold) / 100;
199 if (wss.threshold == 0)
200 wss.threshold = 1;
201
202 atomic_set(&wss.clean_counter, wss_clean_period);
203
204 wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
205 GFP_KERNEL);
206 if (!wss.entries) {
207 hfi1_wss_exit();
208 return -ENOMEM;
209 }
210
211 return 0;
212}
213
214void hfi1_wss_exit(void)
215{
216
217 kfree(wss.entries);
218 wss.entries = NULL;
219}
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236static void wss_advance_clean_counter(void)
237{
238 int entry;
239 int weight;
240 unsigned long bits;
241
242
243 if (atomic_dec_and_test(&wss.clean_counter)) {
244
245
246
247
248
249
250
251
252
253
254
255
256
257 atomic_set(&wss.clean_counter, wss_clean_period);
258
259
260
261
262
263
264
265 entry = (atomic_inc_return(&wss.clean_entry) - 1)
266 & (wss.num_entries - 1);
267
268
269 bits = xchg(&wss.entries[entry], 0);
270 weight = hweight64((u64)bits);
271
272 if (weight)
273 atomic_sub(weight, &wss.total_count);
274 }
275}
276
277
278
279
280static void wss_insert(void *address)
281{
282 u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
283 u32 entry = page / BITS_PER_LONG;
284 u32 nr = page & (BITS_PER_LONG - 1);
285
286 if (!test_and_set_bit(nr, &wss.entries[entry]))
287 atomic_inc(&wss.total_count);
288
289 wss_advance_clean_counter();
290}
291
292
293
294
295static inline int wss_exceeds_threshold(void)
296{
297 return atomic_read(&wss.total_count) >= wss.threshold;
298}
299
300
301
302
303const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
304 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
305 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
306 [IB_WR_SEND] = IB_WC_SEND,
307 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
308 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
309 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
310 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
311};
312
313
314
315
316const u8 hdr_len_by_opcode[256] = {
317
318 [IB_OPCODE_RC_SEND_FIRST] = 12 + 8,
319 [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8,
320 [IB_OPCODE_RC_SEND_LAST] = 12 + 8,
321 [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
322 [IB_OPCODE_RC_SEND_ONLY] = 12 + 8,
323 [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4,
324 [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16,
325 [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8,
326 [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8,
327 [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
328 [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16,
329 [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20,
330 [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16,
331 [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4,
332 [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8,
333 [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4,
334 [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4,
335 [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4,
336 [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
337 [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
338 [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
339
340 [IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
341 [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
342 [IB_OPCODE_UC_SEND_LAST] = 12 + 8,
343 [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
344 [IB_OPCODE_UC_SEND_ONLY] = 12 + 8,
345 [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4,
346 [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16,
347 [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8,
348 [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8,
349 [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
350 [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16,
351 [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20,
352
353 [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8,
354 [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12
355};
356
357static const opcode_handler opcode_handler_tbl[256] = {
358
359 [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv,
360 [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv,
361 [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv,
362 [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv,
363 [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv,
364 [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv,
365 [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv,
366 [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv,
367 [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv,
368 [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv,
369 [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv,
370 [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv,
371 [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv,
372 [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv,
373 [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv,
374 [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv,
375 [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv,
376 [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv,
377 [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv,
378 [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv,
379 [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
380
381 [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
382 [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
383 [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv,
384 [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv,
385 [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv,
386 [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv,
387 [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv,
388 [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv,
389 [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv,
390 [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv,
391 [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv,
392 [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv,
393
394 [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv,
395 [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv,
396
397 [IB_OPCODE_CNP] = &hfi1_cnp_rcv
398};
399
400
401
402
403__be64 ib_hfi1_sys_image_guid;
404
405
406
407
408
409
410
411
412void hfi1_copy_sge(
413 struct rvt_sge_state *ss,
414 void *data, u32 length,
415 int release,
416 int copy_last)
417{
418 struct rvt_sge *sge = &ss->sge;
419 int in_last = 0;
420 int i;
421 int cacheless_copy = 0;
422
423 if (sge_copy_mode == COPY_CACHELESS) {
424 cacheless_copy = length >= PAGE_SIZE;
425 } else if (sge_copy_mode == COPY_ADAPTIVE) {
426 if (length >= PAGE_SIZE) {
427
428
429
430
431
432 wss_insert(sge->vaddr);
433 if (length >= (2 * PAGE_SIZE))
434 wss_insert(sge->vaddr + PAGE_SIZE);
435
436 cacheless_copy = wss_exceeds_threshold();
437 } else {
438 wss_advance_clean_counter();
439 }
440 }
441 if (copy_last) {
442 if (length > 8) {
443 length -= 8;
444 } else {
445 copy_last = 0;
446 in_last = 1;
447 }
448 }
449
450again:
451 while (length) {
452 u32 len = sge->length;
453
454 if (len > length)
455 len = length;
456 if (len > sge->sge_length)
457 len = sge->sge_length;
458 WARN_ON_ONCE(len == 0);
459 if (unlikely(in_last)) {
460
461 for (i = 0; i < len; i++)
462 ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
463 } else if (cacheless_copy) {
464 cacheless_memcpy(sge->vaddr, data, len);
465 } else {
466 memcpy(sge->vaddr, data, len);
467 }
468 sge->vaddr += len;
469 sge->length -= len;
470 sge->sge_length -= len;
471 if (sge->sge_length == 0) {
472 if (release)
473 rvt_put_mr(sge->mr);
474 if (--ss->num_sge)
475 *sge = *ss->sg_list++;
476 } else if (sge->length == 0 && sge->mr->lkey) {
477 if (++sge->n >= RVT_SEGSZ) {
478 if (++sge->m >= sge->mr->mapsz)
479 break;
480 sge->n = 0;
481 }
482 sge->vaddr =
483 sge->mr->map[sge->m]->segs[sge->n].vaddr;
484 sge->length =
485 sge->mr->map[sge->m]->segs[sge->n].length;
486 }
487 data += len;
488 length -= len;
489 }
490
491 if (copy_last) {
492 copy_last = 0;
493 in_last = 1;
494 length = 8;
495 goto again;
496 }
497}
498
499
500
501
502
503
504void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
505{
506 struct rvt_sge *sge = &ss->sge;
507
508 while (length) {
509 u32 len = sge->length;
510
511 if (len > length)
512 len = length;
513 if (len > sge->sge_length)
514 len = sge->sge_length;
515 WARN_ON_ONCE(len == 0);
516 sge->vaddr += len;
517 sge->length -= len;
518 sge->sge_length -= len;
519 if (sge->sge_length == 0) {
520 if (release)
521 rvt_put_mr(sge->mr);
522 if (--ss->num_sge)
523 *sge = *ss->sg_list++;
524 } else if (sge->length == 0 && sge->mr->lkey) {
525 if (++sge->n >= RVT_SEGSZ) {
526 if (++sge->m >= sge->mr->mapsz)
527 break;
528 sge->n = 0;
529 }
530 sge->vaddr =
531 sge->mr->map[sge->m]->segs[sge->n].vaddr;
532 sge->length =
533 sge->mr->map[sge->m]->segs[sge->n].length;
534 }
535 length -= len;
536 }
537}
538
539
540
541
542static inline int qp_ok(int opcode, struct hfi1_packet *packet)
543{
544 struct hfi1_ibport *ibp;
545
546 if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
547 goto dropit;
548 if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
549 (opcode == IB_OPCODE_CNP))
550 return 1;
551dropit:
552 ibp = &packet->rcd->ppd->ibport_data;
553 ibp->rvp.n_pkt_drops++;
554 return 0;
555}
556
557
558
559
560
561
562
563
564
565void hfi1_ib_rcv(struct hfi1_packet *packet)
566{
567 struct hfi1_ctxtdata *rcd = packet->rcd;
568 struct hfi1_ib_header *hdr = packet->hdr;
569 u32 tlen = packet->tlen;
570 struct hfi1_pportdata *ppd = rcd->ppd;
571 struct hfi1_ibport *ibp = &ppd->ibport_data;
572 struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
573 unsigned long flags;
574 u32 qp_num;
575 int lnh;
576 u8 opcode;
577 u16 lid;
578
579
580 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
581 if (lnh == HFI1_LRH_BTH) {
582 packet->ohdr = &hdr->u.oth;
583 } else if (lnh == HFI1_LRH_GRH) {
584 u32 vtf;
585
586 packet->ohdr = &hdr->u.l.oth;
587 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
588 goto drop;
589 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
590 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
591 goto drop;
592 packet->rcv_flags |= HFI1_HAS_GRH;
593 } else {
594 goto drop;
595 }
596
597 trace_input_ibhdr(rcd->dd, hdr);
598
599 opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
600 inc_opstats(tlen, &rcd->opstats->stats[opcode]);
601
602
603 qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
604 lid = be16_to_cpu(hdr->lrh[1]);
605 if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
606 (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
607 struct rvt_mcast *mcast;
608 struct rvt_mcast_qp *p;
609
610 if (lnh != HFI1_LRH_GRH)
611 goto drop;
612 mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
613 if (!mcast)
614 goto drop;
615 list_for_each_entry_rcu(p, &mcast->qp_list, list) {
616 packet->qp = p->qp;
617 spin_lock_irqsave(&packet->qp->r_lock, flags);
618 if (likely((qp_ok(opcode, packet))))
619 opcode_handler_tbl[opcode](packet);
620 spin_unlock_irqrestore(&packet->qp->r_lock, flags);
621 }
622
623
624
625
626 if (atomic_dec_return(&mcast->refcount) <= 1)
627 wake_up(&mcast->wait);
628 } else {
629 rcu_read_lock();
630 packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
631 if (!packet->qp) {
632 rcu_read_unlock();
633 goto drop;
634 }
635 spin_lock_irqsave(&packet->qp->r_lock, flags);
636 if (likely((qp_ok(opcode, packet))))
637 opcode_handler_tbl[opcode](packet);
638 spin_unlock_irqrestore(&packet->qp->r_lock, flags);
639 rcu_read_unlock();
640 }
641 return;
642
643drop:
644 ibp->rvp.n_pkt_drops++;
645}
646
647
648
649
650
651static void mem_timer(unsigned long data)
652{
653 struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
654 struct list_head *list = &dev->memwait;
655 struct rvt_qp *qp = NULL;
656 struct iowait *wait;
657 unsigned long flags;
658 struct hfi1_qp_priv *priv;
659
660 write_seqlock_irqsave(&dev->iowait_lock, flags);
661 if (!list_empty(list)) {
662 wait = list_first_entry(list, struct iowait, list);
663 qp = iowait_to_qp(wait);
664 priv = qp->priv;
665 list_del_init(&priv->s_iowait.list);
666
667 if (!list_empty(list))
668 mod_timer(&dev->mem_timer, jiffies + 1);
669 }
670 write_sequnlock_irqrestore(&dev->iowait_lock, flags);
671
672 if (qp)
673 hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
674}
675
676void update_sge(struct rvt_sge_state *ss, u32 length)
677{
678 struct rvt_sge *sge = &ss->sge;
679
680 sge->vaddr += length;
681 sge->length -= length;
682 sge->sge_length -= length;
683 if (sge->sge_length == 0) {
684 if (--ss->num_sge)
685 *sge = *ss->sg_list++;
686 } else if (sge->length == 0 && sge->mr->lkey) {
687 if (++sge->n >= RVT_SEGSZ) {
688 if (++sge->m >= sge->mr->mapsz)
689 return;
690 sge->n = 0;
691 }
692 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
693 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
694 }
695}
696
697
698
699
700
701static void verbs_sdma_complete(
702 struct sdma_txreq *cookie,
703 int status)
704{
705 struct verbs_txreq *tx =
706 container_of(cookie, struct verbs_txreq, txreq);
707 struct rvt_qp *qp = tx->qp;
708
709 spin_lock(&qp->s_lock);
710 if (tx->wqe) {
711 hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
712 } else if (qp->ibqp.qp_type == IB_QPT_RC) {
713 struct hfi1_ib_header *hdr;
714
715 hdr = &tx->phdr.hdr;
716 hfi1_rc_send_complete(qp, hdr);
717 }
718 spin_unlock(&qp->s_lock);
719
720 hfi1_put_txreq(tx);
721}
722
723static int wait_kmem(struct hfi1_ibdev *dev,
724 struct rvt_qp *qp,
725 struct hfi1_pkt_state *ps)
726{
727 struct hfi1_qp_priv *priv = qp->priv;
728 unsigned long flags;
729 int ret = 0;
730
731 spin_lock_irqsave(&qp->s_lock, flags);
732 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
733 write_seqlock(&dev->iowait_lock);
734 list_add_tail(&ps->s_txreq->txreq.list,
735 &priv->s_iowait.tx_head);
736 if (list_empty(&priv->s_iowait.list)) {
737 if (list_empty(&dev->memwait))
738 mod_timer(&dev->mem_timer, jiffies + 1);
739 qp->s_flags |= RVT_S_WAIT_KMEM;
740 list_add_tail(&priv->s_iowait.list, &dev->memwait);
741 trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
742 atomic_inc(&qp->refcount);
743 }
744 write_sequnlock(&dev->iowait_lock);
745 qp->s_flags &= ~RVT_S_BUSY;
746 ret = -EBUSY;
747 }
748 spin_unlock_irqrestore(&qp->s_lock, flags);
749
750 return ret;
751}
752
753
754
755
756
757
758static noinline int build_verbs_ulp_payload(
759 struct sdma_engine *sde,
760 struct rvt_sge_state *ss,
761 u32 length,
762 struct verbs_txreq *tx)
763{
764 struct rvt_sge *sg_list = ss->sg_list;
765 struct rvt_sge sge = ss->sge;
766 u8 num_sge = ss->num_sge;
767 u32 len;
768 int ret = 0;
769
770 while (length) {
771 len = ss->sge.length;
772 if (len > length)
773 len = length;
774 if (len > ss->sge.sge_length)
775 len = ss->sge.sge_length;
776 WARN_ON_ONCE(len == 0);
777 ret = sdma_txadd_kvaddr(
778 sde->dd,
779 &tx->txreq,
780 ss->sge.vaddr,
781 len);
782 if (ret)
783 goto bail_txadd;
784 update_sge(ss, len);
785 length -= len;
786 }
787 return ret;
788bail_txadd:
789
790 ss->sge = sge;
791 ss->num_sge = num_sge;
792 ss->sg_list = sg_list;
793 return ret;
794}
795
796
797
798
799
800
801
802
803
804
805static int build_verbs_tx_desc(
806 struct sdma_engine *sde,
807 struct rvt_sge_state *ss,
808 u32 length,
809 struct verbs_txreq *tx,
810 struct ahg_ib_header *ahdr,
811 u64 pbc)
812{
813 int ret = 0;
814 struct hfi1_pio_header *phdr = &tx->phdr;
815 u16 hdrbytes = tx->hdr_dwords << 2;
816
817 if (!ahdr->ahgcount) {
818 ret = sdma_txinit_ahg(
819 &tx->txreq,
820 ahdr->tx_flags,
821 hdrbytes + length,
822 ahdr->ahgidx,
823 0,
824 NULL,
825 0,
826 verbs_sdma_complete);
827 if (ret)
828 goto bail_txadd;
829 phdr->pbc = cpu_to_le64(pbc);
830 ret = sdma_txadd_kvaddr(
831 sde->dd,
832 &tx->txreq,
833 phdr,
834 hdrbytes);
835 if (ret)
836 goto bail_txadd;
837 } else {
838 ret = sdma_txinit_ahg(
839 &tx->txreq,
840 ahdr->tx_flags,
841 length,
842 ahdr->ahgidx,
843 ahdr->ahgcount,
844 ahdr->ahgdesc,
845 hdrbytes,
846 verbs_sdma_complete);
847 if (ret)
848 goto bail_txadd;
849 }
850
851
852 if (ss)
853 ret = build_verbs_ulp_payload(sde, ss, length, tx);
854bail_txadd:
855 return ret;
856}
857
858int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
859 u64 pbc)
860{
861 struct hfi1_qp_priv *priv = qp->priv;
862 struct ahg_ib_header *ahdr = priv->s_hdr;
863 u32 hdrwords = qp->s_hdrwords;
864 struct rvt_sge_state *ss = qp->s_cur_sge;
865 u32 len = qp->s_cur_size;
866 u32 plen = hdrwords + ((len + 3) >> 2) + 2;
867 struct hfi1_ibdev *dev = ps->dev;
868 struct hfi1_pportdata *ppd = ps->ppd;
869 struct verbs_txreq *tx;
870 u64 pbc_flags = 0;
871 u8 sc5 = priv->s_sc;
872
873 int ret;
874
875 tx = ps->s_txreq;
876 if (!sdma_txreq_built(&tx->txreq)) {
877 if (likely(pbc == 0)) {
878 u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
879
880
881 pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
882
883 pbc = create_pbc(ppd,
884 pbc_flags,
885 qp->srate_mbps,
886 vl,
887 plen);
888 }
889 tx->wqe = qp->s_wqe;
890 ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
891 if (unlikely(ret))
892 goto bail_build;
893 }
894 ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
895 if (unlikely(ret < 0)) {
896 if (ret == -ECOMM)
897 goto bail_ecomm;
898 return ret;
899 }
900 trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
901 &ps->s_txreq->phdr.hdr);
902 return ret;
903
904bail_ecomm:
905
906 return 0;
907bail_build:
908 ret = wait_kmem(dev, qp, ps);
909 if (!ret) {
910
911 hfi1_put_txreq(ps->s_txreq);
912 ps->s_txreq = NULL;
913 }
914 return ret;
915}
916
917
918
919
920
921static int pio_wait(struct rvt_qp *qp,
922 struct send_context *sc,
923 struct hfi1_pkt_state *ps,
924 u32 flag)
925{
926 struct hfi1_qp_priv *priv = qp->priv;
927 struct hfi1_devdata *dd = sc->dd;
928 struct hfi1_ibdev *dev = &dd->verbs_dev;
929 unsigned long flags;
930 int ret = 0;
931
932
933
934
935
936
937
938 spin_lock_irqsave(&qp->s_lock, flags);
939 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
940 write_seqlock(&dev->iowait_lock);
941 list_add_tail(&ps->s_txreq->txreq.list,
942 &priv->s_iowait.tx_head);
943 if (list_empty(&priv->s_iowait.list)) {
944 struct hfi1_ibdev *dev = &dd->verbs_dev;
945 int was_empty;
946
947 dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
948 dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
949 dev->n_piowait++;
950 qp->s_flags |= flag;
951 was_empty = list_empty(&sc->piowait);
952 list_add_tail(&priv->s_iowait.list, &sc->piowait);
953 trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
954 atomic_inc(&qp->refcount);
955
956 if (was_empty)
957 hfi1_sc_wantpiobuf_intr(sc, 1);
958 }
959 write_sequnlock(&dev->iowait_lock);
960 qp->s_flags &= ~RVT_S_BUSY;
961 ret = -EBUSY;
962 }
963 spin_unlock_irqrestore(&qp->s_lock, flags);
964 return ret;
965}
966
967static void verbs_pio_complete(void *arg, int code)
968{
969 struct rvt_qp *qp = (struct rvt_qp *)arg;
970 struct hfi1_qp_priv *priv = qp->priv;
971
972 if (iowait_pio_dec(&priv->s_iowait))
973 iowait_drain_wakeup(&priv->s_iowait);
974}
975
976int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
977 u64 pbc)
978{
979 struct hfi1_qp_priv *priv = qp->priv;
980 u32 hdrwords = qp->s_hdrwords;
981 struct rvt_sge_state *ss = qp->s_cur_sge;
982 u32 len = qp->s_cur_size;
983 u32 dwords = (len + 3) >> 2;
984 u32 plen = hdrwords + dwords + 2;
985 struct hfi1_pportdata *ppd = ps->ppd;
986 u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
987 u64 pbc_flags = 0;
988 u8 sc5;
989 unsigned long flags = 0;
990 struct send_context *sc;
991 struct pio_buf *pbuf;
992 int wc_status = IB_WC_SUCCESS;
993 int ret = 0;
994 pio_release_cb cb = NULL;
995
996
997 switch (qp->ibqp.qp_type) {
998 case IB_QPT_RC:
999 case IB_QPT_UC:
1000 cb = verbs_pio_complete;
1001 break;
1002 default:
1003 break;
1004 }
1005
1006
1007 sc5 = priv->s_sc;
1008 sc = ps->s_txreq->psc;
1009
1010 if (likely(pbc == 0)) {
1011 u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
1012
1013 pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
1014 pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
1015 }
1016 if (cb)
1017 iowait_pio_inc(&priv->s_iowait);
1018 pbuf = sc_buffer_alloc(sc, plen, cb, qp);
1019 if (unlikely(!pbuf)) {
1020 if (cb)
1021 verbs_pio_complete(qp, 0);
1022 if (ppd->host_link_state != HLS_UP_ACTIVE) {
1023
1024
1025
1026
1027
1028
1029 hfi1_cdbg(
1030 PIO,
1031 "alloc failed. state not active, completing");
1032 wc_status = IB_WC_GENERAL_ERR;
1033 goto pio_bail;
1034 } else {
1035
1036
1037
1038
1039
1040 hfi1_cdbg(PIO, "alloc failed. state active, queuing");
1041 ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
1042 if (!ret)
1043
1044 goto bail;
1045
1046 return ret;
1047 }
1048 }
1049
1050 if (len == 0) {
1051 pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
1052 } else {
1053 if (ss) {
1054 seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4);
1055 while (len) {
1056 void *addr = ss->sge.vaddr;
1057 u32 slen = ss->sge.length;
1058
1059 if (slen > len)
1060 slen = len;
1061 update_sge(ss, slen);
1062 seg_pio_copy_mid(pbuf, addr, slen);
1063 len -= slen;
1064 }
1065 seg_pio_copy_end(pbuf);
1066 }
1067 }
1068
1069 trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
1070 &ps->s_txreq->phdr.hdr);
1071
1072pio_bail:
1073 if (qp->s_wqe) {
1074 spin_lock_irqsave(&qp->s_lock, flags);
1075 hfi1_send_complete(qp, qp->s_wqe, wc_status);
1076 spin_unlock_irqrestore(&qp->s_lock, flags);
1077 } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1078 spin_lock_irqsave(&qp->s_lock, flags);
1079 hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
1080 spin_unlock_irqrestore(&qp->s_lock, flags);
1081 }
1082
1083 ret = 0;
1084
1085bail:
1086 hfi1_put_txreq(ps->s_txreq);
1087 return ret;
1088}
1089
1090
1091
1092
1093
1094
1095
1096static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
1097{
1098 u16 mkey = pkey & PKEY_LOW_15_MASK;
1099 u16 ment = ent & PKEY_LOW_15_MASK;
1100
1101 if (mkey == ment) {
1102
1103
1104
1105
1106
1107 if (pkey & PKEY_MEMBER_MASK)
1108 return !!(ent & PKEY_MEMBER_MASK);
1109 return 1;
1110 }
1111 return 0;
1112}
1113
1114
1115
1116
1117
1118static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
1119 struct hfi1_ib_header *hdr,
1120 struct rvt_qp *qp)
1121{
1122 struct hfi1_qp_priv *priv = qp->priv;
1123 struct hfi1_other_headers *ohdr;
1124 struct hfi1_devdata *dd;
1125 int i = 0;
1126 u16 pkey;
1127 u8 lnh, sc5 = priv->s_sc;
1128
1129 if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
1130 return 0;
1131
1132
1133 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
1134 if (lnh == HFI1_LRH_GRH)
1135 ohdr = &hdr->u.l.oth;
1136 else
1137 ohdr = &hdr->u.oth;
1138
1139 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
1140
1141
1142 if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
1143 goto bad;
1144
1145
1146 if ((pkey & PKEY_LOW_15_MASK) == 0)
1147 goto bad;
1148
1149
1150 if (unlikely(!egress_pkey_matches_entry(pkey,
1151 ppd->pkeys
1152 [qp->s_pkey_index]))) {
1153
1154 for (; i < MAX_PKEY_VALUES; i++) {
1155 if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
1156 break;
1157 }
1158 }
1159
1160 if (i < MAX_PKEY_VALUES)
1161 return 0;
1162bad:
1163 incr_cntr64(&ppd->port_xmit_constraint_errors);
1164 dd = ppd->dd;
1165 if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) {
1166 u16 slid = be16_to_cpu(hdr->lrh[3]);
1167
1168 dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK;
1169 dd->err_info_xmit_constraint.slid = slid;
1170 dd->err_info_xmit_constraint.pkey = pkey;
1171 }
1172 return 1;
1173}
1174
1175
1176
1177
1178
1179
1180
1181static inline send_routine get_send_routine(struct rvt_qp *qp,
1182 struct verbs_txreq *tx)
1183{
1184 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1185 struct hfi1_qp_priv *priv = qp->priv;
1186 struct hfi1_ib_header *h = &tx->phdr.hdr;
1187
1188 if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
1189 return dd->process_pio_send;
1190 switch (qp->ibqp.qp_type) {
1191 case IB_QPT_SMI:
1192 return dd->process_pio_send;
1193 case IB_QPT_GSI:
1194 case IB_QPT_UD:
1195 break;
1196 case IB_QPT_RC:
1197 if (piothreshold &&
1198 qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
1199 (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
1200 iowait_sdma_pending(&priv->s_iowait) == 0 &&
1201 !sdma_txreq_built(&tx->txreq))
1202 return dd->process_pio_send;
1203 break;
1204 case IB_QPT_UC:
1205 if (piothreshold &&
1206 qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
1207 (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
1208 iowait_sdma_pending(&priv->s_iowait) == 0 &&
1209 !sdma_txreq_built(&tx->txreq))
1210 return dd->process_pio_send;
1211 break;
1212 default:
1213 break;
1214 }
1215 return dd->process_dma_send;
1216}
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
1227{
1228 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1229 struct hfi1_qp_priv *priv = qp->priv;
1230 send_routine sr;
1231 int ret;
1232
1233 sr = get_send_routine(qp, ps->s_txreq);
1234 ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp);
1235 if (unlikely(ret)) {
1236
1237
1238
1239
1240
1241
1242
1243
1244 if (sr == dd->process_pio_send) {
1245 unsigned long flags;
1246
1247 hfi1_cdbg(PIO, "%s() Failed. Completing with err",
1248 __func__);
1249 spin_lock_irqsave(&qp->s_lock, flags);
1250 hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
1251 spin_unlock_irqrestore(&qp->s_lock, flags);
1252 }
1253 return -EINVAL;
1254 }
1255 if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
1256 return pio_wait(qp,
1257 ps->s_txreq->psc,
1258 ps,
1259 RVT_S_WAIT_PIO_DRAIN);
1260 return sr(qp, ps, 0);
1261}
1262
1263
1264
1265
1266
1267static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
1268{
1269 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1270
1271 memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
1272
1273 rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1274 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1275 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1276 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1277 rdi->dparms.props.page_size_cap = PAGE_SIZE;
1278 rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
1279 rdi->dparms.props.vendor_part_id = dd->pcidev->device;
1280 rdi->dparms.props.hw_ver = dd->minrev;
1281 rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
1282 rdi->dparms.props.max_mr_size = ~0ULL;
1283 rdi->dparms.props.max_qp = hfi1_max_qps;
1284 rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
1285 rdi->dparms.props.max_sge = hfi1_max_sges;
1286 rdi->dparms.props.max_sge_rd = hfi1_max_sges;
1287 rdi->dparms.props.max_cq = hfi1_max_cqs;
1288 rdi->dparms.props.max_ah = hfi1_max_ahs;
1289 rdi->dparms.props.max_cqe = hfi1_max_cqes;
1290 rdi->dparms.props.max_mr = rdi->lkey_table.max;
1291 rdi->dparms.props.max_fmr = rdi->lkey_table.max;
1292 rdi->dparms.props.max_map_per_fmr = 32767;
1293 rdi->dparms.props.max_pd = hfi1_max_pds;
1294 rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
1295 rdi->dparms.props.max_qp_init_rd_atom = 255;
1296 rdi->dparms.props.max_srq = hfi1_max_srqs;
1297 rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
1298 rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
1299 rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
1300 rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
1301 rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
1302 rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
1303 rdi->dparms.props.max_total_mcast_qp_attach =
1304 rdi->dparms.props.max_mcast_qp_attach *
1305 rdi->dparms.props.max_mcast_grp;
1306}
1307
1308static inline u16 opa_speed_to_ib(u16 in)
1309{
1310 u16 out = 0;
1311
1312 if (in & OPA_LINK_SPEED_25G)
1313 out |= IB_SPEED_EDR;
1314 if (in & OPA_LINK_SPEED_12_5G)
1315 out |= IB_SPEED_FDR;
1316
1317 return out;
1318}
1319
1320
1321
1322
1323
1324
1325static inline u16 opa_width_to_ib(u16 in)
1326{
1327 switch (in) {
1328 case OPA_LINK_WIDTH_1X:
1329
1330 case OPA_LINK_WIDTH_2X:
1331 case OPA_LINK_WIDTH_3X:
1332 return IB_WIDTH_1X;
1333 default:
1334 case OPA_LINK_WIDTH_4X:
1335 return IB_WIDTH_4X;
1336 }
1337}
1338
1339static int query_port(struct rvt_dev_info *rdi, u8 port_num,
1340 struct ib_port_attr *props)
1341{
1342 struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
1343 struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
1344 struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
1345 u16 lid = ppd->lid;
1346
1347 props->lid = lid ? lid : 0;
1348 props->lmc = ppd->lmc;
1349
1350 props->state = driver_lstate(ppd);
1351 props->phys_state = hfi1_ibphys_portstate(ppd);
1352 props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
1353 props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
1354
1355 props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
1356 props->max_vl_num = ppd->vls_supported;
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366 props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ?
1367 4096 : hfi1_max_mtu), IB_MTU_4096);
1368 props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
1369 mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
1370
1371 return 0;
1372}
1373
1374static int modify_device(struct ib_device *device,
1375 int device_modify_mask,
1376 struct ib_device_modify *device_modify)
1377{
1378 struct hfi1_devdata *dd = dd_from_ibdev(device);
1379 unsigned i;
1380 int ret;
1381
1382 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1383 IB_DEVICE_MODIFY_NODE_DESC)) {
1384 ret = -EOPNOTSUPP;
1385 goto bail;
1386 }
1387
1388 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
1389 memcpy(device->node_desc, device_modify->node_desc, 64);
1390 for (i = 0; i < dd->num_pports; i++) {
1391 struct hfi1_ibport *ibp = &dd->pport[i].ibport_data;
1392
1393 hfi1_node_desc_chg(ibp);
1394 }
1395 }
1396
1397 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
1398 ib_hfi1_sys_image_guid =
1399 cpu_to_be64(device_modify->sys_image_guid);
1400 for (i = 0; i < dd->num_pports; i++) {
1401 struct hfi1_ibport *ibp = &dd->pport[i].ibport_data;
1402
1403 hfi1_sys_guid_chg(ibp);
1404 }
1405 }
1406
1407 ret = 0;
1408
1409bail:
1410 return ret;
1411}
1412
1413static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
1414{
1415 struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
1416 struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
1417 struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
1418 int ret;
1419
1420 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
1421 OPA_LINKDOWN_REASON_UNKNOWN);
1422 ret = set_link_state(ppd, HLS_DN_DOWNDEF);
1423 return ret;
1424}
1425
1426static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
1427 int guid_index, __be64 *guid)
1428{
1429 struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
1430 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
1431
1432 if (guid_index == 0)
1433 *guid = cpu_to_be64(ppd->guid);
1434 else if (guid_index < HFI1_GUIDS_PER_PORT)
1435 *guid = ibp->guids[guid_index - 1];
1436 else
1437 return -EINVAL;
1438
1439 return 0;
1440}
1441
1442
1443
1444
1445u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
1446{
1447 struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num);
1448
1449 return ibp->sl_to_sc[ah->sl];
1450}
1451
1452static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
1453{
1454 struct hfi1_ibport *ibp;
1455 struct hfi1_pportdata *ppd;
1456 struct hfi1_devdata *dd;
1457 u8 sc5;
1458
1459
1460 ibp = to_iport(ibdev, ah_attr->port_num);
1461 ppd = ppd_from_ibp(ibp);
1462 sc5 = ibp->sl_to_sc[ah_attr->sl];
1463 dd = dd_from_ppd(ppd);
1464 if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
1465 return -EINVAL;
1466 return 0;
1467}
1468
1469static void hfi1_notify_new_ah(struct ib_device *ibdev,
1470 struct ib_ah_attr *ah_attr,
1471 struct rvt_ah *ah)
1472{
1473 struct hfi1_ibport *ibp;
1474 struct hfi1_pportdata *ppd;
1475 struct hfi1_devdata *dd;
1476 u8 sc5;
1477
1478
1479
1480
1481
1482
1483 ibp = to_iport(ibdev, ah_attr->port_num);
1484 ppd = ppd_from_ibp(ibp);
1485 sc5 = ibp->sl_to_sc[ah->attr.sl];
1486 dd = dd_from_ppd(ppd);
1487 ah->vl = sc_to_vlt(dd, sc5);
1488 if (ah->vl < num_vls || ah->vl == 15)
1489 ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
1490}
1491
1492struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
1493{
1494 struct ib_ah_attr attr;
1495 struct ib_ah *ah = ERR_PTR(-EINVAL);
1496 struct rvt_qp *qp0;
1497
1498 memset(&attr, 0, sizeof(attr));
1499 attr.dlid = dlid;
1500 attr.port_num = ppd_from_ibp(ibp)->port;
1501 rcu_read_lock();
1502 qp0 = rcu_dereference(ibp->rvp.qp[0]);
1503 if (qp0)
1504 ah = ib_create_ah(qp0->ibqp.pd, &attr);
1505 rcu_read_unlock();
1506 return ah;
1507}
1508
1509
1510
1511
1512
1513unsigned hfi1_get_npkeys(struct hfi1_devdata *dd)
1514{
1515 return ARRAY_SIZE(dd->pport[0].pkeys);
1516}
1517
1518static void init_ibport(struct hfi1_pportdata *ppd)
1519{
1520 struct hfi1_ibport *ibp = &ppd->ibport_data;
1521 size_t sz = ARRAY_SIZE(ibp->sl_to_sc);
1522 int i;
1523
1524 for (i = 0; i < sz; i++) {
1525 ibp->sl_to_sc[i] = i;
1526 ibp->sc_to_sl[i] = i;
1527 }
1528
1529 spin_lock_init(&ibp->rvp.lock);
1530
1531 ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
1532 ibp->rvp.sm_lid = 0;
1533
1534 ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
1535 IB_PORT_CAP_MASK_NOTICE_SUP;
1536 ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
1537 ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
1538 ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
1539 ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
1540 ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
1541
1542 RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
1543 RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
1544}
1545
1546
1547
1548
1549
1550
1551int hfi1_register_ib_device(struct hfi1_devdata *dd)
1552{
1553 struct hfi1_ibdev *dev = &dd->verbs_dev;
1554 struct ib_device *ibdev = &dev->rdi.ibdev;
1555 struct hfi1_pportdata *ppd = dd->pport;
1556 unsigned i;
1557 int ret;
1558 size_t lcpysz = IB_DEVICE_NAME_MAX;
1559
1560 for (i = 0; i < dd->num_pports; i++)
1561 init_ibport(ppd + i);
1562
1563
1564
1565 setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
1566
1567 seqlock_init(&dev->iowait_lock);
1568 INIT_LIST_HEAD(&dev->txwait);
1569 INIT_LIST_HEAD(&dev->memwait);
1570
1571 ret = verbs_txreq_init(dev);
1572 if (ret)
1573 goto err_verbs_txreq;
1574
1575
1576
1577
1578
1579
1580 if (!ib_hfi1_sys_image_guid)
1581 ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid);
1582 lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
1583 strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
1584 ibdev->owner = THIS_MODULE;
1585 ibdev->node_guid = cpu_to_be64(ppd->guid);
1586 ibdev->phys_port_cnt = dd->num_pports;
1587 ibdev->dma_device = &dd->pcidev->dev;
1588 ibdev->modify_device = modify_device;
1589
1590
1591 ibdev->process_mad = hfi1_process_mad;
1592
1593 strncpy(ibdev->node_desc, init_utsname()->nodename,
1594 sizeof(ibdev->node_desc));
1595
1596
1597
1598
1599 dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
1600 dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
1601 dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
1602 dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
1603 dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
1604 dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be;
1605 dd->verbs_dev.rdi.driver_f.query_port_state = query_port;
1606 dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port;
1607 dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg;
1608
1609
1610
1611 hfi1_fill_device_attr(dd);
1612
1613
1614 dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size;
1615 dd->verbs_dev.rdi.dparms.qpn_start = 0;
1616 dd->verbs_dev.rdi.dparms.qpn_inc = 1;
1617 dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
1618 dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
1619 dd->verbs_dev.rdi.dparms.qpn_res_end =
1620 dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
1621 dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
1622 dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
1623 dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
1624 dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
1625 dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
1626 dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
1627
1628 dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
1629 dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
1630 dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
1631 dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
1632 dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
1633 dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
1634 dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
1635 dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
1636 dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
1637 dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
1638 dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue;
1639 dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp;
1640 dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
1641 dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp;
1642 dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
1643 dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
1644 dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
1645 dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
1646
1647
1648 snprintf(dd->verbs_dev.rdi.dparms.cq_name,
1649 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
1650 "hfi1_cq%d", dd->unit);
1651 dd->verbs_dev.rdi.dparms.node = dd->node;
1652
1653
1654 dd->verbs_dev.rdi.flags = 0;
1655 dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
1656 dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
1657 dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
1658
1659 ppd = dd->pport;
1660 for (i = 0; i < dd->num_pports; i++, ppd++)
1661 rvt_init_port(&dd->verbs_dev.rdi,
1662 &ppd->ibport_data.rvp,
1663 i,
1664 ppd->pkeys);
1665
1666 ret = rvt_register_device(&dd->verbs_dev.rdi);
1667 if (ret)
1668 goto err_verbs_txreq;
1669
1670 ret = hfi1_verbs_register_sysfs(dd);
1671 if (ret)
1672 goto err_class;
1673
1674 return ret;
1675
1676err_class:
1677 rvt_unregister_device(&dd->verbs_dev.rdi);
1678err_verbs_txreq:
1679 verbs_txreq_exit(dev);
1680 dd_dev_err(dd, "cannot register verbs: %d!\n", -ret);
1681 return ret;
1682}
1683
1684void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
1685{
1686 struct hfi1_ibdev *dev = &dd->verbs_dev;
1687
1688 hfi1_verbs_unregister_sysfs(dd);
1689
1690 rvt_unregister_device(&dd->verbs_dev.rdi);
1691
1692 if (!list_empty(&dev->txwait))
1693 dd_dev_err(dd, "txwait list not empty!\n");
1694 if (!list_empty(&dev->memwait))
1695 dd_dev_err(dd, "memwait list not empty!\n");
1696
1697 del_timer_sync(&dev->mem_timer);
1698 verbs_txreq_exit(dev);
1699}
1700
1701void hfi1_cnp_rcv(struct hfi1_packet *packet)
1702{
1703 struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
1704 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
1705 struct hfi1_ib_header *hdr = packet->hdr;
1706 struct rvt_qp *qp = packet->qp;
1707 u32 lqpn, rqpn = 0;
1708 u16 rlid = 0;
1709 u8 sl, sc5, sc4_bit, svc_type;
1710 bool sc4_set = has_sc4_bit(packet);
1711
1712 switch (packet->qp->ibqp.qp_type) {
1713 case IB_QPT_UC:
1714 rlid = qp->remote_ah_attr.dlid;
1715 rqpn = qp->remote_qpn;
1716 svc_type = IB_CC_SVCTYPE_UC;
1717 break;
1718 case IB_QPT_RC:
1719 rlid = qp->remote_ah_attr.dlid;
1720 rqpn = qp->remote_qpn;
1721 svc_type = IB_CC_SVCTYPE_RC;
1722 break;
1723 case IB_QPT_SMI:
1724 case IB_QPT_GSI:
1725 case IB_QPT_UD:
1726 svc_type = IB_CC_SVCTYPE_UD;
1727 break;
1728 default:
1729 ibp->rvp.n_pkt_drops++;
1730 return;
1731 }
1732
1733 sc4_bit = sc4_set << 4;
1734 sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
1735 sc5 |= sc4_bit;
1736 sl = ibp->sc_to_sl[sc5];
1737 lqpn = qp->ibqp.qp_num;
1738
1739 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
1740}
1741