1
2
3
4
5
6
7#include "hfi.h"
8#include "qp.h"
9#include "rc.h"
10#include "verbs.h"
11#include "tid_rdma.h"
12#include "exp_rcv.h"
13#include "trace.h"
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
31#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
32#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
33#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
34#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
35#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
36
37
38#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
39
40#define GENERATION_MASK 0xFFFFF
41
42static u32 mask_generation(u32 a)
43{
44 return a & GENERATION_MASK;
45}
46
47
48#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
49
50
51
52
53
54#define TID_RDMA_JKEY 32
55#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
56#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
57
58
59#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
60#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
61#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
62 TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
63#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
64
65#define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
66
67#define TID_RDMA_DESTQP_FLOW_SHIFT 11
68#define TID_RDMA_DESTQP_FLOW_MASK 0x1f
69
70#define TID_OPFN_QP_CTXT_MASK 0xff
71#define TID_OPFN_QP_CTXT_SHIFT 56
72#define TID_OPFN_QP_KDETH_MASK 0xff
73#define TID_OPFN_QP_KDETH_SHIFT 48
74#define TID_OPFN_MAX_LEN_MASK 0x7ff
75#define TID_OPFN_MAX_LEN_SHIFT 37
76#define TID_OPFN_TIMEOUT_MASK 0x1f
77#define TID_OPFN_TIMEOUT_SHIFT 32
78#define TID_OPFN_RESERVED_MASK 0x3f
79#define TID_OPFN_RESERVED_SHIFT 26
80#define TID_OPFN_URG_MASK 0x1
81#define TID_OPFN_URG_SHIFT 25
82#define TID_OPFN_VER_MASK 0x7
83#define TID_OPFN_VER_SHIFT 22
84#define TID_OPFN_JKEY_MASK 0x3f
85#define TID_OPFN_JKEY_SHIFT 16
86#define TID_OPFN_MAX_READ_MASK 0x3f
87#define TID_OPFN_MAX_READ_SHIFT 10
88#define TID_OPFN_MAX_WRITE_MASK 0x3f
89#define TID_OPFN_MAX_WRITE_SHIFT 4
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110static u32 tid_rdma_flow_wt;
111
112static void tid_rdma_trigger_resume(struct work_struct *work);
113static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
114static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
115 gfp_t gfp);
116static void hfi1_init_trdma_req(struct rvt_qp *qp,
117 struct tid_rdma_request *req);
118static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
119static void hfi1_tid_timeout(struct timer_list *t);
120static void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
121static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
122static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
123static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
124static void hfi1_tid_retry_timeout(struct timer_list *t);
125static int make_tid_rdma_ack(struct rvt_qp *qp,
126 struct ib_other_headers *ohdr,
127 struct hfi1_pkt_state *ps);
128static void hfi1_do_tid_send(struct rvt_qp *qp);
129static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
130static void tid_rdma_rcv_err(struct hfi1_packet *packet,
131 struct ib_other_headers *ohdr,
132 struct rvt_qp *qp, u32 psn, int diff, bool fecn);
133static void update_r_next_psn_fecn(struct hfi1_packet *packet,
134 struct hfi1_qp_priv *priv,
135 struct hfi1_ctxtdata *rcd,
136 struct tid_rdma_flow *flow,
137 bool fecn);
138
139static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
140{
141 return
142 (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
143 TID_OPFN_QP_CTXT_SHIFT) |
144 ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
145 TID_OPFN_QP_KDETH_SHIFT) |
146 (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
147 TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
148 (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
149 TID_OPFN_TIMEOUT_SHIFT) |
150 (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
151 (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
152 (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
153 TID_OPFN_MAX_READ_SHIFT) |
154 (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
155 TID_OPFN_MAX_WRITE_SHIFT);
156}
157
158static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
159{
160 p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
161 TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
162 p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
163 p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
164 TID_OPFN_MAX_WRITE_MASK;
165 p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
166 TID_OPFN_MAX_READ_MASK;
167 p->qp =
168 ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
169 << 16) |
170 ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
171 p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
172 p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
173}
174
175void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
176{
177 struct hfi1_qp_priv *priv = qp->priv;
178
179 p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
180 p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
181 p->jkey = priv->rcd->jkey;
182 p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
183 p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
184 p->timeout = qp->timeout;
185 p->urg = is_urg_masked(priv->rcd);
186}
187
188bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
189{
190 struct hfi1_qp_priv *priv = qp->priv;
191
192 *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
193 return true;
194}
195
196bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
197{
198 struct hfi1_qp_priv *priv = qp->priv;
199 struct tid_rdma_params *remote, *old;
200 bool ret = true;
201
202 old = rcu_dereference_protected(priv->tid_rdma.remote,
203 lockdep_is_held(&priv->opfn.lock));
204 data &= ~0xfULL;
205
206
207
208
209 if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
210 goto null;
211
212
213
214
215
216
217
218 remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
219 if (!remote) {
220 ret = false;
221 goto null;
222 }
223
224 tid_rdma_opfn_decode(remote, data);
225 priv->tid_timer_timeout_jiffies =
226 usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
227 1000UL) << 3) * 7);
228 trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
229 trace_hfi1_opfn_param(qp, 1, remote);
230 rcu_assign_pointer(priv->tid_rdma.remote, remote);
231
232
233
234
235
236
237
238
239 priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
240 priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
241 goto free;
242null:
243 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
244 priv->timeout_shift = 0;
245free:
246 if (old)
247 kfree_rcu(old, rcu_head);
248 return ret;
249}
250
251bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
252{
253 bool ret;
254
255 ret = tid_rdma_conn_reply(qp, *data);
256 *data = 0;
257
258
259
260
261
262 if (ret)
263 (void)tid_rdma_conn_req(qp, data);
264 return ret;
265}
266
267void tid_rdma_conn_error(struct rvt_qp *qp)
268{
269 struct hfi1_qp_priv *priv = qp->priv;
270 struct tid_rdma_params *old;
271
272 old = rcu_dereference_protected(priv->tid_rdma.remote,
273 lockdep_is_held(&priv->opfn.lock));
274 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
275 if (old)
276 kfree_rcu(old, rcu_head);
277}
278
279
280int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
281{
282 if (reinit)
283 return 0;
284
285 BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
286 BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
287 rcd->jkey = TID_RDMA_JKEY;
288 hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
289 return hfi1_alloc_ctxt_rcv_groups(rcd);
290}
291
292
293
294
295
296
297
298
299
300
301static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
302 struct rvt_qp *qp)
303{
304 struct hfi1_ibdev *verbs_dev = container_of(rdi,
305 struct hfi1_ibdev,
306 rdi);
307 struct hfi1_devdata *dd = container_of(verbs_dev,
308 struct hfi1_devdata,
309 verbs_dev);
310 unsigned int ctxt;
311
312 if (qp->ibqp.qp_num == 0)
313 ctxt = 0;
314 else
315 ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
316 return dd->rcd[ctxt];
317}
318
319int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
320 struct ib_qp_init_attr *init_attr)
321{
322 struct hfi1_qp_priv *qpriv = qp->priv;
323 int i, ret;
324
325 qpriv->rcd = qp_to_rcd(rdi, qp);
326
327 spin_lock_init(&qpriv->opfn.lock);
328 INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
329 INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
330 qpriv->flow_state.psn = 0;
331 qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
332 qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
333 qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
334 qpriv->s_state = TID_OP(WRITE_RESP);
335 qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
336 qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
337 qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
338 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
339 qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
340 qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
341 qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
342 qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
343 atomic_set(&qpriv->n_requests, 0);
344 atomic_set(&qpriv->n_tid_requests, 0);
345 timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
346 timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
347 INIT_LIST_HEAD(&qpriv->tid_wait);
348
349 if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
350 struct hfi1_devdata *dd = qpriv->rcd->dd;
351
352 qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
353 sizeof(*qpriv->pages),
354 GFP_KERNEL, dd->node);
355 if (!qpriv->pages)
356 return -ENOMEM;
357 for (i = 0; i < qp->s_size; i++) {
358 struct hfi1_swqe_priv *priv;
359 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
360
361 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
362 dd->node);
363 if (!priv)
364 return -ENOMEM;
365
366 hfi1_init_trdma_req(qp, &priv->tid_req);
367 priv->tid_req.e.swqe = wqe;
368 wqe->priv = priv;
369 }
370 for (i = 0; i < rvt_max_atomic(rdi); i++) {
371 struct hfi1_ack_priv *priv;
372
373 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
374 dd->node);
375 if (!priv)
376 return -ENOMEM;
377
378 hfi1_init_trdma_req(qp, &priv->tid_req);
379 priv->tid_req.e.ack = &qp->s_ack_queue[i];
380
381 ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
382 GFP_KERNEL);
383 if (ret) {
384 kfree(priv);
385 return ret;
386 }
387 qp->s_ack_queue[i].priv = priv;
388 }
389 }
390
391 return 0;
392}
393
394void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
395{
396 struct hfi1_qp_priv *qpriv = qp->priv;
397 struct rvt_swqe *wqe;
398 u32 i;
399
400 if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
401 for (i = 0; i < qp->s_size; i++) {
402 wqe = rvt_get_swqe_ptr(qp, i);
403 kfree(wqe->priv);
404 wqe->priv = NULL;
405 }
406 for (i = 0; i < rvt_max_atomic(rdi); i++) {
407 struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
408
409 if (priv)
410 hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
411 kfree(priv);
412 qp->s_ack_queue[i].priv = NULL;
413 }
414 cancel_work_sync(&qpriv->opfn.opfn_work);
415 kfree(qpriv->pages);
416 qpriv->pages = NULL;
417 }
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
451 struct tid_queue *queue)
452 __must_hold(&rcd->exp_lock)
453{
454 struct hfi1_qp_priv *priv;
455
456 lockdep_assert_held(&rcd->exp_lock);
457 priv = list_first_entry_or_null(&queue->queue_head,
458 struct hfi1_qp_priv,
459 tid_wait);
460 if (!priv)
461 return NULL;
462 rvt_get_qp(priv->owner);
463 return priv->owner;
464}
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
485 struct tid_queue *queue, struct rvt_qp *qp)
486 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
487{
488 struct rvt_qp *fqp;
489 bool ret = true;
490
491 lockdep_assert_held(&qp->s_lock);
492 lockdep_assert_held(&rcd->exp_lock);
493 fqp = first_qp(rcd, queue);
494 if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
495 ret = false;
496 rvt_put_qp(fqp);
497 return ret;
498}
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
516 struct tid_queue *queue, struct rvt_qp *qp)
517 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
518{
519 struct hfi1_qp_priv *priv = qp->priv;
520
521 lockdep_assert_held(&qp->s_lock);
522 lockdep_assert_held(&rcd->exp_lock);
523 if (list_empty(&priv->tid_wait))
524 return;
525 list_del_init(&priv->tid_wait);
526 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
527 queue->dequeue++;
528 rvt_put_qp(qp);
529}
530
531
532
533
534
535
536
537
538
539
540
541static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
542 struct tid_queue *queue, struct rvt_qp *qp)
543 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
544{
545 struct hfi1_qp_priv *priv = qp->priv;
546
547 lockdep_assert_held(&qp->s_lock);
548 lockdep_assert_held(&rcd->exp_lock);
549 if (list_empty(&priv->tid_wait)) {
550 qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
551 list_add_tail(&priv->tid_wait, &queue->queue_head);
552 priv->tid_enqueue = ++queue->enqueue;
553 rcd->dd->verbs_dev.n_tidwait++;
554 trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
555 rvt_get_qp(qp);
556 }
557}
558
559
560
561
562
563
564
565
566static void __trigger_tid_waiter(struct rvt_qp *qp)
567 __must_hold(&qp->s_lock)
568{
569 lockdep_assert_held(&qp->s_lock);
570 if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
571 return;
572 trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
573 hfi1_schedule_send(qp);
574}
575
576
577
578
579
580
581
582
583
584
585
586
587
588static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
589{
590 struct hfi1_qp_priv *priv;
591 struct hfi1_ibport *ibp;
592 struct hfi1_pportdata *ppd;
593 struct hfi1_devdata *dd;
594 bool rval;
595
596 if (!qp)
597 return;
598
599 priv = qp->priv;
600 ibp = to_iport(qp->ibqp.device, qp->port_num);
601 ppd = ppd_from_ibp(ibp);
602 dd = dd_from_ibdev(qp->ibqp.device);
603
604 rval = queue_work_on(priv->s_sde ?
605 priv->s_sde->cpu :
606 cpumask_first(cpumask_of_node(dd->node)),
607 ppd->hfi1_wq,
608 &priv->tid_rdma.trigger_work);
609 if (!rval)
610 rvt_put_qp(qp);
611}
612
613
614
615
616
617
618
619
620static void tid_rdma_trigger_resume(struct work_struct *work)
621{
622 struct tid_rdma_qp_params *tr;
623 struct hfi1_qp_priv *priv;
624 struct rvt_qp *qp;
625
626 tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
627 priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
628 qp = priv->owner;
629 spin_lock_irq(&qp->s_lock);
630 if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
631 spin_unlock_irq(&qp->s_lock);
632 hfi1_do_send(priv->owner, true);
633 } else {
634 spin_unlock_irq(&qp->s_lock);
635 }
636 rvt_put_qp(qp);
637}
638
639
640
641
642
643
644
645
646static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
647 __must_hold(&qp->s_lock)
648{
649 struct hfi1_qp_priv *priv;
650
651 if (!qp)
652 return;
653 lockdep_assert_held(&qp->s_lock);
654 priv = qp->priv;
655 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
656 spin_lock(&priv->rcd->exp_lock);
657 if (!list_empty(&priv->tid_wait)) {
658 list_del_init(&priv->tid_wait);
659 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
660 queue->dequeue++;
661 rvt_put_qp(qp);
662 }
663 spin_unlock(&priv->rcd->exp_lock);
664}
665
666void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
667 __must_hold(&qp->s_lock)
668{
669 struct hfi1_qp_priv *priv = qp->priv;
670
671 _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
672 _tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
673}
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
694 __must_hold(&rcd->exp_lock)
695{
696 int nr;
697
698
699 if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
700 !test_and_set_bit(last, &rcd->flow_mask))
701 return last;
702
703 nr = ffz(rcd->flow_mask);
704 BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
705 (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
706 if (nr > (RXE_NUM_TID_FLOWS - 1))
707 return -EAGAIN;
708 set_bit(nr, &rcd->flow_mask);
709 return nr;
710}
711
712static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
713 u32 flow_idx)
714{
715 u64 reg;
716
717 reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
718 RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
719 RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
720 RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
721 RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
722 RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
723
724 if (generation != KERN_GENERATION_RESERVED)
725 reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
726
727 write_uctxt_csr(rcd->dd, rcd->ctxt,
728 RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
729}
730
731static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
732 __must_hold(&rcd->exp_lock)
733{
734 u32 generation = rcd->flows[flow_idx].generation;
735
736 kern_set_hw_flow(rcd, generation, flow_idx);
737 return generation;
738}
739
740static u32 kern_flow_generation_next(u32 gen)
741{
742 u32 generation = mask_generation(gen + 1);
743
744 if (generation == KERN_GENERATION_RESERVED)
745 generation = mask_generation(generation + 1);
746 return generation;
747}
748
749static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
750 __must_hold(&rcd->exp_lock)
751{
752 rcd->flows[flow_idx].generation =
753 kern_flow_generation_next(rcd->flows[flow_idx].generation);
754 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
755}
756
757int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
758{
759 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
760 struct tid_flow_state *fs = &qpriv->flow_state;
761 struct rvt_qp *fqp;
762 unsigned long flags;
763 int ret = 0;
764
765
766 if (fs->index != RXE_NUM_TID_FLOWS)
767 return ret;
768
769 spin_lock_irqsave(&rcd->exp_lock, flags);
770 if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
771 goto queue;
772
773 ret = kern_reserve_flow(rcd, fs->last_index);
774 if (ret < 0)
775 goto queue;
776 fs->index = ret;
777 fs->last_index = fs->index;
778
779
780 if (fs->generation != KERN_GENERATION_RESERVED)
781 rcd->flows[fs->index].generation = fs->generation;
782 fs->generation = kern_setup_hw_flow(rcd, fs->index);
783 fs->psn = 0;
784 dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
785
786 fqp = first_qp(rcd, &rcd->flow_queue);
787 spin_unlock_irqrestore(&rcd->exp_lock, flags);
788
789 tid_rdma_schedule_tid_wakeup(fqp);
790 return 0;
791queue:
792 queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
793 spin_unlock_irqrestore(&rcd->exp_lock, flags);
794 return -EAGAIN;
795}
796
797void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
798{
799 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
800 struct tid_flow_state *fs = &qpriv->flow_state;
801 struct rvt_qp *fqp;
802 unsigned long flags;
803
804 if (fs->index >= RXE_NUM_TID_FLOWS)
805 return;
806 spin_lock_irqsave(&rcd->exp_lock, flags);
807 kern_clear_hw_flow(rcd, fs->index);
808 clear_bit(fs->index, &rcd->flow_mask);
809 fs->index = RXE_NUM_TID_FLOWS;
810 fs->psn = 0;
811 fs->generation = KERN_GENERATION_RESERVED;
812
813
814 fqp = first_qp(rcd, &rcd->flow_queue);
815 spin_unlock_irqrestore(&rcd->exp_lock, flags);
816
817 if (fqp == qp) {
818 __trigger_tid_waiter(fqp);
819 rvt_put_qp(fqp);
820 } else {
821 tid_rdma_schedule_tid_wakeup(fqp);
822 }
823}
824
825void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
826{
827 int i;
828
829 for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
830 rcd->flows[i].generation = mask_generation(prandom_u32());
831 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
832 }
833}
834
835
836static u8 trdma_pset_order(struct tid_rdma_pageset *s)
837{
838 u8 count = s->count;
839
840 return ilog2(count) + 1;
841}
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
858 struct page **pages,
859 u32 npages,
860 struct tid_rdma_pageset *list)
861{
862 u32 pagecount, pageidx, setcount = 0, i;
863 void *vaddr, *this_vaddr;
864
865 if (!npages)
866 return 0;
867
868
869
870
871
872
873 vaddr = page_address(pages[0]);
874 trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
875 for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
876 this_vaddr = i < npages ? page_address(pages[i]) : NULL;
877 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
878 this_vaddr);
879
880
881
882
883 if (this_vaddr != (vaddr + PAGE_SIZE)) {
884
885
886
887
888
889
890
891
892
893
894
895
896 while (pagecount) {
897 int maxpages = pagecount;
898 u32 bufsize = pagecount * PAGE_SIZE;
899
900 if (bufsize > MAX_EXPECTED_BUFFER)
901 maxpages =
902 MAX_EXPECTED_BUFFER >>
903 PAGE_SHIFT;
904 else if (!is_power_of_2(bufsize))
905 maxpages =
906 rounddown_pow_of_two(bufsize) >>
907 PAGE_SHIFT;
908
909 list[setcount].idx = pageidx;
910 list[setcount].count = maxpages;
911 trace_hfi1_tid_pageset(flow->req->qp, setcount,
912 list[setcount].idx,
913 list[setcount].count);
914 pagecount -= maxpages;
915 pageidx += maxpages;
916 setcount++;
917 }
918 pageidx = i;
919 pagecount = 1;
920 vaddr = this_vaddr;
921 } else {
922 vaddr += PAGE_SIZE;
923 pagecount++;
924 }
925 }
926
927 if (setcount & 1)
928 list[setcount++].count = 0;
929 return setcount;
930}
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952static u32 tid_flush_pages(struct tid_rdma_pageset *list,
953 u32 *idx, u32 pages, u32 sets)
954{
955 while (pages) {
956 u32 maxpages = pages;
957
958 if (maxpages > MAX_EXPECTED_PAGES)
959 maxpages = MAX_EXPECTED_PAGES;
960 else if (!is_power_of_2(maxpages))
961 maxpages = rounddown_pow_of_two(maxpages);
962 list[sets].idx = *idx;
963 list[sets++].count = maxpages;
964 *idx += maxpages;
965 pages -= maxpages;
966 }
967
968 if (sets & 1)
969 list[sets++].count = 0;
970 return sets;
971}
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
996 struct page **pages,
997 u32 npages,
998 struct tid_rdma_pageset *list)
999{
1000 u32 idx, sets = 0, i;
1001 u32 pagecnt = 0;
1002 void *v0, *v1, *vm1;
1003
1004 if (!npages)
1005 return 0;
1006 for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
1007
1008 v0 = page_address(pages[i]);
1009 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
1010 v1 = i + 1 < npages ?
1011 page_address(pages[i + 1]) : NULL;
1012 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
1013
1014 if (v1 != (v0 + PAGE_SIZE)) {
1015
1016 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1017
1018 list[sets].idx = idx++;
1019 list[sets++].count = 1;
1020 if (v1) {
1021 list[sets].count = 1;
1022 list[sets++].idx = idx++;
1023 } else {
1024 list[sets++].count = 0;
1025 }
1026 vm1 = NULL;
1027 pagecnt = 0;
1028 continue;
1029 }
1030
1031 if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
1032
1033 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1034 pagecnt = 0;
1035 }
1036
1037 pagecnt += 2;
1038
1039 vm1 = v1;
1040
1041 }
1042
1043 sets = tid_flush_pages(list, &idx, npages - idx, sets);
1044
1045 WARN_ON(sets & 1);
1046 return sets;
1047}
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062static u32 kern_find_pages(struct tid_rdma_flow *flow,
1063 struct page **pages,
1064 struct rvt_sge_state *ss, bool *last)
1065{
1066 struct tid_rdma_request *req = flow->req;
1067 struct rvt_sge *sge = &ss->sge;
1068 u32 length = flow->req->seg_len;
1069 u32 len = PAGE_SIZE;
1070 u32 i = 0;
1071
1072 while (length && req->isge < ss->num_sge) {
1073 pages[i++] = virt_to_page(sge->vaddr);
1074
1075 sge->vaddr += len;
1076 sge->length -= len;
1077 sge->sge_length -= len;
1078 if (!sge->sge_length) {
1079 if (++req->isge < ss->num_sge)
1080 *sge = ss->sg_list[req->isge - 1];
1081 } else if (sge->length == 0 && sge->mr->lkey) {
1082 if (++sge->n >= RVT_SEGSZ) {
1083 ++sge->m;
1084 sge->n = 0;
1085 }
1086 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
1087 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
1088 }
1089 length -= len;
1090 }
1091
1092 flow->length = flow->req->seg_len - length;
1093 *last = req->isge == ss->num_sge ? false : true;
1094 return i;
1095}
1096
1097static void dma_unmap_flow(struct tid_rdma_flow *flow)
1098{
1099 struct hfi1_devdata *dd;
1100 int i;
1101 struct tid_rdma_pageset *pset;
1102
1103 dd = flow->req->rcd->dd;
1104 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1105 i++, pset++) {
1106 if (pset->count && pset->addr) {
1107 dma_unmap_page(&dd->pcidev->dev,
1108 pset->addr,
1109 PAGE_SIZE * pset->count,
1110 DMA_FROM_DEVICE);
1111 pset->mapped = 0;
1112 }
1113 }
1114}
1115
1116static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
1117{
1118 int i;
1119 struct hfi1_devdata *dd = flow->req->rcd->dd;
1120 struct tid_rdma_pageset *pset;
1121
1122 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1123 i++, pset++) {
1124 if (pset->count) {
1125 pset->addr = dma_map_page(&dd->pcidev->dev,
1126 pages[pset->idx],
1127 0,
1128 PAGE_SIZE * pset->count,
1129 DMA_FROM_DEVICE);
1130
1131 if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
1132 dma_unmap_flow(flow);
1133 return -ENOMEM;
1134 }
1135 pset->mapped = 1;
1136 }
1137 }
1138 return 0;
1139}
1140
1141static inline bool dma_mapped(struct tid_rdma_flow *flow)
1142{
1143 return !!flow->pagesets[0].mapped;
1144}
1145
1146
1147
1148
1149
1150static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
1151 struct page **pages,
1152 struct rvt_sge_state *ss, bool *last)
1153{
1154 u8 npages;
1155
1156
1157 if (flow->npagesets) {
1158 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
1159 flow);
1160 if (!dma_mapped(flow))
1161 return dma_map_flow(flow, pages);
1162 return 0;
1163 }
1164
1165 npages = kern_find_pages(flow, pages, ss, last);
1166
1167 if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
1168 flow->npagesets =
1169 tid_rdma_find_phys_blocks_4k(flow, pages, npages,
1170 flow->pagesets);
1171 else
1172 flow->npagesets =
1173 tid_rdma_find_phys_blocks_8k(flow, pages, npages,
1174 flow->pagesets);
1175
1176 return dma_map_flow(flow, pages);
1177}
1178
1179static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
1180 struct hfi1_ctxtdata *rcd, char *s,
1181 struct tid_group *grp, u8 cnt)
1182{
1183 struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
1184
1185 WARN_ON_ONCE(flow->tnode_cnt >=
1186 (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
1187 if (WARN_ON_ONCE(cnt & 1))
1188 dd_dev_err(rcd->dd,
1189 "unexpected odd allocation cnt %u map 0x%x used %u",
1190 cnt, grp->map, grp->used);
1191
1192 node->grp = grp;
1193 node->map = grp->map;
1194 node->cnt = cnt;
1195 trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
1196 grp->base, grp->map, grp->used, cnt);
1197}
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212static int kern_alloc_tids(struct tid_rdma_flow *flow)
1213{
1214 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1215 struct hfi1_devdata *dd = rcd->dd;
1216 u32 ngroups, pageidx = 0;
1217 struct tid_group *group = NULL, *used;
1218 u8 use;
1219
1220 flow->tnode_cnt = 0;
1221 ngroups = flow->npagesets / dd->rcv_entries.group_size;
1222 if (!ngroups)
1223 goto used_list;
1224
1225
1226 list_for_each_entry(group, &rcd->tid_group_list.list, list) {
1227 kern_add_tid_node(flow, rcd, "complete groups", group,
1228 group->size);
1229
1230 pageidx += group->size;
1231 if (!--ngroups)
1232 break;
1233 }
1234
1235 if (pageidx >= flow->npagesets)
1236 goto ok;
1237
1238used_list:
1239
1240 list_for_each_entry(used, &rcd->tid_used_list.list, list) {
1241 use = min_t(u32, flow->npagesets - pageidx,
1242 used->size - used->used);
1243 kern_add_tid_node(flow, rcd, "used groups", used, use);
1244
1245 pageidx += use;
1246 if (pageidx >= flow->npagesets)
1247 goto ok;
1248 }
1249
1250
1251
1252
1253
1254
1255 if (group && &group->list == &rcd->tid_group_list.list)
1256 goto bail_eagain;
1257 group = list_prepare_entry(group, &rcd->tid_group_list.list,
1258 list);
1259 if (list_is_last(&group->list, &rcd->tid_group_list.list))
1260 goto bail_eagain;
1261 group = list_next_entry(group, list);
1262 use = min_t(u32, flow->npagesets - pageidx, group->size);
1263 kern_add_tid_node(flow, rcd, "complete continue", group, use);
1264 pageidx += use;
1265 if (pageidx >= flow->npagesets)
1266 goto ok;
1267bail_eagain:
1268 trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
1269 (u64)flow->npagesets);
1270 return -EAGAIN;
1271ok:
1272 return 0;
1273}
1274
1275static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
1276 u32 *pset_idx)
1277{
1278 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1279 struct hfi1_devdata *dd = rcd->dd;
1280 struct kern_tid_node *node = &flow->tnode[grp_num];
1281 struct tid_group *grp = node->grp;
1282 struct tid_rdma_pageset *pset;
1283 u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
1284 u32 rcventry, npages = 0, pair = 0, tidctrl;
1285 u8 i, cnt = 0;
1286
1287 for (i = 0; i < grp->size; i++) {
1288 rcventry = grp->base + i;
1289
1290 if (node->map & BIT(i) || cnt >= node->cnt) {
1291 rcv_array_wc_fill(dd, rcventry);
1292 continue;
1293 }
1294 pset = &flow->pagesets[(*pset_idx)++];
1295 if (pset->count) {
1296 hfi1_put_tid(dd, rcventry, PT_EXPECTED,
1297 pset->addr, trdma_pset_order(pset));
1298 } else {
1299 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1300 }
1301 npages += pset->count;
1302
1303 rcventry -= rcd->expected_base;
1304 tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
1305
1306
1307
1308
1309
1310
1311
1312 pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
1313 node->cnt >= cnt + 2;
1314 if (!pair) {
1315 if (!pset->count)
1316 tidctrl = 0x1;
1317 flow->tid_entry[flow->tidcnt++] =
1318 EXP_TID_SET(IDX, rcventry >> 1) |
1319 EXP_TID_SET(CTRL, tidctrl) |
1320 EXP_TID_SET(LEN, npages);
1321 trace_hfi1_tid_entry_alloc(
1322 flow->req->qp, flow->tidcnt - 1,
1323 flow->tid_entry[flow->tidcnt - 1]);
1324
1325
1326 flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
1327 npages = 0;
1328 }
1329
1330 if (grp->used == grp->size - 1)
1331 tid_group_move(grp, &rcd->tid_used_list,
1332 &rcd->tid_full_list);
1333 else if (!grp->used)
1334 tid_group_move(grp, &rcd->tid_group_list,
1335 &rcd->tid_used_list);
1336
1337 grp->used++;
1338 grp->map |= BIT(i);
1339 cnt++;
1340 }
1341}
1342
1343static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
1344{
1345 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1346 struct hfi1_devdata *dd = rcd->dd;
1347 struct kern_tid_node *node = &flow->tnode[grp_num];
1348 struct tid_group *grp = node->grp;
1349 u32 rcventry;
1350 u8 i, cnt = 0;
1351
1352 for (i = 0; i < grp->size; i++) {
1353 rcventry = grp->base + i;
1354
1355 if (node->map & BIT(i) || cnt >= node->cnt) {
1356 rcv_array_wc_fill(dd, rcventry);
1357 continue;
1358 }
1359
1360 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1361
1362 grp->used--;
1363 grp->map &= ~BIT(i);
1364 cnt++;
1365
1366 if (grp->used == grp->size - 1)
1367 tid_group_move(grp, &rcd->tid_full_list,
1368 &rcd->tid_used_list);
1369 else if (!grp->used)
1370 tid_group_move(grp, &rcd->tid_used_list,
1371 &rcd->tid_group_list);
1372 }
1373 if (WARN_ON_ONCE(cnt & 1)) {
1374 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1375 struct hfi1_devdata *dd = rcd->dd;
1376
1377 dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
1378 cnt, grp->map, grp->used);
1379 }
1380}
1381
1382static void kern_program_rcvarray(struct tid_rdma_flow *flow)
1383{
1384 u32 pset_idx = 0;
1385 int i;
1386
1387 flow->npkts = 0;
1388 flow->tidcnt = 0;
1389 for (i = 0; i < flow->tnode_cnt; i++)
1390 kern_program_rcv_group(flow, i, &pset_idx);
1391 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
1392}
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
1437 struct rvt_sge_state *ss, bool *last)
1438 __must_hold(&req->qp->s_lock)
1439{
1440 struct tid_rdma_flow *flow = &req->flows[req->setup_head];
1441 struct hfi1_ctxtdata *rcd = req->rcd;
1442 struct hfi1_qp_priv *qpriv = req->qp->priv;
1443 unsigned long flags;
1444 struct rvt_qp *fqp;
1445 u16 clear_tail = req->clear_tail;
1446
1447 lockdep_assert_held(&req->qp->s_lock);
1448
1449
1450
1451
1452
1453
1454 if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
1455 CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
1456 req->n_flows)
1457 return -EINVAL;
1458
1459
1460
1461
1462
1463
1464 if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
1465 hfi1_wait_kmem(flow->req->qp);
1466 return -ENOMEM;
1467 }
1468
1469 spin_lock_irqsave(&rcd->exp_lock, flags);
1470 if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
1471 goto queue;
1472
1473
1474
1475
1476
1477
1478 if (kern_alloc_tids(flow))
1479 goto queue;
1480
1481
1482
1483
1484 kern_program_rcvarray(flow);
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494 memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
1495 flow->idx = qpriv->flow_state.index;
1496 flow->flow_state.generation = qpriv->flow_state.generation;
1497 flow->flow_state.spsn = qpriv->flow_state.psn;
1498 flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
1499 flow->flow_state.r_next_psn =
1500 full_flow_psn(flow, flow->flow_state.spsn);
1501 qpriv->flow_state.psn += flow->npkts;
1502
1503 dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
1504
1505 fqp = first_qp(rcd, &rcd->rarr_queue);
1506 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1507 tid_rdma_schedule_tid_wakeup(fqp);
1508
1509 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1510 return 0;
1511queue:
1512 queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
1513 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1514 return -EAGAIN;
1515}
1516
1517static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
1518{
1519 flow->npagesets = 0;
1520}
1521
1522
1523
1524
1525
1526
1527
1528int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
1529 __must_hold(&req->qp->s_lock)
1530{
1531 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
1532 struct hfi1_ctxtdata *rcd = req->rcd;
1533 unsigned long flags;
1534 int i;
1535 struct rvt_qp *fqp;
1536
1537 lockdep_assert_held(&req->qp->s_lock);
1538
1539 if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
1540 return -EINVAL;
1541
1542 spin_lock_irqsave(&rcd->exp_lock, flags);
1543
1544 for (i = 0; i < flow->tnode_cnt; i++)
1545 kern_unprogram_rcv_group(flow, i);
1546
1547 flow->tnode_cnt = 0;
1548
1549 fqp = first_qp(rcd, &rcd->rarr_queue);
1550 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1551
1552 dma_unmap_flow(flow);
1553
1554 hfi1_tid_rdma_reset_flow(flow);
1555 req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
1556
1557 if (fqp == req->qp) {
1558 __trigger_tid_waiter(fqp);
1559 rvt_put_qp(fqp);
1560 } else {
1561 tid_rdma_schedule_tid_wakeup(fqp);
1562 }
1563
1564 return 0;
1565}
1566
1567
1568
1569
1570
1571void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
1572 __must_hold(&req->qp->s_lock)
1573{
1574
1575 while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
1576 if (hfi1_kern_exp_rcv_clear(req))
1577 break;
1578 }
1579}
1580
1581
1582
1583
1584
1585static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
1586{
1587 kfree(req->flows);
1588 req->flows = NULL;
1589}
1590
1591
1592
1593
1594
1595
1596void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
1597{
1598 struct hfi1_swqe_priv *p = wqe->priv;
1599
1600 hfi1_kern_exp_rcv_free_flows(&p->tid_req);
1601}
1602
1603
1604
1605
1606static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
1607 gfp_t gfp)
1608{
1609 struct tid_rdma_flow *flows;
1610 int i;
1611
1612 if (likely(req->flows))
1613 return 0;
1614 flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
1615 req->rcd->numa_id);
1616 if (!flows)
1617 return -ENOMEM;
1618
1619 for (i = 0; i < MAX_FLOWS; i++) {
1620 flows[i].req = req;
1621 flows[i].npagesets = 0;
1622 flows[i].pagesets[0].mapped = 0;
1623 }
1624 req->flows = flows;
1625 return 0;
1626}
1627
1628static void hfi1_init_trdma_req(struct rvt_qp *qp,
1629 struct tid_rdma_request *req)
1630{
1631 struct hfi1_qp_priv *qpriv = qp->priv;
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643 req->qp = qp;
1644 req->rcd = qpriv->rcd;
1645}
1646
1647u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
1648 void *context, int vl, int mode, u64 data)
1649{
1650 struct hfi1_devdata *dd = context;
1651
1652 return dd->verbs_dev.n_tidwait;
1653}
1654
1655static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
1656 u32 psn, u16 *fidx)
1657{
1658 u16 head, tail;
1659 struct tid_rdma_flow *flow;
1660
1661 head = req->setup_head;
1662 tail = req->clear_tail;
1663 for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
1664 tail = CIRC_NEXT(tail, MAX_FLOWS)) {
1665 flow = &req->flows[tail];
1666 if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
1667 cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
1668 if (fidx)
1669 *fidx = tail;
1670 return flow;
1671 }
1672 }
1673 return NULL;
1674}
1675
1676static struct tid_rdma_flow *
1677__find_flow_ranged(struct tid_rdma_request *req, u16 head, u16 tail,
1678 u32 psn, u16 *fidx)
1679{
1680 for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
1681 tail = CIRC_NEXT(tail, MAX_FLOWS)) {
1682 struct tid_rdma_flow *flow = &req->flows[tail];
1683 u32 spsn, lpsn;
1684
1685 spsn = full_flow_psn(flow, flow->flow_state.spsn);
1686 lpsn = full_flow_psn(flow, flow->flow_state.lpsn);
1687
1688 if (cmp_psn(psn, spsn) >= 0 && cmp_psn(psn, lpsn) <= 0) {
1689 if (fidx)
1690 *fidx = tail;
1691 return flow;
1692 }
1693 }
1694 return NULL;
1695}
1696
1697static struct tid_rdma_flow *find_flow(struct tid_rdma_request *req,
1698 u32 psn, u16 *fidx)
1699{
1700 return __find_flow_ranged(req, req->setup_head, req->clear_tail, psn,
1701 fidx);
1702}
1703
1704
1705u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
1706 struct ib_other_headers *ohdr, u32 *bth1,
1707 u32 *bth2, u32 *len)
1708{
1709 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1710 struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
1711 struct rvt_qp *qp = req->qp;
1712 struct hfi1_qp_priv *qpriv = qp->priv;
1713 struct hfi1_swqe_priv *wpriv = wqe->priv;
1714 struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
1715 struct tid_rdma_params *remote;
1716 u32 req_len = 0;
1717 void *req_addr = NULL;
1718
1719
1720 *bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
1721 trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
1722
1723
1724 req_addr = &flow->tid_entry[flow->tid_idx];
1725 req_len = sizeof(*flow->tid_entry) *
1726 (flow->tidcnt - flow->tid_idx);
1727
1728 memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
1729 wpriv->ss.sge.vaddr = req_addr;
1730 wpriv->ss.sge.sge_length = req_len;
1731 wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
1732
1733
1734
1735
1736 wpriv->ss.sge.mr = NULL;
1737 wpriv->ss.sge.m = 0;
1738 wpriv->ss.sge.n = 0;
1739
1740 wpriv->ss.sg_list = NULL;
1741 wpriv->ss.total_len = wpriv->ss.sge.sge_length;
1742 wpriv->ss.num_sge = 1;
1743
1744
1745 rcu_read_lock();
1746 remote = rcu_dereference(qpriv->tid_rdma.remote);
1747
1748 KDETH_RESET(rreq->kdeth0, KVER, 0x1);
1749 KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
1750 rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
1751 req->cur_seg * req->seg_len + flow->sent);
1752 rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
1753 rreq->reth.length = cpu_to_be32(*len);
1754 rreq->tid_flow_psn =
1755 cpu_to_be32((flow->flow_state.generation <<
1756 HFI1_KDETH_BTH_SEQ_SHIFT) |
1757 ((flow->flow_state.spsn + flow->pkt) &
1758 HFI1_KDETH_BTH_SEQ_MASK));
1759 rreq->tid_flow_qp =
1760 cpu_to_be32(qpriv->tid_rdma.local.qp |
1761 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
1762 TID_RDMA_DESTQP_FLOW_SHIFT) |
1763 qpriv->rcd->ctxt);
1764 rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
1765 *bth1 &= ~RVT_QPN_MASK;
1766 *bth1 |= remote->qp;
1767 *bth2 |= IB_BTH_REQ_ACK;
1768 rcu_read_unlock();
1769
1770
1771 flow->sent += *len;
1772 req->cur_seg++;
1773 qp->s_state = TID_OP(READ_REQ);
1774 req->ack_pending++;
1775 req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
1776 qpriv->pending_tid_r_segs++;
1777 qp->s_num_rd_atomic++;
1778
1779
1780 *len = req_len;
1781
1782 return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
1783}
1784
1785
1786
1787
1788
1789u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
1790 struct ib_other_headers *ohdr, u32 *bth1,
1791 u32 *bth2, u32 *len)
1792 __must_hold(&qp->s_lock)
1793{
1794 struct hfi1_qp_priv *qpriv = qp->priv;
1795 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1796 struct tid_rdma_flow *flow = NULL;
1797 u32 hdwords = 0;
1798 bool last;
1799 bool retry = true;
1800 u32 npkts = rvt_div_round_up_mtu(qp, *len);
1801
1802 trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
1803 wqe->lpsn, req);
1804
1805
1806
1807
1808sync_check:
1809 if (req->state == TID_REQUEST_SYNC) {
1810 if (qpriv->pending_tid_r_segs)
1811 goto done;
1812
1813 hfi1_kern_clear_hw_flow(req->rcd, qp);
1814 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
1815 req->state = TID_REQUEST_ACTIVE;
1816 }
1817
1818
1819
1820
1821
1822
1823 if (req->flow_idx == req->setup_head) {
1824 retry = false;
1825 if (req->state == TID_REQUEST_RESEND) {
1826
1827
1828
1829
1830
1831 restart_sge(&qp->s_sge, wqe, req->s_next_psn,
1832 qp->pmtu);
1833 req->isge = 0;
1834 req->state = TID_REQUEST_ACTIVE;
1835 }
1836
1837
1838
1839
1840
1841 if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
1842 req->state = TID_REQUEST_SYNC;
1843 goto sync_check;
1844 }
1845
1846
1847 if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
1848 goto done;
1849
1850
1851
1852
1853
1854 if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
1855 req->state = TID_REQUEST_QUEUED;
1856
1857
1858
1859
1860
1861 goto done;
1862 }
1863 }
1864
1865
1866 flow = &req->flows[req->flow_idx];
1867 flow->pkt = 0;
1868 flow->tid_idx = 0;
1869 flow->sent = 0;
1870 if (!retry) {
1871
1872 flow->flow_state.ib_spsn = req->s_next_psn;
1873 flow->flow_state.ib_lpsn =
1874 flow->flow_state.ib_spsn + flow->npkts - 1;
1875 }
1876
1877
1878 req->s_next_psn += flow->npkts;
1879
1880
1881 hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
1882done:
1883 return hdwords;
1884}
1885
1886
1887
1888
1889
1890
1891static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
1892 struct rvt_ack_entry *e,
1893 struct hfi1_packet *packet,
1894 struct ib_other_headers *ohdr,
1895 u32 bth0, u32 psn, u64 vaddr, u32 len)
1896{
1897 struct hfi1_qp_priv *qpriv = qp->priv;
1898 struct tid_rdma_request *req;
1899 struct tid_rdma_flow *flow;
1900 u32 flow_psn, i, tidlen = 0, pktlen, tlen;
1901
1902 req = ack_to_tid_req(e);
1903
1904
1905 flow = &req->flows[req->setup_head];
1906
1907
1908 pktlen = packet->tlen - (packet->hlen + 4);
1909 if (pktlen > sizeof(flow->tid_entry))
1910 return 1;
1911 memcpy(flow->tid_entry, packet->ebuf, pktlen);
1912 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
1913
1914
1915
1916
1917
1918 flow->npkts = rvt_div_round_up_mtu(qp, len);
1919 for (i = 0; i < flow->tidcnt; i++) {
1920 trace_hfi1_tid_entry_rcv_read_req(qp, i,
1921 flow->tid_entry[i]);
1922 tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
1923 if (!tlen)
1924 return 1;
1925
1926
1927
1928
1929
1930
1931
1932 tidlen += tlen;
1933 }
1934 if (tidlen * PAGE_SIZE < len)
1935 return 1;
1936
1937
1938 req->clear_tail = req->setup_head;
1939 flow->pkt = 0;
1940 flow->tid_idx = 0;
1941 flow->tid_offset = 0;
1942 flow->sent = 0;
1943 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
1944 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
1945 TID_RDMA_DESTQP_FLOW_MASK;
1946 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
1947 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
1948 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
1949 flow->length = len;
1950
1951 flow->flow_state.lpsn = flow->flow_state.spsn +
1952 flow->npkts - 1;
1953 flow->flow_state.ib_spsn = psn;
1954 flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
1955
1956 trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
1957
1958 req->flow_idx = req->setup_head;
1959
1960
1961 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1962
1963
1964
1965
1966 e->opcode = (bth0 >> 24) & 0xff;
1967 e->psn = psn;
1968 e->lpsn = psn + flow->npkts - 1;
1969 e->sent = 0;
1970
1971 req->n_flows = qpriv->tid_rdma.local.max_read;
1972 req->state = TID_REQUEST_ACTIVE;
1973 req->cur_seg = 0;
1974 req->comp_seg = 0;
1975 req->ack_seg = 0;
1976 req->isge = 0;
1977 req->seg_len = qpriv->tid_rdma.local.max_len;
1978 req->total_len = len;
1979 req->total_segs = 1;
1980 req->r_flow_psn = e->psn;
1981
1982 trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
1983 req);
1984 return 0;
1985}
1986
1987static int tid_rdma_rcv_error(struct hfi1_packet *packet,
1988 struct ib_other_headers *ohdr,
1989 struct rvt_qp *qp, u32 psn, int diff)
1990{
1991 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1992 struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
1993 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
1994 struct hfi1_qp_priv *qpriv = qp->priv;
1995 struct rvt_ack_entry *e;
1996 struct tid_rdma_request *req;
1997 unsigned long flags;
1998 u8 prev;
1999 bool old_req;
2000
2001 trace_hfi1_rsp_tid_rcv_error(qp, psn);
2002 trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
2003 if (diff > 0) {
2004
2005 if (!qp->r_nak_state) {
2006 ibp->rvp.n_rc_seqnak++;
2007 qp->r_nak_state = IB_NAK_PSN_ERROR;
2008 qp->r_ack_psn = qp->r_psn;
2009 rc_defered_ack(rcd, qp);
2010 }
2011 goto done;
2012 }
2013
2014 ibp->rvp.n_rc_dupreq++;
2015
2016 spin_lock_irqsave(&qp->s_lock, flags);
2017 e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
2018 if (!e || (e->opcode != TID_OP(READ_REQ) &&
2019 e->opcode != TID_OP(WRITE_REQ)))
2020 goto unlock;
2021
2022 req = ack_to_tid_req(e);
2023 req->r_flow_psn = psn;
2024 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
2025 if (e->opcode == TID_OP(READ_REQ)) {
2026 struct ib_reth *reth;
2027 u32 offset;
2028 u32 len;
2029 u32 rkey;
2030 u64 vaddr;
2031 int ok;
2032 u32 bth0;
2033
2034 reth = &ohdr->u.tid_rdma.r_req.reth;
2035
2036
2037
2038
2039 offset = delta_psn(psn, e->psn) * qp->pmtu;
2040 len = be32_to_cpu(reth->length);
2041 if (psn != e->psn || len != req->total_len)
2042 goto unlock;
2043
2044 release_rdma_sge_mr(e);
2045
2046 rkey = be32_to_cpu(reth->rkey);
2047 vaddr = get_ib_reth_vaddr(reth);
2048
2049 qp->r_len = len;
2050 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
2051 IB_ACCESS_REMOTE_READ);
2052 if (unlikely(!ok))
2053 goto unlock;
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065 bth0 = be32_to_cpu(ohdr->bth[0]);
2066 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
2067 vaddr, len))
2068 goto unlock;
2069
2070
2071
2072
2073
2074 if (old_req)
2075 goto unlock;
2076 } else {
2077 struct flow_state *fstate;
2078 bool schedule = false;
2079 u8 i;
2080
2081 if (req->state == TID_REQUEST_RESEND) {
2082 req->state = TID_REQUEST_RESEND_ACTIVE;
2083 } else if (req->state == TID_REQUEST_INIT_RESEND) {
2084 req->state = TID_REQUEST_INIT;
2085 schedule = true;
2086 }
2087
2088
2089
2090
2091
2092
2093
2094
2095 if (old_req || req->state == TID_REQUEST_INIT ||
2096 (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
2097 for (i = prev + 1; ; i++) {
2098 if (i > rvt_size_atomic(&dev->rdi))
2099 i = 0;
2100 if (i == qp->r_head_ack_queue)
2101 break;
2102 e = &qp->s_ack_queue[i];
2103 req = ack_to_tid_req(e);
2104 if (e->opcode == TID_OP(WRITE_REQ) &&
2105 req->state == TID_REQUEST_INIT)
2106 req->state = TID_REQUEST_INIT_RESEND;
2107 }
2108
2109
2110
2111
2112
2113
2114 if (!schedule)
2115 goto unlock;
2116 }
2117
2118
2119
2120
2121
2122 if (req->clear_tail == req->setup_head)
2123 goto schedule;
2124
2125
2126
2127
2128
2129
2130 if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
2131 fstate = &req->flows[req->clear_tail].flow_state;
2132 qpriv->pending_tid_w_segs -=
2133 CIRC_CNT(req->flow_idx, req->clear_tail,
2134 MAX_FLOWS);
2135 req->flow_idx =
2136 CIRC_ADD(req->clear_tail,
2137 delta_psn(psn, fstate->resp_ib_psn),
2138 MAX_FLOWS);
2139 qpriv->pending_tid_w_segs +=
2140 delta_psn(psn, fstate->resp_ib_psn);
2141
2142
2143
2144
2145
2146
2147
2148 if (CIRC_CNT(req->setup_head, req->flow_idx,
2149 MAX_FLOWS)) {
2150 req->cur_seg = delta_psn(psn, e->psn);
2151 req->state = TID_REQUEST_RESEND_ACTIVE;
2152 }
2153 }
2154
2155 for (i = prev + 1; ; i++) {
2156
2157
2158
2159
2160 if (i > rvt_size_atomic(&dev->rdi))
2161 i = 0;
2162 if (i == qp->r_head_ack_queue)
2163 break;
2164 e = &qp->s_ack_queue[i];
2165 req = ack_to_tid_req(e);
2166 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
2167 e->lpsn, req);
2168 if (e->opcode != TID_OP(WRITE_REQ) ||
2169 req->cur_seg == req->comp_seg ||
2170 req->state == TID_REQUEST_INIT ||
2171 req->state == TID_REQUEST_INIT_RESEND) {
2172 if (req->state == TID_REQUEST_INIT)
2173 req->state = TID_REQUEST_INIT_RESEND;
2174 continue;
2175 }
2176 qpriv->pending_tid_w_segs -=
2177 CIRC_CNT(req->flow_idx,
2178 req->clear_tail,
2179 MAX_FLOWS);
2180 req->flow_idx = req->clear_tail;
2181 req->state = TID_REQUEST_RESEND;
2182 req->cur_seg = req->comp_seg;
2183 }
2184 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
2185 }
2186
2187 if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
2188 qp->s_acked_ack_queue = prev;
2189 qp->s_tail_ack_queue = prev;
2190
2191
2192
2193
2194
2195
2196 qp->s_ack_state = OP(ACKNOWLEDGE);
2197schedule:
2198
2199
2200
2201
2202 if (qpriv->rnr_nak_state) {
2203 qp->s_nak_state = 0;
2204 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
2205 qp->r_psn = e->lpsn + 1;
2206 hfi1_tid_write_alloc_resources(qp, true);
2207 }
2208
2209 qp->r_state = e->opcode;
2210 qp->r_nak_state = 0;
2211 qp->s_flags |= RVT_S_RESP_PENDING;
2212 hfi1_schedule_send(qp);
2213unlock:
2214 spin_unlock_irqrestore(&qp->s_lock, flags);
2215done:
2216 return 1;
2217}
2218
2219void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
2220{
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234 struct hfi1_ctxtdata *rcd = packet->rcd;
2235 struct rvt_qp *qp = packet->qp;
2236 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
2237 struct ib_other_headers *ohdr = packet->ohdr;
2238 struct rvt_ack_entry *e;
2239 unsigned long flags;
2240 struct ib_reth *reth;
2241 struct hfi1_qp_priv *qpriv = qp->priv;
2242 u32 bth0, psn, len, rkey;
2243 bool fecn;
2244 u8 next;
2245 u64 vaddr;
2246 int diff;
2247 u8 nack_state = IB_NAK_INVALID_REQUEST;
2248
2249 bth0 = be32_to_cpu(ohdr->bth[0]);
2250 if (hfi1_ruc_check_hdr(ibp, packet))
2251 return;
2252
2253 fecn = process_ecn(qp, packet);
2254 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2255 trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
2256
2257 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
2258 rvt_comm_est(qp);
2259
2260 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2261 goto nack_inv;
2262
2263 reth = &ohdr->u.tid_rdma.r_req.reth;
2264 vaddr = be64_to_cpu(reth->vaddr);
2265 len = be32_to_cpu(reth->length);
2266
2267 if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
2268 goto nack_inv;
2269
2270 diff = delta_psn(psn, qp->r_psn);
2271 if (unlikely(diff)) {
2272 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
2273 return;
2274 }
2275
2276
2277 next = qp->r_head_ack_queue + 1;
2278 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
2279 next = 0;
2280 spin_lock_irqsave(&qp->s_lock, flags);
2281 if (unlikely(next == qp->s_tail_ack_queue)) {
2282 if (!qp->s_ack_queue[next].sent) {
2283 nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2284 goto nack_inv_unlock;
2285 }
2286 update_ack_queue(qp, next);
2287 }
2288 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2289 release_rdma_sge_mr(e);
2290
2291 rkey = be32_to_cpu(reth->rkey);
2292 qp->r_len = len;
2293
2294 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
2295 rkey, IB_ACCESS_REMOTE_READ)))
2296 goto nack_acc;
2297
2298
2299 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
2300 len))
2301 goto nack_inv_unlock;
2302
2303 qp->r_state = e->opcode;
2304 qp->r_nak_state = 0;
2305
2306
2307
2308
2309
2310 qp->r_msn++;
2311 qp->r_psn += e->lpsn - e->psn + 1;
2312
2313 qp->r_head_ack_queue = next;
2314
2315
2316
2317
2318
2319
2320
2321 qpriv->r_tid_alloc = qp->r_head_ack_queue;
2322
2323
2324 qp->s_flags |= RVT_S_RESP_PENDING;
2325 if (fecn)
2326 qp->s_flags |= RVT_S_ECN;
2327 hfi1_schedule_send(qp);
2328
2329 spin_unlock_irqrestore(&qp->s_lock, flags);
2330 return;
2331
2332nack_inv_unlock:
2333 spin_unlock_irqrestore(&qp->s_lock, flags);
2334nack_inv:
2335 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2336 qp->r_nak_state = nack_state;
2337 qp->r_ack_psn = qp->r_psn;
2338
2339 rc_defered_ack(rcd, qp);
2340 return;
2341nack_acc:
2342 spin_unlock_irqrestore(&qp->s_lock, flags);
2343 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
2344 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2345 qp->r_ack_psn = qp->r_psn;
2346}
2347
2348u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
2349 struct ib_other_headers *ohdr, u32 *bth0,
2350 u32 *bth1, u32 *bth2, u32 *len, bool *last)
2351{
2352 struct hfi1_ack_priv *epriv = e->priv;
2353 struct tid_rdma_request *req = &epriv->tid_req;
2354 struct hfi1_qp_priv *qpriv = qp->priv;
2355 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
2356 u32 tidentry = flow->tid_entry[flow->tid_idx];
2357 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
2358 struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
2359 u32 next_offset, om = KDETH_OM_LARGE;
2360 bool last_pkt;
2361 u32 hdwords = 0;
2362 struct tid_rdma_params *remote;
2363
2364 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
2365 flow->sent += *len;
2366 next_offset = flow->tid_offset + *len;
2367 last_pkt = (flow->sent >= flow->length);
2368
2369 trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
2370 trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
2371
2372 rcu_read_lock();
2373 remote = rcu_dereference(qpriv->tid_rdma.remote);
2374 if (!remote) {
2375 rcu_read_unlock();
2376 goto done;
2377 }
2378 KDETH_RESET(resp->kdeth0, KVER, 0x1);
2379 KDETH_SET(resp->kdeth0, SH, !last_pkt);
2380 KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
2381 KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
2382 KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
2383 KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
2384 KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
2385 KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
2386 resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
2387 rcu_read_unlock();
2388
2389 resp->aeth = rvt_compute_aeth(qp);
2390 resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
2391 flow->pkt));
2392
2393 *bth0 = TID_OP(READ_RESP) << 24;
2394 *bth1 = flow->tid_qpn;
2395 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
2396 HFI1_KDETH_BTH_SEQ_MASK) |
2397 (flow->flow_state.generation <<
2398 HFI1_KDETH_BTH_SEQ_SHIFT));
2399 *last = last_pkt;
2400 if (last_pkt)
2401
2402 req->clear_tail = (req->clear_tail + 1) &
2403 (MAX_FLOWS - 1);
2404
2405 if (next_offset >= tidlen) {
2406 flow->tid_offset = 0;
2407 flow->tid_idx++;
2408 } else {
2409 flow->tid_offset = next_offset;
2410 }
2411
2412 hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
2413
2414done:
2415 return hdwords;
2416}
2417
2418static inline struct tid_rdma_request *
2419find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
2420 __must_hold(&qp->s_lock)
2421{
2422 struct rvt_swqe *wqe;
2423 struct tid_rdma_request *req = NULL;
2424 u32 i, end;
2425
2426 end = qp->s_cur + 1;
2427 if (end == qp->s_size)
2428 end = 0;
2429 for (i = qp->s_acked; i != end;) {
2430 wqe = rvt_get_swqe_ptr(qp, i);
2431 if (cmp_psn(psn, wqe->psn) >= 0 &&
2432 cmp_psn(psn, wqe->lpsn) <= 0) {
2433 if (wqe->wr.opcode == opcode)
2434 req = wqe_to_tid_req(wqe);
2435 break;
2436 }
2437 if (++i == qp->s_size)
2438 i = 0;
2439 }
2440
2441 return req;
2442}
2443
2444void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
2445{
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455 struct ib_other_headers *ohdr = packet->ohdr;
2456 struct rvt_qp *qp = packet->qp;
2457 struct hfi1_qp_priv *priv = qp->priv;
2458 struct hfi1_ctxtdata *rcd = packet->rcd;
2459 struct tid_rdma_request *req;
2460 struct tid_rdma_flow *flow;
2461 u32 opcode, aeth;
2462 bool fecn;
2463 unsigned long flags;
2464 u32 kpsn, ipsn;
2465
2466 trace_hfi1_sender_rcv_tid_read_resp(qp);
2467 fecn = process_ecn(qp, packet);
2468 kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2469 aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
2470 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2471
2472 spin_lock_irqsave(&qp->s_lock, flags);
2473 ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
2474 req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
2475 if (unlikely(!req))
2476 goto ack_op_err;
2477
2478 flow = &req->flows[req->clear_tail];
2479
2480 if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
2481 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
2482
2483 if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
2484 goto ack_done;
2485 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2486
2487
2488
2489
2490
2491
2492
2493 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
2494 struct rvt_sge_state ss;
2495 u32 len;
2496 u32 tlen = packet->tlen;
2497 u16 hdrsize = packet->hlen;
2498 u8 pad = packet->pad;
2499 u8 extra_bytes = pad + packet->extra_byte +
2500 (SIZE_OF_CRC << 2);
2501 u32 pmtu = qp->pmtu;
2502
2503 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
2504 goto ack_op_err;
2505 len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
2506 if (unlikely(len < pmtu))
2507 goto ack_op_err;
2508 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
2509 false);
2510
2511 priv->s_flags |= HFI1_R_TID_SW_PSN;
2512 }
2513
2514 goto ack_done;
2515 }
2516 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2517 req->ack_pending--;
2518 priv->pending_tid_r_segs--;
2519 qp->s_num_rd_atomic--;
2520 if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
2521 !qp->s_num_rd_atomic) {
2522 qp->s_flags &= ~(RVT_S_WAIT_FENCE |
2523 RVT_S_WAIT_ACK);
2524 hfi1_schedule_send(qp);
2525 }
2526 if (qp->s_flags & RVT_S_WAIT_RDMAR) {
2527 qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
2528 hfi1_schedule_send(qp);
2529 }
2530
2531 trace_hfi1_ack(qp, ipsn);
2532 trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
2533 req->e.swqe->psn, req->e.swqe->lpsn,
2534 req);
2535 trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
2536
2537
2538 hfi1_kern_exp_rcv_clear(req);
2539
2540 if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
2541 goto ack_done;
2542
2543
2544 if (++req->comp_seg >= req->total_segs) {
2545 priv->tid_r_comp++;
2546 req->state = TID_REQUEST_COMPLETE;
2547 }
2548
2549
2550
2551
2552
2553
2554 if ((req->state == TID_REQUEST_SYNC &&
2555 req->comp_seg == req->cur_seg) ||
2556 priv->tid_r_comp == priv->tid_r_reqs) {
2557 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2558 priv->s_flags &= ~HFI1_R_TID_SW_PSN;
2559 if (req->state == TID_REQUEST_SYNC)
2560 req->state = TID_REQUEST_ACTIVE;
2561 }
2562
2563 hfi1_schedule_send(qp);
2564 goto ack_done;
2565
2566ack_op_err:
2567
2568
2569
2570
2571
2572
2573
2574
2575 if (qp->s_last == qp->s_acked)
2576 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
2577
2578ack_done:
2579 spin_unlock_irqrestore(&qp->s_lock, flags);
2580}
2581
2582void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
2583 __must_hold(&qp->s_lock)
2584{
2585 u32 n = qp->s_acked;
2586 struct rvt_swqe *wqe;
2587 struct tid_rdma_request *req;
2588 struct hfi1_qp_priv *priv = qp->priv;
2589
2590 lockdep_assert_held(&qp->s_lock);
2591
2592 while (n != qp->s_tail) {
2593 wqe = rvt_get_swqe_ptr(qp, n);
2594 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2595 req = wqe_to_tid_req(wqe);
2596 hfi1_kern_exp_rcv_clear_all(req);
2597 }
2598
2599 if (++n == qp->s_size)
2600 n = 0;
2601 }
2602
2603 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2604}
2605
2606static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
2607 struct hfi1_packet *packet, u8 rcv_type,
2608 u8 opcode)
2609{
2610 struct rvt_qp *qp = packet->qp;
2611 struct hfi1_qp_priv *qpriv = qp->priv;
2612 u32 ipsn;
2613 struct ib_other_headers *ohdr = packet->ohdr;
2614 struct rvt_ack_entry *e;
2615 struct tid_rdma_request *req;
2616 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2617 u32 i;
2618
2619 if (rcv_type >= RHF_RCV_TYPE_IB)
2620 goto done;
2621
2622 spin_lock(&qp->s_lock);
2623
2624
2625
2626
2627
2628
2629
2630
2631 if (rcv_type == RHF_RCV_TYPE_EAGER) {
2632 hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
2633 hfi1_schedule_send(qp);
2634 goto done_unlock;
2635 }
2636
2637
2638
2639
2640
2641 if (opcode == TID_OP(READ_RESP)) {
2642 ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
2643 if (cmp_psn(ipsn, qp->s_last_psn) > 0 &&
2644 cmp_psn(ipsn, qp->s_psn) < 0) {
2645 hfi1_kern_read_tid_flow_free(qp);
2646 spin_unlock(&qp->s_lock);
2647 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2648 goto done;
2649 }
2650 goto done_unlock;
2651 }
2652
2653
2654
2655
2656 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
2657 for (i = 0; i < rvt_max_atomic(rdi); i++) {
2658 e = &qp->s_ack_queue[i];
2659 if (e->opcode == TID_OP(WRITE_REQ)) {
2660 req = ack_to_tid_req(e);
2661 hfi1_kern_exp_rcv_clear_all(req);
2662 }
2663 }
2664 spin_unlock(&qp->s_lock);
2665 rvt_rc_error(qp, IB_WC_LOC_LEN_ERR);
2666 goto done;
2667
2668done_unlock:
2669 spin_unlock(&qp->s_lock);
2670done:
2671 return true;
2672}
2673
2674static void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
2675 struct rvt_qp *qp, struct rvt_swqe *wqe)
2676{
2677 struct tid_rdma_request *req;
2678 struct tid_rdma_flow *flow;
2679
2680
2681 qp->r_flags |= RVT_R_RDMAR_SEQ;
2682 req = wqe_to_tid_req(wqe);
2683 flow = &req->flows[req->clear_tail];
2684 hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
2685 if (list_empty(&qp->rspwait)) {
2686 qp->r_flags |= RVT_R_RSP_SEND;
2687 rvt_get_qp(qp);
2688 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2689 }
2690}
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2701 struct hfi1_packet *packet, u8 rcv_type,
2702 u8 rte, u32 psn, u32 ibpsn)
2703 __must_hold(&packet->qp->r_lock) __must_hold(RCU)
2704{
2705 struct hfi1_pportdata *ppd = rcd->ppd;
2706 struct hfi1_devdata *dd = ppd->dd;
2707 struct hfi1_ibport *ibp;
2708 struct rvt_swqe *wqe;
2709 struct tid_rdma_request *req;
2710 struct tid_rdma_flow *flow;
2711 u32 ack_psn;
2712 struct rvt_qp *qp = packet->qp;
2713 struct hfi1_qp_priv *priv = qp->priv;
2714 bool ret = true;
2715 int diff = 0;
2716 u32 fpsn;
2717
2718 lockdep_assert_held(&qp->r_lock);
2719
2720 if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
2721 cmp_psn(ibpsn, qp->s_psn) > 0)
2722 return ret;
2723
2724 spin_lock(&qp->s_lock);
2725
2726
2727
2728
2729
2730 ack_psn = ibpsn - 1;
2731 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
2732 ibp = to_iport(qp->ibqp.device, qp->port_num);
2733
2734
2735 while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
2736
2737
2738
2739
2740
2741 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
2742 wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
2743 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2744 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2745
2746 if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
2747 qp->r_flags |= RVT_R_RDMAR_SEQ;
2748 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2749 restart_tid_rdma_read_req(rcd, qp,
2750 wqe);
2751 } else {
2752 hfi1_restart_rc(qp, qp->s_last_psn + 1,
2753 0);
2754 if (list_empty(&qp->rspwait)) {
2755 qp->r_flags |= RVT_R_RSP_SEND;
2756 rvt_get_qp(qp);
2757 list_add_tail(
2758 &qp->rspwait,
2759 &rcd->qp_wait_list);
2760 }
2761 }
2762 }
2763
2764
2765
2766
2767 break;
2768 }
2769
2770 wqe = do_rc_completion(qp, wqe, ibp);
2771 if (qp->s_acked == qp->s_tail)
2772 break;
2773 }
2774
2775
2776 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
2777 goto s_unlock;
2778
2779 req = wqe_to_tid_req(wqe);
2780 switch (rcv_type) {
2781 case RHF_RCV_TYPE_EXPECTED:
2782 switch (rte) {
2783 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793 flow = find_flow(req, psn, NULL);
2794 if (!flow) {
2795
2796
2797
2798
2799
2800
2801 hfi1_kern_read_tid_flow_free(qp);
2802 spin_unlock(&qp->s_lock);
2803 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2804 return ret;
2805 }
2806 if (priv->s_flags & HFI1_R_TID_SW_PSN) {
2807 diff = cmp_psn(psn,
2808 flow->flow_state.r_next_psn);
2809 if (diff > 0) {
2810 if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
2811 restart_tid_rdma_read_req(rcd,
2812 qp,
2813 wqe);
2814
2815
2816 goto s_unlock;
2817 } else if (diff < 0) {
2818
2819
2820
2821
2822
2823 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2824 qp->r_flags &=
2825 ~RVT_R_RDMAR_SEQ;
2826
2827
2828 goto s_unlock;
2829 }
2830
2831
2832
2833
2834
2835
2836 fpsn = full_flow_psn(flow,
2837 flow->flow_state.lpsn);
2838 if (cmp_psn(fpsn, psn) == 0) {
2839 ret = false;
2840 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2841 qp->r_flags &=
2842 ~RVT_R_RDMAR_SEQ;
2843 }
2844 flow->flow_state.r_next_psn =
2845 mask_psn(psn + 1);
2846 } else {
2847 u32 last_psn;
2848
2849 last_psn = read_r_next_psn(dd, rcd->ctxt,
2850 flow->idx);
2851 flow->flow_state.r_next_psn = last_psn;
2852 priv->s_flags |= HFI1_R_TID_SW_PSN;
2853
2854
2855
2856
2857 if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
2858 restart_tid_rdma_read_req(rcd, qp,
2859 wqe);
2860 }
2861
2862 break;
2863
2864 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2865
2866
2867
2868
2869 break;
2870
2871 default:
2872 break;
2873 }
2874 break;
2875
2876 case RHF_RCV_TYPE_ERROR:
2877 switch (rte) {
2878 case RHF_RTE_ERROR_OP_CODE_ERR:
2879 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
2880 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
2881 case RHF_RTE_ERROR_KHDR_KVER_ERR:
2882 case RHF_RTE_ERROR_CONTEXT_ERR:
2883 case RHF_RTE_ERROR_KHDR_TID_ERR:
2884 default:
2885 break;
2886 }
2887 default:
2888 break;
2889 }
2890s_unlock:
2891 spin_unlock(&qp->s_lock);
2892 return ret;
2893}
2894
2895bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2896 struct hfi1_pportdata *ppd,
2897 struct hfi1_packet *packet)
2898{
2899 struct hfi1_ibport *ibp = &ppd->ibport_data;
2900 struct hfi1_devdata *dd = ppd->dd;
2901 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
2902 u8 rcv_type = rhf_rcv_type(packet->rhf);
2903 u8 rte = rhf_rcv_type_err(packet->rhf);
2904 struct ib_header *hdr = packet->hdr;
2905 struct ib_other_headers *ohdr = NULL;
2906 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
2907 u16 lid = be16_to_cpu(hdr->lrh[1]);
2908 u8 opcode;
2909 u32 qp_num, psn, ibpsn;
2910 struct rvt_qp *qp;
2911 struct hfi1_qp_priv *qpriv;
2912 unsigned long flags;
2913 bool ret = true;
2914 struct rvt_ack_entry *e;
2915 struct tid_rdma_request *req;
2916 struct tid_rdma_flow *flow;
2917 int diff = 0;
2918
2919 trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
2920 packet->rhf);
2921 if (packet->rhf & RHF_ICRC_ERR)
2922 return ret;
2923
2924 packet->ohdr = &hdr->u.oth;
2925 ohdr = packet->ohdr;
2926 trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
2927
2928
2929 qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
2930 RVT_QPN_MASK;
2931 if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
2932 goto drop;
2933
2934 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2935 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2936
2937 rcu_read_lock();
2938 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
2939 if (!qp)
2940 goto rcu_unlock;
2941
2942 packet->qp = qp;
2943
2944
2945 spin_lock_irqsave(&qp->r_lock, flags);
2946 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
2947 ibp->rvp.n_pkt_drops++;
2948 goto r_unlock;
2949 }
2950
2951 if (packet->rhf & RHF_TID_ERR) {
2952
2953 u32 tlen = rhf_pkt_len(packet->rhf);
2954
2955
2956 if (tlen < 24)
2957 goto r_unlock;
2958
2959
2960
2961
2962
2963 if (lnh == HFI1_LRH_GRH)
2964 goto r_unlock;
2965
2966 if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode))
2967 goto r_unlock;
2968 }
2969
2970
2971 if (opcode == TID_OP(READ_RESP)) {
2972 ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
2973 ibpsn = mask_psn(ibpsn);
2974 ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
2975 ibpsn);
2976 goto r_unlock;
2977 }
2978
2979
2980
2981
2982
2983
2984 spin_lock(&qp->s_lock);
2985 qpriv = qp->priv;
2986 e = &qp->s_ack_queue[qpriv->r_tid_tail];
2987 req = ack_to_tid_req(e);
2988 flow = &req->flows[req->clear_tail];
2989 trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
2990 trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
2991 trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
2992 trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
2993 e->lpsn, req);
2994 trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
2995
2996 switch (rcv_type) {
2997 case RHF_RCV_TYPE_EXPECTED:
2998 switch (rte) {
2999 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
3000 if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
3001 qpriv->s_flags |= HFI1_R_TID_SW_PSN;
3002 flow->flow_state.r_next_psn =
3003 read_r_next_psn(dd, rcd->ctxt,
3004 flow->idx);
3005 qpriv->r_next_psn_kdeth =
3006 flow->flow_state.r_next_psn;
3007 goto nak_psn;
3008 } else {
3009
3010
3011
3012
3013
3014
3015
3016
3017 diff = cmp_psn(psn,
3018 flow->flow_state.r_next_psn);
3019 if (diff > 0)
3020 goto nak_psn;
3021 else if (diff < 0)
3022 break;
3023
3024 qpriv->s_nak_state = 0;
3025
3026
3027
3028
3029
3030 if (psn == full_flow_psn(flow,
3031 flow->flow_state.lpsn))
3032 ret = false;
3033 flow->flow_state.r_next_psn =
3034 mask_psn(psn + 1);
3035 qpriv->r_next_psn_kdeth =
3036 flow->flow_state.r_next_psn;
3037 }
3038 break;
3039
3040 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
3041 goto nak_psn;
3042
3043 default:
3044 break;
3045 }
3046 break;
3047
3048 case RHF_RCV_TYPE_ERROR:
3049 switch (rte) {
3050 case RHF_RTE_ERROR_OP_CODE_ERR:
3051 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
3052 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
3053 case RHF_RTE_ERROR_KHDR_KVER_ERR:
3054 case RHF_RTE_ERROR_CONTEXT_ERR:
3055 case RHF_RTE_ERROR_KHDR_TID_ERR:
3056 default:
3057 break;
3058 }
3059 default:
3060 break;
3061 }
3062
3063unlock:
3064 spin_unlock(&qp->s_lock);
3065r_unlock:
3066 spin_unlock_irqrestore(&qp->r_lock, flags);
3067rcu_unlock:
3068 rcu_read_unlock();
3069drop:
3070 return ret;
3071nak_psn:
3072 ibp->rvp.n_rc_seqnak++;
3073 if (!qpriv->s_nak_state) {
3074 qpriv->s_nak_state = IB_NAK_PSN_ERROR;
3075
3076 qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
3077 qpriv->s_flags |= RVT_S_ACK_PENDING;
3078 if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
3079 qpriv->r_tid_ack = qpriv->r_tid_tail;
3080 hfi1_schedule_tid_send(qp);
3081 }
3082 goto unlock;
3083}
3084
3085
3086
3087
3088
3089
3090
3091void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3092 u32 *bth2)
3093{
3094 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3095 struct tid_rdma_flow *flow;
3096 struct hfi1_qp_priv *qpriv = qp->priv;
3097 int diff, delta_pkts;
3098 u32 tididx = 0, i;
3099 u16 fidx;
3100
3101 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3102 *bth2 = mask_psn(qp->s_psn);
3103 flow = find_flow_ib(req, *bth2, &fidx);
3104 if (!flow) {
3105 trace_hfi1_msg_tid_restart_req(
3106 qp, "!!!!!! Could not find flow to restart: bth2 ",
3107 (u64)*bth2);
3108 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
3109 wqe->psn, wqe->lpsn,
3110 req);
3111 return;
3112 }
3113 } else {
3114 fidx = req->acked_tail;
3115 flow = &req->flows[fidx];
3116 *bth2 = mask_psn(req->r_ack_psn);
3117 }
3118
3119 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3120 delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
3121 else
3122 delta_pkts = delta_psn(*bth2,
3123 full_flow_psn(flow,
3124 flow->flow_state.spsn));
3125
3126 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3127 diff = delta_pkts + flow->resync_npkts;
3128
3129 flow->sent = 0;
3130 flow->pkt = 0;
3131 flow->tid_idx = 0;
3132 flow->tid_offset = 0;
3133 if (diff) {
3134 for (tididx = 0; tididx < flow->tidcnt; tididx++) {
3135 u32 tidentry = flow->tid_entry[tididx], tidlen,
3136 tidnpkts, npkts;
3137
3138 flow->tid_offset = 0;
3139 tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
3140 tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
3141 npkts = min_t(u32, diff, tidnpkts);
3142 flow->pkt += npkts;
3143 flow->sent += (npkts == tidnpkts ? tidlen :
3144 npkts * qp->pmtu);
3145 flow->tid_offset += npkts * qp->pmtu;
3146 diff -= npkts;
3147 if (!diff)
3148 break;
3149 }
3150 }
3151 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3152 rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
3153 flow->sent, 0);
3154
3155
3156
3157
3158
3159
3160
3161 flow->pkt -= flow->resync_npkts;
3162 }
3163
3164 if (flow->tid_offset ==
3165 EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
3166 tididx++;
3167 flow->tid_offset = 0;
3168 }
3169 flow->tid_idx = tididx;
3170 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3171
3172 req->flow_idx = fidx;
3173 else
3174 req->clear_tail = fidx;
3175
3176 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3177 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
3178 wqe->lpsn, req);
3179 req->state = TID_REQUEST_ACTIVE;
3180 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3181
3182 fidx = CIRC_NEXT(fidx, MAX_FLOWS);
3183 i = qpriv->s_tid_tail;
3184 do {
3185 for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
3186 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
3187 req->flows[fidx].sent = 0;
3188 req->flows[fidx].pkt = 0;
3189 req->flows[fidx].tid_idx = 0;
3190 req->flows[fidx].tid_offset = 0;
3191 req->flows[fidx].resync_npkts = 0;
3192 }
3193 if (i == qpriv->s_tid_cur)
3194 break;
3195 do {
3196 i = (++i == qp->s_size ? 0 : i);
3197 wqe = rvt_get_swqe_ptr(qp, i);
3198 } while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
3199 req = wqe_to_tid_req(wqe);
3200 req->cur_seg = req->ack_seg;
3201 fidx = req->acked_tail;
3202
3203 req->clear_tail = fidx;
3204 } while (1);
3205 }
3206}
3207
3208void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
3209{
3210 int i, ret;
3211 struct hfi1_qp_priv *qpriv = qp->priv;
3212 struct tid_flow_state *fs;
3213
3214 if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
3215 return;
3216
3217
3218
3219
3220
3221 fs = &qpriv->flow_state;
3222 if (fs->index != RXE_NUM_TID_FLOWS)
3223 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
3224
3225 for (i = qp->s_acked; i != qp->s_head;) {
3226 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
3227
3228 if (++i == qp->s_size)
3229 i = 0;
3230
3231 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
3232 continue;
3233 do {
3234 struct hfi1_swqe_priv *priv = wqe->priv;
3235
3236 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3237 } while (!ret);
3238 }
3239 for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
3240 struct rvt_ack_entry *e = &qp->s_ack_queue[i];
3241
3242 if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
3243 i = 0;
3244
3245 if (e->opcode != TID_OP(WRITE_REQ))
3246 continue;
3247 do {
3248 struct hfi1_ack_priv *priv = e->priv;
3249
3250 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3251 } while (!ret);
3252 }
3253}
3254
3255bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
3256{
3257 struct rvt_swqe *prev;
3258 struct hfi1_qp_priv *priv = qp->priv;
3259 u32 s_prev;
3260 struct tid_rdma_request *req;
3261
3262 s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
3263 prev = rvt_get_swqe_ptr(qp, s_prev);
3264
3265 switch (wqe->wr.opcode) {
3266 case IB_WR_SEND:
3267 case IB_WR_SEND_WITH_IMM:
3268 case IB_WR_SEND_WITH_INV:
3269 case IB_WR_ATOMIC_CMP_AND_SWP:
3270 case IB_WR_ATOMIC_FETCH_AND_ADD:
3271 case IB_WR_RDMA_WRITE:
3272 switch (prev->wr.opcode) {
3273 case IB_WR_TID_RDMA_WRITE:
3274 req = wqe_to_tid_req(prev);
3275 if (req->ack_seg != req->total_segs)
3276 goto interlock;
3277 default:
3278 break;
3279 }
3280 break;
3281 case IB_WR_RDMA_READ:
3282 if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
3283 break;
3284
3285 case IB_WR_TID_RDMA_READ:
3286 switch (prev->wr.opcode) {
3287 case IB_WR_RDMA_READ:
3288 if (qp->s_acked != qp->s_cur)
3289 goto interlock;
3290 break;
3291 case IB_WR_TID_RDMA_WRITE:
3292 req = wqe_to_tid_req(prev);
3293 if (req->ack_seg != req->total_segs)
3294 goto interlock;
3295 default:
3296 break;
3297 }
3298 default:
3299 break;
3300 }
3301 return false;
3302
3303interlock:
3304 priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
3305 return true;
3306}
3307
3308
3309static inline bool hfi1_check_sge_align(struct rvt_qp *qp,
3310 struct rvt_sge *sge, int num_sge)
3311{
3312 int i;
3313
3314 for (i = 0; i < num_sge; i++, sge++) {
3315 trace_hfi1_sge_check_align(qp, i, sge);
3316 if ((u64)sge->vaddr & ~PAGE_MASK ||
3317 sge->sge_length & ~PAGE_MASK)
3318 return false;
3319 }
3320 return true;
3321}
3322
3323void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
3324{
3325 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
3326 struct hfi1_swqe_priv *priv = wqe->priv;
3327 struct tid_rdma_params *remote;
3328 enum ib_wr_opcode new_opcode;
3329 bool do_tid_rdma = false;
3330 struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
3331
3332 if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
3333 ppd->lid)
3334 return;
3335 if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
3336 return;
3337
3338 rcu_read_lock();
3339 remote = rcu_dereference(qpriv->tid_rdma.remote);
3340
3341
3342
3343
3344 if (!remote)
3345 goto exit;
3346
3347 if (wqe->wr.opcode == IB_WR_RDMA_READ) {
3348 if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
3349 wqe->wr.num_sge)) {
3350 new_opcode = IB_WR_TID_RDMA_READ;
3351 do_tid_rdma = true;
3352 }
3353 } else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
3354
3355
3356
3357
3358
3359
3360 if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
3361 !(wqe->length & ~PAGE_MASK)) {
3362 new_opcode = IB_WR_TID_RDMA_WRITE;
3363 do_tid_rdma = true;
3364 }
3365 }
3366
3367 if (do_tid_rdma) {
3368 if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
3369 goto exit;
3370 wqe->wr.opcode = new_opcode;
3371 priv->tid_req.seg_len =
3372 min_t(u32, remote->max_len, wqe->length);
3373 priv->tid_req.total_segs =
3374 DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
3375
3376 wqe->lpsn = wqe->psn;
3377 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3378 priv->tid_req.n_flows = remote->max_read;
3379 qpriv->tid_r_reqs++;
3380 wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
3381 } else {
3382 wqe->lpsn += priv->tid_req.total_segs - 1;
3383 atomic_inc(&qpriv->n_requests);
3384 }
3385
3386 priv->tid_req.cur_seg = 0;
3387 priv->tid_req.comp_seg = 0;
3388 priv->tid_req.ack_seg = 0;
3389 priv->tid_req.state = TID_REQUEST_INACTIVE;
3390
3391
3392
3393
3394
3395
3396 priv->tid_req.acked_tail = priv->tid_req.setup_head;
3397 trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
3398 wqe->psn, wqe->lpsn,
3399 &priv->tid_req);
3400 }
3401exit:
3402 rcu_read_unlock();
3403}
3404
3405
3406
3407u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3408 struct ib_other_headers *ohdr,
3409 u32 *bth1, u32 *bth2, u32 *len)
3410{
3411 struct hfi1_qp_priv *qpriv = qp->priv;
3412 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3413 struct tid_rdma_params *remote;
3414
3415 rcu_read_lock();
3416 remote = rcu_dereference(qpriv->tid_rdma.remote);
3417
3418
3419
3420
3421 req->n_flows = remote->max_write;
3422 req->state = TID_REQUEST_ACTIVE;
3423
3424 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
3425 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
3426 ohdr->u.tid_rdma.w_req.reth.vaddr =
3427 cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
3428 ohdr->u.tid_rdma.w_req.reth.rkey =
3429 cpu_to_be32(wqe->rdma_wr.rkey);
3430 ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
3431 ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
3432 *bth1 &= ~RVT_QPN_MASK;
3433 *bth1 |= remote->qp;
3434 qp->s_state = TID_OP(WRITE_REQ);
3435 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
3436 *bth2 |= IB_BTH_REQ_ACK;
3437 *len = 0;
3438
3439 rcu_read_unlock();
3440 return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
3441}
3442
3443void hfi1_compute_tid_rdma_flow_wt(void)
3444{
3445
3446
3447
3448
3449
3450
3451
3452
3453 tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
3454 TID_RDMA_MAX_SEGMENT_SIZE;
3455}
3456
3457static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
3458 struct tid_queue *queue)
3459{
3460 return qpriv->tid_enqueue - queue->dequeue;
3461}
3462
3463
3464
3465
3466
3467
3468static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
3469{
3470 struct hfi1_qp_priv *qpriv = qp->priv;
3471 u64 timeout;
3472 u32 bytes_per_us;
3473 u8 i;
3474
3475 bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
3476 timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
3477
3478
3479
3480
3481 for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
3482 if (rvt_rnr_tbl_to_usec(i) >= timeout)
3483 return i;
3484 return 0;
3485}
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
3507{
3508 struct tid_rdma_request *req;
3509 struct hfi1_qp_priv *qpriv = qp->priv;
3510 struct hfi1_ctxtdata *rcd = qpriv->rcd;
3511 struct tid_rdma_params *local = &qpriv->tid_rdma.local;
3512 struct rvt_ack_entry *e;
3513 u32 npkts, to_seg;
3514 bool last;
3515 int ret = 0;
3516
3517 lockdep_assert_held(&qp->s_lock);
3518
3519 while (1) {
3520 trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
3521 trace_hfi1_tid_write_rsp_alloc_res(qp);
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534 if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
3535 break;
3536
3537
3538 if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
3539
3540 if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
3541 !qpriv->alloc_w_segs) {
3542 hfi1_kern_clear_hw_flow(rcd, qp);
3543 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3544 }
3545 break;
3546 }
3547
3548 e = &qp->s_ack_queue[qpriv->r_tid_alloc];
3549 if (e->opcode != TID_OP(WRITE_REQ))
3550 goto next_req;
3551 req = ack_to_tid_req(e);
3552 trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
3553 e->lpsn, req);
3554
3555 if (req->alloc_seg >= req->total_segs)
3556 goto next_req;
3557
3558
3559 if (qpriv->alloc_w_segs >= local->max_write)
3560 break;
3561
3562
3563 if (qpriv->sync_pt && qpriv->alloc_w_segs)
3564 break;
3565
3566
3567 if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
3568 hfi1_kern_clear_hw_flow(rcd, qp);
3569 qpriv->sync_pt = false;
3570 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3571 }
3572
3573
3574 if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
3575 ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
3576 if (ret) {
3577 to_seg = tid_rdma_flow_wt *
3578 position_in_queue(qpriv,
3579 &rcd->flow_queue);
3580 break;
3581 }
3582 }
3583
3584 npkts = rvt_div_round_up_mtu(qp, req->seg_len);
3585
3586
3587
3588
3589
3590 if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
3591 qpriv->sync_pt = true;
3592 break;
3593 }
3594
3595
3596
3597
3598
3599
3600
3601
3602 if (!CIRC_SPACE(req->setup_head, req->acked_tail,
3603 MAX_FLOWS)) {
3604 ret = -EAGAIN;
3605 to_seg = MAX_FLOWS >> 1;
3606 qpriv->s_flags |= RVT_S_ACK_PENDING;
3607 hfi1_schedule_tid_send(qp);
3608 break;
3609 }
3610
3611
3612 ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
3613 if (ret == -EAGAIN)
3614 to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
3615 if (ret)
3616 break;
3617
3618 qpriv->alloc_w_segs++;
3619 req->alloc_seg++;
3620 continue;
3621next_req:
3622
3623 if (++qpriv->r_tid_alloc >
3624 rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3625 qpriv->r_tid_alloc = 0;
3626 }
3627
3628
3629
3630
3631
3632
3633 if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
3634 goto send_rnr_nak;
3635
3636 return;
3637
3638send_rnr_nak:
3639 lockdep_assert_held(&qp->r_lock);
3640
3641
3642 qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
3643
3644
3645 qp->r_psn = e->psn + req->alloc_seg;
3646 qp->r_ack_psn = qp->r_psn;
3647
3648
3649
3650
3651
3652 qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
3653 if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3654 qp->r_head_ack_queue = 0;
3655 qpriv->r_tid_head = qp->r_head_ack_queue;
3656
3657
3658
3659
3660
3661 qp->s_nak_state = qp->r_nak_state;
3662 qp->s_ack_psn = qp->r_ack_psn;
3663
3664
3665
3666
3667 qp->s_flags &= ~(RVT_S_ACK_PENDING);
3668
3669 trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
3670
3671
3672
3673
3674
3675
3676
3677 qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
3678
3679
3680
3681
3682
3683
3684 rc_defered_ack(rcd, qp);
3685}
3686
3687void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
3688{
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702 struct hfi1_ctxtdata *rcd = packet->rcd;
3703 struct rvt_qp *qp = packet->qp;
3704 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
3705 struct ib_other_headers *ohdr = packet->ohdr;
3706 struct rvt_ack_entry *e;
3707 unsigned long flags;
3708 struct ib_reth *reth;
3709 struct hfi1_qp_priv *qpriv = qp->priv;
3710 struct tid_rdma_request *req;
3711 u32 bth0, psn, len, rkey, num_segs;
3712 bool fecn;
3713 u8 next;
3714 u64 vaddr;
3715 int diff;
3716
3717 bth0 = be32_to_cpu(ohdr->bth[0]);
3718 if (hfi1_ruc_check_hdr(ibp, packet))
3719 return;
3720
3721 fecn = process_ecn(qp, packet);
3722 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
3723 trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
3724
3725 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
3726 rvt_comm_est(qp);
3727
3728 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
3729 goto nack_inv;
3730
3731 reth = &ohdr->u.tid_rdma.w_req.reth;
3732 vaddr = be64_to_cpu(reth->vaddr);
3733 len = be32_to_cpu(reth->length);
3734
3735 num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
3736 diff = delta_psn(psn, qp->r_psn);
3737 if (unlikely(diff)) {
3738 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
3739 return;
3740 }
3741
3742
3743
3744
3745
3746
3747 if (qpriv->rnr_nak_state)
3748 qp->r_head_ack_queue = qp->r_head_ack_queue ?
3749 qp->r_head_ack_queue - 1 :
3750 rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
3751
3752
3753 next = qp->r_head_ack_queue + 1;
3754 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3755 next = 0;
3756 spin_lock_irqsave(&qp->s_lock, flags);
3757 if (unlikely(next == qp->s_acked_ack_queue)) {
3758 if (!qp->s_ack_queue[next].sent)
3759 goto nack_inv_unlock;
3760 update_ack_queue(qp, next);
3761 }
3762 e = &qp->s_ack_queue[qp->r_head_ack_queue];
3763 req = ack_to_tid_req(e);
3764
3765
3766 if (qpriv->rnr_nak_state) {
3767 qp->r_nak_state = 0;
3768 qp->s_nak_state = 0;
3769 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
3770 qp->r_psn = e->lpsn + 1;
3771 req->state = TID_REQUEST_INIT;
3772 goto update_head;
3773 }
3774
3775 release_rdma_sge_mr(e);
3776
3777
3778 if (!len || len & ~PAGE_MASK)
3779 goto nack_inv_unlock;
3780
3781 rkey = be32_to_cpu(reth->rkey);
3782 qp->r_len = len;
3783
3784 if (e->opcode == TID_OP(WRITE_REQ) &&
3785 (req->setup_head != req->clear_tail ||
3786 req->clear_tail != req->acked_tail))
3787 goto nack_inv_unlock;
3788
3789 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
3790 rkey, IB_ACCESS_REMOTE_WRITE)))
3791 goto nack_acc;
3792
3793 qp->r_psn += num_segs - 1;
3794
3795 e->opcode = (bth0 >> 24) & 0xff;
3796 e->psn = psn;
3797 e->lpsn = qp->r_psn;
3798 e->sent = 0;
3799
3800 req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
3801 req->state = TID_REQUEST_INIT;
3802 req->cur_seg = 0;
3803 req->comp_seg = 0;
3804 req->ack_seg = 0;
3805 req->alloc_seg = 0;
3806 req->isge = 0;
3807 req->seg_len = qpriv->tid_rdma.local.max_len;
3808 req->total_len = len;
3809 req->total_segs = num_segs;
3810 req->r_flow_psn = e->psn;
3811 req->ss.sge = e->rdma_sge;
3812 req->ss.num_sge = 1;
3813
3814 req->flow_idx = req->setup_head;
3815 req->clear_tail = req->setup_head;
3816 req->acked_tail = req->setup_head;
3817
3818 qp->r_state = e->opcode;
3819 qp->r_nak_state = 0;
3820
3821
3822
3823
3824
3825 qp->r_msn++;
3826 qp->r_psn++;
3827
3828 trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
3829 req);
3830
3831 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
3832 qpriv->r_tid_tail = qp->r_head_ack_queue;
3833 } else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
3834 struct tid_rdma_request *ptr;
3835
3836 e = &qp->s_ack_queue[qpriv->r_tid_tail];
3837 ptr = ack_to_tid_req(e);
3838
3839 if (e->opcode != TID_OP(WRITE_REQ) ||
3840 ptr->comp_seg == ptr->total_segs) {
3841 if (qpriv->r_tid_tail == qpriv->r_tid_ack)
3842 qpriv->r_tid_ack = qp->r_head_ack_queue;
3843 qpriv->r_tid_tail = qp->r_head_ack_queue;
3844 }
3845 }
3846update_head:
3847 qp->r_head_ack_queue = next;
3848 qpriv->r_tid_head = qp->r_head_ack_queue;
3849
3850 hfi1_tid_write_alloc_resources(qp, true);
3851 trace_hfi1_tid_write_rsp_rcv_req(qp);
3852
3853
3854 qp->s_flags |= RVT_S_RESP_PENDING;
3855 if (fecn)
3856 qp->s_flags |= RVT_S_ECN;
3857 hfi1_schedule_send(qp);
3858
3859 spin_unlock_irqrestore(&qp->s_lock, flags);
3860 return;
3861
3862nack_inv_unlock:
3863 spin_unlock_irqrestore(&qp->s_lock, flags);
3864nack_inv:
3865 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
3866 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
3867 qp->r_ack_psn = qp->r_psn;
3868
3869 rc_defered_ack(rcd, qp);
3870 return;
3871nack_acc:
3872 spin_unlock_irqrestore(&qp->s_lock, flags);
3873 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
3874 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
3875 qp->r_ack_psn = qp->r_psn;
3876}
3877
3878u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
3879 struct ib_other_headers *ohdr, u32 *bth1,
3880 u32 bth2, u32 *len,
3881 struct rvt_sge_state **ss)
3882{
3883 struct hfi1_ack_priv *epriv = e->priv;
3884 struct tid_rdma_request *req = &epriv->tid_req;
3885 struct hfi1_qp_priv *qpriv = qp->priv;
3886 struct tid_rdma_flow *flow = NULL;
3887 u32 resp_len = 0, hdwords = 0;
3888 void *resp_addr = NULL;
3889 struct tid_rdma_params *remote;
3890
3891 trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
3892 req);
3893 trace_hfi1_tid_write_rsp_build_resp(qp);
3894 trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
3895 flow = &req->flows[req->flow_idx];
3896 switch (req->state) {
3897 default:
3898
3899
3900
3901
3902 hfi1_tid_write_alloc_resources(qp, false);
3903
3904
3905 if (req->cur_seg >= req->alloc_seg)
3906 goto done;
3907
3908
3909
3910
3911
3912 if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
3913 goto done;
3914
3915 req->state = TID_REQUEST_ACTIVE;
3916 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3917 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3918 hfi1_add_tid_reap_timer(qp);
3919 break;
3920
3921 case TID_REQUEST_RESEND_ACTIVE:
3922 case TID_REQUEST_RESEND:
3923 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3924 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3925 if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
3926 req->state = TID_REQUEST_ACTIVE;
3927
3928 hfi1_mod_tid_reap_timer(qp);
3929 break;
3930 }
3931 flow->flow_state.resp_ib_psn = bth2;
3932 resp_addr = (void *)flow->tid_entry;
3933 resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
3934 req->cur_seg++;
3935
3936 memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
3937 epriv->ss.sge.vaddr = resp_addr;
3938 epriv->ss.sge.sge_length = resp_len;
3939 epriv->ss.sge.length = epriv->ss.sge.sge_length;
3940
3941
3942
3943
3944 epriv->ss.sge.mr = NULL;
3945 epriv->ss.sge.m = 0;
3946 epriv->ss.sge.n = 0;
3947
3948 epriv->ss.sg_list = NULL;
3949 epriv->ss.total_len = epriv->ss.sge.sge_length;
3950 epriv->ss.num_sge = 1;
3951
3952 *ss = &epriv->ss;
3953 *len = epriv->ss.total_len;
3954
3955
3956 rcu_read_lock();
3957 remote = rcu_dereference(qpriv->tid_rdma.remote);
3958
3959 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
3960 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
3961 ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
3962 ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
3963 cpu_to_be32((flow->flow_state.generation <<
3964 HFI1_KDETH_BTH_SEQ_SHIFT) |
3965 (flow->flow_state.spsn &
3966 HFI1_KDETH_BTH_SEQ_MASK));
3967 ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
3968 cpu_to_be32(qpriv->tid_rdma.local.qp |
3969 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
3970 TID_RDMA_DESTQP_FLOW_SHIFT) |
3971 qpriv->rcd->ctxt);
3972 ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
3973 *bth1 = remote->qp;
3974 rcu_read_unlock();
3975 hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
3976 qpriv->pending_tid_w_segs++;
3977done:
3978 return hdwords;
3979}
3980
3981static void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
3982{
3983 struct hfi1_qp_priv *qpriv = qp->priv;
3984
3985 lockdep_assert_held(&qp->s_lock);
3986 if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
3987 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3988 qpriv->s_tid_timer.expires = jiffies +
3989 qpriv->tid_timer_timeout_jiffies;
3990 add_timer(&qpriv->s_tid_timer);
3991 }
3992}
3993
3994static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
3995{
3996 struct hfi1_qp_priv *qpriv = qp->priv;
3997
3998 lockdep_assert_held(&qp->s_lock);
3999 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
4000 mod_timer(&qpriv->s_tid_timer, jiffies +
4001 qpriv->tid_timer_timeout_jiffies);
4002}
4003
4004static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
4005{
4006 struct hfi1_qp_priv *qpriv = qp->priv;
4007 int rval = 0;
4008
4009 lockdep_assert_held(&qp->s_lock);
4010 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
4011 rval = del_timer(&qpriv->s_tid_timer);
4012 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
4013 }
4014 return rval;
4015}
4016
4017void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
4018{
4019 struct hfi1_qp_priv *qpriv = qp->priv;
4020
4021 del_timer_sync(&qpriv->s_tid_timer);
4022 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
4023}
4024
4025static void hfi1_tid_timeout(struct timer_list *t)
4026{
4027 struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
4028 struct rvt_qp *qp = qpriv->owner;
4029 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
4030 unsigned long flags;
4031 u32 i;
4032
4033 spin_lock_irqsave(&qp->r_lock, flags);
4034 spin_lock(&qp->s_lock);
4035 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
4036 dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
4037 qp->ibqp.qp_num, __func__, __LINE__);
4038 trace_hfi1_msg_tid_timeout(
4039 qp, "resource timeout = ",
4040 (u64)qpriv->tid_timer_timeout_jiffies);
4041 hfi1_stop_tid_reap_timer(qp);
4042
4043
4044
4045
4046 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
4047 for (i = 0; i < rvt_max_atomic(rdi); i++) {
4048 struct tid_rdma_request *req =
4049 ack_to_tid_req(&qp->s_ack_queue[i]);
4050
4051 hfi1_kern_exp_rcv_clear_all(req);
4052 }
4053 spin_unlock(&qp->s_lock);
4054 if (qp->ibqp.event_handler) {
4055 struct ib_event ev;
4056
4057 ev.device = qp->ibqp.device;
4058 ev.element.qp = &qp->ibqp;
4059 ev.event = IB_EVENT_QP_FATAL;
4060 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
4061 }
4062 rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
4063 goto unlock_r_lock;
4064 }
4065 spin_unlock(&qp->s_lock);
4066unlock_r_lock:
4067 spin_unlock_irqrestore(&qp->r_lock, flags);
4068}
4069
4070void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
4071{
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083 struct ib_other_headers *ohdr = packet->ohdr;
4084 struct rvt_qp *qp = packet->qp;
4085 struct hfi1_qp_priv *qpriv = qp->priv;
4086 struct hfi1_ctxtdata *rcd = packet->rcd;
4087 struct rvt_swqe *wqe;
4088 struct tid_rdma_request *req;
4089 struct tid_rdma_flow *flow;
4090 enum ib_wc_status status;
4091 u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
4092 bool fecn;
4093 unsigned long flags;
4094
4095 fecn = process_ecn(qp, packet);
4096 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4097 aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
4098 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4099
4100 spin_lock_irqsave(&qp->s_lock, flags);
4101
4102
4103 if (cmp_psn(psn, qp->s_next_psn) >= 0)
4104 goto ack_done;
4105
4106
4107 if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
4108 goto ack_done;
4109
4110 if (unlikely(qp->s_acked == qp->s_tail))
4111 goto ack_done;
4112
4113
4114
4115
4116
4117
4118 if (qp->r_flags & RVT_R_RDMAR_SEQ) {
4119 if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
4120 goto ack_done;
4121 qp->r_flags &= ~RVT_R_RDMAR_SEQ;
4122 }
4123
4124 wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
4125 if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
4126 goto ack_op_err;
4127
4128 req = wqe_to_tid_req(wqe);
4129
4130
4131
4132
4133
4134 if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
4135 goto ack_done;
4136
4137
4138
4139
4140
4141
4142
4143
4144 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
4145 goto ack_done;
4146
4147 trace_hfi1_ack(qp, psn);
4148
4149 flow = &req->flows[req->setup_head];
4150 flow->pkt = 0;
4151 flow->tid_idx = 0;
4152 flow->tid_offset = 0;
4153 flow->sent = 0;
4154 flow->resync_npkts = 0;
4155 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
4156 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
4157 TID_RDMA_DESTQP_FLOW_MASK;
4158 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
4159 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4160 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
4161 flow->flow_state.resp_ib_psn = psn;
4162 flow->length = min_t(u32, req->seg_len,
4163 (wqe->length - (req->comp_seg * req->seg_len)));
4164
4165 flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
4166 flow->flow_state.lpsn = flow->flow_state.spsn +
4167 flow->npkts - 1;
4168
4169 pktlen = packet->tlen - (packet->hlen + 4);
4170 if (pktlen > sizeof(flow->tid_entry)) {
4171 status = IB_WC_LOC_LEN_ERR;
4172 goto ack_err;
4173 }
4174 memcpy(flow->tid_entry, packet->ebuf, pktlen);
4175 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
4176 trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
4177
4178 req->comp_seg++;
4179 trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
4180
4181
4182
4183
4184 for (i = 0; i < flow->tidcnt; i++) {
4185 trace_hfi1_tid_entry_rcv_write_resp(
4186 qp, i, flow->tid_entry[i]);
4187 if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
4188 status = IB_WC_LOC_LEN_ERR;
4189 goto ack_err;
4190 }
4191 tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
4192 }
4193 if (tidlen * PAGE_SIZE < flow->length) {
4194 status = IB_WC_LOC_LEN_ERR;
4195 goto ack_err;
4196 }
4197
4198 trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
4199 wqe->lpsn, req);
4200
4201
4202
4203
4204 if (!cmp_psn(psn, wqe->psn)) {
4205 req->r_last_acked = mask_psn(wqe->psn - 1);
4206
4207 req->acked_tail = req->setup_head;
4208 }
4209
4210
4211 req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
4212 req->state = TID_REQUEST_ACTIVE;
4213
4214
4215
4216
4217
4218
4219
4220
4221 if (qpriv->s_tid_cur != qpriv->s_tid_head &&
4222 req->comp_seg == req->total_segs) {
4223 for (i = qpriv->s_tid_cur + 1; ; i++) {
4224 if (i == qp->s_size)
4225 i = 0;
4226 wqe = rvt_get_swqe_ptr(qp, i);
4227 if (i == qpriv->s_tid_head)
4228 break;
4229 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
4230 break;
4231 }
4232 qpriv->s_tid_cur = i;
4233 }
4234 qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
4235 hfi1_schedule_tid_send(qp);
4236 goto ack_done;
4237
4238ack_op_err:
4239 status = IB_WC_LOC_QP_OP_ERR;
4240ack_err:
4241 rvt_error_qp(qp, status);
4242ack_done:
4243 if (fecn)
4244 qp->s_flags |= RVT_S_ECN;
4245 spin_unlock_irqrestore(&qp->s_lock, flags);
4246}
4247
4248bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
4249 struct ib_other_headers *ohdr,
4250 u32 *bth1, u32 *bth2, u32 *len)
4251{
4252 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4253 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
4254 struct tid_rdma_params *remote;
4255 struct rvt_qp *qp = req->qp;
4256 struct hfi1_qp_priv *qpriv = qp->priv;
4257 u32 tidentry = flow->tid_entry[flow->tid_idx];
4258 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
4259 struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
4260 u32 next_offset, om = KDETH_OM_LARGE;
4261 bool last_pkt;
4262
4263 if (!tidlen) {
4264 hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
4265 rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
4266 }
4267
4268 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
4269 flow->sent += *len;
4270 next_offset = flow->tid_offset + *len;
4271 last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
4272 next_offset >= tidlen) || (flow->sent >= flow->length);
4273 trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
4274 trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
4275
4276 rcu_read_lock();
4277 remote = rcu_dereference(qpriv->tid_rdma.remote);
4278 KDETH_RESET(wd->kdeth0, KVER, 0x1);
4279 KDETH_SET(wd->kdeth0, SH, !last_pkt);
4280 KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
4281 KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
4282 KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
4283 KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
4284 KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
4285 KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
4286 wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
4287 rcu_read_unlock();
4288
4289 *bth1 = flow->tid_qpn;
4290 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
4291 HFI1_KDETH_BTH_SEQ_MASK) |
4292 (flow->flow_state.generation <<
4293 HFI1_KDETH_BTH_SEQ_SHIFT));
4294 if (last_pkt) {
4295
4296 if (flow->flow_state.lpsn + 1 +
4297 rvt_div_round_up_mtu(qp, req->seg_len) >
4298 MAX_TID_FLOW_PSN)
4299 req->state = TID_REQUEST_SYNC;
4300 *bth2 |= IB_BTH_REQ_ACK;
4301 }
4302
4303 if (next_offset >= tidlen) {
4304 flow->tid_offset = 0;
4305 flow->tid_idx++;
4306 } else {
4307 flow->tid_offset = next_offset;
4308 }
4309 return last_pkt;
4310}
4311
4312void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
4313{
4314 struct rvt_qp *qp = packet->qp;
4315 struct hfi1_qp_priv *priv = qp->priv;
4316 struct hfi1_ctxtdata *rcd = priv->rcd;
4317 struct ib_other_headers *ohdr = packet->ohdr;
4318 struct rvt_ack_entry *e;
4319 struct tid_rdma_request *req;
4320 struct tid_rdma_flow *flow;
4321 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4322 unsigned long flags;
4323 u32 psn, next;
4324 u8 opcode;
4325 bool fecn;
4326
4327 fecn = process_ecn(qp, packet);
4328 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4329 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4330
4331
4332
4333
4334
4335 spin_lock_irqsave(&qp->s_lock, flags);
4336 e = &qp->s_ack_queue[priv->r_tid_tail];
4337 req = ack_to_tid_req(e);
4338 flow = &req->flows[req->clear_tail];
4339 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
4340 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
4341
4342 if (cmp_psn(psn, flow->flow_state.r_next_psn))
4343 goto send_nak;
4344
4345 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4346
4347
4348
4349
4350
4351
4352
4353 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
4354 struct rvt_sge_state ss;
4355 u32 len;
4356 u32 tlen = packet->tlen;
4357 u16 hdrsize = packet->hlen;
4358 u8 pad = packet->pad;
4359 u8 extra_bytes = pad + packet->extra_byte +
4360 (SIZE_OF_CRC << 2);
4361 u32 pmtu = qp->pmtu;
4362
4363 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
4364 goto send_nak;
4365 len = req->comp_seg * req->seg_len;
4366 len += delta_psn(psn,
4367 full_flow_psn(flow, flow->flow_state.spsn)) *
4368 pmtu;
4369 if (unlikely(req->total_len - len < pmtu))
4370 goto send_nak;
4371
4372
4373
4374
4375
4376 ss.sge = e->rdma_sge;
4377 ss.sg_list = NULL;
4378 ss.num_sge = 1;
4379 ss.total_len = req->total_len;
4380 rvt_skip_sge(&ss, len, false);
4381 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
4382 false);
4383
4384 priv->r_next_psn_kdeth = mask_psn(psn + 1);
4385 priv->s_flags |= HFI1_R_TID_SW_PSN;
4386 }
4387 goto exit;
4388 }
4389 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4390 hfi1_kern_exp_rcv_clear(req);
4391 priv->alloc_w_segs--;
4392 rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
4393 req->comp_seg++;
4394 priv->s_nak_state = 0;
4395
4396
4397
4398
4399
4400
4401
4402
4403 trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
4404 trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
4405 req);
4406 trace_hfi1_tid_write_rsp_rcv_data(qp);
4407 if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
4408 priv->r_tid_ack = priv->r_tid_tail;
4409
4410 if (opcode == TID_OP(WRITE_DATA_LAST)) {
4411 release_rdma_sge_mr(e);
4412 for (next = priv->r_tid_tail + 1; ; next++) {
4413 if (next > rvt_size_atomic(&dev->rdi))
4414 next = 0;
4415 if (next == priv->r_tid_head)
4416 break;
4417 e = &qp->s_ack_queue[next];
4418 if (e->opcode == TID_OP(WRITE_REQ))
4419 break;
4420 }
4421 priv->r_tid_tail = next;
4422 if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
4423 qp->s_acked_ack_queue = 0;
4424 }
4425
4426 hfi1_tid_write_alloc_resources(qp, true);
4427
4428
4429
4430
4431
4432 if (req->cur_seg < req->total_segs ||
4433 qp->s_tail_ack_queue != qp->r_head_ack_queue) {
4434 qp->s_flags |= RVT_S_RESP_PENDING;
4435 hfi1_schedule_send(qp);
4436 }
4437
4438 priv->pending_tid_w_segs--;
4439 if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
4440 if (priv->pending_tid_w_segs)
4441 hfi1_mod_tid_reap_timer(req->qp);
4442 else
4443 hfi1_stop_tid_reap_timer(req->qp);
4444 }
4445
4446done:
4447 priv->s_flags |= RVT_S_ACK_PENDING;
4448 hfi1_schedule_tid_send(qp);
4449exit:
4450 priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
4451 if (fecn)
4452 qp->s_flags |= RVT_S_ECN;
4453 spin_unlock_irqrestore(&qp->s_lock, flags);
4454 return;
4455
4456send_nak:
4457 if (!priv->s_nak_state) {
4458 priv->s_nak_state = IB_NAK_PSN_ERROR;
4459 priv->s_nak_psn = flow->flow_state.r_next_psn;
4460 priv->s_flags |= RVT_S_ACK_PENDING;
4461 if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
4462 priv->r_tid_ack = priv->r_tid_tail;
4463 hfi1_schedule_tid_send(qp);
4464 }
4465 goto done;
4466}
4467
4468static bool hfi1_tid_rdma_is_resync_psn(u32 psn)
4469{
4470 return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
4471 HFI1_KDETH_BTH_SEQ_MASK);
4472}
4473
4474u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
4475 struct ib_other_headers *ohdr, u16 iflow,
4476 u32 *bth1, u32 *bth2)
4477{
4478 struct hfi1_qp_priv *qpriv = qp->priv;
4479 struct tid_flow_state *fs = &qpriv->flow_state;
4480 struct tid_rdma_request *req = ack_to_tid_req(e);
4481 struct tid_rdma_flow *flow = &req->flows[iflow];
4482 struct tid_rdma_params *remote;
4483
4484 rcu_read_lock();
4485 remote = rcu_dereference(qpriv->tid_rdma.remote);
4486 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4487 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4488 *bth1 = remote->qp;
4489 rcu_read_unlock();
4490
4491 if (qpriv->resync) {
4492 *bth2 = mask_psn((fs->generation <<
4493 HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4494 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4495 } else if (qpriv->s_nak_state) {
4496 *bth2 = mask_psn(qpriv->s_nak_psn);
4497 ohdr->u.tid_rdma.ack.aeth =
4498 cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
4499 (qpriv->s_nak_state <<
4500 IB_AETH_CREDIT_SHIFT));
4501 } else {
4502 *bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
4503 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4504 }
4505 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4506 ohdr->u.tid_rdma.ack.tid_flow_qp =
4507 cpu_to_be32(qpriv->tid_rdma.local.qp |
4508 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
4509 TID_RDMA_DESTQP_FLOW_SHIFT) |
4510 qpriv->rcd->ctxt);
4511
4512 ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
4513 ohdr->u.tid_rdma.ack.verbs_psn =
4514 cpu_to_be32(flow->flow_state.resp_ib_psn);
4515
4516 if (qpriv->resync) {
4517
4518
4519
4520
4521
4522
4523 if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
4524 ohdr->u.tid_rdma.ack.tid_flow_psn =
4525 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4526 } else {
4527
4528
4529
4530
4531
4532
4533 qpriv->r_next_psn_kdeth_save =
4534 qpriv->r_next_psn_kdeth - 1;
4535 ohdr->u.tid_rdma.ack.tid_flow_psn =
4536 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4537 qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
4538 }
4539 qpriv->resync = false;
4540 }
4541
4542 return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
4543}
4544
4545void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
4546{
4547 struct ib_other_headers *ohdr = packet->ohdr;
4548 struct rvt_qp *qp = packet->qp;
4549 struct hfi1_qp_priv *qpriv = qp->priv;
4550 struct rvt_swqe *wqe;
4551 struct tid_rdma_request *req;
4552 struct tid_rdma_flow *flow;
4553 u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
4554 unsigned long flags;
4555 u16 fidx;
4556
4557 trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
4558 process_ecn(qp, packet);
4559 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4560 aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
4561 req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
4562 resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
4563
4564 spin_lock_irqsave(&qp->s_lock, flags);
4565 trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
4566
4567
4568 if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
4569 cmp_psn(psn, qpriv->s_resync_psn))
4570 goto ack_op_err;
4571
4572 ack_psn = req_psn;
4573 if (hfi1_tid_rdma_is_resync_psn(psn))
4574 ack_kpsn = resync_psn;
4575 else
4576 ack_kpsn = psn;
4577 if (aeth >> 29) {
4578 ack_psn--;
4579 ack_kpsn--;
4580 }
4581
4582 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4583
4584 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4585 goto ack_op_err;
4586
4587 req = wqe_to_tid_req(wqe);
4588 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4589 wqe->lpsn, req);
4590 flow = &req->flows[req->acked_tail];
4591 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4592
4593
4594 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0)
4595 goto ack_op_err;
4596
4597 while (cmp_psn(ack_kpsn,
4598 full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
4599 req->ack_seg < req->cur_seg) {
4600 req->ack_seg++;
4601
4602 req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
4603 req->r_last_acked = flow->flow_state.resp_ib_psn;
4604 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4605 wqe->lpsn, req);
4606 if (req->ack_seg == req->total_segs) {
4607 req->state = TID_REQUEST_COMPLETE;
4608 wqe = do_rc_completion(qp, wqe,
4609 to_iport(qp->ibqp.device,
4610 qp->port_num));
4611 trace_hfi1_sender_rcv_tid_ack(qp);
4612 atomic_dec(&qpriv->n_tid_requests);
4613 if (qp->s_acked == qp->s_tail)
4614 break;
4615 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4616 break;
4617 req = wqe_to_tid_req(wqe);
4618 }
4619 flow = &req->flows[req->acked_tail];
4620 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4621 }
4622
4623 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4624 wqe->lpsn, req);
4625 switch (aeth >> 29) {
4626 case 0:
4627 if (qpriv->s_flags & RVT_S_WAIT_ACK)
4628 qpriv->s_flags &= ~RVT_S_WAIT_ACK;
4629 if (!hfi1_tid_rdma_is_resync_psn(psn)) {
4630
4631 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
4632 req->ack_seg < req->cur_seg)
4633 hfi1_mod_tid_retry_timer(qp);
4634 else
4635 hfi1_stop_tid_retry_timer(qp);
4636 hfi1_schedule_send(qp);
4637 } else {
4638 u32 spsn, fpsn, last_acked, generation;
4639 struct tid_rdma_request *rptr;
4640
4641
4642 hfi1_stop_tid_retry_timer(qp);
4643
4644 qp->s_flags &= ~HFI1_S_WAIT_HALT;
4645
4646
4647
4648
4649
4650
4651 qpriv->s_flags &= ~RVT_S_SEND_ONE;
4652 hfi1_schedule_send(qp);
4653
4654 if ((qp->s_acked == qpriv->s_tid_tail &&
4655 req->ack_seg == req->total_segs) ||
4656 qp->s_acked == qp->s_tail) {
4657 qpriv->s_state = TID_OP(WRITE_DATA_LAST);
4658 goto done;
4659 }
4660
4661 if (req->ack_seg == req->comp_seg) {
4662 qpriv->s_state = TID_OP(WRITE_DATA);
4663 goto done;
4664 }
4665
4666
4667
4668
4669
4670 psn = mask_psn(psn + 1);
4671 generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4672 spsn = 0;
4673
4674
4675
4676
4677
4678 if (delta_psn(ack_psn, wqe->lpsn))
4679 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4680 req = wqe_to_tid_req(wqe);
4681 flow = &req->flows[req->acked_tail];
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691 fpsn = full_flow_psn(flow, flow->flow_state.spsn);
4692 req->r_ack_psn = psn;
4693 flow->resync_npkts +=
4694 delta_psn(mask_psn(resync_psn + 1), fpsn);
4695
4696
4697
4698
4699 last_acked = qp->s_acked;
4700 rptr = req;
4701 while (1) {
4702
4703 for (fidx = rptr->acked_tail;
4704 CIRC_CNT(rptr->setup_head, fidx,
4705 MAX_FLOWS);
4706 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
4707 u32 lpsn;
4708 u32 gen;
4709
4710 flow = &rptr->flows[fidx];
4711 gen = flow->flow_state.generation;
4712 if (WARN_ON(gen == generation &&
4713 flow->flow_state.spsn !=
4714 spsn))
4715 continue;
4716 lpsn = flow->flow_state.lpsn;
4717 lpsn = full_flow_psn(flow, lpsn);
4718 flow->npkts =
4719 delta_psn(lpsn,
4720 mask_psn(resync_psn)
4721 );
4722 flow->flow_state.generation =
4723 generation;
4724 flow->flow_state.spsn = spsn;
4725 flow->flow_state.lpsn =
4726 flow->flow_state.spsn +
4727 flow->npkts - 1;
4728 flow->pkt = 0;
4729 spsn += flow->npkts;
4730 resync_psn += flow->npkts;
4731 trace_hfi1_tid_flow_rcv_tid_ack(qp,
4732 fidx,
4733 flow);
4734 }
4735 if (++last_acked == qpriv->s_tid_cur + 1)
4736 break;
4737 if (last_acked == qp->s_size)
4738 last_acked = 0;
4739 wqe = rvt_get_swqe_ptr(qp, last_acked);
4740 rptr = wqe_to_tid_req(wqe);
4741 }
4742 req->cur_seg = req->ack_seg;
4743 qpriv->s_tid_tail = qp->s_acked;
4744 qpriv->s_state = TID_OP(WRITE_REQ);
4745 hfi1_schedule_tid_send(qp);
4746 }
4747done:
4748 qpriv->s_retry = qp->s_retry_cnt;
4749 break;
4750
4751 case 3:
4752 hfi1_stop_tid_retry_timer(qp);
4753 switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
4754 IB_AETH_CREDIT_MASK) {
4755 case 0:
4756 flow = &req->flows[req->acked_tail];
4757 fspsn = full_flow_psn(flow, flow->flow_state.spsn);
4758 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
4759 flow);
4760 req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4761 req->cur_seg = req->ack_seg;
4762 qpriv->s_tid_tail = qp->s_acked;
4763 qpriv->s_state = TID_OP(WRITE_REQ);
4764 qpriv->s_retry = qp->s_retry_cnt;
4765 hfi1_schedule_tid_send(qp);
4766 break;
4767
4768 default:
4769 break;
4770 }
4771 break;
4772
4773 default:
4774 break;
4775 }
4776
4777ack_op_err:
4778 spin_unlock_irqrestore(&qp->s_lock, flags);
4779}
4780
4781void hfi1_add_tid_retry_timer(struct rvt_qp *qp)
4782{
4783 struct hfi1_qp_priv *priv = qp->priv;
4784 struct ib_qp *ibqp = &qp->ibqp;
4785 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4786
4787 lockdep_assert_held(&qp->s_lock);
4788 if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
4789 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4790 priv->s_tid_retry_timer.expires = jiffies +
4791 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
4792 add_timer(&priv->s_tid_retry_timer);
4793 }
4794}
4795
4796static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
4797{
4798 struct hfi1_qp_priv *priv = qp->priv;
4799 struct ib_qp *ibqp = &qp->ibqp;
4800 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4801
4802 lockdep_assert_held(&qp->s_lock);
4803 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4804 mod_timer(&priv->s_tid_retry_timer, jiffies +
4805 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
4806}
4807
4808static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
4809{
4810 struct hfi1_qp_priv *priv = qp->priv;
4811 int rval = 0;
4812
4813 lockdep_assert_held(&qp->s_lock);
4814 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4815 rval = del_timer(&priv->s_tid_retry_timer);
4816 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4817 }
4818 return rval;
4819}
4820
4821void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
4822{
4823 struct hfi1_qp_priv *priv = qp->priv;
4824
4825 del_timer_sync(&priv->s_tid_retry_timer);
4826 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4827}
4828
4829static void hfi1_tid_retry_timeout(struct timer_list *t)
4830{
4831 struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
4832 struct rvt_qp *qp = priv->owner;
4833 struct rvt_swqe *wqe;
4834 unsigned long flags;
4835 struct tid_rdma_request *req;
4836
4837 spin_lock_irqsave(&qp->r_lock, flags);
4838 spin_lock(&qp->s_lock);
4839 trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
4840 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4841 hfi1_stop_tid_retry_timer(qp);
4842 if (!priv->s_retry) {
4843 trace_hfi1_msg_tid_retry_timeout(
4844 qp,
4845 "Exhausted retries. Tid retry timeout = ",
4846 (u64)priv->tid_retry_timeout_jiffies);
4847
4848 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4849 hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
4850 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
4851 } else {
4852 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4853 req = wqe_to_tid_req(wqe);
4854 trace_hfi1_tid_req_tid_retry_timeout(
4855 qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
4856
4857 priv->s_flags &= ~RVT_S_WAIT_ACK;
4858
4859 priv->s_flags |= RVT_S_SEND_ONE;
4860
4861
4862
4863
4864 qp->s_flags |= HFI1_S_WAIT_HALT;
4865 priv->s_state = TID_OP(RESYNC);
4866 priv->s_retry--;
4867 hfi1_schedule_tid_send(qp);
4868 }
4869 }
4870 spin_unlock(&qp->s_lock);
4871 spin_unlock_irqrestore(&qp->r_lock, flags);
4872}
4873
4874u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
4875 struct ib_other_headers *ohdr, u32 *bth1,
4876 u32 *bth2, u16 fidx)
4877{
4878 struct hfi1_qp_priv *qpriv = qp->priv;
4879 struct tid_rdma_params *remote;
4880 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4881 struct tid_rdma_flow *flow = &req->flows[fidx];
4882 u32 generation;
4883
4884 rcu_read_lock();
4885 remote = rcu_dereference(qpriv->tid_rdma.remote);
4886 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4887 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4888 *bth1 = remote->qp;
4889 rcu_read_unlock();
4890
4891 generation = kern_flow_generation_next(flow->flow_state.generation);
4892 *bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4893 qpriv->s_resync_psn = *bth2;
4894 *bth2 |= IB_BTH_REQ_ACK;
4895 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4896
4897 return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
4898}
4899
4900void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
4901{
4902 struct ib_other_headers *ohdr = packet->ohdr;
4903 struct rvt_qp *qp = packet->qp;
4904 struct hfi1_qp_priv *qpriv = qp->priv;
4905 struct hfi1_ctxtdata *rcd = qpriv->rcd;
4906 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4907 struct rvt_ack_entry *e;
4908 struct tid_rdma_request *req;
4909 struct tid_rdma_flow *flow;
4910 struct tid_flow_state *fs = &qpriv->flow_state;
4911 u32 psn, generation, idx, gen_next;
4912 bool fecn;
4913 unsigned long flags;
4914
4915 fecn = process_ecn(qp, packet);
4916 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4917
4918 generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
4919 spin_lock_irqsave(&qp->s_lock, flags);
4920
4921 gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
4922 generation : kern_flow_generation_next(fs->generation);
4923
4924
4925
4926
4927 if (generation != mask_generation(gen_next - 1) &&
4928 generation != gen_next)
4929 goto bail;
4930
4931 if (qpriv->resync)
4932 goto bail;
4933
4934 spin_lock(&rcd->exp_lock);
4935 if (fs->index >= RXE_NUM_TID_FLOWS) {
4936
4937
4938
4939
4940 fs->generation = generation;
4941 } else {
4942
4943 rcd->flows[fs->index].generation = generation;
4944 fs->generation = kern_setup_hw_flow(rcd, fs->index);
4945 }
4946 fs->psn = 0;
4947
4948
4949
4950
4951 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
4952 trace_hfi1_tid_write_rsp_rcv_resync(qp);
4953
4954
4955
4956
4957
4958
4959 for (idx = qpriv->r_tid_tail; ; idx++) {
4960 u16 flow_idx;
4961
4962 if (idx > rvt_size_atomic(&dev->rdi))
4963 idx = 0;
4964 e = &qp->s_ack_queue[idx];
4965 if (e->opcode == TID_OP(WRITE_REQ)) {
4966 req = ack_to_tid_req(e);
4967 trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
4968 e->lpsn, req);
4969
4970
4971 for (flow_idx = req->clear_tail;
4972 CIRC_CNT(req->setup_head, flow_idx,
4973 MAX_FLOWS);
4974 flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
4975 u32 lpsn;
4976 u32 next;
4977
4978 flow = &req->flows[flow_idx];
4979 lpsn = full_flow_psn(flow,
4980 flow->flow_state.lpsn);
4981 next = flow->flow_state.r_next_psn;
4982 flow->npkts = delta_psn(lpsn, next - 1);
4983 flow->flow_state.generation = fs->generation;
4984 flow->flow_state.spsn = fs->psn;
4985 flow->flow_state.lpsn =
4986 flow->flow_state.spsn + flow->npkts - 1;
4987 flow->flow_state.r_next_psn =
4988 full_flow_psn(flow,
4989 flow->flow_state.spsn);
4990 fs->psn += flow->npkts;
4991 trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
4992 flow);
4993 }
4994 }
4995 if (idx == qp->s_tail_ack_queue)
4996 break;
4997 }
4998
4999 spin_unlock(&rcd->exp_lock);
5000 qpriv->resync = true;
5001
5002 qpriv->s_nak_state = 0;
5003 qpriv->s_flags |= RVT_S_ACK_PENDING;
5004 hfi1_schedule_tid_send(qp);
5005bail:
5006 if (fecn)
5007 qp->s_flags |= RVT_S_ECN;
5008 spin_unlock_irqrestore(&qp->s_lock, flags);
5009}
5010
5011
5012
5013
5014
5015static void update_tid_tail(struct rvt_qp *qp)
5016 __must_hold(&qp->s_lock)
5017{
5018 struct hfi1_qp_priv *priv = qp->priv;
5019 u32 i;
5020 struct rvt_swqe *wqe;
5021
5022 lockdep_assert_held(&qp->s_lock);
5023
5024 if (priv->s_tid_tail == priv->s_tid_cur)
5025 return;
5026 for (i = priv->s_tid_tail + 1; ; i++) {
5027 if (i == qp->s_size)
5028 i = 0;
5029
5030 if (i == priv->s_tid_cur)
5031 break;
5032 wqe = rvt_get_swqe_ptr(qp, i);
5033 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
5034 break;
5035 }
5036 priv->s_tid_tail = i;
5037 priv->s_state = TID_OP(WRITE_RESP);
5038}
5039
5040int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
5041 __must_hold(&qp->s_lock)
5042{
5043 struct hfi1_qp_priv *priv = qp->priv;
5044 struct rvt_swqe *wqe;
5045 u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
5046 struct ib_other_headers *ohdr;
5047 struct rvt_sge_state *ss = &qp->s_sge;
5048 struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
5049 struct tid_rdma_request *req = ack_to_tid_req(e);
5050 bool last = false;
5051 u8 opcode = TID_OP(WRITE_DATA);
5052
5053 lockdep_assert_held(&qp->s_lock);
5054 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5055
5056
5057
5058
5059 if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
5060 atomic_read(&priv->n_requests) &&
5061 !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
5062 HFI1_S_ANY_WAIT_IO))) ||
5063 (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
5064 !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
5065 struct iowait_work *iowork;
5066
5067 iowork = iowait_get_ib_work(&priv->s_iowait);
5068 ps->s_txreq = get_waiting_verbs_txreq(iowork);
5069 if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
5070 priv->s_flags |= HFI1_S_TID_BUSY_SET;
5071 return 1;
5072 }
5073 }
5074
5075 ps->s_txreq = get_txreq(ps->dev, qp);
5076 if (!ps->s_txreq)
5077 goto bail_no_tx;
5078
5079 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
5080
5081 if ((priv->s_flags & RVT_S_ACK_PENDING) &&
5082 make_tid_rdma_ack(qp, ohdr, ps))
5083 return 1;
5084
5085
5086
5087
5088
5089
5090
5091 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
5092 goto bail;
5093
5094 if (priv->s_flags & RVT_S_WAIT_ACK)
5095 goto bail;
5096
5097
5098 if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
5099 goto bail;
5100 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5101 req = wqe_to_tid_req(wqe);
5102 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
5103 wqe->lpsn, req);
5104 switch (priv->s_state) {
5105 case TID_OP(WRITE_REQ):
5106 case TID_OP(WRITE_RESP):
5107 priv->tid_ss.sge = wqe->sg_list[0];
5108 priv->tid_ss.sg_list = wqe->sg_list + 1;
5109 priv->tid_ss.num_sge = wqe->wr.num_sge;
5110 priv->tid_ss.total_len = wqe->length;
5111
5112 if (priv->s_state == TID_OP(WRITE_REQ))
5113 hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
5114 priv->s_state = TID_OP(WRITE_DATA);
5115
5116
5117 case TID_OP(WRITE_DATA):
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131 trace_hfi1_sender_make_tid_pkt(qp);
5132 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5133 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5134 req = wqe_to_tid_req(wqe);
5135 len = wqe->length;
5136
5137 if (!req->comp_seg || req->cur_seg == req->comp_seg)
5138 goto bail;
5139
5140 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
5141 wqe->psn, wqe->lpsn, req);
5142 last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
5143 &len);
5144
5145 if (last) {
5146
5147 req->clear_tail = CIRC_NEXT(req->clear_tail,
5148 MAX_FLOWS);
5149 if (++req->cur_seg < req->total_segs) {
5150 if (!CIRC_CNT(req->setup_head, req->clear_tail,
5151 MAX_FLOWS))
5152 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
5153 } else {
5154 priv->s_state = TID_OP(WRITE_DATA_LAST);
5155 opcode = TID_OP(WRITE_DATA_LAST);
5156
5157
5158 update_tid_tail(qp);
5159 }
5160 }
5161 hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
5162 ss = &priv->tid_ss;
5163 break;
5164
5165 case TID_OP(RESYNC):
5166 trace_hfi1_sender_make_tid_pkt(qp);
5167
5168 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
5169 req = wqe_to_tid_req(wqe);
5170
5171 if (!req->comp_seg) {
5172 wqe = rvt_get_swqe_ptr(qp,
5173 (!priv->s_tid_cur ? qp->s_size :
5174 priv->s_tid_cur) - 1);
5175 req = wqe_to_tid_req(wqe);
5176 }
5177 hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
5178 &bth2,
5179 CIRC_PREV(req->setup_head,
5180 MAX_FLOWS));
5181 ss = NULL;
5182 len = 0;
5183 opcode = TID_OP(RESYNC);
5184 break;
5185
5186 default:
5187 goto bail;
5188 }
5189 if (priv->s_flags & RVT_S_SEND_ONE) {
5190 priv->s_flags &= ~RVT_S_SEND_ONE;
5191 priv->s_flags |= RVT_S_WAIT_ACK;
5192 bth2 |= IB_BTH_REQ_ACK;
5193 }
5194 qp->s_len -= len;
5195 ps->s_txreq->hdr_dwords = hwords;
5196 ps->s_txreq->sde = priv->s_sde;
5197 ps->s_txreq->ss = ss;
5198 ps->s_txreq->s_cur_size = len;
5199 hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
5200 middle, ps);
5201 return 1;
5202bail:
5203 hfi1_put_txreq(ps->s_txreq);
5204bail_no_tx:
5205 ps->s_txreq = NULL;
5206 priv->s_flags &= ~RVT_S_BUSY;
5207
5208
5209
5210
5211
5212
5213
5214 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5215 return 0;
5216}
5217
5218static int make_tid_rdma_ack(struct rvt_qp *qp,
5219 struct ib_other_headers *ohdr,
5220 struct hfi1_pkt_state *ps)
5221{
5222 struct rvt_ack_entry *e;
5223 struct hfi1_qp_priv *qpriv = qp->priv;
5224 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5225 u32 hwords, next;
5226 u32 len = 0;
5227 u32 bth1 = 0, bth2 = 0;
5228 int middle = 0;
5229 u16 flow;
5230 struct tid_rdma_request *req, *nreq;
5231
5232 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5233
5234 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
5235 goto bail;
5236
5237
5238 hwords = 5;
5239
5240 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5241 req = ack_to_tid_req(e);
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254 if (qpriv->resync) {
5255 if (!req->ack_seg || req->ack_seg == req->total_segs)
5256 qpriv->r_tid_ack = !qpriv->r_tid_ack ?
5257 rvt_size_atomic(&dev->rdi) :
5258 qpriv->r_tid_ack - 1;
5259 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5260 req = ack_to_tid_req(e);
5261 }
5262
5263 trace_hfi1_rsp_make_tid_ack(qp, e->psn);
5264 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5265 req);
5266
5267
5268
5269
5270 if (!qpriv->s_nak_state && !qpriv->resync &&
5271 req->ack_seg == req->comp_seg)
5272 goto bail;
5273
5274 do {
5275
5276
5277
5278
5279
5280
5281 req->ack_seg +=
5282
5283 CIRC_CNT(req->clear_tail, req->acked_tail,
5284 MAX_FLOWS);
5285
5286 req->acked_tail = req->clear_tail;
5287
5288
5289
5290
5291
5292
5293 flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
5294 if (req->ack_seg != req->total_segs)
5295 break;
5296 req->state = TID_REQUEST_COMPLETE;
5297
5298 next = qpriv->r_tid_ack + 1;
5299 if (next > rvt_size_atomic(&dev->rdi))
5300 next = 0;
5301 qpriv->r_tid_ack = next;
5302 if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
5303 break;
5304 nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
5305 if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
5306 break;
5307
5308
5309 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5310 req = ack_to_tid_req(e);
5311 } while (1);
5312
5313
5314
5315
5316
5317 if (qpriv->s_nak_state ||
5318 (qpriv->resync &&
5319 !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
5320 (cmp_psn(qpriv->r_next_psn_kdeth - 1,
5321 full_flow_psn(&req->flows[flow],
5322 req->flows[flow].flow_state.lpsn)) > 0))) {
5323
5324
5325
5326
5327
5328
5329
5330 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5331 req = ack_to_tid_req(e);
5332 flow = req->acked_tail;
5333 } else if (req->ack_seg == req->total_segs &&
5334 qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
5335 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
5336
5337 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5338 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5339 req);
5340 hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
5341 &bth2);
5342 len = 0;
5343 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5344 ps->s_txreq->hdr_dwords = hwords;
5345 ps->s_txreq->sde = qpriv->s_sde;
5346 ps->s_txreq->s_cur_size = len;
5347 ps->s_txreq->ss = NULL;
5348 hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
5349 ps);
5350 ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
5351 return 1;
5352bail:
5353
5354
5355
5356
5357 smp_wmb();
5358 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5359 return 0;
5360}
5361
5362static int hfi1_send_tid_ok(struct rvt_qp *qp)
5363{
5364 struct hfi1_qp_priv *priv = qp->priv;
5365
5366 return !(priv->s_flags & RVT_S_BUSY ||
5367 qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
5368 (verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
5369 (priv->s_flags & RVT_S_RESP_PENDING) ||
5370 !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
5371}
5372
5373void _hfi1_do_tid_send(struct work_struct *work)
5374{
5375 struct iowait_work *w = container_of(work, struct iowait_work, iowork);
5376 struct rvt_qp *qp = iowait_to_qp(w->iow);
5377
5378 hfi1_do_tid_send(qp);
5379}
5380
5381static void hfi1_do_tid_send(struct rvt_qp *qp)
5382{
5383 struct hfi1_pkt_state ps;
5384 struct hfi1_qp_priv *priv = qp->priv;
5385
5386 ps.dev = to_idev(qp->ibqp.device);
5387 ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
5388 ps.ppd = ppd_from_ibp(ps.ibp);
5389 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5390 ps.in_thread = false;
5391 ps.timeout_int = qp->timeout_jiffies / 8;
5392
5393 trace_hfi1_rc_do_tid_send(qp, false);
5394 spin_lock_irqsave(&qp->s_lock, ps.flags);
5395
5396
5397 if (!hfi1_send_tid_ok(qp)) {
5398 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5399 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5400 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5401 return;
5402 }
5403
5404 priv->s_flags |= RVT_S_BUSY;
5405
5406 ps.timeout = jiffies + ps.timeout_int;
5407 ps.cpu = priv->s_sde ? priv->s_sde->cpu :
5408 cpumask_first(cpumask_of_node(ps.ppd->dd->node));
5409 ps.pkts_sent = false;
5410
5411
5412 ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
5413 do {
5414
5415 if (ps.s_txreq) {
5416 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5417 qp->s_flags |= RVT_S_BUSY;
5418 ps.wait = iowait_get_ib_work(&priv->s_iowait);
5419 }
5420 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5421
5422
5423
5424
5425
5426 if (hfi1_verbs_send(qp, &ps))
5427 return;
5428
5429
5430 if (hfi1_schedule_send_yield(qp, &ps, true))
5431 return;
5432
5433 spin_lock_irqsave(&qp->s_lock, ps.flags);
5434 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5435 qp->s_flags &= ~RVT_S_BUSY;
5436 priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
5437 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5438 if (iowait_flag_set(&priv->s_iowait,
5439 IOWAIT_PENDING_IB))
5440 hfi1_schedule_send(qp);
5441 }
5442 }
5443 } while (hfi1_make_tid_rdma_pkt(qp, &ps));
5444 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
5445 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5446}
5447
5448static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
5449{
5450 struct hfi1_qp_priv *priv = qp->priv;
5451 struct hfi1_ibport *ibp =
5452 to_iport(qp->ibqp.device, qp->port_num);
5453 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
5454 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
5455
5456 return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
5457 priv->s_sde ?
5458 priv->s_sde->cpu :
5459 cpumask_first(cpumask_of_node(dd->node)));
5460}
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475bool hfi1_schedule_tid_send(struct rvt_qp *qp)
5476{
5477 lockdep_assert_held(&qp->s_lock);
5478 if (hfi1_send_tid_ok(qp)) {
5479
5480
5481
5482
5483
5484
5485 _hfi1_schedule_tid_send(qp);
5486 return true;
5487 }
5488 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5489 iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
5490 IOWAIT_PENDING_TID);
5491 return false;
5492}
5493
5494bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
5495{
5496 struct rvt_ack_entry *prev;
5497 struct tid_rdma_request *req;
5498 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5499 struct hfi1_qp_priv *priv = qp->priv;
5500 u32 s_prev;
5501
5502 s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
5503 (qp->s_tail_ack_queue - 1);
5504 prev = &qp->s_ack_queue[s_prev];
5505
5506 if ((e->opcode == TID_OP(READ_REQ) ||
5507 e->opcode == OP(RDMA_READ_REQUEST)) &&
5508 prev->opcode == TID_OP(WRITE_REQ)) {
5509 req = ack_to_tid_req(prev);
5510 if (req->ack_seg != req->total_segs) {
5511 priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
5512 return true;
5513 }
5514 }
5515 return false;
5516}
5517
5518static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
5519{
5520 u64 reg;
5521
5522
5523
5524
5525
5526 reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
5527 return mask_psn(reg);
5528}
5529
5530static void tid_rdma_rcv_err(struct hfi1_packet *packet,
5531 struct ib_other_headers *ohdr,
5532 struct rvt_qp *qp, u32 psn, int diff, bool fecn)
5533{
5534 unsigned long flags;
5535
5536 tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
5537 if (fecn) {
5538 spin_lock_irqsave(&qp->s_lock, flags);
5539 qp->s_flags |= RVT_S_ECN;
5540 spin_unlock_irqrestore(&qp->s_lock, flags);
5541 }
5542}
5543
5544static void update_r_next_psn_fecn(struct hfi1_packet *packet,
5545 struct hfi1_qp_priv *priv,
5546 struct hfi1_ctxtdata *rcd,
5547 struct tid_rdma_flow *flow,
5548 bool fecn)
5549{
5550
5551
5552
5553
5554 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
5555 !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
5556 struct hfi1_devdata *dd = rcd->dd;
5557
5558 flow->flow_state.r_next_psn =
5559 read_r_next_psn(dd, rcd->ctxt, flow->idx);
5560 }
5561}
5562