1
2
3
4
5
6
7#include "hfi.h"
8#include "qp.h"
9#include "rc.h"
10#include "verbs.h"
11#include "tid_rdma.h"
12#include "exp_rcv.h"
13#include "trace.h"
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
31#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
32#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
33#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
34#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
35#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
36
37
38#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
39
40#define GENERATION_MASK 0xFFFFF
41
42static u32 mask_generation(u32 a)
43{
44 return a & GENERATION_MASK;
45}
46
47
48#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
49
50
51
52
53
54#define TID_RDMA_JKEY 32
55#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
56#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
57
58
59#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
60#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
61#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
62 TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
63#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
64
65#define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
66
67#define TID_RDMA_DESTQP_FLOW_SHIFT 11
68#define TID_RDMA_DESTQP_FLOW_MASK 0x1f
69
70#define TID_OPFN_QP_CTXT_MASK 0xff
71#define TID_OPFN_QP_CTXT_SHIFT 56
72#define TID_OPFN_QP_KDETH_MASK 0xff
73#define TID_OPFN_QP_KDETH_SHIFT 48
74#define TID_OPFN_MAX_LEN_MASK 0x7ff
75#define TID_OPFN_MAX_LEN_SHIFT 37
76#define TID_OPFN_TIMEOUT_MASK 0x1f
77#define TID_OPFN_TIMEOUT_SHIFT 32
78#define TID_OPFN_RESERVED_MASK 0x3f
79#define TID_OPFN_RESERVED_SHIFT 26
80#define TID_OPFN_URG_MASK 0x1
81#define TID_OPFN_URG_SHIFT 25
82#define TID_OPFN_VER_MASK 0x7
83#define TID_OPFN_VER_SHIFT 22
84#define TID_OPFN_JKEY_MASK 0x3f
85#define TID_OPFN_JKEY_SHIFT 16
86#define TID_OPFN_MAX_READ_MASK 0x3f
87#define TID_OPFN_MAX_READ_SHIFT 10
88#define TID_OPFN_MAX_WRITE_MASK 0x3f
89#define TID_OPFN_MAX_WRITE_SHIFT 4
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110static void tid_rdma_trigger_resume(struct work_struct *work);
111static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
112static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
113 gfp_t gfp);
114static void hfi1_init_trdma_req(struct rvt_qp *qp,
115 struct tid_rdma_request *req);
116static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
117static void hfi1_tid_timeout(struct timer_list *t);
118static void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
119static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
120static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
121static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
122static void hfi1_tid_retry_timeout(struct timer_list *t);
123static int make_tid_rdma_ack(struct rvt_qp *qp,
124 struct ib_other_headers *ohdr,
125 struct hfi1_pkt_state *ps);
126static void hfi1_do_tid_send(struct rvt_qp *qp);
127static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
128static void tid_rdma_rcv_err(struct hfi1_packet *packet,
129 struct ib_other_headers *ohdr,
130 struct rvt_qp *qp, u32 psn, int diff, bool fecn);
131static void update_r_next_psn_fecn(struct hfi1_packet *packet,
132 struct hfi1_qp_priv *priv,
133 struct hfi1_ctxtdata *rcd,
134 struct tid_rdma_flow *flow,
135 bool fecn);
136
137static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
138{
139 if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
140 priv->r_tid_ack = priv->r_tid_tail;
141}
142
143static void tid_rdma_schedule_ack(struct rvt_qp *qp)
144{
145 struct hfi1_qp_priv *priv = qp->priv;
146
147 priv->s_flags |= RVT_S_ACK_PENDING;
148 hfi1_schedule_tid_send(qp);
149}
150
151static void tid_rdma_trigger_ack(struct rvt_qp *qp)
152{
153 validate_r_tid_ack(qp->priv);
154 tid_rdma_schedule_ack(qp);
155}
156
157static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
158{
159 return
160 (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
161 TID_OPFN_QP_CTXT_SHIFT) |
162 ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
163 TID_OPFN_QP_KDETH_SHIFT) |
164 (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
165 TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
166 (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
167 TID_OPFN_TIMEOUT_SHIFT) |
168 (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
169 (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
170 (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
171 TID_OPFN_MAX_READ_SHIFT) |
172 (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
173 TID_OPFN_MAX_WRITE_SHIFT);
174}
175
176static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
177{
178 p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
179 TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
180 p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
181 p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
182 TID_OPFN_MAX_WRITE_MASK;
183 p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
184 TID_OPFN_MAX_READ_MASK;
185 p->qp =
186 ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
187 << 16) |
188 ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
189 p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
190 p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
191}
192
193void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
194{
195 struct hfi1_qp_priv *priv = qp->priv;
196
197 p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
198 p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
199 p->jkey = priv->rcd->jkey;
200 p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
201 p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
202 p->timeout = qp->timeout;
203 p->urg = is_urg_masked(priv->rcd);
204}
205
206bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
207{
208 struct hfi1_qp_priv *priv = qp->priv;
209
210 *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
211 return true;
212}
213
214bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
215{
216 struct hfi1_qp_priv *priv = qp->priv;
217 struct tid_rdma_params *remote, *old;
218 bool ret = true;
219
220 old = rcu_dereference_protected(priv->tid_rdma.remote,
221 lockdep_is_held(&priv->opfn.lock));
222 data &= ~0xfULL;
223
224
225
226
227 if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
228 goto null;
229
230
231
232
233
234
235
236 remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
237 if (!remote) {
238 ret = false;
239 goto null;
240 }
241
242 tid_rdma_opfn_decode(remote, data);
243 priv->tid_timer_timeout_jiffies =
244 usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
245 1000UL) << 3) * 7);
246 trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
247 trace_hfi1_opfn_param(qp, 1, remote);
248 rcu_assign_pointer(priv->tid_rdma.remote, remote);
249
250
251
252
253
254
255
256
257 priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
258 priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
259 goto free;
260null:
261 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
262 priv->timeout_shift = 0;
263free:
264 if (old)
265 kfree_rcu(old, rcu_head);
266 return ret;
267}
268
269bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
270{
271 bool ret;
272
273 ret = tid_rdma_conn_reply(qp, *data);
274 *data = 0;
275
276
277
278
279
280 if (ret)
281 (void)tid_rdma_conn_req(qp, data);
282 return ret;
283}
284
285void tid_rdma_conn_error(struct rvt_qp *qp)
286{
287 struct hfi1_qp_priv *priv = qp->priv;
288 struct tid_rdma_params *old;
289
290 old = rcu_dereference_protected(priv->tid_rdma.remote,
291 lockdep_is_held(&priv->opfn.lock));
292 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
293 if (old)
294 kfree_rcu(old, rcu_head);
295}
296
297
298int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
299{
300 if (reinit)
301 return 0;
302
303 BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
304 BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
305 rcd->jkey = TID_RDMA_JKEY;
306 hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
307 return hfi1_alloc_ctxt_rcv_groups(rcd);
308}
309
310
311
312
313
314
315
316
317
318
319
320static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
321 struct rvt_qp *qp)
322{
323 struct hfi1_ibdev *verbs_dev = container_of(rdi,
324 struct hfi1_ibdev,
325 rdi);
326 struct hfi1_devdata *dd = container_of(verbs_dev,
327 struct hfi1_devdata,
328 verbs_dev);
329 unsigned int ctxt;
330
331 if (qp->ibqp.qp_num == 0)
332 ctxt = 0;
333 else
334 ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
335 return dd->rcd[ctxt];
336}
337
338int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
339 struct ib_qp_init_attr *init_attr)
340{
341 struct hfi1_qp_priv *qpriv = qp->priv;
342 int i, ret;
343
344 qpriv->rcd = qp_to_rcd(rdi, qp);
345
346 spin_lock_init(&qpriv->opfn.lock);
347 INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
348 INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
349 qpriv->flow_state.psn = 0;
350 qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
351 qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
352 qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
353 qpriv->s_state = TID_OP(WRITE_RESP);
354 qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
355 qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
356 qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
357 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
358 qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
359 qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
360 qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
361 qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
362 atomic_set(&qpriv->n_requests, 0);
363 atomic_set(&qpriv->n_tid_requests, 0);
364 timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
365 timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
366 INIT_LIST_HEAD(&qpriv->tid_wait);
367
368 if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
369 struct hfi1_devdata *dd = qpriv->rcd->dd;
370
371 qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
372 sizeof(*qpriv->pages),
373 GFP_KERNEL, dd->node);
374 if (!qpriv->pages)
375 return -ENOMEM;
376 for (i = 0; i < qp->s_size; i++) {
377 struct hfi1_swqe_priv *priv;
378 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
379
380 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
381 dd->node);
382 if (!priv)
383 return -ENOMEM;
384
385 hfi1_init_trdma_req(qp, &priv->tid_req);
386 priv->tid_req.e.swqe = wqe;
387 wqe->priv = priv;
388 }
389 for (i = 0; i < rvt_max_atomic(rdi); i++) {
390 struct hfi1_ack_priv *priv;
391
392 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
393 dd->node);
394 if (!priv)
395 return -ENOMEM;
396
397 hfi1_init_trdma_req(qp, &priv->tid_req);
398 priv->tid_req.e.ack = &qp->s_ack_queue[i];
399
400 ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
401 GFP_KERNEL);
402 if (ret) {
403 kfree(priv);
404 return ret;
405 }
406 qp->s_ack_queue[i].priv = priv;
407 }
408 }
409
410 return 0;
411}
412
413void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
414{
415 struct hfi1_qp_priv *qpriv = qp->priv;
416 struct rvt_swqe *wqe;
417 u32 i;
418
419 if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
420 for (i = 0; i < qp->s_size; i++) {
421 wqe = rvt_get_swqe_ptr(qp, i);
422 kfree(wqe->priv);
423 wqe->priv = NULL;
424 }
425 for (i = 0; i < rvt_max_atomic(rdi); i++) {
426 struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
427
428 if (priv)
429 hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
430 kfree(priv);
431 qp->s_ack_queue[i].priv = NULL;
432 }
433 cancel_work_sync(&qpriv->opfn.opfn_work);
434 kfree(qpriv->pages);
435 qpriv->pages = NULL;
436 }
437}
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
470 struct tid_queue *queue)
471 __must_hold(&rcd->exp_lock)
472{
473 struct hfi1_qp_priv *priv;
474
475 lockdep_assert_held(&rcd->exp_lock);
476 priv = list_first_entry_or_null(&queue->queue_head,
477 struct hfi1_qp_priv,
478 tid_wait);
479 if (!priv)
480 return NULL;
481 rvt_get_qp(priv->owner);
482 return priv->owner;
483}
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
505 struct tid_queue *queue, struct rvt_qp *qp)
506 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
507{
508 struct rvt_qp *fqp;
509 bool ret = true;
510
511 lockdep_assert_held(&qp->s_lock);
512 lockdep_assert_held(&rcd->exp_lock);
513 fqp = first_qp(rcd, queue);
514 if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
515 ret = false;
516 rvt_put_qp(fqp);
517 return ret;
518}
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
538 struct tid_queue *queue, struct rvt_qp *qp)
539 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
540{
541 struct hfi1_qp_priv *priv = qp->priv;
542
543 lockdep_assert_held(&qp->s_lock);
544 lockdep_assert_held(&rcd->exp_lock);
545 if (list_empty(&priv->tid_wait))
546 return;
547 list_del_init(&priv->tid_wait);
548 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
549 queue->dequeue++;
550 rvt_put_qp(qp);
551}
552
553
554
555
556
557
558
559
560
561
562
563
564static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
565 struct tid_queue *queue, struct rvt_qp *qp)
566 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
567{
568 struct hfi1_qp_priv *priv = qp->priv;
569
570 lockdep_assert_held(&qp->s_lock);
571 lockdep_assert_held(&rcd->exp_lock);
572 if (list_empty(&priv->tid_wait)) {
573 qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
574 list_add_tail(&priv->tid_wait, &queue->queue_head);
575 priv->tid_enqueue = ++queue->enqueue;
576 rcd->dd->verbs_dev.n_tidwait++;
577 trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
578 rvt_get_qp(qp);
579 }
580}
581
582
583
584
585
586
587
588
589static void __trigger_tid_waiter(struct rvt_qp *qp)
590 __must_hold(&qp->s_lock)
591{
592 lockdep_assert_held(&qp->s_lock);
593 if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
594 return;
595 trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
596 hfi1_schedule_send(qp);
597}
598
599
600
601
602
603
604
605
606
607
608
609
610
611static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
612{
613 struct hfi1_qp_priv *priv;
614 struct hfi1_ibport *ibp;
615 struct hfi1_pportdata *ppd;
616 struct hfi1_devdata *dd;
617 bool rval;
618
619 if (!qp)
620 return;
621
622 priv = qp->priv;
623 ibp = to_iport(qp->ibqp.device, qp->port_num);
624 ppd = ppd_from_ibp(ibp);
625 dd = dd_from_ibdev(qp->ibqp.device);
626
627 rval = queue_work_on(priv->s_sde ?
628 priv->s_sde->cpu :
629 cpumask_first(cpumask_of_node(dd->node)),
630 ppd->hfi1_wq,
631 &priv->tid_rdma.trigger_work);
632 if (!rval)
633 rvt_put_qp(qp);
634}
635
636
637
638
639
640
641
642
643static void tid_rdma_trigger_resume(struct work_struct *work)
644{
645 struct tid_rdma_qp_params *tr;
646 struct hfi1_qp_priv *priv;
647 struct rvt_qp *qp;
648
649 tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
650 priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
651 qp = priv->owner;
652 spin_lock_irq(&qp->s_lock);
653 if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
654 spin_unlock_irq(&qp->s_lock);
655 hfi1_do_send(priv->owner, true);
656 } else {
657 spin_unlock_irq(&qp->s_lock);
658 }
659 rvt_put_qp(qp);
660}
661
662
663
664
665
666
667
668
669static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
670 __must_hold(&qp->s_lock)
671{
672 struct hfi1_qp_priv *priv;
673
674 if (!qp)
675 return;
676 lockdep_assert_held(&qp->s_lock);
677 priv = qp->priv;
678 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
679 spin_lock(&priv->rcd->exp_lock);
680 if (!list_empty(&priv->tid_wait)) {
681 list_del_init(&priv->tid_wait);
682 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
683 queue->dequeue++;
684 rvt_put_qp(qp);
685 }
686 spin_unlock(&priv->rcd->exp_lock);
687}
688
689void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
690 __must_hold(&qp->s_lock)
691{
692 struct hfi1_qp_priv *priv = qp->priv;
693
694 _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
695 _tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
696}
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
717 __must_hold(&rcd->exp_lock)
718{
719 int nr;
720
721
722 if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
723 !test_and_set_bit(last, &rcd->flow_mask))
724 return last;
725
726 nr = ffz(rcd->flow_mask);
727 BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
728 (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
729 if (nr > (RXE_NUM_TID_FLOWS - 1))
730 return -EAGAIN;
731 set_bit(nr, &rcd->flow_mask);
732 return nr;
733}
734
735static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
736 u32 flow_idx)
737{
738 u64 reg;
739
740 reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
741 RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
742 RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
743 RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
744 RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
745 RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
746
747 if (generation != KERN_GENERATION_RESERVED)
748 reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
749
750 write_uctxt_csr(rcd->dd, rcd->ctxt,
751 RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
752}
753
754static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
755 __must_hold(&rcd->exp_lock)
756{
757 u32 generation = rcd->flows[flow_idx].generation;
758
759 kern_set_hw_flow(rcd, generation, flow_idx);
760 return generation;
761}
762
763static u32 kern_flow_generation_next(u32 gen)
764{
765 u32 generation = mask_generation(gen + 1);
766
767 if (generation == KERN_GENERATION_RESERVED)
768 generation = mask_generation(generation + 1);
769 return generation;
770}
771
772static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
773 __must_hold(&rcd->exp_lock)
774{
775 rcd->flows[flow_idx].generation =
776 kern_flow_generation_next(rcd->flows[flow_idx].generation);
777 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
778}
779
780int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
781{
782 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
783 struct tid_flow_state *fs = &qpriv->flow_state;
784 struct rvt_qp *fqp;
785 unsigned long flags;
786 int ret = 0;
787
788
789 if (fs->index != RXE_NUM_TID_FLOWS)
790 return ret;
791
792 spin_lock_irqsave(&rcd->exp_lock, flags);
793 if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
794 goto queue;
795
796 ret = kern_reserve_flow(rcd, fs->last_index);
797 if (ret < 0)
798 goto queue;
799 fs->index = ret;
800 fs->last_index = fs->index;
801
802
803 if (fs->generation != KERN_GENERATION_RESERVED)
804 rcd->flows[fs->index].generation = fs->generation;
805 fs->generation = kern_setup_hw_flow(rcd, fs->index);
806 fs->psn = 0;
807 dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
808
809 fqp = first_qp(rcd, &rcd->flow_queue);
810 spin_unlock_irqrestore(&rcd->exp_lock, flags);
811
812 tid_rdma_schedule_tid_wakeup(fqp);
813 return 0;
814queue:
815 queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
816 spin_unlock_irqrestore(&rcd->exp_lock, flags);
817 return -EAGAIN;
818}
819
820void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
821{
822 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
823 struct tid_flow_state *fs = &qpriv->flow_state;
824 struct rvt_qp *fqp;
825 unsigned long flags;
826
827 if (fs->index >= RXE_NUM_TID_FLOWS)
828 return;
829 spin_lock_irqsave(&rcd->exp_lock, flags);
830 kern_clear_hw_flow(rcd, fs->index);
831 clear_bit(fs->index, &rcd->flow_mask);
832 fs->index = RXE_NUM_TID_FLOWS;
833 fs->psn = 0;
834 fs->generation = KERN_GENERATION_RESERVED;
835
836
837 fqp = first_qp(rcd, &rcd->flow_queue);
838 spin_unlock_irqrestore(&rcd->exp_lock, flags);
839
840 if (fqp == qp) {
841 __trigger_tid_waiter(fqp);
842 rvt_put_qp(fqp);
843 } else {
844 tid_rdma_schedule_tid_wakeup(fqp);
845 }
846}
847
848void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
849{
850 int i;
851
852 for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
853 rcd->flows[i].generation = mask_generation(prandom_u32());
854 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
855 }
856}
857
858
859static u8 trdma_pset_order(struct tid_rdma_pageset *s)
860{
861 u8 count = s->count;
862
863 return ilog2(count) + 1;
864}
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
882 struct page **pages,
883 u32 npages,
884 struct tid_rdma_pageset *list)
885{
886 u32 pagecount, pageidx, setcount = 0, i;
887 void *vaddr, *this_vaddr;
888
889 if (!npages)
890 return 0;
891
892
893
894
895
896
897 vaddr = page_address(pages[0]);
898 trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
899 for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
900 this_vaddr = i < npages ? page_address(pages[i]) : NULL;
901 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
902 this_vaddr);
903
904
905
906
907 if (this_vaddr != (vaddr + PAGE_SIZE)) {
908
909
910
911
912
913
914
915
916
917
918
919
920 while (pagecount) {
921 int maxpages = pagecount;
922 u32 bufsize = pagecount * PAGE_SIZE;
923
924 if (bufsize > MAX_EXPECTED_BUFFER)
925 maxpages =
926 MAX_EXPECTED_BUFFER >>
927 PAGE_SHIFT;
928 else if (!is_power_of_2(bufsize))
929 maxpages =
930 rounddown_pow_of_two(bufsize) >>
931 PAGE_SHIFT;
932
933 list[setcount].idx = pageidx;
934 list[setcount].count = maxpages;
935 trace_hfi1_tid_pageset(flow->req->qp, setcount,
936 list[setcount].idx,
937 list[setcount].count);
938 pagecount -= maxpages;
939 pageidx += maxpages;
940 setcount++;
941 }
942 pageidx = i;
943 pagecount = 1;
944 vaddr = this_vaddr;
945 } else {
946 vaddr += PAGE_SIZE;
947 pagecount++;
948 }
949 }
950
951 if (setcount & 1)
952 list[setcount++].count = 0;
953 return setcount;
954}
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976static u32 tid_flush_pages(struct tid_rdma_pageset *list,
977 u32 *idx, u32 pages, u32 sets)
978{
979 while (pages) {
980 u32 maxpages = pages;
981
982 if (maxpages > MAX_EXPECTED_PAGES)
983 maxpages = MAX_EXPECTED_PAGES;
984 else if (!is_power_of_2(maxpages))
985 maxpages = rounddown_pow_of_two(maxpages);
986 list[sets].idx = *idx;
987 list[sets++].count = maxpages;
988 *idx += maxpages;
989 pages -= maxpages;
990 }
991
992 if (sets & 1)
993 list[sets++].count = 0;
994 return sets;
995}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
1021 struct page **pages,
1022 u32 npages,
1023 struct tid_rdma_pageset *list)
1024{
1025 u32 idx, sets = 0, i;
1026 u32 pagecnt = 0;
1027 void *v0, *v1, *vm1;
1028
1029 if (!npages)
1030 return 0;
1031 for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
1032
1033 v0 = page_address(pages[i]);
1034 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
1035 v1 = i + 1 < npages ?
1036 page_address(pages[i + 1]) : NULL;
1037 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
1038
1039 if (v1 != (v0 + PAGE_SIZE)) {
1040
1041 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1042
1043 list[sets].idx = idx++;
1044 list[sets++].count = 1;
1045 if (v1) {
1046 list[sets].count = 1;
1047 list[sets++].idx = idx++;
1048 } else {
1049 list[sets++].count = 0;
1050 }
1051 vm1 = NULL;
1052 pagecnt = 0;
1053 continue;
1054 }
1055
1056 if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
1057
1058 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1059 pagecnt = 0;
1060 }
1061
1062 pagecnt += 2;
1063
1064 vm1 = v1;
1065
1066 }
1067
1068 sets = tid_flush_pages(list, &idx, npages - idx, sets);
1069
1070 WARN_ON(sets & 1);
1071 return sets;
1072}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087static u32 kern_find_pages(struct tid_rdma_flow *flow,
1088 struct page **pages,
1089 struct rvt_sge_state *ss, bool *last)
1090{
1091 struct tid_rdma_request *req = flow->req;
1092 struct rvt_sge *sge = &ss->sge;
1093 u32 length = flow->req->seg_len;
1094 u32 len = PAGE_SIZE;
1095 u32 i = 0;
1096
1097 while (length && req->isge < ss->num_sge) {
1098 pages[i++] = virt_to_page(sge->vaddr);
1099
1100 sge->vaddr += len;
1101 sge->length -= len;
1102 sge->sge_length -= len;
1103 if (!sge->sge_length) {
1104 if (++req->isge < ss->num_sge)
1105 *sge = ss->sg_list[req->isge - 1];
1106 } else if (sge->length == 0 && sge->mr->lkey) {
1107 if (++sge->n >= RVT_SEGSZ) {
1108 ++sge->m;
1109 sge->n = 0;
1110 }
1111 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
1112 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
1113 }
1114 length -= len;
1115 }
1116
1117 flow->length = flow->req->seg_len - length;
1118 *last = req->isge != ss->num_sge;
1119 return i;
1120}
1121
1122static void dma_unmap_flow(struct tid_rdma_flow *flow)
1123{
1124 struct hfi1_devdata *dd;
1125 int i;
1126 struct tid_rdma_pageset *pset;
1127
1128 dd = flow->req->rcd->dd;
1129 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1130 i++, pset++) {
1131 if (pset->count && pset->addr) {
1132 dma_unmap_page(&dd->pcidev->dev,
1133 pset->addr,
1134 PAGE_SIZE * pset->count,
1135 DMA_FROM_DEVICE);
1136 pset->mapped = 0;
1137 }
1138 }
1139}
1140
1141static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
1142{
1143 int i;
1144 struct hfi1_devdata *dd = flow->req->rcd->dd;
1145 struct tid_rdma_pageset *pset;
1146
1147 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1148 i++, pset++) {
1149 if (pset->count) {
1150 pset->addr = dma_map_page(&dd->pcidev->dev,
1151 pages[pset->idx],
1152 0,
1153 PAGE_SIZE * pset->count,
1154 DMA_FROM_DEVICE);
1155
1156 if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
1157 dma_unmap_flow(flow);
1158 return -ENOMEM;
1159 }
1160 pset->mapped = 1;
1161 }
1162 }
1163 return 0;
1164}
1165
1166static inline bool dma_mapped(struct tid_rdma_flow *flow)
1167{
1168 return !!flow->pagesets[0].mapped;
1169}
1170
1171
1172
1173
1174
1175static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
1176 struct page **pages,
1177 struct rvt_sge_state *ss, bool *last)
1178{
1179 u8 npages;
1180
1181
1182 if (flow->npagesets) {
1183 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
1184 flow);
1185 if (!dma_mapped(flow))
1186 return dma_map_flow(flow, pages);
1187 return 0;
1188 }
1189
1190 npages = kern_find_pages(flow, pages, ss, last);
1191
1192 if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
1193 flow->npagesets =
1194 tid_rdma_find_phys_blocks_4k(flow, pages, npages,
1195 flow->pagesets);
1196 else
1197 flow->npagesets =
1198 tid_rdma_find_phys_blocks_8k(flow, pages, npages,
1199 flow->pagesets);
1200
1201 return dma_map_flow(flow, pages);
1202}
1203
1204static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
1205 struct hfi1_ctxtdata *rcd, char *s,
1206 struct tid_group *grp, u8 cnt)
1207{
1208 struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
1209
1210 WARN_ON_ONCE(flow->tnode_cnt >=
1211 (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
1212 if (WARN_ON_ONCE(cnt & 1))
1213 dd_dev_err(rcd->dd,
1214 "unexpected odd allocation cnt %u map 0x%x used %u",
1215 cnt, grp->map, grp->used);
1216
1217 node->grp = grp;
1218 node->map = grp->map;
1219 node->cnt = cnt;
1220 trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
1221 grp->base, grp->map, grp->used, cnt);
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237static int kern_alloc_tids(struct tid_rdma_flow *flow)
1238{
1239 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1240 struct hfi1_devdata *dd = rcd->dd;
1241 u32 ngroups, pageidx = 0;
1242 struct tid_group *group = NULL, *used;
1243 u8 use;
1244
1245 flow->tnode_cnt = 0;
1246 ngroups = flow->npagesets / dd->rcv_entries.group_size;
1247 if (!ngroups)
1248 goto used_list;
1249
1250
1251 list_for_each_entry(group, &rcd->tid_group_list.list, list) {
1252 kern_add_tid_node(flow, rcd, "complete groups", group,
1253 group->size);
1254
1255 pageidx += group->size;
1256 if (!--ngroups)
1257 break;
1258 }
1259
1260 if (pageidx >= flow->npagesets)
1261 goto ok;
1262
1263used_list:
1264
1265 list_for_each_entry(used, &rcd->tid_used_list.list, list) {
1266 use = min_t(u32, flow->npagesets - pageidx,
1267 used->size - used->used);
1268 kern_add_tid_node(flow, rcd, "used groups", used, use);
1269
1270 pageidx += use;
1271 if (pageidx >= flow->npagesets)
1272 goto ok;
1273 }
1274
1275
1276
1277
1278
1279
1280 if (group && &group->list == &rcd->tid_group_list.list)
1281 goto bail_eagain;
1282 group = list_prepare_entry(group, &rcd->tid_group_list.list,
1283 list);
1284 if (list_is_last(&group->list, &rcd->tid_group_list.list))
1285 goto bail_eagain;
1286 group = list_next_entry(group, list);
1287 use = min_t(u32, flow->npagesets - pageidx, group->size);
1288 kern_add_tid_node(flow, rcd, "complete continue", group, use);
1289 pageidx += use;
1290 if (pageidx >= flow->npagesets)
1291 goto ok;
1292bail_eagain:
1293 trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
1294 (u64)flow->npagesets);
1295 return -EAGAIN;
1296ok:
1297 return 0;
1298}
1299
1300static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
1301 u32 *pset_idx)
1302{
1303 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1304 struct hfi1_devdata *dd = rcd->dd;
1305 struct kern_tid_node *node = &flow->tnode[grp_num];
1306 struct tid_group *grp = node->grp;
1307 struct tid_rdma_pageset *pset;
1308 u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
1309 u32 rcventry, npages = 0, pair = 0, tidctrl;
1310 u8 i, cnt = 0;
1311
1312 for (i = 0; i < grp->size; i++) {
1313 rcventry = grp->base + i;
1314
1315 if (node->map & BIT(i) || cnt >= node->cnt) {
1316 rcv_array_wc_fill(dd, rcventry);
1317 continue;
1318 }
1319 pset = &flow->pagesets[(*pset_idx)++];
1320 if (pset->count) {
1321 hfi1_put_tid(dd, rcventry, PT_EXPECTED,
1322 pset->addr, trdma_pset_order(pset));
1323 } else {
1324 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1325 }
1326 npages += pset->count;
1327
1328 rcventry -= rcd->expected_base;
1329 tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
1330
1331
1332
1333
1334
1335
1336
1337 pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
1338 node->cnt >= cnt + 2;
1339 if (!pair) {
1340 if (!pset->count)
1341 tidctrl = 0x1;
1342 flow->tid_entry[flow->tidcnt++] =
1343 EXP_TID_SET(IDX, rcventry >> 1) |
1344 EXP_TID_SET(CTRL, tidctrl) |
1345 EXP_TID_SET(LEN, npages);
1346 trace_hfi1_tid_entry_alloc(
1347 flow->req->qp, flow->tidcnt - 1,
1348 flow->tid_entry[flow->tidcnt - 1]);
1349
1350
1351 flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
1352 npages = 0;
1353 }
1354
1355 if (grp->used == grp->size - 1)
1356 tid_group_move(grp, &rcd->tid_used_list,
1357 &rcd->tid_full_list);
1358 else if (!grp->used)
1359 tid_group_move(grp, &rcd->tid_group_list,
1360 &rcd->tid_used_list);
1361
1362 grp->used++;
1363 grp->map |= BIT(i);
1364 cnt++;
1365 }
1366}
1367
1368static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
1369{
1370 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1371 struct hfi1_devdata *dd = rcd->dd;
1372 struct kern_tid_node *node = &flow->tnode[grp_num];
1373 struct tid_group *grp = node->grp;
1374 u32 rcventry;
1375 u8 i, cnt = 0;
1376
1377 for (i = 0; i < grp->size; i++) {
1378 rcventry = grp->base + i;
1379
1380 if (node->map & BIT(i) || cnt >= node->cnt) {
1381 rcv_array_wc_fill(dd, rcventry);
1382 continue;
1383 }
1384
1385 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1386
1387 grp->used--;
1388 grp->map &= ~BIT(i);
1389 cnt++;
1390
1391 if (grp->used == grp->size - 1)
1392 tid_group_move(grp, &rcd->tid_full_list,
1393 &rcd->tid_used_list);
1394 else if (!grp->used)
1395 tid_group_move(grp, &rcd->tid_used_list,
1396 &rcd->tid_group_list);
1397 }
1398 if (WARN_ON_ONCE(cnt & 1)) {
1399 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1400 struct hfi1_devdata *dd = rcd->dd;
1401
1402 dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
1403 cnt, grp->map, grp->used);
1404 }
1405}
1406
1407static void kern_program_rcvarray(struct tid_rdma_flow *flow)
1408{
1409 u32 pset_idx = 0;
1410 int i;
1411
1412 flow->npkts = 0;
1413 flow->tidcnt = 0;
1414 for (i = 0; i < flow->tnode_cnt; i++)
1415 kern_program_rcv_group(flow, i, &pset_idx);
1416 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
1417}
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
1462 struct rvt_sge_state *ss, bool *last)
1463 __must_hold(&req->qp->s_lock)
1464{
1465 struct tid_rdma_flow *flow = &req->flows[req->setup_head];
1466 struct hfi1_ctxtdata *rcd = req->rcd;
1467 struct hfi1_qp_priv *qpriv = req->qp->priv;
1468 unsigned long flags;
1469 struct rvt_qp *fqp;
1470 u16 clear_tail = req->clear_tail;
1471
1472 lockdep_assert_held(&req->qp->s_lock);
1473
1474
1475
1476
1477
1478
1479 if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
1480 CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
1481 req->n_flows)
1482 return -EINVAL;
1483
1484
1485
1486
1487
1488
1489 if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
1490 hfi1_wait_kmem(flow->req->qp);
1491 return -ENOMEM;
1492 }
1493
1494 spin_lock_irqsave(&rcd->exp_lock, flags);
1495 if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
1496 goto queue;
1497
1498
1499
1500
1501
1502
1503 if (kern_alloc_tids(flow))
1504 goto queue;
1505
1506
1507
1508
1509 kern_program_rcvarray(flow);
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519 memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
1520 flow->idx = qpriv->flow_state.index;
1521 flow->flow_state.generation = qpriv->flow_state.generation;
1522 flow->flow_state.spsn = qpriv->flow_state.psn;
1523 flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
1524 flow->flow_state.r_next_psn =
1525 full_flow_psn(flow, flow->flow_state.spsn);
1526 qpriv->flow_state.psn += flow->npkts;
1527
1528 dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
1529
1530 fqp = first_qp(rcd, &rcd->rarr_queue);
1531 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1532 tid_rdma_schedule_tid_wakeup(fqp);
1533
1534 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1535 return 0;
1536queue:
1537 queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
1538 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1539 return -EAGAIN;
1540}
1541
1542static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
1543{
1544 flow->npagesets = 0;
1545}
1546
1547
1548
1549
1550
1551
1552
1553int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
1554 __must_hold(&req->qp->s_lock)
1555{
1556 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
1557 struct hfi1_ctxtdata *rcd = req->rcd;
1558 unsigned long flags;
1559 int i;
1560 struct rvt_qp *fqp;
1561
1562 lockdep_assert_held(&req->qp->s_lock);
1563
1564 if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
1565 return -EINVAL;
1566
1567 spin_lock_irqsave(&rcd->exp_lock, flags);
1568
1569 for (i = 0; i < flow->tnode_cnt; i++)
1570 kern_unprogram_rcv_group(flow, i);
1571
1572 flow->tnode_cnt = 0;
1573
1574 fqp = first_qp(rcd, &rcd->rarr_queue);
1575 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1576
1577 dma_unmap_flow(flow);
1578
1579 hfi1_tid_rdma_reset_flow(flow);
1580 req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
1581
1582 if (fqp == req->qp) {
1583 __trigger_tid_waiter(fqp);
1584 rvt_put_qp(fqp);
1585 } else {
1586 tid_rdma_schedule_tid_wakeup(fqp);
1587 }
1588
1589 return 0;
1590}
1591
1592
1593
1594
1595
1596void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
1597 __must_hold(&req->qp->s_lock)
1598{
1599
1600 while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
1601 if (hfi1_kern_exp_rcv_clear(req))
1602 break;
1603 }
1604}
1605
1606
1607
1608
1609
1610static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
1611{
1612 kfree(req->flows);
1613 req->flows = NULL;
1614}
1615
1616
1617
1618
1619
1620
1621void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
1622{
1623 struct hfi1_swqe_priv *p = wqe->priv;
1624
1625 hfi1_kern_exp_rcv_free_flows(&p->tid_req);
1626}
1627
1628
1629
1630
1631static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
1632 gfp_t gfp)
1633{
1634 struct tid_rdma_flow *flows;
1635 int i;
1636
1637 if (likely(req->flows))
1638 return 0;
1639 flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
1640 req->rcd->numa_id);
1641 if (!flows)
1642 return -ENOMEM;
1643
1644 for (i = 0; i < MAX_FLOWS; i++) {
1645 flows[i].req = req;
1646 flows[i].npagesets = 0;
1647 flows[i].pagesets[0].mapped = 0;
1648 flows[i].resync_npkts = 0;
1649 }
1650 req->flows = flows;
1651 return 0;
1652}
1653
1654static void hfi1_init_trdma_req(struct rvt_qp *qp,
1655 struct tid_rdma_request *req)
1656{
1657 struct hfi1_qp_priv *qpriv = qp->priv;
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669 req->qp = qp;
1670 req->rcd = qpriv->rcd;
1671}
1672
1673u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
1674 void *context, int vl, int mode, u64 data)
1675{
1676 struct hfi1_devdata *dd = context;
1677
1678 return dd->verbs_dev.n_tidwait;
1679}
1680
1681static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
1682 u32 psn, u16 *fidx)
1683{
1684 u16 head, tail;
1685 struct tid_rdma_flow *flow;
1686
1687 head = req->setup_head;
1688 tail = req->clear_tail;
1689 for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
1690 tail = CIRC_NEXT(tail, MAX_FLOWS)) {
1691 flow = &req->flows[tail];
1692 if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
1693 cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
1694 if (fidx)
1695 *fidx = tail;
1696 return flow;
1697 }
1698 }
1699 return NULL;
1700}
1701
1702
1703u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
1704 struct ib_other_headers *ohdr, u32 *bth1,
1705 u32 *bth2, u32 *len)
1706{
1707 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1708 struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
1709 struct rvt_qp *qp = req->qp;
1710 struct hfi1_qp_priv *qpriv = qp->priv;
1711 struct hfi1_swqe_priv *wpriv = wqe->priv;
1712 struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
1713 struct tid_rdma_params *remote;
1714 u32 req_len = 0;
1715 void *req_addr = NULL;
1716
1717
1718 *bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
1719 trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
1720
1721
1722 req_addr = &flow->tid_entry[flow->tid_idx];
1723 req_len = sizeof(*flow->tid_entry) *
1724 (flow->tidcnt - flow->tid_idx);
1725
1726 memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
1727 wpriv->ss.sge.vaddr = req_addr;
1728 wpriv->ss.sge.sge_length = req_len;
1729 wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
1730
1731
1732
1733
1734 wpriv->ss.sge.mr = NULL;
1735 wpriv->ss.sge.m = 0;
1736 wpriv->ss.sge.n = 0;
1737
1738 wpriv->ss.sg_list = NULL;
1739 wpriv->ss.total_len = wpriv->ss.sge.sge_length;
1740 wpriv->ss.num_sge = 1;
1741
1742
1743 rcu_read_lock();
1744 remote = rcu_dereference(qpriv->tid_rdma.remote);
1745
1746 KDETH_RESET(rreq->kdeth0, KVER, 0x1);
1747 KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
1748 rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
1749 req->cur_seg * req->seg_len + flow->sent);
1750 rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
1751 rreq->reth.length = cpu_to_be32(*len);
1752 rreq->tid_flow_psn =
1753 cpu_to_be32((flow->flow_state.generation <<
1754 HFI1_KDETH_BTH_SEQ_SHIFT) |
1755 ((flow->flow_state.spsn + flow->pkt) &
1756 HFI1_KDETH_BTH_SEQ_MASK));
1757 rreq->tid_flow_qp =
1758 cpu_to_be32(qpriv->tid_rdma.local.qp |
1759 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
1760 TID_RDMA_DESTQP_FLOW_SHIFT) |
1761 qpriv->rcd->ctxt);
1762 rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
1763 *bth1 &= ~RVT_QPN_MASK;
1764 *bth1 |= remote->qp;
1765 *bth2 |= IB_BTH_REQ_ACK;
1766 rcu_read_unlock();
1767
1768
1769 flow->sent += *len;
1770 req->cur_seg++;
1771 qp->s_state = TID_OP(READ_REQ);
1772 req->ack_pending++;
1773 req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
1774 qpriv->pending_tid_r_segs++;
1775 qp->s_num_rd_atomic++;
1776
1777
1778 *len = req_len;
1779
1780 return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
1781}
1782
1783
1784
1785
1786
1787u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
1788 struct ib_other_headers *ohdr, u32 *bth1,
1789 u32 *bth2, u32 *len)
1790 __must_hold(&qp->s_lock)
1791{
1792 struct hfi1_qp_priv *qpriv = qp->priv;
1793 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1794 struct tid_rdma_flow *flow = NULL;
1795 u32 hdwords = 0;
1796 bool last;
1797 bool retry = true;
1798 u32 npkts = rvt_div_round_up_mtu(qp, *len);
1799
1800 trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
1801 wqe->lpsn, req);
1802
1803
1804
1805
1806sync_check:
1807 if (req->state == TID_REQUEST_SYNC) {
1808 if (qpriv->pending_tid_r_segs)
1809 goto done;
1810
1811 hfi1_kern_clear_hw_flow(req->rcd, qp);
1812 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
1813 req->state = TID_REQUEST_ACTIVE;
1814 }
1815
1816
1817
1818
1819
1820
1821 if (req->flow_idx == req->setup_head) {
1822 retry = false;
1823 if (req->state == TID_REQUEST_RESEND) {
1824
1825
1826
1827
1828
1829 restart_sge(&qp->s_sge, wqe, req->s_next_psn,
1830 qp->pmtu);
1831 req->isge = 0;
1832 req->state = TID_REQUEST_ACTIVE;
1833 }
1834
1835
1836
1837
1838
1839 if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
1840 req->state = TID_REQUEST_SYNC;
1841 goto sync_check;
1842 }
1843
1844
1845 if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
1846 goto done;
1847
1848
1849
1850
1851
1852 if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
1853 req->state = TID_REQUEST_QUEUED;
1854
1855
1856
1857
1858
1859 goto done;
1860 }
1861 }
1862
1863
1864 flow = &req->flows[req->flow_idx];
1865 flow->pkt = 0;
1866 flow->tid_idx = 0;
1867 flow->sent = 0;
1868 if (!retry) {
1869
1870 flow->flow_state.ib_spsn = req->s_next_psn;
1871 flow->flow_state.ib_lpsn =
1872 flow->flow_state.ib_spsn + flow->npkts - 1;
1873 }
1874
1875
1876 req->s_next_psn += flow->npkts;
1877
1878
1879 hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
1880done:
1881 return hdwords;
1882}
1883
1884
1885
1886
1887
1888
1889static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
1890 struct rvt_ack_entry *e,
1891 struct hfi1_packet *packet,
1892 struct ib_other_headers *ohdr,
1893 u32 bth0, u32 psn, u64 vaddr, u32 len)
1894{
1895 struct hfi1_qp_priv *qpriv = qp->priv;
1896 struct tid_rdma_request *req;
1897 struct tid_rdma_flow *flow;
1898 u32 flow_psn, i, tidlen = 0, pktlen, tlen;
1899
1900 req = ack_to_tid_req(e);
1901
1902
1903 flow = &req->flows[req->setup_head];
1904
1905
1906 pktlen = packet->tlen - (packet->hlen + 4);
1907 if (pktlen > sizeof(flow->tid_entry))
1908 return 1;
1909 memcpy(flow->tid_entry, packet->ebuf, pktlen);
1910 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
1911
1912
1913
1914
1915
1916 flow->npkts = rvt_div_round_up_mtu(qp, len);
1917 for (i = 0; i < flow->tidcnt; i++) {
1918 trace_hfi1_tid_entry_rcv_read_req(qp, i,
1919 flow->tid_entry[i]);
1920 tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
1921 if (!tlen)
1922 return 1;
1923
1924
1925
1926
1927
1928
1929
1930 tidlen += tlen;
1931 }
1932 if (tidlen * PAGE_SIZE < len)
1933 return 1;
1934
1935
1936 req->clear_tail = req->setup_head;
1937 flow->pkt = 0;
1938 flow->tid_idx = 0;
1939 flow->tid_offset = 0;
1940 flow->sent = 0;
1941 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
1942 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
1943 TID_RDMA_DESTQP_FLOW_MASK;
1944 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
1945 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
1946 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
1947 flow->length = len;
1948
1949 flow->flow_state.lpsn = flow->flow_state.spsn +
1950 flow->npkts - 1;
1951 flow->flow_state.ib_spsn = psn;
1952 flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
1953
1954 trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
1955
1956 req->flow_idx = req->setup_head;
1957
1958
1959 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1960
1961
1962
1963
1964 e->opcode = (bth0 >> 24) & 0xff;
1965 e->psn = psn;
1966 e->lpsn = psn + flow->npkts - 1;
1967 e->sent = 0;
1968
1969 req->n_flows = qpriv->tid_rdma.local.max_read;
1970 req->state = TID_REQUEST_ACTIVE;
1971 req->cur_seg = 0;
1972 req->comp_seg = 0;
1973 req->ack_seg = 0;
1974 req->isge = 0;
1975 req->seg_len = qpriv->tid_rdma.local.max_len;
1976 req->total_len = len;
1977 req->total_segs = 1;
1978 req->r_flow_psn = e->psn;
1979
1980 trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
1981 req);
1982 return 0;
1983}
1984
1985static int tid_rdma_rcv_error(struct hfi1_packet *packet,
1986 struct ib_other_headers *ohdr,
1987 struct rvt_qp *qp, u32 psn, int diff)
1988{
1989 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1990 struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
1991 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
1992 struct hfi1_qp_priv *qpriv = qp->priv;
1993 struct rvt_ack_entry *e;
1994 struct tid_rdma_request *req;
1995 unsigned long flags;
1996 u8 prev;
1997 bool old_req;
1998
1999 trace_hfi1_rsp_tid_rcv_error(qp, psn);
2000 trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
2001 if (diff > 0) {
2002
2003 if (!qp->r_nak_state) {
2004 ibp->rvp.n_rc_seqnak++;
2005 qp->r_nak_state = IB_NAK_PSN_ERROR;
2006 qp->r_ack_psn = qp->r_psn;
2007 rc_defered_ack(rcd, qp);
2008 }
2009 goto done;
2010 }
2011
2012 ibp->rvp.n_rc_dupreq++;
2013
2014 spin_lock_irqsave(&qp->s_lock, flags);
2015 e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
2016 if (!e || (e->opcode != TID_OP(READ_REQ) &&
2017 e->opcode != TID_OP(WRITE_REQ)))
2018 goto unlock;
2019
2020 req = ack_to_tid_req(e);
2021 req->r_flow_psn = psn;
2022 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
2023 if (e->opcode == TID_OP(READ_REQ)) {
2024 struct ib_reth *reth;
2025 u32 len;
2026 u32 rkey;
2027 u64 vaddr;
2028 int ok;
2029 u32 bth0;
2030
2031 reth = &ohdr->u.tid_rdma.r_req.reth;
2032
2033
2034
2035
2036 len = be32_to_cpu(reth->length);
2037 if (psn != e->psn || len != req->total_len)
2038 goto unlock;
2039
2040 release_rdma_sge_mr(e);
2041
2042 rkey = be32_to_cpu(reth->rkey);
2043 vaddr = get_ib_reth_vaddr(reth);
2044
2045 qp->r_len = len;
2046 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
2047 IB_ACCESS_REMOTE_READ);
2048 if (unlikely(!ok))
2049 goto unlock;
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061 bth0 = be32_to_cpu(ohdr->bth[0]);
2062 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
2063 vaddr, len))
2064 goto unlock;
2065
2066
2067
2068
2069
2070 if (old_req)
2071 goto unlock;
2072 } else {
2073 struct flow_state *fstate;
2074 bool schedule = false;
2075 u8 i;
2076
2077 if (req->state == TID_REQUEST_RESEND) {
2078 req->state = TID_REQUEST_RESEND_ACTIVE;
2079 } else if (req->state == TID_REQUEST_INIT_RESEND) {
2080 req->state = TID_REQUEST_INIT;
2081 schedule = true;
2082 }
2083
2084
2085
2086
2087
2088
2089
2090
2091 if (old_req || req->state == TID_REQUEST_INIT ||
2092 (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
2093 for (i = prev + 1; ; i++) {
2094 if (i > rvt_size_atomic(&dev->rdi))
2095 i = 0;
2096 if (i == qp->r_head_ack_queue)
2097 break;
2098 e = &qp->s_ack_queue[i];
2099 req = ack_to_tid_req(e);
2100 if (e->opcode == TID_OP(WRITE_REQ) &&
2101 req->state == TID_REQUEST_INIT)
2102 req->state = TID_REQUEST_INIT_RESEND;
2103 }
2104
2105
2106
2107
2108
2109
2110 if (!schedule)
2111 goto unlock;
2112 }
2113
2114
2115
2116
2117
2118 if (req->clear_tail == req->setup_head)
2119 goto schedule;
2120
2121
2122
2123
2124
2125
2126 if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
2127 fstate = &req->flows[req->clear_tail].flow_state;
2128 qpriv->pending_tid_w_segs -=
2129 CIRC_CNT(req->flow_idx, req->clear_tail,
2130 MAX_FLOWS);
2131 req->flow_idx =
2132 CIRC_ADD(req->clear_tail,
2133 delta_psn(psn, fstate->resp_ib_psn),
2134 MAX_FLOWS);
2135 qpriv->pending_tid_w_segs +=
2136 delta_psn(psn, fstate->resp_ib_psn);
2137
2138
2139
2140
2141
2142
2143
2144 if (CIRC_CNT(req->setup_head, req->flow_idx,
2145 MAX_FLOWS)) {
2146 req->cur_seg = delta_psn(psn, e->psn);
2147 req->state = TID_REQUEST_RESEND_ACTIVE;
2148 }
2149 }
2150
2151 for (i = prev + 1; ; i++) {
2152
2153
2154
2155
2156 if (i > rvt_size_atomic(&dev->rdi))
2157 i = 0;
2158 if (i == qp->r_head_ack_queue)
2159 break;
2160 e = &qp->s_ack_queue[i];
2161 req = ack_to_tid_req(e);
2162 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
2163 e->lpsn, req);
2164 if (e->opcode != TID_OP(WRITE_REQ) ||
2165 req->cur_seg == req->comp_seg ||
2166 req->state == TID_REQUEST_INIT ||
2167 req->state == TID_REQUEST_INIT_RESEND) {
2168 if (req->state == TID_REQUEST_INIT)
2169 req->state = TID_REQUEST_INIT_RESEND;
2170 continue;
2171 }
2172 qpriv->pending_tid_w_segs -=
2173 CIRC_CNT(req->flow_idx,
2174 req->clear_tail,
2175 MAX_FLOWS);
2176 req->flow_idx = req->clear_tail;
2177 req->state = TID_REQUEST_RESEND;
2178 req->cur_seg = req->comp_seg;
2179 }
2180 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
2181 }
2182
2183 if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
2184 qp->s_acked_ack_queue = prev;
2185 qp->s_tail_ack_queue = prev;
2186
2187
2188
2189
2190
2191
2192 qp->s_ack_state = OP(ACKNOWLEDGE);
2193schedule:
2194
2195
2196
2197
2198 if (qpriv->rnr_nak_state) {
2199 qp->s_nak_state = 0;
2200 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
2201 qp->r_psn = e->lpsn + 1;
2202 hfi1_tid_write_alloc_resources(qp, true);
2203 }
2204
2205 qp->r_state = e->opcode;
2206 qp->r_nak_state = 0;
2207 qp->s_flags |= RVT_S_RESP_PENDING;
2208 hfi1_schedule_send(qp);
2209unlock:
2210 spin_unlock_irqrestore(&qp->s_lock, flags);
2211done:
2212 return 1;
2213}
2214
2215void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
2216{
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230 struct hfi1_ctxtdata *rcd = packet->rcd;
2231 struct rvt_qp *qp = packet->qp;
2232 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
2233 struct ib_other_headers *ohdr = packet->ohdr;
2234 struct rvt_ack_entry *e;
2235 unsigned long flags;
2236 struct ib_reth *reth;
2237 struct hfi1_qp_priv *qpriv = qp->priv;
2238 u32 bth0, psn, len, rkey;
2239 bool fecn;
2240 u8 next;
2241 u64 vaddr;
2242 int diff;
2243 u8 nack_state = IB_NAK_INVALID_REQUEST;
2244
2245 bth0 = be32_to_cpu(ohdr->bth[0]);
2246 if (hfi1_ruc_check_hdr(ibp, packet))
2247 return;
2248
2249 fecn = process_ecn(qp, packet);
2250 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2251 trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
2252
2253 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
2254 rvt_comm_est(qp);
2255
2256 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2257 goto nack_inv;
2258
2259 reth = &ohdr->u.tid_rdma.r_req.reth;
2260 vaddr = be64_to_cpu(reth->vaddr);
2261 len = be32_to_cpu(reth->length);
2262
2263 if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
2264 goto nack_inv;
2265
2266 diff = delta_psn(psn, qp->r_psn);
2267 if (unlikely(diff)) {
2268 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
2269 return;
2270 }
2271
2272
2273 next = qp->r_head_ack_queue + 1;
2274 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
2275 next = 0;
2276 spin_lock_irqsave(&qp->s_lock, flags);
2277 if (unlikely(next == qp->s_tail_ack_queue)) {
2278 if (!qp->s_ack_queue[next].sent) {
2279 nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2280 goto nack_inv_unlock;
2281 }
2282 update_ack_queue(qp, next);
2283 }
2284 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2285 release_rdma_sge_mr(e);
2286
2287 rkey = be32_to_cpu(reth->rkey);
2288 qp->r_len = len;
2289
2290 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
2291 rkey, IB_ACCESS_REMOTE_READ)))
2292 goto nack_acc;
2293
2294
2295 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
2296 len))
2297 goto nack_inv_unlock;
2298
2299 qp->r_state = e->opcode;
2300 qp->r_nak_state = 0;
2301
2302
2303
2304
2305
2306 qp->r_msn++;
2307 qp->r_psn += e->lpsn - e->psn + 1;
2308
2309 qp->r_head_ack_queue = next;
2310
2311
2312
2313
2314
2315
2316
2317 qpriv->r_tid_alloc = qp->r_head_ack_queue;
2318
2319
2320 qp->s_flags |= RVT_S_RESP_PENDING;
2321 if (fecn)
2322 qp->s_flags |= RVT_S_ECN;
2323 hfi1_schedule_send(qp);
2324
2325 spin_unlock_irqrestore(&qp->s_lock, flags);
2326 return;
2327
2328nack_inv_unlock:
2329 spin_unlock_irqrestore(&qp->s_lock, flags);
2330nack_inv:
2331 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2332 qp->r_nak_state = nack_state;
2333 qp->r_ack_psn = qp->r_psn;
2334
2335 rc_defered_ack(rcd, qp);
2336 return;
2337nack_acc:
2338 spin_unlock_irqrestore(&qp->s_lock, flags);
2339 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
2340 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2341 qp->r_ack_psn = qp->r_psn;
2342}
2343
2344u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
2345 struct ib_other_headers *ohdr, u32 *bth0,
2346 u32 *bth1, u32 *bth2, u32 *len, bool *last)
2347{
2348 struct hfi1_ack_priv *epriv = e->priv;
2349 struct tid_rdma_request *req = &epriv->tid_req;
2350 struct hfi1_qp_priv *qpriv = qp->priv;
2351 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
2352 u32 tidentry = flow->tid_entry[flow->tid_idx];
2353 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
2354 struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
2355 u32 next_offset, om = KDETH_OM_LARGE;
2356 bool last_pkt;
2357 u32 hdwords = 0;
2358 struct tid_rdma_params *remote;
2359
2360 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
2361 flow->sent += *len;
2362 next_offset = flow->tid_offset + *len;
2363 last_pkt = (flow->sent >= flow->length);
2364
2365 trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
2366 trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
2367
2368 rcu_read_lock();
2369 remote = rcu_dereference(qpriv->tid_rdma.remote);
2370 if (!remote) {
2371 rcu_read_unlock();
2372 goto done;
2373 }
2374 KDETH_RESET(resp->kdeth0, KVER, 0x1);
2375 KDETH_SET(resp->kdeth0, SH, !last_pkt);
2376 KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
2377 KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
2378 KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
2379 KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
2380 KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
2381 KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
2382 resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
2383 rcu_read_unlock();
2384
2385 resp->aeth = rvt_compute_aeth(qp);
2386 resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
2387 flow->pkt));
2388
2389 *bth0 = TID_OP(READ_RESP) << 24;
2390 *bth1 = flow->tid_qpn;
2391 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
2392 HFI1_KDETH_BTH_SEQ_MASK) |
2393 (flow->flow_state.generation <<
2394 HFI1_KDETH_BTH_SEQ_SHIFT));
2395 *last = last_pkt;
2396 if (last_pkt)
2397
2398 req->clear_tail = (req->clear_tail + 1) &
2399 (MAX_FLOWS - 1);
2400
2401 if (next_offset >= tidlen) {
2402 flow->tid_offset = 0;
2403 flow->tid_idx++;
2404 } else {
2405 flow->tid_offset = next_offset;
2406 }
2407
2408 hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
2409
2410done:
2411 return hdwords;
2412}
2413
2414static inline struct tid_rdma_request *
2415find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
2416 __must_hold(&qp->s_lock)
2417{
2418 struct rvt_swqe *wqe;
2419 struct tid_rdma_request *req = NULL;
2420 u32 i, end;
2421
2422 end = qp->s_cur + 1;
2423 if (end == qp->s_size)
2424 end = 0;
2425 for (i = qp->s_acked; i != end;) {
2426 wqe = rvt_get_swqe_ptr(qp, i);
2427 if (cmp_psn(psn, wqe->psn) >= 0 &&
2428 cmp_psn(psn, wqe->lpsn) <= 0) {
2429 if (wqe->wr.opcode == opcode)
2430 req = wqe_to_tid_req(wqe);
2431 break;
2432 }
2433 if (++i == qp->s_size)
2434 i = 0;
2435 }
2436
2437 return req;
2438}
2439
2440void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
2441{
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451 struct ib_other_headers *ohdr = packet->ohdr;
2452 struct rvt_qp *qp = packet->qp;
2453 struct hfi1_qp_priv *priv = qp->priv;
2454 struct hfi1_ctxtdata *rcd = packet->rcd;
2455 struct tid_rdma_request *req;
2456 struct tid_rdma_flow *flow;
2457 u32 opcode, aeth;
2458 bool fecn;
2459 unsigned long flags;
2460 u32 kpsn, ipsn;
2461
2462 trace_hfi1_sender_rcv_tid_read_resp(qp);
2463 fecn = process_ecn(qp, packet);
2464 kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2465 aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
2466 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2467
2468 spin_lock_irqsave(&qp->s_lock, flags);
2469 ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
2470 req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
2471 if (unlikely(!req))
2472 goto ack_op_err;
2473
2474 flow = &req->flows[req->clear_tail];
2475
2476 if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
2477 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
2478
2479 if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
2480 goto ack_done;
2481 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2482
2483
2484
2485
2486
2487
2488
2489 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
2490 struct rvt_sge_state ss;
2491 u32 len;
2492 u32 tlen = packet->tlen;
2493 u16 hdrsize = packet->hlen;
2494 u8 pad = packet->pad;
2495 u8 extra_bytes = pad + packet->extra_byte +
2496 (SIZE_OF_CRC << 2);
2497 u32 pmtu = qp->pmtu;
2498
2499 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
2500 goto ack_op_err;
2501 len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
2502 if (unlikely(len < pmtu))
2503 goto ack_op_err;
2504 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
2505 false);
2506
2507 priv->s_flags |= HFI1_R_TID_SW_PSN;
2508 }
2509
2510 goto ack_done;
2511 }
2512 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2513 req->ack_pending--;
2514 priv->pending_tid_r_segs--;
2515 qp->s_num_rd_atomic--;
2516 if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
2517 !qp->s_num_rd_atomic) {
2518 qp->s_flags &= ~(RVT_S_WAIT_FENCE |
2519 RVT_S_WAIT_ACK);
2520 hfi1_schedule_send(qp);
2521 }
2522 if (qp->s_flags & RVT_S_WAIT_RDMAR) {
2523 qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
2524 hfi1_schedule_send(qp);
2525 }
2526
2527 trace_hfi1_ack(qp, ipsn);
2528 trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
2529 req->e.swqe->psn, req->e.swqe->lpsn,
2530 req);
2531 trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
2532
2533
2534 hfi1_kern_exp_rcv_clear(req);
2535
2536 if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
2537 goto ack_done;
2538
2539
2540 if (++req->comp_seg >= req->total_segs) {
2541 priv->tid_r_comp++;
2542 req->state = TID_REQUEST_COMPLETE;
2543 }
2544
2545
2546
2547
2548
2549
2550 if ((req->state == TID_REQUEST_SYNC &&
2551 req->comp_seg == req->cur_seg) ||
2552 priv->tid_r_comp == priv->tid_r_reqs) {
2553 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2554 priv->s_flags &= ~HFI1_R_TID_SW_PSN;
2555 if (req->state == TID_REQUEST_SYNC)
2556 req->state = TID_REQUEST_ACTIVE;
2557 }
2558
2559 hfi1_schedule_send(qp);
2560 goto ack_done;
2561
2562ack_op_err:
2563
2564
2565
2566
2567
2568
2569
2570
2571 if (qp->s_last == qp->s_acked)
2572 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
2573
2574ack_done:
2575 spin_unlock_irqrestore(&qp->s_lock, flags);
2576}
2577
2578void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
2579 __must_hold(&qp->s_lock)
2580{
2581 u32 n = qp->s_acked;
2582 struct rvt_swqe *wqe;
2583 struct tid_rdma_request *req;
2584 struct hfi1_qp_priv *priv = qp->priv;
2585
2586 lockdep_assert_held(&qp->s_lock);
2587
2588 while (n != qp->s_tail) {
2589 wqe = rvt_get_swqe_ptr(qp, n);
2590 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2591 req = wqe_to_tid_req(wqe);
2592 hfi1_kern_exp_rcv_clear_all(req);
2593 }
2594
2595 if (++n == qp->s_size)
2596 n = 0;
2597 }
2598
2599 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2600}
2601
2602static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
2603{
2604 struct rvt_qp *qp = packet->qp;
2605
2606 if (rcv_type >= RHF_RCV_TYPE_IB)
2607 goto done;
2608
2609 spin_lock(&qp->s_lock);
2610
2611
2612
2613
2614
2615
2616
2617
2618 if (rcv_type == RHF_RCV_TYPE_EAGER) {
2619 hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
2620 hfi1_schedule_send(qp);
2621 }
2622
2623
2624 spin_unlock(&qp->s_lock);
2625done:
2626 return true;
2627}
2628
2629static void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
2630 struct rvt_qp *qp, struct rvt_swqe *wqe)
2631{
2632 struct tid_rdma_request *req;
2633 struct tid_rdma_flow *flow;
2634
2635
2636 qp->r_flags |= RVT_R_RDMAR_SEQ;
2637 req = wqe_to_tid_req(wqe);
2638 flow = &req->flows[req->clear_tail];
2639 hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
2640 if (list_empty(&qp->rspwait)) {
2641 qp->r_flags |= RVT_R_RSP_SEND;
2642 rvt_get_qp(qp);
2643 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2644 }
2645}
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2656 struct hfi1_packet *packet, u8 rcv_type,
2657 u8 rte, u32 psn, u32 ibpsn)
2658 __must_hold(&packet->qp->r_lock) __must_hold(RCU)
2659{
2660 struct hfi1_pportdata *ppd = rcd->ppd;
2661 struct hfi1_devdata *dd = ppd->dd;
2662 struct hfi1_ibport *ibp;
2663 struct rvt_swqe *wqe;
2664 struct tid_rdma_request *req;
2665 struct tid_rdma_flow *flow;
2666 u32 ack_psn;
2667 struct rvt_qp *qp = packet->qp;
2668 struct hfi1_qp_priv *priv = qp->priv;
2669 bool ret = true;
2670 int diff = 0;
2671 u32 fpsn;
2672
2673 lockdep_assert_held(&qp->r_lock);
2674 trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
2675 trace_hfi1_sender_read_kdeth_eflags(qp);
2676 trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
2677 spin_lock(&qp->s_lock);
2678
2679 if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
2680 cmp_psn(ibpsn, qp->s_psn) > 0)
2681 goto s_unlock;
2682
2683
2684
2685
2686
2687
2688 ack_psn = ibpsn - 1;
2689 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
2690 ibp = to_iport(qp->ibqp.device, qp->port_num);
2691
2692
2693 while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
2694
2695
2696
2697
2698
2699 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
2700 wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
2701 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2702 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2703
2704 if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
2705 qp->r_flags |= RVT_R_RDMAR_SEQ;
2706 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2707 restart_tid_rdma_read_req(rcd, qp,
2708 wqe);
2709 } else {
2710 hfi1_restart_rc(qp, qp->s_last_psn + 1,
2711 0);
2712 if (list_empty(&qp->rspwait)) {
2713 qp->r_flags |= RVT_R_RSP_SEND;
2714 rvt_get_qp(qp);
2715 list_add_tail(
2716 &qp->rspwait,
2717 &rcd->qp_wait_list);
2718 }
2719 }
2720 }
2721
2722
2723
2724
2725 break;
2726 }
2727
2728 wqe = do_rc_completion(qp, wqe, ibp);
2729 if (qp->s_acked == qp->s_tail)
2730 goto s_unlock;
2731 }
2732
2733 if (qp->s_acked == qp->s_tail)
2734 goto s_unlock;
2735
2736
2737 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
2738 goto s_unlock;
2739
2740 req = wqe_to_tid_req(wqe);
2741 trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
2742 wqe->lpsn, req);
2743 switch (rcv_type) {
2744 case RHF_RCV_TYPE_EXPECTED:
2745 switch (rte) {
2746 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756 flow = &req->flows[req->clear_tail];
2757 trace_hfi1_tid_flow_read_kdeth_eflags(qp,
2758 req->clear_tail,
2759 flow);
2760 if (priv->s_flags & HFI1_R_TID_SW_PSN) {
2761 diff = cmp_psn(psn,
2762 flow->flow_state.r_next_psn);
2763 if (diff > 0) {
2764
2765 goto s_unlock;
2766 } else if (diff < 0) {
2767
2768
2769
2770
2771
2772 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2773 qp->r_flags &=
2774 ~RVT_R_RDMAR_SEQ;
2775
2776
2777 goto s_unlock;
2778 }
2779
2780
2781
2782
2783
2784
2785 fpsn = full_flow_psn(flow,
2786 flow->flow_state.lpsn);
2787 if (cmp_psn(fpsn, psn) == 0) {
2788 ret = false;
2789 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2790 qp->r_flags &=
2791 ~RVT_R_RDMAR_SEQ;
2792 }
2793 flow->flow_state.r_next_psn =
2794 mask_psn(psn + 1);
2795 } else {
2796 u32 last_psn;
2797
2798 last_psn = read_r_next_psn(dd, rcd->ctxt,
2799 flow->idx);
2800 flow->flow_state.r_next_psn = last_psn;
2801 priv->s_flags |= HFI1_R_TID_SW_PSN;
2802
2803
2804
2805
2806 if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
2807 restart_tid_rdma_read_req(rcd, qp,
2808 wqe);
2809 }
2810
2811 break;
2812
2813 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2814
2815
2816
2817
2818 break;
2819
2820 default:
2821 break;
2822 }
2823 break;
2824
2825 case RHF_RCV_TYPE_ERROR:
2826 switch (rte) {
2827 case RHF_RTE_ERROR_OP_CODE_ERR:
2828 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
2829 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
2830 case RHF_RTE_ERROR_KHDR_KVER_ERR:
2831 case RHF_RTE_ERROR_CONTEXT_ERR:
2832 case RHF_RTE_ERROR_KHDR_TID_ERR:
2833 default:
2834 break;
2835 }
2836 break;
2837 default:
2838 break;
2839 }
2840s_unlock:
2841 spin_unlock(&qp->s_lock);
2842 return ret;
2843}
2844
2845bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2846 struct hfi1_pportdata *ppd,
2847 struct hfi1_packet *packet)
2848{
2849 struct hfi1_ibport *ibp = &ppd->ibport_data;
2850 struct hfi1_devdata *dd = ppd->dd;
2851 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
2852 u8 rcv_type = rhf_rcv_type(packet->rhf);
2853 u8 rte = rhf_rcv_type_err(packet->rhf);
2854 struct ib_header *hdr = packet->hdr;
2855 struct ib_other_headers *ohdr = NULL;
2856 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
2857 u16 lid = be16_to_cpu(hdr->lrh[1]);
2858 u8 opcode;
2859 u32 qp_num, psn, ibpsn;
2860 struct rvt_qp *qp;
2861 struct hfi1_qp_priv *qpriv;
2862 unsigned long flags;
2863 bool ret = true;
2864 struct rvt_ack_entry *e;
2865 struct tid_rdma_request *req;
2866 struct tid_rdma_flow *flow;
2867 int diff = 0;
2868
2869 trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
2870 packet->rhf);
2871 if (packet->rhf & RHF_ICRC_ERR)
2872 return ret;
2873
2874 packet->ohdr = &hdr->u.oth;
2875 ohdr = packet->ohdr;
2876 trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
2877
2878
2879 qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
2880 RVT_QPN_MASK;
2881 if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
2882 goto drop;
2883
2884 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2885 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2886
2887 rcu_read_lock();
2888 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
2889 if (!qp)
2890 goto rcu_unlock;
2891
2892 packet->qp = qp;
2893
2894
2895 spin_lock_irqsave(&qp->r_lock, flags);
2896 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
2897 ibp->rvp.n_pkt_drops++;
2898 goto r_unlock;
2899 }
2900
2901 if (packet->rhf & RHF_TID_ERR) {
2902
2903 u32 tlen = rhf_pkt_len(packet->rhf);
2904
2905
2906 if (tlen < 24)
2907 goto r_unlock;
2908
2909
2910
2911
2912
2913 if (lnh == HFI1_LRH_GRH)
2914 goto r_unlock;
2915
2916 if (tid_rdma_tid_err(packet, rcv_type))
2917 goto r_unlock;
2918 }
2919
2920
2921 if (opcode == TID_OP(READ_RESP)) {
2922 ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
2923 ibpsn = mask_psn(ibpsn);
2924 ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
2925 ibpsn);
2926 goto r_unlock;
2927 }
2928
2929
2930
2931
2932
2933
2934 spin_lock(&qp->s_lock);
2935 qpriv = qp->priv;
2936 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
2937 qpriv->r_tid_tail == qpriv->r_tid_head)
2938 goto unlock;
2939 e = &qp->s_ack_queue[qpriv->r_tid_tail];
2940 if (e->opcode != TID_OP(WRITE_REQ))
2941 goto unlock;
2942 req = ack_to_tid_req(e);
2943 if (req->comp_seg == req->cur_seg)
2944 goto unlock;
2945 flow = &req->flows[req->clear_tail];
2946 trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
2947 trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
2948 trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
2949 trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
2950 e->lpsn, req);
2951 trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
2952
2953 switch (rcv_type) {
2954 case RHF_RCV_TYPE_EXPECTED:
2955 switch (rte) {
2956 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2957 if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
2958 qpriv->s_flags |= HFI1_R_TID_SW_PSN;
2959 flow->flow_state.r_next_psn =
2960 read_r_next_psn(dd, rcd->ctxt,
2961 flow->idx);
2962 qpriv->r_next_psn_kdeth =
2963 flow->flow_state.r_next_psn;
2964 goto nak_psn;
2965 } else {
2966
2967
2968
2969
2970
2971
2972
2973
2974 diff = cmp_psn(psn,
2975 flow->flow_state.r_next_psn);
2976 if (diff > 0)
2977 goto nak_psn;
2978 else if (diff < 0)
2979 break;
2980
2981 qpriv->s_nak_state = 0;
2982
2983
2984
2985
2986
2987 if (psn == full_flow_psn(flow,
2988 flow->flow_state.lpsn))
2989 ret = false;
2990 flow->flow_state.r_next_psn =
2991 mask_psn(psn + 1);
2992 qpriv->r_next_psn_kdeth =
2993 flow->flow_state.r_next_psn;
2994 }
2995 break;
2996
2997 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2998 goto nak_psn;
2999
3000 default:
3001 break;
3002 }
3003 break;
3004
3005 case RHF_RCV_TYPE_ERROR:
3006 switch (rte) {
3007 case RHF_RTE_ERROR_OP_CODE_ERR:
3008 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
3009 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
3010 case RHF_RTE_ERROR_KHDR_KVER_ERR:
3011 case RHF_RTE_ERROR_CONTEXT_ERR:
3012 case RHF_RTE_ERROR_KHDR_TID_ERR:
3013 default:
3014 break;
3015 }
3016 break;
3017 default:
3018 break;
3019 }
3020
3021unlock:
3022 spin_unlock(&qp->s_lock);
3023r_unlock:
3024 spin_unlock_irqrestore(&qp->r_lock, flags);
3025rcu_unlock:
3026 rcu_read_unlock();
3027drop:
3028 return ret;
3029nak_psn:
3030 ibp->rvp.n_rc_seqnak++;
3031 if (!qpriv->s_nak_state) {
3032 qpriv->s_nak_state = IB_NAK_PSN_ERROR;
3033
3034 qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
3035 tid_rdma_trigger_ack(qp);
3036 }
3037 goto unlock;
3038}
3039
3040
3041
3042
3043
3044
3045
3046void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3047 u32 *bth2)
3048{
3049 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3050 struct tid_rdma_flow *flow;
3051 struct hfi1_qp_priv *qpriv = qp->priv;
3052 int diff, delta_pkts;
3053 u32 tididx = 0, i;
3054 u16 fidx;
3055
3056 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3057 *bth2 = mask_psn(qp->s_psn);
3058 flow = find_flow_ib(req, *bth2, &fidx);
3059 if (!flow) {
3060 trace_hfi1_msg_tid_restart_req(
3061 qp, "!!!!!! Could not find flow to restart: bth2 ",
3062 (u64)*bth2);
3063 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
3064 wqe->psn, wqe->lpsn,
3065 req);
3066 return;
3067 }
3068 } else {
3069 fidx = req->acked_tail;
3070 flow = &req->flows[fidx];
3071 *bth2 = mask_psn(req->r_ack_psn);
3072 }
3073
3074 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3075 delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
3076 else
3077 delta_pkts = delta_psn(*bth2,
3078 full_flow_psn(flow,
3079 flow->flow_state.spsn));
3080
3081 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3082 diff = delta_pkts + flow->resync_npkts;
3083
3084 flow->sent = 0;
3085 flow->pkt = 0;
3086 flow->tid_idx = 0;
3087 flow->tid_offset = 0;
3088 if (diff) {
3089 for (tididx = 0; tididx < flow->tidcnt; tididx++) {
3090 u32 tidentry = flow->tid_entry[tididx], tidlen,
3091 tidnpkts, npkts;
3092
3093 flow->tid_offset = 0;
3094 tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
3095 tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
3096 npkts = min_t(u32, diff, tidnpkts);
3097 flow->pkt += npkts;
3098 flow->sent += (npkts == tidnpkts ? tidlen :
3099 npkts * qp->pmtu);
3100 flow->tid_offset += npkts * qp->pmtu;
3101 diff -= npkts;
3102 if (!diff)
3103 break;
3104 }
3105 }
3106 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3107 rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
3108 flow->sent, 0);
3109
3110
3111
3112
3113
3114
3115
3116 flow->pkt -= flow->resync_npkts;
3117 }
3118
3119 if (flow->tid_offset ==
3120 EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
3121 tididx++;
3122 flow->tid_offset = 0;
3123 }
3124 flow->tid_idx = tididx;
3125 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3126
3127 req->flow_idx = fidx;
3128 else
3129 req->clear_tail = fidx;
3130
3131 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3132 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
3133 wqe->lpsn, req);
3134 req->state = TID_REQUEST_ACTIVE;
3135 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3136
3137 fidx = CIRC_NEXT(fidx, MAX_FLOWS);
3138 i = qpriv->s_tid_tail;
3139 do {
3140 for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
3141 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
3142 req->flows[fidx].sent = 0;
3143 req->flows[fidx].pkt = 0;
3144 req->flows[fidx].tid_idx = 0;
3145 req->flows[fidx].tid_offset = 0;
3146 req->flows[fidx].resync_npkts = 0;
3147 }
3148 if (i == qpriv->s_tid_cur)
3149 break;
3150 do {
3151 i = (++i == qp->s_size ? 0 : i);
3152 wqe = rvt_get_swqe_ptr(qp, i);
3153 } while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
3154 req = wqe_to_tid_req(wqe);
3155 req->cur_seg = req->ack_seg;
3156 fidx = req->acked_tail;
3157
3158 req->clear_tail = fidx;
3159 } while (1);
3160 }
3161}
3162
3163void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
3164{
3165 int i, ret;
3166 struct hfi1_qp_priv *qpriv = qp->priv;
3167 struct tid_flow_state *fs;
3168
3169 if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
3170 return;
3171
3172
3173
3174
3175
3176 fs = &qpriv->flow_state;
3177 if (fs->index != RXE_NUM_TID_FLOWS)
3178 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
3179
3180 for (i = qp->s_acked; i != qp->s_head;) {
3181 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
3182
3183 if (++i == qp->s_size)
3184 i = 0;
3185
3186 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
3187 continue;
3188 do {
3189 struct hfi1_swqe_priv *priv = wqe->priv;
3190
3191 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3192 } while (!ret);
3193 }
3194 for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
3195 struct rvt_ack_entry *e = &qp->s_ack_queue[i];
3196
3197 if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
3198 i = 0;
3199
3200 if (e->opcode != TID_OP(WRITE_REQ))
3201 continue;
3202 do {
3203 struct hfi1_ack_priv *priv = e->priv;
3204
3205 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3206 } while (!ret);
3207 }
3208}
3209
3210bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
3211{
3212 struct rvt_swqe *prev;
3213 struct hfi1_qp_priv *priv = qp->priv;
3214 u32 s_prev;
3215 struct tid_rdma_request *req;
3216
3217 s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
3218 prev = rvt_get_swqe_ptr(qp, s_prev);
3219
3220 switch (wqe->wr.opcode) {
3221 case IB_WR_SEND:
3222 case IB_WR_SEND_WITH_IMM:
3223 case IB_WR_SEND_WITH_INV:
3224 case IB_WR_ATOMIC_CMP_AND_SWP:
3225 case IB_WR_ATOMIC_FETCH_AND_ADD:
3226 case IB_WR_RDMA_WRITE:
3227 case IB_WR_RDMA_WRITE_WITH_IMM:
3228 switch (prev->wr.opcode) {
3229 case IB_WR_TID_RDMA_WRITE:
3230 req = wqe_to_tid_req(prev);
3231 if (req->ack_seg != req->total_segs)
3232 goto interlock;
3233 break;
3234 default:
3235 break;
3236 }
3237 break;
3238 case IB_WR_RDMA_READ:
3239 if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
3240 break;
3241 fallthrough;
3242 case IB_WR_TID_RDMA_READ:
3243 switch (prev->wr.opcode) {
3244 case IB_WR_RDMA_READ:
3245 if (qp->s_acked != qp->s_cur)
3246 goto interlock;
3247 break;
3248 case IB_WR_TID_RDMA_WRITE:
3249 req = wqe_to_tid_req(prev);
3250 if (req->ack_seg != req->total_segs)
3251 goto interlock;
3252 break;
3253 default:
3254 break;
3255 }
3256 break;
3257 default:
3258 break;
3259 }
3260 return false;
3261
3262interlock:
3263 priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
3264 return true;
3265}
3266
3267
3268static inline bool hfi1_check_sge_align(struct rvt_qp *qp,
3269 struct rvt_sge *sge, int num_sge)
3270{
3271 int i;
3272
3273 for (i = 0; i < num_sge; i++, sge++) {
3274 trace_hfi1_sge_check_align(qp, i, sge);
3275 if ((u64)sge->vaddr & ~PAGE_MASK ||
3276 sge->sge_length & ~PAGE_MASK)
3277 return false;
3278 }
3279 return true;
3280}
3281
3282void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
3283{
3284 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
3285 struct hfi1_swqe_priv *priv = wqe->priv;
3286 struct tid_rdma_params *remote;
3287 enum ib_wr_opcode new_opcode;
3288 bool do_tid_rdma = false;
3289 struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
3290
3291 if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
3292 ppd->lid)
3293 return;
3294 if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
3295 return;
3296
3297 rcu_read_lock();
3298 remote = rcu_dereference(qpriv->tid_rdma.remote);
3299
3300
3301
3302
3303 if (!remote)
3304 goto exit;
3305
3306 if (wqe->wr.opcode == IB_WR_RDMA_READ) {
3307 if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
3308 wqe->wr.num_sge)) {
3309 new_opcode = IB_WR_TID_RDMA_READ;
3310 do_tid_rdma = true;
3311 }
3312 } else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
3313
3314
3315
3316
3317
3318
3319 if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
3320 !(wqe->length & ~PAGE_MASK)) {
3321 new_opcode = IB_WR_TID_RDMA_WRITE;
3322 do_tid_rdma = true;
3323 }
3324 }
3325
3326 if (do_tid_rdma) {
3327 if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
3328 goto exit;
3329 wqe->wr.opcode = new_opcode;
3330 priv->tid_req.seg_len =
3331 min_t(u32, remote->max_len, wqe->length);
3332 priv->tid_req.total_segs =
3333 DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
3334
3335 wqe->lpsn = wqe->psn;
3336 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3337 priv->tid_req.n_flows = remote->max_read;
3338 qpriv->tid_r_reqs++;
3339 wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
3340 } else {
3341 wqe->lpsn += priv->tid_req.total_segs - 1;
3342 atomic_inc(&qpriv->n_requests);
3343 }
3344
3345 priv->tid_req.cur_seg = 0;
3346 priv->tid_req.comp_seg = 0;
3347 priv->tid_req.ack_seg = 0;
3348 priv->tid_req.state = TID_REQUEST_INACTIVE;
3349
3350
3351
3352
3353
3354
3355 priv->tid_req.acked_tail = priv->tid_req.setup_head;
3356 trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
3357 wqe->psn, wqe->lpsn,
3358 &priv->tid_req);
3359 }
3360exit:
3361 rcu_read_unlock();
3362}
3363
3364
3365
3366u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3367 struct ib_other_headers *ohdr,
3368 u32 *bth1, u32 *bth2, u32 *len)
3369{
3370 struct hfi1_qp_priv *qpriv = qp->priv;
3371 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3372 struct tid_rdma_params *remote;
3373
3374 rcu_read_lock();
3375 remote = rcu_dereference(qpriv->tid_rdma.remote);
3376
3377
3378
3379
3380 req->n_flows = remote->max_write;
3381 req->state = TID_REQUEST_ACTIVE;
3382
3383 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
3384 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
3385 ohdr->u.tid_rdma.w_req.reth.vaddr =
3386 cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
3387 ohdr->u.tid_rdma.w_req.reth.rkey =
3388 cpu_to_be32(wqe->rdma_wr.rkey);
3389 ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
3390 ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
3391 *bth1 &= ~RVT_QPN_MASK;
3392 *bth1 |= remote->qp;
3393 qp->s_state = TID_OP(WRITE_REQ);
3394 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
3395 *bth2 |= IB_BTH_REQ_ACK;
3396 *len = 0;
3397
3398 rcu_read_unlock();
3399 return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
3400}
3401
3402static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
3403{
3404
3405
3406
3407
3408
3409
3410
3411
3412 return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
3413}
3414
3415static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
3416 struct tid_queue *queue)
3417{
3418 return qpriv->tid_enqueue - queue->dequeue;
3419}
3420
3421
3422
3423
3424
3425
3426static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
3427{
3428 struct hfi1_qp_priv *qpriv = qp->priv;
3429 u64 timeout;
3430 u32 bytes_per_us;
3431 u8 i;
3432
3433 bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
3434 timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
3435
3436
3437
3438
3439 for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
3440 if (rvt_rnr_tbl_to_usec(i) >= timeout)
3441 return i;
3442 return 0;
3443}
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
3465{
3466 struct tid_rdma_request *req;
3467 struct hfi1_qp_priv *qpriv = qp->priv;
3468 struct hfi1_ctxtdata *rcd = qpriv->rcd;
3469 struct tid_rdma_params *local = &qpriv->tid_rdma.local;
3470 struct rvt_ack_entry *e;
3471 u32 npkts, to_seg;
3472 bool last;
3473 int ret = 0;
3474
3475 lockdep_assert_held(&qp->s_lock);
3476
3477 while (1) {
3478 trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
3479 trace_hfi1_tid_write_rsp_alloc_res(qp);
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492 if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
3493 break;
3494
3495
3496 if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
3497
3498 if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
3499 !qpriv->alloc_w_segs) {
3500 hfi1_kern_clear_hw_flow(rcd, qp);
3501 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3502 }
3503 break;
3504 }
3505
3506 e = &qp->s_ack_queue[qpriv->r_tid_alloc];
3507 if (e->opcode != TID_OP(WRITE_REQ))
3508 goto next_req;
3509 req = ack_to_tid_req(e);
3510 trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
3511 e->lpsn, req);
3512
3513 if (req->alloc_seg >= req->total_segs)
3514 goto next_req;
3515
3516
3517 if (qpriv->alloc_w_segs >= local->max_write)
3518 break;
3519
3520
3521 if (qpriv->sync_pt && qpriv->alloc_w_segs)
3522 break;
3523
3524
3525 if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
3526 hfi1_kern_clear_hw_flow(rcd, qp);
3527 qpriv->sync_pt = false;
3528 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3529 }
3530
3531
3532 if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
3533 ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
3534 if (ret) {
3535 to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
3536 position_in_queue(qpriv,
3537 &rcd->flow_queue);
3538 break;
3539 }
3540 }
3541
3542 npkts = rvt_div_round_up_mtu(qp, req->seg_len);
3543
3544
3545
3546
3547
3548 if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
3549 qpriv->sync_pt = true;
3550 break;
3551 }
3552
3553
3554
3555
3556
3557
3558
3559
3560 if (!CIRC_SPACE(req->setup_head, req->acked_tail,
3561 MAX_FLOWS)) {
3562 ret = -EAGAIN;
3563 to_seg = MAX_FLOWS >> 1;
3564 tid_rdma_trigger_ack(qp);
3565 break;
3566 }
3567
3568
3569 ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
3570 if (ret == -EAGAIN)
3571 to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
3572 if (ret)
3573 break;
3574
3575 qpriv->alloc_w_segs++;
3576 req->alloc_seg++;
3577 continue;
3578next_req:
3579
3580 if (++qpriv->r_tid_alloc >
3581 rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3582 qpriv->r_tid_alloc = 0;
3583 }
3584
3585
3586
3587
3588
3589
3590 if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
3591 goto send_rnr_nak;
3592
3593 return;
3594
3595send_rnr_nak:
3596 lockdep_assert_held(&qp->r_lock);
3597
3598
3599 qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
3600
3601
3602 qp->r_psn = e->psn + req->alloc_seg;
3603 qp->r_ack_psn = qp->r_psn;
3604
3605
3606
3607
3608
3609 qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
3610 if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3611 qp->r_head_ack_queue = 0;
3612 qpriv->r_tid_head = qp->r_head_ack_queue;
3613
3614
3615
3616
3617
3618 qp->s_nak_state = qp->r_nak_state;
3619 qp->s_ack_psn = qp->r_ack_psn;
3620
3621
3622
3623
3624 qp->s_flags &= ~(RVT_S_ACK_PENDING);
3625
3626 trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
3627
3628
3629
3630
3631
3632
3633
3634 qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
3635
3636
3637
3638
3639
3640
3641 rc_defered_ack(rcd, qp);
3642}
3643
3644void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
3645{
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659 struct hfi1_ctxtdata *rcd = packet->rcd;
3660 struct rvt_qp *qp = packet->qp;
3661 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
3662 struct ib_other_headers *ohdr = packet->ohdr;
3663 struct rvt_ack_entry *e;
3664 unsigned long flags;
3665 struct ib_reth *reth;
3666 struct hfi1_qp_priv *qpriv = qp->priv;
3667 struct tid_rdma_request *req;
3668 u32 bth0, psn, len, rkey, num_segs;
3669 bool fecn;
3670 u8 next;
3671 u64 vaddr;
3672 int diff;
3673
3674 bth0 = be32_to_cpu(ohdr->bth[0]);
3675 if (hfi1_ruc_check_hdr(ibp, packet))
3676 return;
3677
3678 fecn = process_ecn(qp, packet);
3679 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
3680 trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
3681
3682 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
3683 rvt_comm_est(qp);
3684
3685 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
3686 goto nack_inv;
3687
3688 reth = &ohdr->u.tid_rdma.w_req.reth;
3689 vaddr = be64_to_cpu(reth->vaddr);
3690 len = be32_to_cpu(reth->length);
3691
3692 num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
3693 diff = delta_psn(psn, qp->r_psn);
3694 if (unlikely(diff)) {
3695 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
3696 return;
3697 }
3698
3699
3700
3701
3702
3703
3704 if (qpriv->rnr_nak_state)
3705 qp->r_head_ack_queue = qp->r_head_ack_queue ?
3706 qp->r_head_ack_queue - 1 :
3707 rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
3708
3709
3710 next = qp->r_head_ack_queue + 1;
3711 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3712 next = 0;
3713 spin_lock_irqsave(&qp->s_lock, flags);
3714 if (unlikely(next == qp->s_acked_ack_queue)) {
3715 if (!qp->s_ack_queue[next].sent)
3716 goto nack_inv_unlock;
3717 update_ack_queue(qp, next);
3718 }
3719 e = &qp->s_ack_queue[qp->r_head_ack_queue];
3720 req = ack_to_tid_req(e);
3721
3722
3723 if (qpriv->rnr_nak_state) {
3724 qp->r_nak_state = 0;
3725 qp->s_nak_state = 0;
3726 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
3727 qp->r_psn = e->lpsn + 1;
3728 req->state = TID_REQUEST_INIT;
3729 goto update_head;
3730 }
3731
3732 release_rdma_sge_mr(e);
3733
3734
3735 if (!len || len & ~PAGE_MASK)
3736 goto nack_inv_unlock;
3737
3738 rkey = be32_to_cpu(reth->rkey);
3739 qp->r_len = len;
3740
3741 if (e->opcode == TID_OP(WRITE_REQ) &&
3742 (req->setup_head != req->clear_tail ||
3743 req->clear_tail != req->acked_tail))
3744 goto nack_inv_unlock;
3745
3746 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
3747 rkey, IB_ACCESS_REMOTE_WRITE)))
3748 goto nack_acc;
3749
3750 qp->r_psn += num_segs - 1;
3751
3752 e->opcode = (bth0 >> 24) & 0xff;
3753 e->psn = psn;
3754 e->lpsn = qp->r_psn;
3755 e->sent = 0;
3756
3757 req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
3758 req->state = TID_REQUEST_INIT;
3759 req->cur_seg = 0;
3760 req->comp_seg = 0;
3761 req->ack_seg = 0;
3762 req->alloc_seg = 0;
3763 req->isge = 0;
3764 req->seg_len = qpriv->tid_rdma.local.max_len;
3765 req->total_len = len;
3766 req->total_segs = num_segs;
3767 req->r_flow_psn = e->psn;
3768 req->ss.sge = e->rdma_sge;
3769 req->ss.num_sge = 1;
3770
3771 req->flow_idx = req->setup_head;
3772 req->clear_tail = req->setup_head;
3773 req->acked_tail = req->setup_head;
3774
3775 qp->r_state = e->opcode;
3776 qp->r_nak_state = 0;
3777
3778
3779
3780
3781
3782 qp->r_msn++;
3783 qp->r_psn++;
3784
3785 trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
3786 req);
3787
3788 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
3789 qpriv->r_tid_tail = qp->r_head_ack_queue;
3790 } else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
3791 struct tid_rdma_request *ptr;
3792
3793 e = &qp->s_ack_queue[qpriv->r_tid_tail];
3794 ptr = ack_to_tid_req(e);
3795
3796 if (e->opcode != TID_OP(WRITE_REQ) ||
3797 ptr->comp_seg == ptr->total_segs) {
3798 if (qpriv->r_tid_tail == qpriv->r_tid_ack)
3799 qpriv->r_tid_ack = qp->r_head_ack_queue;
3800 qpriv->r_tid_tail = qp->r_head_ack_queue;
3801 }
3802 }
3803update_head:
3804 qp->r_head_ack_queue = next;
3805 qpriv->r_tid_head = qp->r_head_ack_queue;
3806
3807 hfi1_tid_write_alloc_resources(qp, true);
3808 trace_hfi1_tid_write_rsp_rcv_req(qp);
3809
3810
3811 qp->s_flags |= RVT_S_RESP_PENDING;
3812 if (fecn)
3813 qp->s_flags |= RVT_S_ECN;
3814 hfi1_schedule_send(qp);
3815
3816 spin_unlock_irqrestore(&qp->s_lock, flags);
3817 return;
3818
3819nack_inv_unlock:
3820 spin_unlock_irqrestore(&qp->s_lock, flags);
3821nack_inv:
3822 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
3823 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
3824 qp->r_ack_psn = qp->r_psn;
3825
3826 rc_defered_ack(rcd, qp);
3827 return;
3828nack_acc:
3829 spin_unlock_irqrestore(&qp->s_lock, flags);
3830 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
3831 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
3832 qp->r_ack_psn = qp->r_psn;
3833}
3834
3835u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
3836 struct ib_other_headers *ohdr, u32 *bth1,
3837 u32 bth2, u32 *len,
3838 struct rvt_sge_state **ss)
3839{
3840 struct hfi1_ack_priv *epriv = e->priv;
3841 struct tid_rdma_request *req = &epriv->tid_req;
3842 struct hfi1_qp_priv *qpriv = qp->priv;
3843 struct tid_rdma_flow *flow = NULL;
3844 u32 resp_len = 0, hdwords = 0;
3845 void *resp_addr = NULL;
3846 struct tid_rdma_params *remote;
3847
3848 trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
3849 req);
3850 trace_hfi1_tid_write_rsp_build_resp(qp);
3851 trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
3852 flow = &req->flows[req->flow_idx];
3853 switch (req->state) {
3854 default:
3855
3856
3857
3858
3859 hfi1_tid_write_alloc_resources(qp, false);
3860
3861
3862 if (req->cur_seg >= req->alloc_seg)
3863 goto done;
3864
3865
3866
3867
3868
3869 if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
3870 goto done;
3871
3872 req->state = TID_REQUEST_ACTIVE;
3873 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3874 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3875 hfi1_add_tid_reap_timer(qp);
3876 break;
3877
3878 case TID_REQUEST_RESEND_ACTIVE:
3879 case TID_REQUEST_RESEND:
3880 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3881 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3882 if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
3883 req->state = TID_REQUEST_ACTIVE;
3884
3885 hfi1_mod_tid_reap_timer(qp);
3886 break;
3887 }
3888 flow->flow_state.resp_ib_psn = bth2;
3889 resp_addr = (void *)flow->tid_entry;
3890 resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
3891 req->cur_seg++;
3892
3893 memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
3894 epriv->ss.sge.vaddr = resp_addr;
3895 epriv->ss.sge.sge_length = resp_len;
3896 epriv->ss.sge.length = epriv->ss.sge.sge_length;
3897
3898
3899
3900
3901 epriv->ss.sge.mr = NULL;
3902 epriv->ss.sge.m = 0;
3903 epriv->ss.sge.n = 0;
3904
3905 epriv->ss.sg_list = NULL;
3906 epriv->ss.total_len = epriv->ss.sge.sge_length;
3907 epriv->ss.num_sge = 1;
3908
3909 *ss = &epriv->ss;
3910 *len = epriv->ss.total_len;
3911
3912
3913 rcu_read_lock();
3914 remote = rcu_dereference(qpriv->tid_rdma.remote);
3915
3916 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
3917 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
3918 ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
3919 ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
3920 cpu_to_be32((flow->flow_state.generation <<
3921 HFI1_KDETH_BTH_SEQ_SHIFT) |
3922 (flow->flow_state.spsn &
3923 HFI1_KDETH_BTH_SEQ_MASK));
3924 ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
3925 cpu_to_be32(qpriv->tid_rdma.local.qp |
3926 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
3927 TID_RDMA_DESTQP_FLOW_SHIFT) |
3928 qpriv->rcd->ctxt);
3929 ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
3930 *bth1 = remote->qp;
3931 rcu_read_unlock();
3932 hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
3933 qpriv->pending_tid_w_segs++;
3934done:
3935 return hdwords;
3936}
3937
3938static void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
3939{
3940 struct hfi1_qp_priv *qpriv = qp->priv;
3941
3942 lockdep_assert_held(&qp->s_lock);
3943 if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
3944 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3945 qpriv->s_tid_timer.expires = jiffies +
3946 qpriv->tid_timer_timeout_jiffies;
3947 add_timer(&qpriv->s_tid_timer);
3948 }
3949}
3950
3951static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
3952{
3953 struct hfi1_qp_priv *qpriv = qp->priv;
3954
3955 lockdep_assert_held(&qp->s_lock);
3956 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3957 mod_timer(&qpriv->s_tid_timer, jiffies +
3958 qpriv->tid_timer_timeout_jiffies);
3959}
3960
3961static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
3962{
3963 struct hfi1_qp_priv *qpriv = qp->priv;
3964 int rval = 0;
3965
3966 lockdep_assert_held(&qp->s_lock);
3967 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
3968 rval = del_timer(&qpriv->s_tid_timer);
3969 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
3970 }
3971 return rval;
3972}
3973
3974void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
3975{
3976 struct hfi1_qp_priv *qpriv = qp->priv;
3977
3978 del_timer_sync(&qpriv->s_tid_timer);
3979 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
3980}
3981
3982static void hfi1_tid_timeout(struct timer_list *t)
3983{
3984 struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
3985 struct rvt_qp *qp = qpriv->owner;
3986 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
3987 unsigned long flags;
3988 u32 i;
3989
3990 spin_lock_irqsave(&qp->r_lock, flags);
3991 spin_lock(&qp->s_lock);
3992 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
3993 dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
3994 qp->ibqp.qp_num, __func__, __LINE__);
3995 trace_hfi1_msg_tid_timeout(
3996 qp, "resource timeout = ",
3997 (u64)qpriv->tid_timer_timeout_jiffies);
3998 hfi1_stop_tid_reap_timer(qp);
3999
4000
4001
4002
4003 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
4004 for (i = 0; i < rvt_max_atomic(rdi); i++) {
4005 struct tid_rdma_request *req =
4006 ack_to_tid_req(&qp->s_ack_queue[i]);
4007
4008 hfi1_kern_exp_rcv_clear_all(req);
4009 }
4010 spin_unlock(&qp->s_lock);
4011 if (qp->ibqp.event_handler) {
4012 struct ib_event ev;
4013
4014 ev.device = qp->ibqp.device;
4015 ev.element.qp = &qp->ibqp;
4016 ev.event = IB_EVENT_QP_FATAL;
4017 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
4018 }
4019 rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
4020 goto unlock_r_lock;
4021 }
4022 spin_unlock(&qp->s_lock);
4023unlock_r_lock:
4024 spin_unlock_irqrestore(&qp->r_lock, flags);
4025}
4026
4027void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
4028{
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040 struct ib_other_headers *ohdr = packet->ohdr;
4041 struct rvt_qp *qp = packet->qp;
4042 struct hfi1_qp_priv *qpriv = qp->priv;
4043 struct hfi1_ctxtdata *rcd = packet->rcd;
4044 struct rvt_swqe *wqe;
4045 struct tid_rdma_request *req;
4046 struct tid_rdma_flow *flow;
4047 enum ib_wc_status status;
4048 u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
4049 bool fecn;
4050 unsigned long flags;
4051
4052 fecn = process_ecn(qp, packet);
4053 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4054 aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
4055 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4056
4057 spin_lock_irqsave(&qp->s_lock, flags);
4058
4059
4060 if (cmp_psn(psn, qp->s_next_psn) >= 0)
4061 goto ack_done;
4062
4063
4064 if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
4065 goto ack_done;
4066
4067 if (unlikely(qp->s_acked == qp->s_tail))
4068 goto ack_done;
4069
4070
4071
4072
4073
4074
4075 if (qp->r_flags & RVT_R_RDMAR_SEQ) {
4076 if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
4077 goto ack_done;
4078 qp->r_flags &= ~RVT_R_RDMAR_SEQ;
4079 }
4080
4081 wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
4082 if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
4083 goto ack_op_err;
4084
4085 req = wqe_to_tid_req(wqe);
4086
4087
4088
4089
4090
4091 if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
4092 goto ack_done;
4093
4094
4095
4096
4097
4098
4099
4100
4101 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
4102 goto ack_done;
4103
4104 trace_hfi1_ack(qp, psn);
4105
4106 flow = &req->flows[req->setup_head];
4107 flow->pkt = 0;
4108 flow->tid_idx = 0;
4109 flow->tid_offset = 0;
4110 flow->sent = 0;
4111 flow->resync_npkts = 0;
4112 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
4113 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
4114 TID_RDMA_DESTQP_FLOW_MASK;
4115 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
4116 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4117 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
4118 flow->flow_state.resp_ib_psn = psn;
4119 flow->length = min_t(u32, req->seg_len,
4120 (wqe->length - (req->comp_seg * req->seg_len)));
4121
4122 flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
4123 flow->flow_state.lpsn = flow->flow_state.spsn +
4124 flow->npkts - 1;
4125
4126 pktlen = packet->tlen - (packet->hlen + 4);
4127 if (pktlen > sizeof(flow->tid_entry)) {
4128 status = IB_WC_LOC_LEN_ERR;
4129 goto ack_err;
4130 }
4131 memcpy(flow->tid_entry, packet->ebuf, pktlen);
4132 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
4133 trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
4134
4135 req->comp_seg++;
4136 trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
4137
4138
4139
4140
4141 for (i = 0; i < flow->tidcnt; i++) {
4142 trace_hfi1_tid_entry_rcv_write_resp(
4143 qp, i, flow->tid_entry[i]);
4144 if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
4145 status = IB_WC_LOC_LEN_ERR;
4146 goto ack_err;
4147 }
4148 tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
4149 }
4150 if (tidlen * PAGE_SIZE < flow->length) {
4151 status = IB_WC_LOC_LEN_ERR;
4152 goto ack_err;
4153 }
4154
4155 trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
4156 wqe->lpsn, req);
4157
4158
4159
4160
4161 if (!cmp_psn(psn, wqe->psn)) {
4162 req->r_last_acked = mask_psn(wqe->psn - 1);
4163
4164 req->acked_tail = req->setup_head;
4165 }
4166
4167
4168 req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
4169 req->state = TID_REQUEST_ACTIVE;
4170
4171
4172
4173
4174
4175
4176
4177
4178 if (qpriv->s_tid_cur != qpriv->s_tid_head &&
4179 req->comp_seg == req->total_segs) {
4180 for (i = qpriv->s_tid_cur + 1; ; i++) {
4181 if (i == qp->s_size)
4182 i = 0;
4183 wqe = rvt_get_swqe_ptr(qp, i);
4184 if (i == qpriv->s_tid_head)
4185 break;
4186 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
4187 break;
4188 }
4189 qpriv->s_tid_cur = i;
4190 }
4191 qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
4192 hfi1_schedule_tid_send(qp);
4193 goto ack_done;
4194
4195ack_op_err:
4196 status = IB_WC_LOC_QP_OP_ERR;
4197ack_err:
4198 rvt_error_qp(qp, status);
4199ack_done:
4200 if (fecn)
4201 qp->s_flags |= RVT_S_ECN;
4202 spin_unlock_irqrestore(&qp->s_lock, flags);
4203}
4204
4205bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
4206 struct ib_other_headers *ohdr,
4207 u32 *bth1, u32 *bth2, u32 *len)
4208{
4209 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4210 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
4211 struct tid_rdma_params *remote;
4212 struct rvt_qp *qp = req->qp;
4213 struct hfi1_qp_priv *qpriv = qp->priv;
4214 u32 tidentry = flow->tid_entry[flow->tid_idx];
4215 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
4216 struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
4217 u32 next_offset, om = KDETH_OM_LARGE;
4218 bool last_pkt;
4219
4220 if (!tidlen) {
4221 hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
4222 rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
4223 }
4224
4225 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
4226 flow->sent += *len;
4227 next_offset = flow->tid_offset + *len;
4228 last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
4229 next_offset >= tidlen) || (flow->sent >= flow->length);
4230 trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
4231 trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
4232
4233 rcu_read_lock();
4234 remote = rcu_dereference(qpriv->tid_rdma.remote);
4235 KDETH_RESET(wd->kdeth0, KVER, 0x1);
4236 KDETH_SET(wd->kdeth0, SH, !last_pkt);
4237 KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
4238 KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
4239 KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
4240 KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
4241 KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
4242 KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
4243 wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
4244 rcu_read_unlock();
4245
4246 *bth1 = flow->tid_qpn;
4247 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
4248 HFI1_KDETH_BTH_SEQ_MASK) |
4249 (flow->flow_state.generation <<
4250 HFI1_KDETH_BTH_SEQ_SHIFT));
4251 if (last_pkt) {
4252
4253 if (flow->flow_state.lpsn + 1 +
4254 rvt_div_round_up_mtu(qp, req->seg_len) >
4255 MAX_TID_FLOW_PSN)
4256 req->state = TID_REQUEST_SYNC;
4257 *bth2 |= IB_BTH_REQ_ACK;
4258 }
4259
4260 if (next_offset >= tidlen) {
4261 flow->tid_offset = 0;
4262 flow->tid_idx++;
4263 } else {
4264 flow->tid_offset = next_offset;
4265 }
4266 return last_pkt;
4267}
4268
4269void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
4270{
4271 struct rvt_qp *qp = packet->qp;
4272 struct hfi1_qp_priv *priv = qp->priv;
4273 struct hfi1_ctxtdata *rcd = priv->rcd;
4274 struct ib_other_headers *ohdr = packet->ohdr;
4275 struct rvt_ack_entry *e;
4276 struct tid_rdma_request *req;
4277 struct tid_rdma_flow *flow;
4278 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4279 unsigned long flags;
4280 u32 psn, next;
4281 u8 opcode;
4282 bool fecn;
4283
4284 fecn = process_ecn(qp, packet);
4285 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4286 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4287
4288
4289
4290
4291
4292 spin_lock_irqsave(&qp->s_lock, flags);
4293 e = &qp->s_ack_queue[priv->r_tid_tail];
4294 req = ack_to_tid_req(e);
4295 flow = &req->flows[req->clear_tail];
4296 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
4297 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
4298
4299 if (cmp_psn(psn, flow->flow_state.r_next_psn))
4300 goto send_nak;
4301
4302 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4303
4304
4305
4306
4307
4308
4309
4310 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
4311 struct rvt_sge_state ss;
4312 u32 len;
4313 u32 tlen = packet->tlen;
4314 u16 hdrsize = packet->hlen;
4315 u8 pad = packet->pad;
4316 u8 extra_bytes = pad + packet->extra_byte +
4317 (SIZE_OF_CRC << 2);
4318 u32 pmtu = qp->pmtu;
4319
4320 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
4321 goto send_nak;
4322 len = req->comp_seg * req->seg_len;
4323 len += delta_psn(psn,
4324 full_flow_psn(flow, flow->flow_state.spsn)) *
4325 pmtu;
4326 if (unlikely(req->total_len - len < pmtu))
4327 goto send_nak;
4328
4329
4330
4331
4332
4333 ss.sge = e->rdma_sge;
4334 ss.sg_list = NULL;
4335 ss.num_sge = 1;
4336 ss.total_len = req->total_len;
4337 rvt_skip_sge(&ss, len, false);
4338 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
4339 false);
4340
4341 priv->r_next_psn_kdeth = mask_psn(psn + 1);
4342 priv->s_flags |= HFI1_R_TID_SW_PSN;
4343 }
4344 goto exit;
4345 }
4346 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4347 hfi1_kern_exp_rcv_clear(req);
4348 priv->alloc_w_segs--;
4349 rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
4350 req->comp_seg++;
4351 priv->s_nak_state = 0;
4352
4353
4354
4355
4356
4357
4358
4359
4360 trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
4361 trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
4362 req);
4363 trace_hfi1_tid_write_rsp_rcv_data(qp);
4364 validate_r_tid_ack(priv);
4365
4366 if (opcode == TID_OP(WRITE_DATA_LAST)) {
4367 release_rdma_sge_mr(e);
4368 for (next = priv->r_tid_tail + 1; ; next++) {
4369 if (next > rvt_size_atomic(&dev->rdi))
4370 next = 0;
4371 if (next == priv->r_tid_head)
4372 break;
4373 e = &qp->s_ack_queue[next];
4374 if (e->opcode == TID_OP(WRITE_REQ))
4375 break;
4376 }
4377 priv->r_tid_tail = next;
4378 if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
4379 qp->s_acked_ack_queue = 0;
4380 }
4381
4382 hfi1_tid_write_alloc_resources(qp, true);
4383
4384
4385
4386
4387
4388 if (req->cur_seg < req->total_segs ||
4389 qp->s_tail_ack_queue != qp->r_head_ack_queue) {
4390 qp->s_flags |= RVT_S_RESP_PENDING;
4391 hfi1_schedule_send(qp);
4392 }
4393
4394 priv->pending_tid_w_segs--;
4395 if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
4396 if (priv->pending_tid_w_segs)
4397 hfi1_mod_tid_reap_timer(req->qp);
4398 else
4399 hfi1_stop_tid_reap_timer(req->qp);
4400 }
4401
4402done:
4403 tid_rdma_schedule_ack(qp);
4404exit:
4405 priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
4406 if (fecn)
4407 qp->s_flags |= RVT_S_ECN;
4408 spin_unlock_irqrestore(&qp->s_lock, flags);
4409 return;
4410
4411send_nak:
4412 if (!priv->s_nak_state) {
4413 priv->s_nak_state = IB_NAK_PSN_ERROR;
4414 priv->s_nak_psn = flow->flow_state.r_next_psn;
4415 tid_rdma_trigger_ack(qp);
4416 }
4417 goto done;
4418}
4419
4420static bool hfi1_tid_rdma_is_resync_psn(u32 psn)
4421{
4422 return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
4423 HFI1_KDETH_BTH_SEQ_MASK);
4424}
4425
4426u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
4427 struct ib_other_headers *ohdr, u16 iflow,
4428 u32 *bth1, u32 *bth2)
4429{
4430 struct hfi1_qp_priv *qpriv = qp->priv;
4431 struct tid_flow_state *fs = &qpriv->flow_state;
4432 struct tid_rdma_request *req = ack_to_tid_req(e);
4433 struct tid_rdma_flow *flow = &req->flows[iflow];
4434 struct tid_rdma_params *remote;
4435
4436 rcu_read_lock();
4437 remote = rcu_dereference(qpriv->tid_rdma.remote);
4438 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4439 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4440 *bth1 = remote->qp;
4441 rcu_read_unlock();
4442
4443 if (qpriv->resync) {
4444 *bth2 = mask_psn((fs->generation <<
4445 HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4446 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4447 } else if (qpriv->s_nak_state) {
4448 *bth2 = mask_psn(qpriv->s_nak_psn);
4449 ohdr->u.tid_rdma.ack.aeth =
4450 cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
4451 (qpriv->s_nak_state <<
4452 IB_AETH_CREDIT_SHIFT));
4453 } else {
4454 *bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
4455 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4456 }
4457 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4458 ohdr->u.tid_rdma.ack.tid_flow_qp =
4459 cpu_to_be32(qpriv->tid_rdma.local.qp |
4460 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
4461 TID_RDMA_DESTQP_FLOW_SHIFT) |
4462 qpriv->rcd->ctxt);
4463
4464 ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
4465 ohdr->u.tid_rdma.ack.verbs_psn =
4466 cpu_to_be32(flow->flow_state.resp_ib_psn);
4467
4468 if (qpriv->resync) {
4469
4470
4471
4472
4473
4474
4475 if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
4476 ohdr->u.tid_rdma.ack.tid_flow_psn =
4477 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4478 } else {
4479
4480
4481
4482
4483
4484
4485 qpriv->r_next_psn_kdeth_save =
4486 qpriv->r_next_psn_kdeth - 1;
4487 ohdr->u.tid_rdma.ack.tid_flow_psn =
4488 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4489 qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
4490 }
4491 qpriv->resync = false;
4492 }
4493
4494 return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
4495}
4496
4497void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
4498{
4499 struct ib_other_headers *ohdr = packet->ohdr;
4500 struct rvt_qp *qp = packet->qp;
4501 struct hfi1_qp_priv *qpriv = qp->priv;
4502 struct rvt_swqe *wqe;
4503 struct tid_rdma_request *req;
4504 struct tid_rdma_flow *flow;
4505 u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
4506 unsigned long flags;
4507 u16 fidx;
4508
4509 trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
4510 process_ecn(qp, packet);
4511 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4512 aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
4513 req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
4514 resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
4515
4516 spin_lock_irqsave(&qp->s_lock, flags);
4517 trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
4518
4519
4520 if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
4521 cmp_psn(psn, qpriv->s_resync_psn))
4522 goto ack_op_err;
4523
4524 ack_psn = req_psn;
4525 if (hfi1_tid_rdma_is_resync_psn(psn))
4526 ack_kpsn = resync_psn;
4527 else
4528 ack_kpsn = psn;
4529 if (aeth >> 29) {
4530 ack_psn--;
4531 ack_kpsn--;
4532 }
4533
4534 if (unlikely(qp->s_acked == qp->s_tail))
4535 goto ack_op_err;
4536
4537 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4538
4539 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4540 goto ack_op_err;
4541
4542 req = wqe_to_tid_req(wqe);
4543 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4544 wqe->lpsn, req);
4545 flow = &req->flows[req->acked_tail];
4546 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4547
4548
4549 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
4550 cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
4551 goto ack_op_err;
4552
4553 while (cmp_psn(ack_kpsn,
4554 full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
4555 req->ack_seg < req->cur_seg) {
4556 req->ack_seg++;
4557
4558 req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
4559 req->r_last_acked = flow->flow_state.resp_ib_psn;
4560 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4561 wqe->lpsn, req);
4562 if (req->ack_seg == req->total_segs) {
4563 req->state = TID_REQUEST_COMPLETE;
4564 wqe = do_rc_completion(qp, wqe,
4565 to_iport(qp->ibqp.device,
4566 qp->port_num));
4567 trace_hfi1_sender_rcv_tid_ack(qp);
4568 atomic_dec(&qpriv->n_tid_requests);
4569 if (qp->s_acked == qp->s_tail)
4570 break;
4571 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4572 break;
4573 req = wqe_to_tid_req(wqe);
4574 }
4575 flow = &req->flows[req->acked_tail];
4576 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4577 }
4578
4579 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4580 wqe->lpsn, req);
4581 switch (aeth >> 29) {
4582 case 0:
4583 if (qpriv->s_flags & RVT_S_WAIT_ACK)
4584 qpriv->s_flags &= ~RVT_S_WAIT_ACK;
4585 if (!hfi1_tid_rdma_is_resync_psn(psn)) {
4586
4587 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
4588 req->ack_seg < req->cur_seg)
4589 hfi1_mod_tid_retry_timer(qp);
4590 else
4591 hfi1_stop_tid_retry_timer(qp);
4592 hfi1_schedule_send(qp);
4593 } else {
4594 u32 spsn, fpsn, last_acked, generation;
4595 struct tid_rdma_request *rptr;
4596
4597
4598 hfi1_stop_tid_retry_timer(qp);
4599
4600 qp->s_flags &= ~HFI1_S_WAIT_HALT;
4601
4602
4603
4604
4605
4606
4607 qpriv->s_flags &= ~RVT_S_SEND_ONE;
4608 hfi1_schedule_send(qp);
4609
4610 if ((qp->s_acked == qpriv->s_tid_tail &&
4611 req->ack_seg == req->total_segs) ||
4612 qp->s_acked == qp->s_tail) {
4613 qpriv->s_state = TID_OP(WRITE_DATA_LAST);
4614 goto done;
4615 }
4616
4617 if (req->ack_seg == req->comp_seg) {
4618 qpriv->s_state = TID_OP(WRITE_DATA);
4619 goto done;
4620 }
4621
4622
4623
4624
4625
4626 psn = mask_psn(psn + 1);
4627 generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4628 spsn = 0;
4629
4630
4631
4632
4633
4634 if (delta_psn(ack_psn, wqe->lpsn))
4635 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4636 req = wqe_to_tid_req(wqe);
4637 flow = &req->flows[req->acked_tail];
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647 fpsn = full_flow_psn(flow, flow->flow_state.spsn);
4648 req->r_ack_psn = psn;
4649
4650
4651
4652
4653
4654
4655 if (flow->flow_state.generation !=
4656 (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
4657 resync_psn = mask_psn(fpsn - 1);
4658 flow->resync_npkts +=
4659 delta_psn(mask_psn(resync_psn + 1), fpsn);
4660
4661
4662
4663
4664 last_acked = qp->s_acked;
4665 rptr = req;
4666 while (1) {
4667
4668 for (fidx = rptr->acked_tail;
4669 CIRC_CNT(rptr->setup_head, fidx,
4670 MAX_FLOWS);
4671 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
4672 u32 lpsn;
4673 u32 gen;
4674
4675 flow = &rptr->flows[fidx];
4676 gen = flow->flow_state.generation;
4677 if (WARN_ON(gen == generation &&
4678 flow->flow_state.spsn !=
4679 spsn))
4680 continue;
4681 lpsn = flow->flow_state.lpsn;
4682 lpsn = full_flow_psn(flow, lpsn);
4683 flow->npkts =
4684 delta_psn(lpsn,
4685 mask_psn(resync_psn)
4686 );
4687 flow->flow_state.generation =
4688 generation;
4689 flow->flow_state.spsn = spsn;
4690 flow->flow_state.lpsn =
4691 flow->flow_state.spsn +
4692 flow->npkts - 1;
4693 flow->pkt = 0;
4694 spsn += flow->npkts;
4695 resync_psn += flow->npkts;
4696 trace_hfi1_tid_flow_rcv_tid_ack(qp,
4697 fidx,
4698 flow);
4699 }
4700 if (++last_acked == qpriv->s_tid_cur + 1)
4701 break;
4702 if (last_acked == qp->s_size)
4703 last_acked = 0;
4704 wqe = rvt_get_swqe_ptr(qp, last_acked);
4705 rptr = wqe_to_tid_req(wqe);
4706 }
4707 req->cur_seg = req->ack_seg;
4708 qpriv->s_tid_tail = qp->s_acked;
4709 qpriv->s_state = TID_OP(WRITE_REQ);
4710 hfi1_schedule_tid_send(qp);
4711 }
4712done:
4713 qpriv->s_retry = qp->s_retry_cnt;
4714 break;
4715
4716 case 3:
4717 hfi1_stop_tid_retry_timer(qp);
4718 switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
4719 IB_AETH_CREDIT_MASK) {
4720 case 0:
4721 if (!req->flows)
4722 break;
4723 flow = &req->flows[req->acked_tail];
4724 flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
4725 if (cmp_psn(psn, flpsn) > 0)
4726 break;
4727 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
4728 flow);
4729 req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4730 req->cur_seg = req->ack_seg;
4731 qpriv->s_tid_tail = qp->s_acked;
4732 qpriv->s_state = TID_OP(WRITE_REQ);
4733 qpriv->s_retry = qp->s_retry_cnt;
4734 hfi1_schedule_tid_send(qp);
4735 break;
4736
4737 default:
4738 break;
4739 }
4740 break;
4741
4742 default:
4743 break;
4744 }
4745
4746ack_op_err:
4747 spin_unlock_irqrestore(&qp->s_lock, flags);
4748}
4749
4750void hfi1_add_tid_retry_timer(struct rvt_qp *qp)
4751{
4752 struct hfi1_qp_priv *priv = qp->priv;
4753 struct ib_qp *ibqp = &qp->ibqp;
4754 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4755
4756 lockdep_assert_held(&qp->s_lock);
4757 if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
4758 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4759 priv->s_tid_retry_timer.expires = jiffies +
4760 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
4761 add_timer(&priv->s_tid_retry_timer);
4762 }
4763}
4764
4765static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
4766{
4767 struct hfi1_qp_priv *priv = qp->priv;
4768 struct ib_qp *ibqp = &qp->ibqp;
4769 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4770
4771 lockdep_assert_held(&qp->s_lock);
4772 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4773 mod_timer(&priv->s_tid_retry_timer, jiffies +
4774 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
4775}
4776
4777static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
4778{
4779 struct hfi1_qp_priv *priv = qp->priv;
4780 int rval = 0;
4781
4782 lockdep_assert_held(&qp->s_lock);
4783 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4784 rval = del_timer(&priv->s_tid_retry_timer);
4785 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4786 }
4787 return rval;
4788}
4789
4790void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
4791{
4792 struct hfi1_qp_priv *priv = qp->priv;
4793
4794 del_timer_sync(&priv->s_tid_retry_timer);
4795 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4796}
4797
4798static void hfi1_tid_retry_timeout(struct timer_list *t)
4799{
4800 struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
4801 struct rvt_qp *qp = priv->owner;
4802 struct rvt_swqe *wqe;
4803 unsigned long flags;
4804 struct tid_rdma_request *req;
4805
4806 spin_lock_irqsave(&qp->r_lock, flags);
4807 spin_lock(&qp->s_lock);
4808 trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
4809 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4810 hfi1_stop_tid_retry_timer(qp);
4811 if (!priv->s_retry) {
4812 trace_hfi1_msg_tid_retry_timeout(
4813 qp,
4814 "Exhausted retries. Tid retry timeout = ",
4815 (u64)priv->tid_retry_timeout_jiffies);
4816
4817 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4818 hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
4819 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
4820 } else {
4821 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4822 req = wqe_to_tid_req(wqe);
4823 trace_hfi1_tid_req_tid_retry_timeout(
4824 qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
4825
4826 priv->s_flags &= ~RVT_S_WAIT_ACK;
4827
4828 priv->s_flags |= RVT_S_SEND_ONE;
4829
4830
4831
4832
4833 qp->s_flags |= HFI1_S_WAIT_HALT;
4834 priv->s_state = TID_OP(RESYNC);
4835 priv->s_retry--;
4836 hfi1_schedule_tid_send(qp);
4837 }
4838 }
4839 spin_unlock(&qp->s_lock);
4840 spin_unlock_irqrestore(&qp->r_lock, flags);
4841}
4842
4843u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
4844 struct ib_other_headers *ohdr, u32 *bth1,
4845 u32 *bth2, u16 fidx)
4846{
4847 struct hfi1_qp_priv *qpriv = qp->priv;
4848 struct tid_rdma_params *remote;
4849 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4850 struct tid_rdma_flow *flow = &req->flows[fidx];
4851 u32 generation;
4852
4853 rcu_read_lock();
4854 remote = rcu_dereference(qpriv->tid_rdma.remote);
4855 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4856 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4857 *bth1 = remote->qp;
4858 rcu_read_unlock();
4859
4860 generation = kern_flow_generation_next(flow->flow_state.generation);
4861 *bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4862 qpriv->s_resync_psn = *bth2;
4863 *bth2 |= IB_BTH_REQ_ACK;
4864 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4865
4866 return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
4867}
4868
4869void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
4870{
4871 struct ib_other_headers *ohdr = packet->ohdr;
4872 struct rvt_qp *qp = packet->qp;
4873 struct hfi1_qp_priv *qpriv = qp->priv;
4874 struct hfi1_ctxtdata *rcd = qpriv->rcd;
4875 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4876 struct rvt_ack_entry *e;
4877 struct tid_rdma_request *req;
4878 struct tid_rdma_flow *flow;
4879 struct tid_flow_state *fs = &qpriv->flow_state;
4880 u32 psn, generation, idx, gen_next;
4881 bool fecn;
4882 unsigned long flags;
4883
4884 fecn = process_ecn(qp, packet);
4885 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4886
4887 generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
4888 spin_lock_irqsave(&qp->s_lock, flags);
4889
4890 gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
4891 generation : kern_flow_generation_next(fs->generation);
4892
4893
4894
4895
4896 if (generation != mask_generation(gen_next - 1) &&
4897 generation != gen_next)
4898 goto bail;
4899
4900 if (qpriv->resync)
4901 goto bail;
4902
4903 spin_lock(&rcd->exp_lock);
4904 if (fs->index >= RXE_NUM_TID_FLOWS) {
4905
4906
4907
4908
4909 fs->generation = generation;
4910 } else {
4911
4912 rcd->flows[fs->index].generation = generation;
4913 fs->generation = kern_setup_hw_flow(rcd, fs->index);
4914 }
4915 fs->psn = 0;
4916
4917
4918
4919
4920 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
4921 trace_hfi1_tid_write_rsp_rcv_resync(qp);
4922
4923
4924
4925
4926
4927
4928 for (idx = qpriv->r_tid_tail; ; idx++) {
4929 u16 flow_idx;
4930
4931 if (idx > rvt_size_atomic(&dev->rdi))
4932 idx = 0;
4933 e = &qp->s_ack_queue[idx];
4934 if (e->opcode == TID_OP(WRITE_REQ)) {
4935 req = ack_to_tid_req(e);
4936 trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
4937 e->lpsn, req);
4938
4939
4940 for (flow_idx = req->clear_tail;
4941 CIRC_CNT(req->setup_head, flow_idx,
4942 MAX_FLOWS);
4943 flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
4944 u32 lpsn;
4945 u32 next;
4946
4947 flow = &req->flows[flow_idx];
4948 lpsn = full_flow_psn(flow,
4949 flow->flow_state.lpsn);
4950 next = flow->flow_state.r_next_psn;
4951 flow->npkts = delta_psn(lpsn, next - 1);
4952 flow->flow_state.generation = fs->generation;
4953 flow->flow_state.spsn = fs->psn;
4954 flow->flow_state.lpsn =
4955 flow->flow_state.spsn + flow->npkts - 1;
4956 flow->flow_state.r_next_psn =
4957 full_flow_psn(flow,
4958 flow->flow_state.spsn);
4959 fs->psn += flow->npkts;
4960 trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
4961 flow);
4962 }
4963 }
4964 if (idx == qp->s_tail_ack_queue)
4965 break;
4966 }
4967
4968 spin_unlock(&rcd->exp_lock);
4969 qpriv->resync = true;
4970
4971 qpriv->s_nak_state = 0;
4972 tid_rdma_trigger_ack(qp);
4973bail:
4974 if (fecn)
4975 qp->s_flags |= RVT_S_ECN;
4976 spin_unlock_irqrestore(&qp->s_lock, flags);
4977}
4978
4979
4980
4981
4982
4983static void update_tid_tail(struct rvt_qp *qp)
4984 __must_hold(&qp->s_lock)
4985{
4986 struct hfi1_qp_priv *priv = qp->priv;
4987 u32 i;
4988 struct rvt_swqe *wqe;
4989
4990 lockdep_assert_held(&qp->s_lock);
4991
4992 if (priv->s_tid_tail == priv->s_tid_cur)
4993 return;
4994 for (i = priv->s_tid_tail + 1; ; i++) {
4995 if (i == qp->s_size)
4996 i = 0;
4997
4998 if (i == priv->s_tid_cur)
4999 break;
5000 wqe = rvt_get_swqe_ptr(qp, i);
5001 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
5002 break;
5003 }
5004 priv->s_tid_tail = i;
5005 priv->s_state = TID_OP(WRITE_RESP);
5006}
5007
5008int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
5009 __must_hold(&qp->s_lock)
5010{
5011 struct hfi1_qp_priv *priv = qp->priv;
5012 struct rvt_swqe *wqe;
5013 u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
5014 struct ib_other_headers *ohdr;
5015 struct rvt_sge_state *ss = &qp->s_sge;
5016 struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
5017 struct tid_rdma_request *req = ack_to_tid_req(e);
5018 bool last = false;
5019 u8 opcode = TID_OP(WRITE_DATA);
5020
5021 lockdep_assert_held(&qp->s_lock);
5022 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5023
5024
5025
5026
5027 if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
5028 atomic_read(&priv->n_requests) &&
5029 !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
5030 HFI1_S_ANY_WAIT_IO))) ||
5031 (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
5032 !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
5033 struct iowait_work *iowork;
5034
5035 iowork = iowait_get_ib_work(&priv->s_iowait);
5036 ps->s_txreq = get_waiting_verbs_txreq(iowork);
5037 if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
5038 priv->s_flags |= HFI1_S_TID_BUSY_SET;
5039 return 1;
5040 }
5041 }
5042
5043 ps->s_txreq = get_txreq(ps->dev, qp);
5044 if (!ps->s_txreq)
5045 goto bail_no_tx;
5046
5047 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
5048
5049 if ((priv->s_flags & RVT_S_ACK_PENDING) &&
5050 make_tid_rdma_ack(qp, ohdr, ps))
5051 return 1;
5052
5053
5054
5055
5056
5057
5058
5059 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
5060 goto bail;
5061
5062 if (priv->s_flags & RVT_S_WAIT_ACK)
5063 goto bail;
5064
5065
5066 if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
5067 goto bail;
5068 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5069 req = wqe_to_tid_req(wqe);
5070 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
5071 wqe->lpsn, req);
5072 switch (priv->s_state) {
5073 case TID_OP(WRITE_REQ):
5074 case TID_OP(WRITE_RESP):
5075 priv->tid_ss.sge = wqe->sg_list[0];
5076 priv->tid_ss.sg_list = wqe->sg_list + 1;
5077 priv->tid_ss.num_sge = wqe->wr.num_sge;
5078 priv->tid_ss.total_len = wqe->length;
5079
5080 if (priv->s_state == TID_OP(WRITE_REQ))
5081 hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
5082 priv->s_state = TID_OP(WRITE_DATA);
5083 fallthrough;
5084
5085 case TID_OP(WRITE_DATA):
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099 trace_hfi1_sender_make_tid_pkt(qp);
5100 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5101 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5102 req = wqe_to_tid_req(wqe);
5103 len = wqe->length;
5104
5105 if (!req->comp_seg || req->cur_seg == req->comp_seg)
5106 goto bail;
5107
5108 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
5109 wqe->psn, wqe->lpsn, req);
5110 last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
5111 &len);
5112
5113 if (last) {
5114
5115 req->clear_tail = CIRC_NEXT(req->clear_tail,
5116 MAX_FLOWS);
5117 if (++req->cur_seg < req->total_segs) {
5118 if (!CIRC_CNT(req->setup_head, req->clear_tail,
5119 MAX_FLOWS))
5120 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
5121 } else {
5122 priv->s_state = TID_OP(WRITE_DATA_LAST);
5123 opcode = TID_OP(WRITE_DATA_LAST);
5124
5125
5126 update_tid_tail(qp);
5127 }
5128 }
5129 hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
5130 ss = &priv->tid_ss;
5131 break;
5132
5133 case TID_OP(RESYNC):
5134 trace_hfi1_sender_make_tid_pkt(qp);
5135
5136 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
5137 req = wqe_to_tid_req(wqe);
5138
5139 if (!req->comp_seg) {
5140 wqe = rvt_get_swqe_ptr(qp,
5141 (!priv->s_tid_cur ? qp->s_size :
5142 priv->s_tid_cur) - 1);
5143 req = wqe_to_tid_req(wqe);
5144 }
5145 hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
5146 &bth2,
5147 CIRC_PREV(req->setup_head,
5148 MAX_FLOWS));
5149 ss = NULL;
5150 len = 0;
5151 opcode = TID_OP(RESYNC);
5152 break;
5153
5154 default:
5155 goto bail;
5156 }
5157 if (priv->s_flags & RVT_S_SEND_ONE) {
5158 priv->s_flags &= ~RVT_S_SEND_ONE;
5159 priv->s_flags |= RVT_S_WAIT_ACK;
5160 bth2 |= IB_BTH_REQ_ACK;
5161 }
5162 qp->s_len -= len;
5163 ps->s_txreq->hdr_dwords = hwords;
5164 ps->s_txreq->sde = priv->s_sde;
5165 ps->s_txreq->ss = ss;
5166 ps->s_txreq->s_cur_size = len;
5167 hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
5168 middle, ps);
5169 return 1;
5170bail:
5171 hfi1_put_txreq(ps->s_txreq);
5172bail_no_tx:
5173 ps->s_txreq = NULL;
5174 priv->s_flags &= ~RVT_S_BUSY;
5175
5176
5177
5178
5179
5180
5181
5182 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5183 return 0;
5184}
5185
5186static int make_tid_rdma_ack(struct rvt_qp *qp,
5187 struct ib_other_headers *ohdr,
5188 struct hfi1_pkt_state *ps)
5189{
5190 struct rvt_ack_entry *e;
5191 struct hfi1_qp_priv *qpriv = qp->priv;
5192 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5193 u32 hwords, next;
5194 u32 len = 0;
5195 u32 bth1 = 0, bth2 = 0;
5196 int middle = 0;
5197 u16 flow;
5198 struct tid_rdma_request *req, *nreq;
5199
5200 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5201
5202 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
5203 goto bail;
5204
5205
5206 hwords = 5;
5207
5208 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5209 req = ack_to_tid_req(e);
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222 if (qpriv->resync) {
5223 if (!req->ack_seg || req->ack_seg == req->total_segs)
5224 qpriv->r_tid_ack = !qpriv->r_tid_ack ?
5225 rvt_size_atomic(&dev->rdi) :
5226 qpriv->r_tid_ack - 1;
5227 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5228 req = ack_to_tid_req(e);
5229 }
5230
5231 trace_hfi1_rsp_make_tid_ack(qp, e->psn);
5232 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5233 req);
5234
5235
5236
5237
5238 if (!qpriv->s_nak_state && !qpriv->resync &&
5239 req->ack_seg == req->comp_seg)
5240 goto bail;
5241
5242 do {
5243
5244
5245
5246
5247
5248
5249 req->ack_seg +=
5250
5251 CIRC_CNT(req->clear_tail, req->acked_tail,
5252 MAX_FLOWS);
5253
5254 req->acked_tail = req->clear_tail;
5255
5256
5257
5258
5259
5260
5261 flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
5262 if (req->ack_seg != req->total_segs)
5263 break;
5264 req->state = TID_REQUEST_COMPLETE;
5265
5266 next = qpriv->r_tid_ack + 1;
5267 if (next > rvt_size_atomic(&dev->rdi))
5268 next = 0;
5269 qpriv->r_tid_ack = next;
5270 if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
5271 break;
5272 nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
5273 if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
5274 break;
5275
5276
5277 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5278 req = ack_to_tid_req(e);
5279 } while (1);
5280
5281
5282
5283
5284
5285 if (qpriv->s_nak_state ||
5286 (qpriv->resync &&
5287 !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
5288 (cmp_psn(qpriv->r_next_psn_kdeth - 1,
5289 full_flow_psn(&req->flows[flow],
5290 req->flows[flow].flow_state.lpsn)) > 0))) {
5291
5292
5293
5294
5295
5296
5297
5298 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5299 req = ack_to_tid_req(e);
5300 flow = req->acked_tail;
5301 } else if (req->ack_seg == req->total_segs &&
5302 qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
5303 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
5304
5305 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5306 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5307 req);
5308 hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
5309 &bth2);
5310 len = 0;
5311 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5312 ps->s_txreq->hdr_dwords = hwords;
5313 ps->s_txreq->sde = qpriv->s_sde;
5314 ps->s_txreq->s_cur_size = len;
5315 ps->s_txreq->ss = NULL;
5316 hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
5317 ps);
5318 ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
5319 return 1;
5320bail:
5321
5322
5323
5324
5325 smp_wmb();
5326 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5327 return 0;
5328}
5329
5330static int hfi1_send_tid_ok(struct rvt_qp *qp)
5331{
5332 struct hfi1_qp_priv *priv = qp->priv;
5333
5334 return !(priv->s_flags & RVT_S_BUSY ||
5335 qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
5336 (verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
5337 (priv->s_flags & RVT_S_RESP_PENDING) ||
5338 !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
5339}
5340
5341void _hfi1_do_tid_send(struct work_struct *work)
5342{
5343 struct iowait_work *w = container_of(work, struct iowait_work, iowork);
5344 struct rvt_qp *qp = iowait_to_qp(w->iow);
5345
5346 hfi1_do_tid_send(qp);
5347}
5348
5349static void hfi1_do_tid_send(struct rvt_qp *qp)
5350{
5351 struct hfi1_pkt_state ps;
5352 struct hfi1_qp_priv *priv = qp->priv;
5353
5354 ps.dev = to_idev(qp->ibqp.device);
5355 ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
5356 ps.ppd = ppd_from_ibp(ps.ibp);
5357 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5358 ps.in_thread = false;
5359 ps.timeout_int = qp->timeout_jiffies / 8;
5360
5361 trace_hfi1_rc_do_tid_send(qp, false);
5362 spin_lock_irqsave(&qp->s_lock, ps.flags);
5363
5364
5365 if (!hfi1_send_tid_ok(qp)) {
5366 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5367 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5368 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5369 return;
5370 }
5371
5372 priv->s_flags |= RVT_S_BUSY;
5373
5374 ps.timeout = jiffies + ps.timeout_int;
5375 ps.cpu = priv->s_sde ? priv->s_sde->cpu :
5376 cpumask_first(cpumask_of_node(ps.ppd->dd->node));
5377 ps.pkts_sent = false;
5378
5379
5380 ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
5381 do {
5382
5383 if (ps.s_txreq) {
5384 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5385 qp->s_flags |= RVT_S_BUSY;
5386 ps.wait = iowait_get_ib_work(&priv->s_iowait);
5387 }
5388 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5389
5390
5391
5392
5393
5394 if (hfi1_verbs_send(qp, &ps))
5395 return;
5396
5397
5398 if (hfi1_schedule_send_yield(qp, &ps, true))
5399 return;
5400
5401 spin_lock_irqsave(&qp->s_lock, ps.flags);
5402 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5403 qp->s_flags &= ~RVT_S_BUSY;
5404 priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
5405 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5406 if (iowait_flag_set(&priv->s_iowait,
5407 IOWAIT_PENDING_IB))
5408 hfi1_schedule_send(qp);
5409 }
5410 }
5411 } while (hfi1_make_tid_rdma_pkt(qp, &ps));
5412 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
5413 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5414}
5415
5416static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
5417{
5418 struct hfi1_qp_priv *priv = qp->priv;
5419 struct hfi1_ibport *ibp =
5420 to_iport(qp->ibqp.device, qp->port_num);
5421 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
5422 struct hfi1_devdata *dd = ppd->dd;
5423
5424 if ((dd->flags & HFI1_SHUTDOWN))
5425 return true;
5426
5427 return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
5428 priv->s_sde ?
5429 priv->s_sde->cpu :
5430 cpumask_first(cpumask_of_node(dd->node)));
5431}
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446bool hfi1_schedule_tid_send(struct rvt_qp *qp)
5447{
5448 lockdep_assert_held(&qp->s_lock);
5449 if (hfi1_send_tid_ok(qp)) {
5450
5451
5452
5453
5454
5455
5456 _hfi1_schedule_tid_send(qp);
5457 return true;
5458 }
5459 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5460 iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
5461 IOWAIT_PENDING_TID);
5462 return false;
5463}
5464
5465bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
5466{
5467 struct rvt_ack_entry *prev;
5468 struct tid_rdma_request *req;
5469 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5470 struct hfi1_qp_priv *priv = qp->priv;
5471 u32 s_prev;
5472
5473 s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
5474 (qp->s_tail_ack_queue - 1);
5475 prev = &qp->s_ack_queue[s_prev];
5476
5477 if ((e->opcode == TID_OP(READ_REQ) ||
5478 e->opcode == OP(RDMA_READ_REQUEST)) &&
5479 prev->opcode == TID_OP(WRITE_REQ)) {
5480 req = ack_to_tid_req(prev);
5481 if (req->ack_seg != req->total_segs) {
5482 priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
5483 return true;
5484 }
5485 }
5486 return false;
5487}
5488
5489static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
5490{
5491 u64 reg;
5492
5493
5494
5495
5496
5497 reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
5498 return mask_psn(reg);
5499}
5500
5501static void tid_rdma_rcv_err(struct hfi1_packet *packet,
5502 struct ib_other_headers *ohdr,
5503 struct rvt_qp *qp, u32 psn, int diff, bool fecn)
5504{
5505 unsigned long flags;
5506
5507 tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
5508 if (fecn) {
5509 spin_lock_irqsave(&qp->s_lock, flags);
5510 qp->s_flags |= RVT_S_ECN;
5511 spin_unlock_irqrestore(&qp->s_lock, flags);
5512 }
5513}
5514
5515static void update_r_next_psn_fecn(struct hfi1_packet *packet,
5516 struct hfi1_qp_priv *priv,
5517 struct hfi1_ctxtdata *rcd,
5518 struct tid_rdma_flow *flow,
5519 bool fecn)
5520{
5521
5522
5523
5524
5525 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
5526 !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
5527 struct hfi1_devdata *dd = rcd->dd;
5528
5529 flow->flow_state.r_next_psn =
5530 read_r_next_psn(dd, rcd->ctxt, flow->idx);
5531 }
5532}
5533