1
2
3
4
5
6
7#include "hfi.h"
8#include "qp.h"
9#include "rc.h"
10#include "verbs.h"
11#include "tid_rdma.h"
12#include "exp_rcv.h"
13#include "trace.h"
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
31#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
32#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
33#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
34#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
35#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
36
37
38#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
39
40#define GENERATION_MASK 0xFFFFF
41
42static u32 mask_generation(u32 a)
43{
44 return a & GENERATION_MASK;
45}
46
47
48#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
49
50
51
52
53
54#define TID_RDMA_JKEY 32
55#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
56#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
57
58
59#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
60#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
61#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
62 TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
63#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
64
65#define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
66
67#define TID_RDMA_DESTQP_FLOW_SHIFT 11
68#define TID_RDMA_DESTQP_FLOW_MASK 0x1f
69
70#define TID_OPFN_QP_CTXT_MASK 0xff
71#define TID_OPFN_QP_CTXT_SHIFT 56
72#define TID_OPFN_QP_KDETH_MASK 0xff
73#define TID_OPFN_QP_KDETH_SHIFT 48
74#define TID_OPFN_MAX_LEN_MASK 0x7ff
75#define TID_OPFN_MAX_LEN_SHIFT 37
76#define TID_OPFN_TIMEOUT_MASK 0x1f
77#define TID_OPFN_TIMEOUT_SHIFT 32
78#define TID_OPFN_RESERVED_MASK 0x3f
79#define TID_OPFN_RESERVED_SHIFT 26
80#define TID_OPFN_URG_MASK 0x1
81#define TID_OPFN_URG_SHIFT 25
82#define TID_OPFN_VER_MASK 0x7
83#define TID_OPFN_VER_SHIFT 22
84#define TID_OPFN_JKEY_MASK 0x3f
85#define TID_OPFN_JKEY_SHIFT 16
86#define TID_OPFN_MAX_READ_MASK 0x3f
87#define TID_OPFN_MAX_READ_SHIFT 10
88#define TID_OPFN_MAX_WRITE_MASK 0x3f
89#define TID_OPFN_MAX_WRITE_SHIFT 4
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110static void tid_rdma_trigger_resume(struct work_struct *work);
111static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
112static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
113 gfp_t gfp);
114static void hfi1_init_trdma_req(struct rvt_qp *qp,
115 struct tid_rdma_request *req);
116static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
117static void hfi1_tid_timeout(struct timer_list *t);
118static void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
119static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
120static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
121static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
122static void hfi1_tid_retry_timeout(struct timer_list *t);
123static int make_tid_rdma_ack(struct rvt_qp *qp,
124 struct ib_other_headers *ohdr,
125 struct hfi1_pkt_state *ps);
126static void hfi1_do_tid_send(struct rvt_qp *qp);
127static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
128static void tid_rdma_rcv_err(struct hfi1_packet *packet,
129 struct ib_other_headers *ohdr,
130 struct rvt_qp *qp, u32 psn, int diff, bool fecn);
131static void update_r_next_psn_fecn(struct hfi1_packet *packet,
132 struct hfi1_qp_priv *priv,
133 struct hfi1_ctxtdata *rcd,
134 struct tid_rdma_flow *flow,
135 bool fecn);
136
137static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
138{
139 if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
140 priv->r_tid_ack = priv->r_tid_tail;
141}
142
143static void tid_rdma_schedule_ack(struct rvt_qp *qp)
144{
145 struct hfi1_qp_priv *priv = qp->priv;
146
147 priv->s_flags |= RVT_S_ACK_PENDING;
148 hfi1_schedule_tid_send(qp);
149}
150
151static void tid_rdma_trigger_ack(struct rvt_qp *qp)
152{
153 validate_r_tid_ack(qp->priv);
154 tid_rdma_schedule_ack(qp);
155}
156
157static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
158{
159 return
160 (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
161 TID_OPFN_QP_CTXT_SHIFT) |
162 ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
163 TID_OPFN_QP_KDETH_SHIFT) |
164 (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
165 TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
166 (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
167 TID_OPFN_TIMEOUT_SHIFT) |
168 (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
169 (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
170 (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
171 TID_OPFN_MAX_READ_SHIFT) |
172 (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
173 TID_OPFN_MAX_WRITE_SHIFT);
174}
175
176static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
177{
178 p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
179 TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
180 p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
181 p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
182 TID_OPFN_MAX_WRITE_MASK;
183 p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
184 TID_OPFN_MAX_READ_MASK;
185 p->qp =
186 ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
187 << 16) |
188 ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
189 p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
190 p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
191}
192
193void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
194{
195 struct hfi1_qp_priv *priv = qp->priv;
196
197 p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
198 p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
199 p->jkey = priv->rcd->jkey;
200 p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
201 p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
202 p->timeout = qp->timeout;
203 p->urg = is_urg_masked(priv->rcd);
204}
205
206bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
207{
208 struct hfi1_qp_priv *priv = qp->priv;
209
210 *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
211 return true;
212}
213
214bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
215{
216 struct hfi1_qp_priv *priv = qp->priv;
217 struct tid_rdma_params *remote, *old;
218 bool ret = true;
219
220 old = rcu_dereference_protected(priv->tid_rdma.remote,
221 lockdep_is_held(&priv->opfn.lock));
222 data &= ~0xfULL;
223
224
225
226
227 if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
228 goto null;
229
230
231
232
233
234
235
236 remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
237 if (!remote) {
238 ret = false;
239 goto null;
240 }
241
242 tid_rdma_opfn_decode(remote, data);
243 priv->tid_timer_timeout_jiffies =
244 usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
245 1000UL) << 3) * 7);
246 trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
247 trace_hfi1_opfn_param(qp, 1, remote);
248 rcu_assign_pointer(priv->tid_rdma.remote, remote);
249
250
251
252
253
254
255
256
257 priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
258 priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
259 goto free;
260null:
261 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
262 priv->timeout_shift = 0;
263free:
264 if (old)
265 kfree_rcu(old, rcu_head);
266 return ret;
267}
268
269bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
270{
271 bool ret;
272
273 ret = tid_rdma_conn_reply(qp, *data);
274 *data = 0;
275
276
277
278
279
280 if (ret)
281 (void)tid_rdma_conn_req(qp, data);
282 return ret;
283}
284
285void tid_rdma_conn_error(struct rvt_qp *qp)
286{
287 struct hfi1_qp_priv *priv = qp->priv;
288 struct tid_rdma_params *old;
289
290 old = rcu_dereference_protected(priv->tid_rdma.remote,
291 lockdep_is_held(&priv->opfn.lock));
292 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
293 if (old)
294 kfree_rcu(old, rcu_head);
295}
296
297
298int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
299{
300 if (reinit)
301 return 0;
302
303 BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
304 BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
305 rcd->jkey = TID_RDMA_JKEY;
306 hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
307 return hfi1_alloc_ctxt_rcv_groups(rcd);
308}
309
310
311
312
313
314
315
316
317
318
319static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
320 struct rvt_qp *qp)
321{
322 struct hfi1_ibdev *verbs_dev = container_of(rdi,
323 struct hfi1_ibdev,
324 rdi);
325 struct hfi1_devdata *dd = container_of(verbs_dev,
326 struct hfi1_devdata,
327 verbs_dev);
328 unsigned int ctxt;
329
330 if (qp->ibqp.qp_num == 0)
331 ctxt = 0;
332 else
333 ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
334 return dd->rcd[ctxt];
335}
336
337int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
338 struct ib_qp_init_attr *init_attr)
339{
340 struct hfi1_qp_priv *qpriv = qp->priv;
341 int i, ret;
342
343 qpriv->rcd = qp_to_rcd(rdi, qp);
344
345 spin_lock_init(&qpriv->opfn.lock);
346 INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
347 INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
348 qpriv->flow_state.psn = 0;
349 qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
350 qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
351 qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
352 qpriv->s_state = TID_OP(WRITE_RESP);
353 qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
354 qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
355 qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
356 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
357 qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
358 qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
359 qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
360 qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
361 atomic_set(&qpriv->n_requests, 0);
362 atomic_set(&qpriv->n_tid_requests, 0);
363 timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
364 timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
365 INIT_LIST_HEAD(&qpriv->tid_wait);
366
367 if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
368 struct hfi1_devdata *dd = qpriv->rcd->dd;
369
370 qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
371 sizeof(*qpriv->pages),
372 GFP_KERNEL, dd->node);
373 if (!qpriv->pages)
374 return -ENOMEM;
375 for (i = 0; i < qp->s_size; i++) {
376 struct hfi1_swqe_priv *priv;
377 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
378
379 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
380 dd->node);
381 if (!priv)
382 return -ENOMEM;
383
384 hfi1_init_trdma_req(qp, &priv->tid_req);
385 priv->tid_req.e.swqe = wqe;
386 wqe->priv = priv;
387 }
388 for (i = 0; i < rvt_max_atomic(rdi); i++) {
389 struct hfi1_ack_priv *priv;
390
391 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
392 dd->node);
393 if (!priv)
394 return -ENOMEM;
395
396 hfi1_init_trdma_req(qp, &priv->tid_req);
397 priv->tid_req.e.ack = &qp->s_ack_queue[i];
398
399 ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
400 GFP_KERNEL);
401 if (ret) {
402 kfree(priv);
403 return ret;
404 }
405 qp->s_ack_queue[i].priv = priv;
406 }
407 }
408
409 return 0;
410}
411
412void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
413{
414 struct hfi1_qp_priv *qpriv = qp->priv;
415 struct rvt_swqe *wqe;
416 u32 i;
417
418 if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
419 for (i = 0; i < qp->s_size; i++) {
420 wqe = rvt_get_swqe_ptr(qp, i);
421 kfree(wqe->priv);
422 wqe->priv = NULL;
423 }
424 for (i = 0; i < rvt_max_atomic(rdi); i++) {
425 struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
426
427 if (priv)
428 hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
429 kfree(priv);
430 qp->s_ack_queue[i].priv = NULL;
431 }
432 cancel_work_sync(&qpriv->opfn.opfn_work);
433 kfree(qpriv->pages);
434 qpriv->pages = NULL;
435 }
436}
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
469 struct tid_queue *queue)
470 __must_hold(&rcd->exp_lock)
471{
472 struct hfi1_qp_priv *priv;
473
474 lockdep_assert_held(&rcd->exp_lock);
475 priv = list_first_entry_or_null(&queue->queue_head,
476 struct hfi1_qp_priv,
477 tid_wait);
478 if (!priv)
479 return NULL;
480 rvt_get_qp(priv->owner);
481 return priv->owner;
482}
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
503 struct tid_queue *queue, struct rvt_qp *qp)
504 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
505{
506 struct rvt_qp *fqp;
507 bool ret = true;
508
509 lockdep_assert_held(&qp->s_lock);
510 lockdep_assert_held(&rcd->exp_lock);
511 fqp = first_qp(rcd, queue);
512 if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
513 ret = false;
514 rvt_put_qp(fqp);
515 return ret;
516}
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
534 struct tid_queue *queue, struct rvt_qp *qp)
535 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
536{
537 struct hfi1_qp_priv *priv = qp->priv;
538
539 lockdep_assert_held(&qp->s_lock);
540 lockdep_assert_held(&rcd->exp_lock);
541 if (list_empty(&priv->tid_wait))
542 return;
543 list_del_init(&priv->tid_wait);
544 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
545 queue->dequeue++;
546 rvt_put_qp(qp);
547}
548
549
550
551
552
553
554
555
556
557
558
559static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
560 struct tid_queue *queue, struct rvt_qp *qp)
561 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
562{
563 struct hfi1_qp_priv *priv = qp->priv;
564
565 lockdep_assert_held(&qp->s_lock);
566 lockdep_assert_held(&rcd->exp_lock);
567 if (list_empty(&priv->tid_wait)) {
568 qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
569 list_add_tail(&priv->tid_wait, &queue->queue_head);
570 priv->tid_enqueue = ++queue->enqueue;
571 rcd->dd->verbs_dev.n_tidwait++;
572 trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
573 rvt_get_qp(qp);
574 }
575}
576
577
578
579
580
581
582
583
584static void __trigger_tid_waiter(struct rvt_qp *qp)
585 __must_hold(&qp->s_lock)
586{
587 lockdep_assert_held(&qp->s_lock);
588 if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
589 return;
590 trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
591 hfi1_schedule_send(qp);
592}
593
594
595
596
597
598
599
600
601
602
603
604
605
606static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
607{
608 struct hfi1_qp_priv *priv;
609 struct hfi1_ibport *ibp;
610 struct hfi1_pportdata *ppd;
611 struct hfi1_devdata *dd;
612 bool rval;
613
614 if (!qp)
615 return;
616
617 priv = qp->priv;
618 ibp = to_iport(qp->ibqp.device, qp->port_num);
619 ppd = ppd_from_ibp(ibp);
620 dd = dd_from_ibdev(qp->ibqp.device);
621
622 rval = queue_work_on(priv->s_sde ?
623 priv->s_sde->cpu :
624 cpumask_first(cpumask_of_node(dd->node)),
625 ppd->hfi1_wq,
626 &priv->tid_rdma.trigger_work);
627 if (!rval)
628 rvt_put_qp(qp);
629}
630
631
632
633
634
635
636
637
638static void tid_rdma_trigger_resume(struct work_struct *work)
639{
640 struct tid_rdma_qp_params *tr;
641 struct hfi1_qp_priv *priv;
642 struct rvt_qp *qp;
643
644 tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
645 priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
646 qp = priv->owner;
647 spin_lock_irq(&qp->s_lock);
648 if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
649 spin_unlock_irq(&qp->s_lock);
650 hfi1_do_send(priv->owner, true);
651 } else {
652 spin_unlock_irq(&qp->s_lock);
653 }
654 rvt_put_qp(qp);
655}
656
657
658
659
660
661
662
663
664static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
665 __must_hold(&qp->s_lock)
666{
667 struct hfi1_qp_priv *priv;
668
669 if (!qp)
670 return;
671 lockdep_assert_held(&qp->s_lock);
672 priv = qp->priv;
673 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
674 spin_lock(&priv->rcd->exp_lock);
675 if (!list_empty(&priv->tid_wait)) {
676 list_del_init(&priv->tid_wait);
677 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
678 queue->dequeue++;
679 rvt_put_qp(qp);
680 }
681 spin_unlock(&priv->rcd->exp_lock);
682}
683
684void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
685 __must_hold(&qp->s_lock)
686{
687 struct hfi1_qp_priv *priv = qp->priv;
688
689 _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
690 _tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
691}
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
712 __must_hold(&rcd->exp_lock)
713{
714 int nr;
715
716
717 if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
718 !test_and_set_bit(last, &rcd->flow_mask))
719 return last;
720
721 nr = ffz(rcd->flow_mask);
722 BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
723 (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
724 if (nr > (RXE_NUM_TID_FLOWS - 1))
725 return -EAGAIN;
726 set_bit(nr, &rcd->flow_mask);
727 return nr;
728}
729
730static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
731 u32 flow_idx)
732{
733 u64 reg;
734
735 reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
736 RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
737 RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
738 RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
739 RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
740 RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
741
742 if (generation != KERN_GENERATION_RESERVED)
743 reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
744
745 write_uctxt_csr(rcd->dd, rcd->ctxt,
746 RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
747}
748
749static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
750 __must_hold(&rcd->exp_lock)
751{
752 u32 generation = rcd->flows[flow_idx].generation;
753
754 kern_set_hw_flow(rcd, generation, flow_idx);
755 return generation;
756}
757
758static u32 kern_flow_generation_next(u32 gen)
759{
760 u32 generation = mask_generation(gen + 1);
761
762 if (generation == KERN_GENERATION_RESERVED)
763 generation = mask_generation(generation + 1);
764 return generation;
765}
766
767static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
768 __must_hold(&rcd->exp_lock)
769{
770 rcd->flows[flow_idx].generation =
771 kern_flow_generation_next(rcd->flows[flow_idx].generation);
772 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
773}
774
775int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
776{
777 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
778 struct tid_flow_state *fs = &qpriv->flow_state;
779 struct rvt_qp *fqp;
780 unsigned long flags;
781 int ret = 0;
782
783
784 if (fs->index != RXE_NUM_TID_FLOWS)
785 return ret;
786
787 spin_lock_irqsave(&rcd->exp_lock, flags);
788 if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
789 goto queue;
790
791 ret = kern_reserve_flow(rcd, fs->last_index);
792 if (ret < 0)
793 goto queue;
794 fs->index = ret;
795 fs->last_index = fs->index;
796
797
798 if (fs->generation != KERN_GENERATION_RESERVED)
799 rcd->flows[fs->index].generation = fs->generation;
800 fs->generation = kern_setup_hw_flow(rcd, fs->index);
801 fs->psn = 0;
802 dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
803
804 fqp = first_qp(rcd, &rcd->flow_queue);
805 spin_unlock_irqrestore(&rcd->exp_lock, flags);
806
807 tid_rdma_schedule_tid_wakeup(fqp);
808 return 0;
809queue:
810 queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
811 spin_unlock_irqrestore(&rcd->exp_lock, flags);
812 return -EAGAIN;
813}
814
815void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
816{
817 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
818 struct tid_flow_state *fs = &qpriv->flow_state;
819 struct rvt_qp *fqp;
820 unsigned long flags;
821
822 if (fs->index >= RXE_NUM_TID_FLOWS)
823 return;
824 spin_lock_irqsave(&rcd->exp_lock, flags);
825 kern_clear_hw_flow(rcd, fs->index);
826 clear_bit(fs->index, &rcd->flow_mask);
827 fs->index = RXE_NUM_TID_FLOWS;
828 fs->psn = 0;
829 fs->generation = KERN_GENERATION_RESERVED;
830
831
832 fqp = first_qp(rcd, &rcd->flow_queue);
833 spin_unlock_irqrestore(&rcd->exp_lock, flags);
834
835 if (fqp == qp) {
836 __trigger_tid_waiter(fqp);
837 rvt_put_qp(fqp);
838 } else {
839 tid_rdma_schedule_tid_wakeup(fqp);
840 }
841}
842
843void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
844{
845 int i;
846
847 for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
848 rcd->flows[i].generation = mask_generation(prandom_u32());
849 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
850 }
851}
852
853
854static u8 trdma_pset_order(struct tid_rdma_pageset *s)
855{
856 u8 count = s->count;
857
858 return ilog2(count) + 1;
859}
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
876 struct page **pages,
877 u32 npages,
878 struct tid_rdma_pageset *list)
879{
880 u32 pagecount, pageidx, setcount = 0, i;
881 void *vaddr, *this_vaddr;
882
883 if (!npages)
884 return 0;
885
886
887
888
889
890
891 vaddr = page_address(pages[0]);
892 trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
893 for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
894 this_vaddr = i < npages ? page_address(pages[i]) : NULL;
895 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
896 this_vaddr);
897
898
899
900
901 if (this_vaddr != (vaddr + PAGE_SIZE)) {
902
903
904
905
906
907
908
909
910
911
912
913
914 while (pagecount) {
915 int maxpages = pagecount;
916 u32 bufsize = pagecount * PAGE_SIZE;
917
918 if (bufsize > MAX_EXPECTED_BUFFER)
919 maxpages =
920 MAX_EXPECTED_BUFFER >>
921 PAGE_SHIFT;
922 else if (!is_power_of_2(bufsize))
923 maxpages =
924 rounddown_pow_of_two(bufsize) >>
925 PAGE_SHIFT;
926
927 list[setcount].idx = pageidx;
928 list[setcount].count = maxpages;
929 trace_hfi1_tid_pageset(flow->req->qp, setcount,
930 list[setcount].idx,
931 list[setcount].count);
932 pagecount -= maxpages;
933 pageidx += maxpages;
934 setcount++;
935 }
936 pageidx = i;
937 pagecount = 1;
938 vaddr = this_vaddr;
939 } else {
940 vaddr += PAGE_SIZE;
941 pagecount++;
942 }
943 }
944
945 if (setcount & 1)
946 list[setcount++].count = 0;
947 return setcount;
948}
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970static u32 tid_flush_pages(struct tid_rdma_pageset *list,
971 u32 *idx, u32 pages, u32 sets)
972{
973 while (pages) {
974 u32 maxpages = pages;
975
976 if (maxpages > MAX_EXPECTED_PAGES)
977 maxpages = MAX_EXPECTED_PAGES;
978 else if (!is_power_of_2(maxpages))
979 maxpages = rounddown_pow_of_two(maxpages);
980 list[sets].idx = *idx;
981 list[sets++].count = maxpages;
982 *idx += maxpages;
983 pages -= maxpages;
984 }
985
986 if (sets & 1)
987 list[sets++].count = 0;
988 return sets;
989}
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
1014 struct page **pages,
1015 u32 npages,
1016 struct tid_rdma_pageset *list)
1017{
1018 u32 idx, sets = 0, i;
1019 u32 pagecnt = 0;
1020 void *v0, *v1, *vm1;
1021
1022 if (!npages)
1023 return 0;
1024 for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
1025
1026 v0 = page_address(pages[i]);
1027 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
1028 v1 = i + 1 < npages ?
1029 page_address(pages[i + 1]) : NULL;
1030 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
1031
1032 if (v1 != (v0 + PAGE_SIZE)) {
1033
1034 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1035
1036 list[sets].idx = idx++;
1037 list[sets++].count = 1;
1038 if (v1) {
1039 list[sets].count = 1;
1040 list[sets++].idx = idx++;
1041 } else {
1042 list[sets++].count = 0;
1043 }
1044 vm1 = NULL;
1045 pagecnt = 0;
1046 continue;
1047 }
1048
1049 if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
1050
1051 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1052 pagecnt = 0;
1053 }
1054
1055 pagecnt += 2;
1056
1057 vm1 = v1;
1058
1059 }
1060
1061 sets = tid_flush_pages(list, &idx, npages - idx, sets);
1062
1063 WARN_ON(sets & 1);
1064 return sets;
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080static u32 kern_find_pages(struct tid_rdma_flow *flow,
1081 struct page **pages,
1082 struct rvt_sge_state *ss, bool *last)
1083{
1084 struct tid_rdma_request *req = flow->req;
1085 struct rvt_sge *sge = &ss->sge;
1086 u32 length = flow->req->seg_len;
1087 u32 len = PAGE_SIZE;
1088 u32 i = 0;
1089
1090 while (length && req->isge < ss->num_sge) {
1091 pages[i++] = virt_to_page(sge->vaddr);
1092
1093 sge->vaddr += len;
1094 sge->length -= len;
1095 sge->sge_length -= len;
1096 if (!sge->sge_length) {
1097 if (++req->isge < ss->num_sge)
1098 *sge = ss->sg_list[req->isge - 1];
1099 } else if (sge->length == 0 && sge->mr->lkey) {
1100 if (++sge->n >= RVT_SEGSZ) {
1101 ++sge->m;
1102 sge->n = 0;
1103 }
1104 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
1105 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
1106 }
1107 length -= len;
1108 }
1109
1110 flow->length = flow->req->seg_len - length;
1111 *last = req->isge == ss->num_sge ? false : true;
1112 return i;
1113}
1114
1115static void dma_unmap_flow(struct tid_rdma_flow *flow)
1116{
1117 struct hfi1_devdata *dd;
1118 int i;
1119 struct tid_rdma_pageset *pset;
1120
1121 dd = flow->req->rcd->dd;
1122 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1123 i++, pset++) {
1124 if (pset->count && pset->addr) {
1125 dma_unmap_page(&dd->pcidev->dev,
1126 pset->addr,
1127 PAGE_SIZE * pset->count,
1128 DMA_FROM_DEVICE);
1129 pset->mapped = 0;
1130 }
1131 }
1132}
1133
1134static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
1135{
1136 int i;
1137 struct hfi1_devdata *dd = flow->req->rcd->dd;
1138 struct tid_rdma_pageset *pset;
1139
1140 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1141 i++, pset++) {
1142 if (pset->count) {
1143 pset->addr = dma_map_page(&dd->pcidev->dev,
1144 pages[pset->idx],
1145 0,
1146 PAGE_SIZE * pset->count,
1147 DMA_FROM_DEVICE);
1148
1149 if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
1150 dma_unmap_flow(flow);
1151 return -ENOMEM;
1152 }
1153 pset->mapped = 1;
1154 }
1155 }
1156 return 0;
1157}
1158
1159static inline bool dma_mapped(struct tid_rdma_flow *flow)
1160{
1161 return !!flow->pagesets[0].mapped;
1162}
1163
1164
1165
1166
1167
1168static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
1169 struct page **pages,
1170 struct rvt_sge_state *ss, bool *last)
1171{
1172 u8 npages;
1173
1174
1175 if (flow->npagesets) {
1176 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
1177 flow);
1178 if (!dma_mapped(flow))
1179 return dma_map_flow(flow, pages);
1180 return 0;
1181 }
1182
1183 npages = kern_find_pages(flow, pages, ss, last);
1184
1185 if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
1186 flow->npagesets =
1187 tid_rdma_find_phys_blocks_4k(flow, pages, npages,
1188 flow->pagesets);
1189 else
1190 flow->npagesets =
1191 tid_rdma_find_phys_blocks_8k(flow, pages, npages,
1192 flow->pagesets);
1193
1194 return dma_map_flow(flow, pages);
1195}
1196
1197static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
1198 struct hfi1_ctxtdata *rcd, char *s,
1199 struct tid_group *grp, u8 cnt)
1200{
1201 struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
1202
1203 WARN_ON_ONCE(flow->tnode_cnt >=
1204 (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
1205 if (WARN_ON_ONCE(cnt & 1))
1206 dd_dev_err(rcd->dd,
1207 "unexpected odd allocation cnt %u map 0x%x used %u",
1208 cnt, grp->map, grp->used);
1209
1210 node->grp = grp;
1211 node->map = grp->map;
1212 node->cnt = cnt;
1213 trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
1214 grp->base, grp->map, grp->used, cnt);
1215}
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230static int kern_alloc_tids(struct tid_rdma_flow *flow)
1231{
1232 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1233 struct hfi1_devdata *dd = rcd->dd;
1234 u32 ngroups, pageidx = 0;
1235 struct tid_group *group = NULL, *used;
1236 u8 use;
1237
1238 flow->tnode_cnt = 0;
1239 ngroups = flow->npagesets / dd->rcv_entries.group_size;
1240 if (!ngroups)
1241 goto used_list;
1242
1243
1244 list_for_each_entry(group, &rcd->tid_group_list.list, list) {
1245 kern_add_tid_node(flow, rcd, "complete groups", group,
1246 group->size);
1247
1248 pageidx += group->size;
1249 if (!--ngroups)
1250 break;
1251 }
1252
1253 if (pageidx >= flow->npagesets)
1254 goto ok;
1255
1256used_list:
1257
1258 list_for_each_entry(used, &rcd->tid_used_list.list, list) {
1259 use = min_t(u32, flow->npagesets - pageidx,
1260 used->size - used->used);
1261 kern_add_tid_node(flow, rcd, "used groups", used, use);
1262
1263 pageidx += use;
1264 if (pageidx >= flow->npagesets)
1265 goto ok;
1266 }
1267
1268
1269
1270
1271
1272
1273 if (group && &group->list == &rcd->tid_group_list.list)
1274 goto bail_eagain;
1275 group = list_prepare_entry(group, &rcd->tid_group_list.list,
1276 list);
1277 if (list_is_last(&group->list, &rcd->tid_group_list.list))
1278 goto bail_eagain;
1279 group = list_next_entry(group, list);
1280 use = min_t(u32, flow->npagesets - pageidx, group->size);
1281 kern_add_tid_node(flow, rcd, "complete continue", group, use);
1282 pageidx += use;
1283 if (pageidx >= flow->npagesets)
1284 goto ok;
1285bail_eagain:
1286 trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
1287 (u64)flow->npagesets);
1288 return -EAGAIN;
1289ok:
1290 return 0;
1291}
1292
1293static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
1294 u32 *pset_idx)
1295{
1296 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1297 struct hfi1_devdata *dd = rcd->dd;
1298 struct kern_tid_node *node = &flow->tnode[grp_num];
1299 struct tid_group *grp = node->grp;
1300 struct tid_rdma_pageset *pset;
1301 u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
1302 u32 rcventry, npages = 0, pair = 0, tidctrl;
1303 u8 i, cnt = 0;
1304
1305 for (i = 0; i < grp->size; i++) {
1306 rcventry = grp->base + i;
1307
1308 if (node->map & BIT(i) || cnt >= node->cnt) {
1309 rcv_array_wc_fill(dd, rcventry);
1310 continue;
1311 }
1312 pset = &flow->pagesets[(*pset_idx)++];
1313 if (pset->count) {
1314 hfi1_put_tid(dd, rcventry, PT_EXPECTED,
1315 pset->addr, trdma_pset_order(pset));
1316 } else {
1317 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1318 }
1319 npages += pset->count;
1320
1321 rcventry -= rcd->expected_base;
1322 tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
1323
1324
1325
1326
1327
1328
1329
1330 pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
1331 node->cnt >= cnt + 2;
1332 if (!pair) {
1333 if (!pset->count)
1334 tidctrl = 0x1;
1335 flow->tid_entry[flow->tidcnt++] =
1336 EXP_TID_SET(IDX, rcventry >> 1) |
1337 EXP_TID_SET(CTRL, tidctrl) |
1338 EXP_TID_SET(LEN, npages);
1339 trace_hfi1_tid_entry_alloc(
1340 flow->req->qp, flow->tidcnt - 1,
1341 flow->tid_entry[flow->tidcnt - 1]);
1342
1343
1344 flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
1345 npages = 0;
1346 }
1347
1348 if (grp->used == grp->size - 1)
1349 tid_group_move(grp, &rcd->tid_used_list,
1350 &rcd->tid_full_list);
1351 else if (!grp->used)
1352 tid_group_move(grp, &rcd->tid_group_list,
1353 &rcd->tid_used_list);
1354
1355 grp->used++;
1356 grp->map |= BIT(i);
1357 cnt++;
1358 }
1359}
1360
1361static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
1362{
1363 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1364 struct hfi1_devdata *dd = rcd->dd;
1365 struct kern_tid_node *node = &flow->tnode[grp_num];
1366 struct tid_group *grp = node->grp;
1367 u32 rcventry;
1368 u8 i, cnt = 0;
1369
1370 for (i = 0; i < grp->size; i++) {
1371 rcventry = grp->base + i;
1372
1373 if (node->map & BIT(i) || cnt >= node->cnt) {
1374 rcv_array_wc_fill(dd, rcventry);
1375 continue;
1376 }
1377
1378 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1379
1380 grp->used--;
1381 grp->map &= ~BIT(i);
1382 cnt++;
1383
1384 if (grp->used == grp->size - 1)
1385 tid_group_move(grp, &rcd->tid_full_list,
1386 &rcd->tid_used_list);
1387 else if (!grp->used)
1388 tid_group_move(grp, &rcd->tid_used_list,
1389 &rcd->tid_group_list);
1390 }
1391 if (WARN_ON_ONCE(cnt & 1)) {
1392 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1393 struct hfi1_devdata *dd = rcd->dd;
1394
1395 dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
1396 cnt, grp->map, grp->used);
1397 }
1398}
1399
1400static void kern_program_rcvarray(struct tid_rdma_flow *flow)
1401{
1402 u32 pset_idx = 0;
1403 int i;
1404
1405 flow->npkts = 0;
1406 flow->tidcnt = 0;
1407 for (i = 0; i < flow->tnode_cnt; i++)
1408 kern_program_rcv_group(flow, i, &pset_idx);
1409 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
1410}
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
1455 struct rvt_sge_state *ss, bool *last)
1456 __must_hold(&req->qp->s_lock)
1457{
1458 struct tid_rdma_flow *flow = &req->flows[req->setup_head];
1459 struct hfi1_ctxtdata *rcd = req->rcd;
1460 struct hfi1_qp_priv *qpriv = req->qp->priv;
1461 unsigned long flags;
1462 struct rvt_qp *fqp;
1463 u16 clear_tail = req->clear_tail;
1464
1465 lockdep_assert_held(&req->qp->s_lock);
1466
1467
1468
1469
1470
1471
1472 if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
1473 CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
1474 req->n_flows)
1475 return -EINVAL;
1476
1477
1478
1479
1480
1481
1482 if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
1483 hfi1_wait_kmem(flow->req->qp);
1484 return -ENOMEM;
1485 }
1486
1487 spin_lock_irqsave(&rcd->exp_lock, flags);
1488 if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
1489 goto queue;
1490
1491
1492
1493
1494
1495
1496 if (kern_alloc_tids(flow))
1497 goto queue;
1498
1499
1500
1501
1502 kern_program_rcvarray(flow);
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
1513 flow->idx = qpriv->flow_state.index;
1514 flow->flow_state.generation = qpriv->flow_state.generation;
1515 flow->flow_state.spsn = qpriv->flow_state.psn;
1516 flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
1517 flow->flow_state.r_next_psn =
1518 full_flow_psn(flow, flow->flow_state.spsn);
1519 qpriv->flow_state.psn += flow->npkts;
1520
1521 dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
1522
1523 fqp = first_qp(rcd, &rcd->rarr_queue);
1524 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1525 tid_rdma_schedule_tid_wakeup(fqp);
1526
1527 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1528 return 0;
1529queue:
1530 queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
1531 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1532 return -EAGAIN;
1533}
1534
1535static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
1536{
1537 flow->npagesets = 0;
1538}
1539
1540
1541
1542
1543
1544
1545
1546int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
1547 __must_hold(&req->qp->s_lock)
1548{
1549 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
1550 struct hfi1_ctxtdata *rcd = req->rcd;
1551 unsigned long flags;
1552 int i;
1553 struct rvt_qp *fqp;
1554
1555 lockdep_assert_held(&req->qp->s_lock);
1556
1557 if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
1558 return -EINVAL;
1559
1560 spin_lock_irqsave(&rcd->exp_lock, flags);
1561
1562 for (i = 0; i < flow->tnode_cnt; i++)
1563 kern_unprogram_rcv_group(flow, i);
1564
1565 flow->tnode_cnt = 0;
1566
1567 fqp = first_qp(rcd, &rcd->rarr_queue);
1568 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1569
1570 dma_unmap_flow(flow);
1571
1572 hfi1_tid_rdma_reset_flow(flow);
1573 req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
1574
1575 if (fqp == req->qp) {
1576 __trigger_tid_waiter(fqp);
1577 rvt_put_qp(fqp);
1578 } else {
1579 tid_rdma_schedule_tid_wakeup(fqp);
1580 }
1581
1582 return 0;
1583}
1584
1585
1586
1587
1588
1589void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
1590 __must_hold(&req->qp->s_lock)
1591{
1592
1593 while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
1594 if (hfi1_kern_exp_rcv_clear(req))
1595 break;
1596 }
1597}
1598
1599
1600
1601
1602
1603static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
1604{
1605 kfree(req->flows);
1606 req->flows = NULL;
1607}
1608
1609
1610
1611
1612
1613
1614void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
1615{
1616 struct hfi1_swqe_priv *p = wqe->priv;
1617
1618 hfi1_kern_exp_rcv_free_flows(&p->tid_req);
1619}
1620
1621
1622
1623
1624static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
1625 gfp_t gfp)
1626{
1627 struct tid_rdma_flow *flows;
1628 int i;
1629
1630 if (likely(req->flows))
1631 return 0;
1632 flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
1633 req->rcd->numa_id);
1634 if (!flows)
1635 return -ENOMEM;
1636
1637 for (i = 0; i < MAX_FLOWS; i++) {
1638 flows[i].req = req;
1639 flows[i].npagesets = 0;
1640 flows[i].pagesets[0].mapped = 0;
1641 flows[i].resync_npkts = 0;
1642 }
1643 req->flows = flows;
1644 return 0;
1645}
1646
1647static void hfi1_init_trdma_req(struct rvt_qp *qp,
1648 struct tid_rdma_request *req)
1649{
1650 struct hfi1_qp_priv *qpriv = qp->priv;
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662 req->qp = qp;
1663 req->rcd = qpriv->rcd;
1664}
1665
1666u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
1667 void *context, int vl, int mode, u64 data)
1668{
1669 struct hfi1_devdata *dd = context;
1670
1671 return dd->verbs_dev.n_tidwait;
1672}
1673
1674static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
1675 u32 psn, u16 *fidx)
1676{
1677 u16 head, tail;
1678 struct tid_rdma_flow *flow;
1679
1680 head = req->setup_head;
1681 tail = req->clear_tail;
1682 for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
1683 tail = CIRC_NEXT(tail, MAX_FLOWS)) {
1684 flow = &req->flows[tail];
1685 if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
1686 cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
1687 if (fidx)
1688 *fidx = tail;
1689 return flow;
1690 }
1691 }
1692 return NULL;
1693}
1694
1695
1696u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
1697 struct ib_other_headers *ohdr, u32 *bth1,
1698 u32 *bth2, u32 *len)
1699{
1700 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1701 struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
1702 struct rvt_qp *qp = req->qp;
1703 struct hfi1_qp_priv *qpriv = qp->priv;
1704 struct hfi1_swqe_priv *wpriv = wqe->priv;
1705 struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
1706 struct tid_rdma_params *remote;
1707 u32 req_len = 0;
1708 void *req_addr = NULL;
1709
1710
1711 *bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
1712 trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
1713
1714
1715 req_addr = &flow->tid_entry[flow->tid_idx];
1716 req_len = sizeof(*flow->tid_entry) *
1717 (flow->tidcnt - flow->tid_idx);
1718
1719 memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
1720 wpriv->ss.sge.vaddr = req_addr;
1721 wpriv->ss.sge.sge_length = req_len;
1722 wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
1723
1724
1725
1726
1727 wpriv->ss.sge.mr = NULL;
1728 wpriv->ss.sge.m = 0;
1729 wpriv->ss.sge.n = 0;
1730
1731 wpriv->ss.sg_list = NULL;
1732 wpriv->ss.total_len = wpriv->ss.sge.sge_length;
1733 wpriv->ss.num_sge = 1;
1734
1735
1736 rcu_read_lock();
1737 remote = rcu_dereference(qpriv->tid_rdma.remote);
1738
1739 KDETH_RESET(rreq->kdeth0, KVER, 0x1);
1740 KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
1741 rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
1742 req->cur_seg * req->seg_len + flow->sent);
1743 rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
1744 rreq->reth.length = cpu_to_be32(*len);
1745 rreq->tid_flow_psn =
1746 cpu_to_be32((flow->flow_state.generation <<
1747 HFI1_KDETH_BTH_SEQ_SHIFT) |
1748 ((flow->flow_state.spsn + flow->pkt) &
1749 HFI1_KDETH_BTH_SEQ_MASK));
1750 rreq->tid_flow_qp =
1751 cpu_to_be32(qpriv->tid_rdma.local.qp |
1752 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
1753 TID_RDMA_DESTQP_FLOW_SHIFT) |
1754 qpriv->rcd->ctxt);
1755 rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
1756 *bth1 &= ~RVT_QPN_MASK;
1757 *bth1 |= remote->qp;
1758 *bth2 |= IB_BTH_REQ_ACK;
1759 rcu_read_unlock();
1760
1761
1762 flow->sent += *len;
1763 req->cur_seg++;
1764 qp->s_state = TID_OP(READ_REQ);
1765 req->ack_pending++;
1766 req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
1767 qpriv->pending_tid_r_segs++;
1768 qp->s_num_rd_atomic++;
1769
1770
1771 *len = req_len;
1772
1773 return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
1774}
1775
1776
1777
1778
1779
1780u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
1781 struct ib_other_headers *ohdr, u32 *bth1,
1782 u32 *bth2, u32 *len)
1783 __must_hold(&qp->s_lock)
1784{
1785 struct hfi1_qp_priv *qpriv = qp->priv;
1786 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1787 struct tid_rdma_flow *flow = NULL;
1788 u32 hdwords = 0;
1789 bool last;
1790 bool retry = true;
1791 u32 npkts = rvt_div_round_up_mtu(qp, *len);
1792
1793 trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
1794 wqe->lpsn, req);
1795
1796
1797
1798
1799sync_check:
1800 if (req->state == TID_REQUEST_SYNC) {
1801 if (qpriv->pending_tid_r_segs)
1802 goto done;
1803
1804 hfi1_kern_clear_hw_flow(req->rcd, qp);
1805 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
1806 req->state = TID_REQUEST_ACTIVE;
1807 }
1808
1809
1810
1811
1812
1813
1814 if (req->flow_idx == req->setup_head) {
1815 retry = false;
1816 if (req->state == TID_REQUEST_RESEND) {
1817
1818
1819
1820
1821
1822 restart_sge(&qp->s_sge, wqe, req->s_next_psn,
1823 qp->pmtu);
1824 req->isge = 0;
1825 req->state = TID_REQUEST_ACTIVE;
1826 }
1827
1828
1829
1830
1831
1832 if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
1833 req->state = TID_REQUEST_SYNC;
1834 goto sync_check;
1835 }
1836
1837
1838 if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
1839 goto done;
1840
1841
1842
1843
1844
1845 if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
1846 req->state = TID_REQUEST_QUEUED;
1847
1848
1849
1850
1851
1852 goto done;
1853 }
1854 }
1855
1856
1857 flow = &req->flows[req->flow_idx];
1858 flow->pkt = 0;
1859 flow->tid_idx = 0;
1860 flow->sent = 0;
1861 if (!retry) {
1862
1863 flow->flow_state.ib_spsn = req->s_next_psn;
1864 flow->flow_state.ib_lpsn =
1865 flow->flow_state.ib_spsn + flow->npkts - 1;
1866 }
1867
1868
1869 req->s_next_psn += flow->npkts;
1870
1871
1872 hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
1873done:
1874 return hdwords;
1875}
1876
1877
1878
1879
1880
1881
1882static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
1883 struct rvt_ack_entry *e,
1884 struct hfi1_packet *packet,
1885 struct ib_other_headers *ohdr,
1886 u32 bth0, u32 psn, u64 vaddr, u32 len)
1887{
1888 struct hfi1_qp_priv *qpriv = qp->priv;
1889 struct tid_rdma_request *req;
1890 struct tid_rdma_flow *flow;
1891 u32 flow_psn, i, tidlen = 0, pktlen, tlen;
1892
1893 req = ack_to_tid_req(e);
1894
1895
1896 flow = &req->flows[req->setup_head];
1897
1898
1899 pktlen = packet->tlen - (packet->hlen + 4);
1900 if (pktlen > sizeof(flow->tid_entry))
1901 return 1;
1902 memcpy(flow->tid_entry, packet->ebuf, pktlen);
1903 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
1904
1905
1906
1907
1908
1909 flow->npkts = rvt_div_round_up_mtu(qp, len);
1910 for (i = 0; i < flow->tidcnt; i++) {
1911 trace_hfi1_tid_entry_rcv_read_req(qp, i,
1912 flow->tid_entry[i]);
1913 tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
1914 if (!tlen)
1915 return 1;
1916
1917
1918
1919
1920
1921
1922
1923 tidlen += tlen;
1924 }
1925 if (tidlen * PAGE_SIZE < len)
1926 return 1;
1927
1928
1929 req->clear_tail = req->setup_head;
1930 flow->pkt = 0;
1931 flow->tid_idx = 0;
1932 flow->tid_offset = 0;
1933 flow->sent = 0;
1934 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
1935 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
1936 TID_RDMA_DESTQP_FLOW_MASK;
1937 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
1938 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
1939 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
1940 flow->length = len;
1941
1942 flow->flow_state.lpsn = flow->flow_state.spsn +
1943 flow->npkts - 1;
1944 flow->flow_state.ib_spsn = psn;
1945 flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
1946
1947 trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
1948
1949 req->flow_idx = req->setup_head;
1950
1951
1952 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1953
1954
1955
1956
1957 e->opcode = (bth0 >> 24) & 0xff;
1958 e->psn = psn;
1959 e->lpsn = psn + flow->npkts - 1;
1960 e->sent = 0;
1961
1962 req->n_flows = qpriv->tid_rdma.local.max_read;
1963 req->state = TID_REQUEST_ACTIVE;
1964 req->cur_seg = 0;
1965 req->comp_seg = 0;
1966 req->ack_seg = 0;
1967 req->isge = 0;
1968 req->seg_len = qpriv->tid_rdma.local.max_len;
1969 req->total_len = len;
1970 req->total_segs = 1;
1971 req->r_flow_psn = e->psn;
1972
1973 trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
1974 req);
1975 return 0;
1976}
1977
1978static int tid_rdma_rcv_error(struct hfi1_packet *packet,
1979 struct ib_other_headers *ohdr,
1980 struct rvt_qp *qp, u32 psn, int diff)
1981{
1982 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1983 struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
1984 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
1985 struct hfi1_qp_priv *qpriv = qp->priv;
1986 struct rvt_ack_entry *e;
1987 struct tid_rdma_request *req;
1988 unsigned long flags;
1989 u8 prev;
1990 bool old_req;
1991
1992 trace_hfi1_rsp_tid_rcv_error(qp, psn);
1993 trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
1994 if (diff > 0) {
1995
1996 if (!qp->r_nak_state) {
1997 ibp->rvp.n_rc_seqnak++;
1998 qp->r_nak_state = IB_NAK_PSN_ERROR;
1999 qp->r_ack_psn = qp->r_psn;
2000 rc_defered_ack(rcd, qp);
2001 }
2002 goto done;
2003 }
2004
2005 ibp->rvp.n_rc_dupreq++;
2006
2007 spin_lock_irqsave(&qp->s_lock, flags);
2008 e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
2009 if (!e || (e->opcode != TID_OP(READ_REQ) &&
2010 e->opcode != TID_OP(WRITE_REQ)))
2011 goto unlock;
2012
2013 req = ack_to_tid_req(e);
2014 req->r_flow_psn = psn;
2015 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
2016 if (e->opcode == TID_OP(READ_REQ)) {
2017 struct ib_reth *reth;
2018 u32 len;
2019 u32 rkey;
2020 u64 vaddr;
2021 int ok;
2022 u32 bth0;
2023
2024 reth = &ohdr->u.tid_rdma.r_req.reth;
2025
2026
2027
2028
2029 len = be32_to_cpu(reth->length);
2030 if (psn != e->psn || len != req->total_len)
2031 goto unlock;
2032
2033 release_rdma_sge_mr(e);
2034
2035 rkey = be32_to_cpu(reth->rkey);
2036 vaddr = get_ib_reth_vaddr(reth);
2037
2038 qp->r_len = len;
2039 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
2040 IB_ACCESS_REMOTE_READ);
2041 if (unlikely(!ok))
2042 goto unlock;
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054 bth0 = be32_to_cpu(ohdr->bth[0]);
2055 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
2056 vaddr, len))
2057 goto unlock;
2058
2059
2060
2061
2062
2063 if (old_req)
2064 goto unlock;
2065 } else {
2066 struct flow_state *fstate;
2067 bool schedule = false;
2068 u8 i;
2069
2070 if (req->state == TID_REQUEST_RESEND) {
2071 req->state = TID_REQUEST_RESEND_ACTIVE;
2072 } else if (req->state == TID_REQUEST_INIT_RESEND) {
2073 req->state = TID_REQUEST_INIT;
2074 schedule = true;
2075 }
2076
2077
2078
2079
2080
2081
2082
2083
2084 if (old_req || req->state == TID_REQUEST_INIT ||
2085 (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
2086 for (i = prev + 1; ; i++) {
2087 if (i > rvt_size_atomic(&dev->rdi))
2088 i = 0;
2089 if (i == qp->r_head_ack_queue)
2090 break;
2091 e = &qp->s_ack_queue[i];
2092 req = ack_to_tid_req(e);
2093 if (e->opcode == TID_OP(WRITE_REQ) &&
2094 req->state == TID_REQUEST_INIT)
2095 req->state = TID_REQUEST_INIT_RESEND;
2096 }
2097
2098
2099
2100
2101
2102
2103 if (!schedule)
2104 goto unlock;
2105 }
2106
2107
2108
2109
2110
2111 if (req->clear_tail == req->setup_head)
2112 goto schedule;
2113
2114
2115
2116
2117
2118
2119 if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
2120 fstate = &req->flows[req->clear_tail].flow_state;
2121 qpriv->pending_tid_w_segs -=
2122 CIRC_CNT(req->flow_idx, req->clear_tail,
2123 MAX_FLOWS);
2124 req->flow_idx =
2125 CIRC_ADD(req->clear_tail,
2126 delta_psn(psn, fstate->resp_ib_psn),
2127 MAX_FLOWS);
2128 qpriv->pending_tid_w_segs +=
2129 delta_psn(psn, fstate->resp_ib_psn);
2130
2131
2132
2133
2134
2135
2136
2137 if (CIRC_CNT(req->setup_head, req->flow_idx,
2138 MAX_FLOWS)) {
2139 req->cur_seg = delta_psn(psn, e->psn);
2140 req->state = TID_REQUEST_RESEND_ACTIVE;
2141 }
2142 }
2143
2144 for (i = prev + 1; ; i++) {
2145
2146
2147
2148
2149 if (i > rvt_size_atomic(&dev->rdi))
2150 i = 0;
2151 if (i == qp->r_head_ack_queue)
2152 break;
2153 e = &qp->s_ack_queue[i];
2154 req = ack_to_tid_req(e);
2155 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
2156 e->lpsn, req);
2157 if (e->opcode != TID_OP(WRITE_REQ) ||
2158 req->cur_seg == req->comp_seg ||
2159 req->state == TID_REQUEST_INIT ||
2160 req->state == TID_REQUEST_INIT_RESEND) {
2161 if (req->state == TID_REQUEST_INIT)
2162 req->state = TID_REQUEST_INIT_RESEND;
2163 continue;
2164 }
2165 qpriv->pending_tid_w_segs -=
2166 CIRC_CNT(req->flow_idx,
2167 req->clear_tail,
2168 MAX_FLOWS);
2169 req->flow_idx = req->clear_tail;
2170 req->state = TID_REQUEST_RESEND;
2171 req->cur_seg = req->comp_seg;
2172 }
2173 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
2174 }
2175
2176 if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
2177 qp->s_acked_ack_queue = prev;
2178 qp->s_tail_ack_queue = prev;
2179
2180
2181
2182
2183
2184
2185 qp->s_ack_state = OP(ACKNOWLEDGE);
2186schedule:
2187
2188
2189
2190
2191 if (qpriv->rnr_nak_state) {
2192 qp->s_nak_state = 0;
2193 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
2194 qp->r_psn = e->lpsn + 1;
2195 hfi1_tid_write_alloc_resources(qp, true);
2196 }
2197
2198 qp->r_state = e->opcode;
2199 qp->r_nak_state = 0;
2200 qp->s_flags |= RVT_S_RESP_PENDING;
2201 hfi1_schedule_send(qp);
2202unlock:
2203 spin_unlock_irqrestore(&qp->s_lock, flags);
2204done:
2205 return 1;
2206}
2207
2208void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
2209{
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223 struct hfi1_ctxtdata *rcd = packet->rcd;
2224 struct rvt_qp *qp = packet->qp;
2225 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
2226 struct ib_other_headers *ohdr = packet->ohdr;
2227 struct rvt_ack_entry *e;
2228 unsigned long flags;
2229 struct ib_reth *reth;
2230 struct hfi1_qp_priv *qpriv = qp->priv;
2231 u32 bth0, psn, len, rkey;
2232 bool fecn;
2233 u8 next;
2234 u64 vaddr;
2235 int diff;
2236 u8 nack_state = IB_NAK_INVALID_REQUEST;
2237
2238 bth0 = be32_to_cpu(ohdr->bth[0]);
2239 if (hfi1_ruc_check_hdr(ibp, packet))
2240 return;
2241
2242 fecn = process_ecn(qp, packet);
2243 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2244 trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
2245
2246 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
2247 rvt_comm_est(qp);
2248
2249 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2250 goto nack_inv;
2251
2252 reth = &ohdr->u.tid_rdma.r_req.reth;
2253 vaddr = be64_to_cpu(reth->vaddr);
2254 len = be32_to_cpu(reth->length);
2255
2256 if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
2257 goto nack_inv;
2258
2259 diff = delta_psn(psn, qp->r_psn);
2260 if (unlikely(diff)) {
2261 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
2262 return;
2263 }
2264
2265
2266 next = qp->r_head_ack_queue + 1;
2267 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
2268 next = 0;
2269 spin_lock_irqsave(&qp->s_lock, flags);
2270 if (unlikely(next == qp->s_tail_ack_queue)) {
2271 if (!qp->s_ack_queue[next].sent) {
2272 nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2273 goto nack_inv_unlock;
2274 }
2275 update_ack_queue(qp, next);
2276 }
2277 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2278 release_rdma_sge_mr(e);
2279
2280 rkey = be32_to_cpu(reth->rkey);
2281 qp->r_len = len;
2282
2283 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
2284 rkey, IB_ACCESS_REMOTE_READ)))
2285 goto nack_acc;
2286
2287
2288 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
2289 len))
2290 goto nack_inv_unlock;
2291
2292 qp->r_state = e->opcode;
2293 qp->r_nak_state = 0;
2294
2295
2296
2297
2298
2299 qp->r_msn++;
2300 qp->r_psn += e->lpsn - e->psn + 1;
2301
2302 qp->r_head_ack_queue = next;
2303
2304
2305
2306
2307
2308
2309
2310 qpriv->r_tid_alloc = qp->r_head_ack_queue;
2311
2312
2313 qp->s_flags |= RVT_S_RESP_PENDING;
2314 if (fecn)
2315 qp->s_flags |= RVT_S_ECN;
2316 hfi1_schedule_send(qp);
2317
2318 spin_unlock_irqrestore(&qp->s_lock, flags);
2319 return;
2320
2321nack_inv_unlock:
2322 spin_unlock_irqrestore(&qp->s_lock, flags);
2323nack_inv:
2324 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2325 qp->r_nak_state = nack_state;
2326 qp->r_ack_psn = qp->r_psn;
2327
2328 rc_defered_ack(rcd, qp);
2329 return;
2330nack_acc:
2331 spin_unlock_irqrestore(&qp->s_lock, flags);
2332 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
2333 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2334 qp->r_ack_psn = qp->r_psn;
2335}
2336
2337u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
2338 struct ib_other_headers *ohdr, u32 *bth0,
2339 u32 *bth1, u32 *bth2, u32 *len, bool *last)
2340{
2341 struct hfi1_ack_priv *epriv = e->priv;
2342 struct tid_rdma_request *req = &epriv->tid_req;
2343 struct hfi1_qp_priv *qpriv = qp->priv;
2344 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
2345 u32 tidentry = flow->tid_entry[flow->tid_idx];
2346 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
2347 struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
2348 u32 next_offset, om = KDETH_OM_LARGE;
2349 bool last_pkt;
2350 u32 hdwords = 0;
2351 struct tid_rdma_params *remote;
2352
2353 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
2354 flow->sent += *len;
2355 next_offset = flow->tid_offset + *len;
2356 last_pkt = (flow->sent >= flow->length);
2357
2358 trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
2359 trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
2360
2361 rcu_read_lock();
2362 remote = rcu_dereference(qpriv->tid_rdma.remote);
2363 if (!remote) {
2364 rcu_read_unlock();
2365 goto done;
2366 }
2367 KDETH_RESET(resp->kdeth0, KVER, 0x1);
2368 KDETH_SET(resp->kdeth0, SH, !last_pkt);
2369 KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
2370 KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
2371 KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
2372 KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
2373 KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
2374 KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
2375 resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
2376 rcu_read_unlock();
2377
2378 resp->aeth = rvt_compute_aeth(qp);
2379 resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
2380 flow->pkt));
2381
2382 *bth0 = TID_OP(READ_RESP) << 24;
2383 *bth1 = flow->tid_qpn;
2384 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
2385 HFI1_KDETH_BTH_SEQ_MASK) |
2386 (flow->flow_state.generation <<
2387 HFI1_KDETH_BTH_SEQ_SHIFT));
2388 *last = last_pkt;
2389 if (last_pkt)
2390
2391 req->clear_tail = (req->clear_tail + 1) &
2392 (MAX_FLOWS - 1);
2393
2394 if (next_offset >= tidlen) {
2395 flow->tid_offset = 0;
2396 flow->tid_idx++;
2397 } else {
2398 flow->tid_offset = next_offset;
2399 }
2400
2401 hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
2402
2403done:
2404 return hdwords;
2405}
2406
2407static inline struct tid_rdma_request *
2408find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
2409 __must_hold(&qp->s_lock)
2410{
2411 struct rvt_swqe *wqe;
2412 struct tid_rdma_request *req = NULL;
2413 u32 i, end;
2414
2415 end = qp->s_cur + 1;
2416 if (end == qp->s_size)
2417 end = 0;
2418 for (i = qp->s_acked; i != end;) {
2419 wqe = rvt_get_swqe_ptr(qp, i);
2420 if (cmp_psn(psn, wqe->psn) >= 0 &&
2421 cmp_psn(psn, wqe->lpsn) <= 0) {
2422 if (wqe->wr.opcode == opcode)
2423 req = wqe_to_tid_req(wqe);
2424 break;
2425 }
2426 if (++i == qp->s_size)
2427 i = 0;
2428 }
2429
2430 return req;
2431}
2432
2433void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
2434{
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444 struct ib_other_headers *ohdr = packet->ohdr;
2445 struct rvt_qp *qp = packet->qp;
2446 struct hfi1_qp_priv *priv = qp->priv;
2447 struct hfi1_ctxtdata *rcd = packet->rcd;
2448 struct tid_rdma_request *req;
2449 struct tid_rdma_flow *flow;
2450 u32 opcode, aeth;
2451 bool fecn;
2452 unsigned long flags;
2453 u32 kpsn, ipsn;
2454
2455 trace_hfi1_sender_rcv_tid_read_resp(qp);
2456 fecn = process_ecn(qp, packet);
2457 kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2458 aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
2459 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2460
2461 spin_lock_irqsave(&qp->s_lock, flags);
2462 ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
2463 req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
2464 if (unlikely(!req))
2465 goto ack_op_err;
2466
2467 flow = &req->flows[req->clear_tail];
2468
2469 if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
2470 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
2471
2472 if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
2473 goto ack_done;
2474 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2475
2476
2477
2478
2479
2480
2481
2482 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
2483 struct rvt_sge_state ss;
2484 u32 len;
2485 u32 tlen = packet->tlen;
2486 u16 hdrsize = packet->hlen;
2487 u8 pad = packet->pad;
2488 u8 extra_bytes = pad + packet->extra_byte +
2489 (SIZE_OF_CRC << 2);
2490 u32 pmtu = qp->pmtu;
2491
2492 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
2493 goto ack_op_err;
2494 len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
2495 if (unlikely(len < pmtu))
2496 goto ack_op_err;
2497 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
2498 false);
2499
2500 priv->s_flags |= HFI1_R_TID_SW_PSN;
2501 }
2502
2503 goto ack_done;
2504 }
2505 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2506 req->ack_pending--;
2507 priv->pending_tid_r_segs--;
2508 qp->s_num_rd_atomic--;
2509 if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
2510 !qp->s_num_rd_atomic) {
2511 qp->s_flags &= ~(RVT_S_WAIT_FENCE |
2512 RVT_S_WAIT_ACK);
2513 hfi1_schedule_send(qp);
2514 }
2515 if (qp->s_flags & RVT_S_WAIT_RDMAR) {
2516 qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
2517 hfi1_schedule_send(qp);
2518 }
2519
2520 trace_hfi1_ack(qp, ipsn);
2521 trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
2522 req->e.swqe->psn, req->e.swqe->lpsn,
2523 req);
2524 trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
2525
2526
2527 hfi1_kern_exp_rcv_clear(req);
2528
2529 if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
2530 goto ack_done;
2531
2532
2533 if (++req->comp_seg >= req->total_segs) {
2534 priv->tid_r_comp++;
2535 req->state = TID_REQUEST_COMPLETE;
2536 }
2537
2538
2539
2540
2541
2542
2543 if ((req->state == TID_REQUEST_SYNC &&
2544 req->comp_seg == req->cur_seg) ||
2545 priv->tid_r_comp == priv->tid_r_reqs) {
2546 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2547 priv->s_flags &= ~HFI1_R_TID_SW_PSN;
2548 if (req->state == TID_REQUEST_SYNC)
2549 req->state = TID_REQUEST_ACTIVE;
2550 }
2551
2552 hfi1_schedule_send(qp);
2553 goto ack_done;
2554
2555ack_op_err:
2556
2557
2558
2559
2560
2561
2562
2563
2564 if (qp->s_last == qp->s_acked)
2565 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
2566
2567ack_done:
2568 spin_unlock_irqrestore(&qp->s_lock, flags);
2569}
2570
2571void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
2572 __must_hold(&qp->s_lock)
2573{
2574 u32 n = qp->s_acked;
2575 struct rvt_swqe *wqe;
2576 struct tid_rdma_request *req;
2577 struct hfi1_qp_priv *priv = qp->priv;
2578
2579 lockdep_assert_held(&qp->s_lock);
2580
2581 while (n != qp->s_tail) {
2582 wqe = rvt_get_swqe_ptr(qp, n);
2583 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2584 req = wqe_to_tid_req(wqe);
2585 hfi1_kern_exp_rcv_clear_all(req);
2586 }
2587
2588 if (++n == qp->s_size)
2589 n = 0;
2590 }
2591
2592 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2593}
2594
2595static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
2596{
2597 struct rvt_qp *qp = packet->qp;
2598
2599 if (rcv_type >= RHF_RCV_TYPE_IB)
2600 goto done;
2601
2602 spin_lock(&qp->s_lock);
2603
2604
2605
2606
2607
2608
2609
2610
2611 if (rcv_type == RHF_RCV_TYPE_EAGER) {
2612 hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
2613 hfi1_schedule_send(qp);
2614 }
2615
2616
2617 spin_unlock(&qp->s_lock);
2618done:
2619 return true;
2620}
2621
2622static void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
2623 struct rvt_qp *qp, struct rvt_swqe *wqe)
2624{
2625 struct tid_rdma_request *req;
2626 struct tid_rdma_flow *flow;
2627
2628
2629 qp->r_flags |= RVT_R_RDMAR_SEQ;
2630 req = wqe_to_tid_req(wqe);
2631 flow = &req->flows[req->clear_tail];
2632 hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
2633 if (list_empty(&qp->rspwait)) {
2634 qp->r_flags |= RVT_R_RSP_SEND;
2635 rvt_get_qp(qp);
2636 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2637 }
2638}
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2649 struct hfi1_packet *packet, u8 rcv_type,
2650 u8 rte, u32 psn, u32 ibpsn)
2651 __must_hold(&packet->qp->r_lock) __must_hold(RCU)
2652{
2653 struct hfi1_pportdata *ppd = rcd->ppd;
2654 struct hfi1_devdata *dd = ppd->dd;
2655 struct hfi1_ibport *ibp;
2656 struct rvt_swqe *wqe;
2657 struct tid_rdma_request *req;
2658 struct tid_rdma_flow *flow;
2659 u32 ack_psn;
2660 struct rvt_qp *qp = packet->qp;
2661 struct hfi1_qp_priv *priv = qp->priv;
2662 bool ret = true;
2663 int diff = 0;
2664 u32 fpsn;
2665
2666 lockdep_assert_held(&qp->r_lock);
2667 trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
2668 trace_hfi1_sender_read_kdeth_eflags(qp);
2669 trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
2670 spin_lock(&qp->s_lock);
2671
2672 if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
2673 cmp_psn(ibpsn, qp->s_psn) > 0)
2674 goto s_unlock;
2675
2676
2677
2678
2679
2680
2681 ack_psn = ibpsn - 1;
2682 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
2683 ibp = to_iport(qp->ibqp.device, qp->port_num);
2684
2685
2686 while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
2687
2688
2689
2690
2691
2692 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
2693 wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
2694 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2695 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2696
2697 if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
2698 qp->r_flags |= RVT_R_RDMAR_SEQ;
2699 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2700 restart_tid_rdma_read_req(rcd, qp,
2701 wqe);
2702 } else {
2703 hfi1_restart_rc(qp, qp->s_last_psn + 1,
2704 0);
2705 if (list_empty(&qp->rspwait)) {
2706 qp->r_flags |= RVT_R_RSP_SEND;
2707 rvt_get_qp(qp);
2708 list_add_tail(
2709 &qp->rspwait,
2710 &rcd->qp_wait_list);
2711 }
2712 }
2713 }
2714
2715
2716
2717
2718 break;
2719 }
2720
2721 wqe = do_rc_completion(qp, wqe, ibp);
2722 if (qp->s_acked == qp->s_tail)
2723 goto s_unlock;
2724 }
2725
2726 if (qp->s_acked == qp->s_tail)
2727 goto s_unlock;
2728
2729
2730 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
2731 goto s_unlock;
2732
2733 req = wqe_to_tid_req(wqe);
2734 trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
2735 wqe->lpsn, req);
2736 switch (rcv_type) {
2737 case RHF_RCV_TYPE_EXPECTED:
2738 switch (rte) {
2739 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 flow = &req->flows[req->clear_tail];
2750 trace_hfi1_tid_flow_read_kdeth_eflags(qp,
2751 req->clear_tail,
2752 flow);
2753 if (priv->s_flags & HFI1_R_TID_SW_PSN) {
2754 diff = cmp_psn(psn,
2755 flow->flow_state.r_next_psn);
2756 if (diff > 0) {
2757
2758 goto s_unlock;
2759 } else if (diff < 0) {
2760
2761
2762
2763
2764
2765 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2766 qp->r_flags &=
2767 ~RVT_R_RDMAR_SEQ;
2768
2769
2770 goto s_unlock;
2771 }
2772
2773
2774
2775
2776
2777
2778 fpsn = full_flow_psn(flow,
2779 flow->flow_state.lpsn);
2780 if (cmp_psn(fpsn, psn) == 0) {
2781 ret = false;
2782 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2783 qp->r_flags &=
2784 ~RVT_R_RDMAR_SEQ;
2785 }
2786 flow->flow_state.r_next_psn =
2787 mask_psn(psn + 1);
2788 } else {
2789 u32 last_psn;
2790
2791 last_psn = read_r_next_psn(dd, rcd->ctxt,
2792 flow->idx);
2793 flow->flow_state.r_next_psn = last_psn;
2794 priv->s_flags |= HFI1_R_TID_SW_PSN;
2795
2796
2797
2798
2799 if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
2800 restart_tid_rdma_read_req(rcd, qp,
2801 wqe);
2802 }
2803
2804 break;
2805
2806 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2807
2808
2809
2810
2811 break;
2812
2813 default:
2814 break;
2815 }
2816 break;
2817
2818 case RHF_RCV_TYPE_ERROR:
2819 switch (rte) {
2820 case RHF_RTE_ERROR_OP_CODE_ERR:
2821 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
2822 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
2823 case RHF_RTE_ERROR_KHDR_KVER_ERR:
2824 case RHF_RTE_ERROR_CONTEXT_ERR:
2825 case RHF_RTE_ERROR_KHDR_TID_ERR:
2826 default:
2827 break;
2828 }
2829 default:
2830 break;
2831 }
2832s_unlock:
2833 spin_unlock(&qp->s_lock);
2834 return ret;
2835}
2836
2837bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2838 struct hfi1_pportdata *ppd,
2839 struct hfi1_packet *packet)
2840{
2841 struct hfi1_ibport *ibp = &ppd->ibport_data;
2842 struct hfi1_devdata *dd = ppd->dd;
2843 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
2844 u8 rcv_type = rhf_rcv_type(packet->rhf);
2845 u8 rte = rhf_rcv_type_err(packet->rhf);
2846 struct ib_header *hdr = packet->hdr;
2847 struct ib_other_headers *ohdr = NULL;
2848 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
2849 u16 lid = be16_to_cpu(hdr->lrh[1]);
2850 u8 opcode;
2851 u32 qp_num, psn, ibpsn;
2852 struct rvt_qp *qp;
2853 struct hfi1_qp_priv *qpriv;
2854 unsigned long flags;
2855 bool ret = true;
2856 struct rvt_ack_entry *e;
2857 struct tid_rdma_request *req;
2858 struct tid_rdma_flow *flow;
2859 int diff = 0;
2860
2861 trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
2862 packet->rhf);
2863 if (packet->rhf & RHF_ICRC_ERR)
2864 return ret;
2865
2866 packet->ohdr = &hdr->u.oth;
2867 ohdr = packet->ohdr;
2868 trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
2869
2870
2871 qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
2872 RVT_QPN_MASK;
2873 if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
2874 goto drop;
2875
2876 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2877 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2878
2879 rcu_read_lock();
2880 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
2881 if (!qp)
2882 goto rcu_unlock;
2883
2884 packet->qp = qp;
2885
2886
2887 spin_lock_irqsave(&qp->r_lock, flags);
2888 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
2889 ibp->rvp.n_pkt_drops++;
2890 goto r_unlock;
2891 }
2892
2893 if (packet->rhf & RHF_TID_ERR) {
2894
2895 u32 tlen = rhf_pkt_len(packet->rhf);
2896
2897
2898 if (tlen < 24)
2899 goto r_unlock;
2900
2901
2902
2903
2904
2905 if (lnh == HFI1_LRH_GRH)
2906 goto r_unlock;
2907
2908 if (tid_rdma_tid_err(packet, rcv_type))
2909 goto r_unlock;
2910 }
2911
2912
2913 if (opcode == TID_OP(READ_RESP)) {
2914 ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
2915 ibpsn = mask_psn(ibpsn);
2916 ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
2917 ibpsn);
2918 goto r_unlock;
2919 }
2920
2921
2922
2923
2924
2925
2926 spin_lock(&qp->s_lock);
2927 qpriv = qp->priv;
2928 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
2929 qpriv->r_tid_tail == qpriv->r_tid_head)
2930 goto unlock;
2931 e = &qp->s_ack_queue[qpriv->r_tid_tail];
2932 if (e->opcode != TID_OP(WRITE_REQ))
2933 goto unlock;
2934 req = ack_to_tid_req(e);
2935 if (req->comp_seg == req->cur_seg)
2936 goto unlock;
2937 flow = &req->flows[req->clear_tail];
2938 trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
2939 trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
2940 trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
2941 trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
2942 e->lpsn, req);
2943 trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
2944
2945 switch (rcv_type) {
2946 case RHF_RCV_TYPE_EXPECTED:
2947 switch (rte) {
2948 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2949 if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
2950 qpriv->s_flags |= HFI1_R_TID_SW_PSN;
2951 flow->flow_state.r_next_psn =
2952 read_r_next_psn(dd, rcd->ctxt,
2953 flow->idx);
2954 qpriv->r_next_psn_kdeth =
2955 flow->flow_state.r_next_psn;
2956 goto nak_psn;
2957 } else {
2958
2959
2960
2961
2962
2963
2964
2965
2966 diff = cmp_psn(psn,
2967 flow->flow_state.r_next_psn);
2968 if (diff > 0)
2969 goto nak_psn;
2970 else if (diff < 0)
2971 break;
2972
2973 qpriv->s_nak_state = 0;
2974
2975
2976
2977
2978
2979 if (psn == full_flow_psn(flow,
2980 flow->flow_state.lpsn))
2981 ret = false;
2982 flow->flow_state.r_next_psn =
2983 mask_psn(psn + 1);
2984 qpriv->r_next_psn_kdeth =
2985 flow->flow_state.r_next_psn;
2986 }
2987 break;
2988
2989 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2990 goto nak_psn;
2991
2992 default:
2993 break;
2994 }
2995 break;
2996
2997 case RHF_RCV_TYPE_ERROR:
2998 switch (rte) {
2999 case RHF_RTE_ERROR_OP_CODE_ERR:
3000 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
3001 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
3002 case RHF_RTE_ERROR_KHDR_KVER_ERR:
3003 case RHF_RTE_ERROR_CONTEXT_ERR:
3004 case RHF_RTE_ERROR_KHDR_TID_ERR:
3005 default:
3006 break;
3007 }
3008 default:
3009 break;
3010 }
3011
3012unlock:
3013 spin_unlock(&qp->s_lock);
3014r_unlock:
3015 spin_unlock_irqrestore(&qp->r_lock, flags);
3016rcu_unlock:
3017 rcu_read_unlock();
3018drop:
3019 return ret;
3020nak_psn:
3021 ibp->rvp.n_rc_seqnak++;
3022 if (!qpriv->s_nak_state) {
3023 qpriv->s_nak_state = IB_NAK_PSN_ERROR;
3024
3025 qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
3026 tid_rdma_trigger_ack(qp);
3027 }
3028 goto unlock;
3029}
3030
3031
3032
3033
3034
3035
3036
3037void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3038 u32 *bth2)
3039{
3040 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3041 struct tid_rdma_flow *flow;
3042 struct hfi1_qp_priv *qpriv = qp->priv;
3043 int diff, delta_pkts;
3044 u32 tididx = 0, i;
3045 u16 fidx;
3046
3047 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3048 *bth2 = mask_psn(qp->s_psn);
3049 flow = find_flow_ib(req, *bth2, &fidx);
3050 if (!flow) {
3051 trace_hfi1_msg_tid_restart_req(
3052 qp, "!!!!!! Could not find flow to restart: bth2 ",
3053 (u64)*bth2);
3054 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
3055 wqe->psn, wqe->lpsn,
3056 req);
3057 return;
3058 }
3059 } else {
3060 fidx = req->acked_tail;
3061 flow = &req->flows[fidx];
3062 *bth2 = mask_psn(req->r_ack_psn);
3063 }
3064
3065 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3066 delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
3067 else
3068 delta_pkts = delta_psn(*bth2,
3069 full_flow_psn(flow,
3070 flow->flow_state.spsn));
3071
3072 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3073 diff = delta_pkts + flow->resync_npkts;
3074
3075 flow->sent = 0;
3076 flow->pkt = 0;
3077 flow->tid_idx = 0;
3078 flow->tid_offset = 0;
3079 if (diff) {
3080 for (tididx = 0; tididx < flow->tidcnt; tididx++) {
3081 u32 tidentry = flow->tid_entry[tididx], tidlen,
3082 tidnpkts, npkts;
3083
3084 flow->tid_offset = 0;
3085 tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
3086 tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
3087 npkts = min_t(u32, diff, tidnpkts);
3088 flow->pkt += npkts;
3089 flow->sent += (npkts == tidnpkts ? tidlen :
3090 npkts * qp->pmtu);
3091 flow->tid_offset += npkts * qp->pmtu;
3092 diff -= npkts;
3093 if (!diff)
3094 break;
3095 }
3096 }
3097 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3098 rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
3099 flow->sent, 0);
3100
3101
3102
3103
3104
3105
3106
3107 flow->pkt -= flow->resync_npkts;
3108 }
3109
3110 if (flow->tid_offset ==
3111 EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
3112 tididx++;
3113 flow->tid_offset = 0;
3114 }
3115 flow->tid_idx = tididx;
3116 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3117
3118 req->flow_idx = fidx;
3119 else
3120 req->clear_tail = fidx;
3121
3122 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3123 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
3124 wqe->lpsn, req);
3125 req->state = TID_REQUEST_ACTIVE;
3126 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3127
3128 fidx = CIRC_NEXT(fidx, MAX_FLOWS);
3129 i = qpriv->s_tid_tail;
3130 do {
3131 for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
3132 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
3133 req->flows[fidx].sent = 0;
3134 req->flows[fidx].pkt = 0;
3135 req->flows[fidx].tid_idx = 0;
3136 req->flows[fidx].tid_offset = 0;
3137 req->flows[fidx].resync_npkts = 0;
3138 }
3139 if (i == qpriv->s_tid_cur)
3140 break;
3141 do {
3142 i = (++i == qp->s_size ? 0 : i);
3143 wqe = rvt_get_swqe_ptr(qp, i);
3144 } while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
3145 req = wqe_to_tid_req(wqe);
3146 req->cur_seg = req->ack_seg;
3147 fidx = req->acked_tail;
3148
3149 req->clear_tail = fidx;
3150 } while (1);
3151 }
3152}
3153
3154void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
3155{
3156 int i, ret;
3157 struct hfi1_qp_priv *qpriv = qp->priv;
3158 struct tid_flow_state *fs;
3159
3160 if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
3161 return;
3162
3163
3164
3165
3166
3167 fs = &qpriv->flow_state;
3168 if (fs->index != RXE_NUM_TID_FLOWS)
3169 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
3170
3171 for (i = qp->s_acked; i != qp->s_head;) {
3172 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
3173
3174 if (++i == qp->s_size)
3175 i = 0;
3176
3177 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
3178 continue;
3179 do {
3180 struct hfi1_swqe_priv *priv = wqe->priv;
3181
3182 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3183 } while (!ret);
3184 }
3185 for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
3186 struct rvt_ack_entry *e = &qp->s_ack_queue[i];
3187
3188 if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
3189 i = 0;
3190
3191 if (e->opcode != TID_OP(WRITE_REQ))
3192 continue;
3193 do {
3194 struct hfi1_ack_priv *priv = e->priv;
3195
3196 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3197 } while (!ret);
3198 }
3199}
3200
3201bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
3202{
3203 struct rvt_swqe *prev;
3204 struct hfi1_qp_priv *priv = qp->priv;
3205 u32 s_prev;
3206 struct tid_rdma_request *req;
3207
3208 s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
3209 prev = rvt_get_swqe_ptr(qp, s_prev);
3210
3211 switch (wqe->wr.opcode) {
3212 case IB_WR_SEND:
3213 case IB_WR_SEND_WITH_IMM:
3214 case IB_WR_SEND_WITH_INV:
3215 case IB_WR_ATOMIC_CMP_AND_SWP:
3216 case IB_WR_ATOMIC_FETCH_AND_ADD:
3217 case IB_WR_RDMA_WRITE:
3218 case IB_WR_RDMA_WRITE_WITH_IMM:
3219 switch (prev->wr.opcode) {
3220 case IB_WR_TID_RDMA_WRITE:
3221 req = wqe_to_tid_req(prev);
3222 if (req->ack_seg != req->total_segs)
3223 goto interlock;
3224 default:
3225 break;
3226 }
3227 break;
3228 case IB_WR_RDMA_READ:
3229 if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
3230 break;
3231 fallthrough;
3232 case IB_WR_TID_RDMA_READ:
3233 switch (prev->wr.opcode) {
3234 case IB_WR_RDMA_READ:
3235 if (qp->s_acked != qp->s_cur)
3236 goto interlock;
3237 break;
3238 case IB_WR_TID_RDMA_WRITE:
3239 req = wqe_to_tid_req(prev);
3240 if (req->ack_seg != req->total_segs)
3241 goto interlock;
3242 default:
3243 break;
3244 }
3245 default:
3246 break;
3247 }
3248 return false;
3249
3250interlock:
3251 priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
3252 return true;
3253}
3254
3255
3256static inline bool hfi1_check_sge_align(struct rvt_qp *qp,
3257 struct rvt_sge *sge, int num_sge)
3258{
3259 int i;
3260
3261 for (i = 0; i < num_sge; i++, sge++) {
3262 trace_hfi1_sge_check_align(qp, i, sge);
3263 if ((u64)sge->vaddr & ~PAGE_MASK ||
3264 sge->sge_length & ~PAGE_MASK)
3265 return false;
3266 }
3267 return true;
3268}
3269
3270void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
3271{
3272 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
3273 struct hfi1_swqe_priv *priv = wqe->priv;
3274 struct tid_rdma_params *remote;
3275 enum ib_wr_opcode new_opcode;
3276 bool do_tid_rdma = false;
3277 struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
3278
3279 if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
3280 ppd->lid)
3281 return;
3282 if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
3283 return;
3284
3285 rcu_read_lock();
3286 remote = rcu_dereference(qpriv->tid_rdma.remote);
3287
3288
3289
3290
3291 if (!remote)
3292 goto exit;
3293
3294 if (wqe->wr.opcode == IB_WR_RDMA_READ) {
3295 if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
3296 wqe->wr.num_sge)) {
3297 new_opcode = IB_WR_TID_RDMA_READ;
3298 do_tid_rdma = true;
3299 }
3300 } else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
3301
3302
3303
3304
3305
3306
3307 if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
3308 !(wqe->length & ~PAGE_MASK)) {
3309 new_opcode = IB_WR_TID_RDMA_WRITE;
3310 do_tid_rdma = true;
3311 }
3312 }
3313
3314 if (do_tid_rdma) {
3315 if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
3316 goto exit;
3317 wqe->wr.opcode = new_opcode;
3318 priv->tid_req.seg_len =
3319 min_t(u32, remote->max_len, wqe->length);
3320 priv->tid_req.total_segs =
3321 DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
3322
3323 wqe->lpsn = wqe->psn;
3324 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3325 priv->tid_req.n_flows = remote->max_read;
3326 qpriv->tid_r_reqs++;
3327 wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
3328 } else {
3329 wqe->lpsn += priv->tid_req.total_segs - 1;
3330 atomic_inc(&qpriv->n_requests);
3331 }
3332
3333 priv->tid_req.cur_seg = 0;
3334 priv->tid_req.comp_seg = 0;
3335 priv->tid_req.ack_seg = 0;
3336 priv->tid_req.state = TID_REQUEST_INACTIVE;
3337
3338
3339
3340
3341
3342
3343 priv->tid_req.acked_tail = priv->tid_req.setup_head;
3344 trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
3345 wqe->psn, wqe->lpsn,
3346 &priv->tid_req);
3347 }
3348exit:
3349 rcu_read_unlock();
3350}
3351
3352
3353
3354u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3355 struct ib_other_headers *ohdr,
3356 u32 *bth1, u32 *bth2, u32 *len)
3357{
3358 struct hfi1_qp_priv *qpriv = qp->priv;
3359 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3360 struct tid_rdma_params *remote;
3361
3362 rcu_read_lock();
3363 remote = rcu_dereference(qpriv->tid_rdma.remote);
3364
3365
3366
3367
3368 req->n_flows = remote->max_write;
3369 req->state = TID_REQUEST_ACTIVE;
3370
3371 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
3372 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
3373 ohdr->u.tid_rdma.w_req.reth.vaddr =
3374 cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
3375 ohdr->u.tid_rdma.w_req.reth.rkey =
3376 cpu_to_be32(wqe->rdma_wr.rkey);
3377 ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
3378 ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
3379 *bth1 &= ~RVT_QPN_MASK;
3380 *bth1 |= remote->qp;
3381 qp->s_state = TID_OP(WRITE_REQ);
3382 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
3383 *bth2 |= IB_BTH_REQ_ACK;
3384 *len = 0;
3385
3386 rcu_read_unlock();
3387 return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
3388}
3389
3390static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
3391{
3392
3393
3394
3395
3396
3397
3398
3399
3400 return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
3401}
3402
3403static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
3404 struct tid_queue *queue)
3405{
3406 return qpriv->tid_enqueue - queue->dequeue;
3407}
3408
3409
3410
3411
3412
3413
3414static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
3415{
3416 struct hfi1_qp_priv *qpriv = qp->priv;
3417 u64 timeout;
3418 u32 bytes_per_us;
3419 u8 i;
3420
3421 bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
3422 timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
3423
3424
3425
3426
3427 for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
3428 if (rvt_rnr_tbl_to_usec(i) >= timeout)
3429 return i;
3430 return 0;
3431}
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
3453{
3454 struct tid_rdma_request *req;
3455 struct hfi1_qp_priv *qpriv = qp->priv;
3456 struct hfi1_ctxtdata *rcd = qpriv->rcd;
3457 struct tid_rdma_params *local = &qpriv->tid_rdma.local;
3458 struct rvt_ack_entry *e;
3459 u32 npkts, to_seg;
3460 bool last;
3461 int ret = 0;
3462
3463 lockdep_assert_held(&qp->s_lock);
3464
3465 while (1) {
3466 trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
3467 trace_hfi1_tid_write_rsp_alloc_res(qp);
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480 if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
3481 break;
3482
3483
3484 if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
3485
3486 if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
3487 !qpriv->alloc_w_segs) {
3488 hfi1_kern_clear_hw_flow(rcd, qp);
3489 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3490 }
3491 break;
3492 }
3493
3494 e = &qp->s_ack_queue[qpriv->r_tid_alloc];
3495 if (e->opcode != TID_OP(WRITE_REQ))
3496 goto next_req;
3497 req = ack_to_tid_req(e);
3498 trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
3499 e->lpsn, req);
3500
3501 if (req->alloc_seg >= req->total_segs)
3502 goto next_req;
3503
3504
3505 if (qpriv->alloc_w_segs >= local->max_write)
3506 break;
3507
3508
3509 if (qpriv->sync_pt && qpriv->alloc_w_segs)
3510 break;
3511
3512
3513 if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
3514 hfi1_kern_clear_hw_flow(rcd, qp);
3515 qpriv->sync_pt = false;
3516 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3517 }
3518
3519
3520 if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
3521 ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
3522 if (ret) {
3523 to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
3524 position_in_queue(qpriv,
3525 &rcd->flow_queue);
3526 break;
3527 }
3528 }
3529
3530 npkts = rvt_div_round_up_mtu(qp, req->seg_len);
3531
3532
3533
3534
3535
3536 if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
3537 qpriv->sync_pt = true;
3538 break;
3539 }
3540
3541
3542
3543
3544
3545
3546
3547
3548 if (!CIRC_SPACE(req->setup_head, req->acked_tail,
3549 MAX_FLOWS)) {
3550 ret = -EAGAIN;
3551 to_seg = MAX_FLOWS >> 1;
3552 tid_rdma_trigger_ack(qp);
3553 break;
3554 }
3555
3556
3557 ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
3558 if (ret == -EAGAIN)
3559 to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
3560 if (ret)
3561 break;
3562
3563 qpriv->alloc_w_segs++;
3564 req->alloc_seg++;
3565 continue;
3566next_req:
3567
3568 if (++qpriv->r_tid_alloc >
3569 rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3570 qpriv->r_tid_alloc = 0;
3571 }
3572
3573
3574
3575
3576
3577
3578 if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
3579 goto send_rnr_nak;
3580
3581 return;
3582
3583send_rnr_nak:
3584 lockdep_assert_held(&qp->r_lock);
3585
3586
3587 qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
3588
3589
3590 qp->r_psn = e->psn + req->alloc_seg;
3591 qp->r_ack_psn = qp->r_psn;
3592
3593
3594
3595
3596
3597 qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
3598 if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3599 qp->r_head_ack_queue = 0;
3600 qpriv->r_tid_head = qp->r_head_ack_queue;
3601
3602
3603
3604
3605
3606 qp->s_nak_state = qp->r_nak_state;
3607 qp->s_ack_psn = qp->r_ack_psn;
3608
3609
3610
3611
3612 qp->s_flags &= ~(RVT_S_ACK_PENDING);
3613
3614 trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
3615
3616
3617
3618
3619
3620
3621
3622 qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
3623
3624
3625
3626
3627
3628
3629 rc_defered_ack(rcd, qp);
3630}
3631
3632void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
3633{
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647 struct hfi1_ctxtdata *rcd = packet->rcd;
3648 struct rvt_qp *qp = packet->qp;
3649 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
3650 struct ib_other_headers *ohdr = packet->ohdr;
3651 struct rvt_ack_entry *e;
3652 unsigned long flags;
3653 struct ib_reth *reth;
3654 struct hfi1_qp_priv *qpriv = qp->priv;
3655 struct tid_rdma_request *req;
3656 u32 bth0, psn, len, rkey, num_segs;
3657 bool fecn;
3658 u8 next;
3659 u64 vaddr;
3660 int diff;
3661
3662 bth0 = be32_to_cpu(ohdr->bth[0]);
3663 if (hfi1_ruc_check_hdr(ibp, packet))
3664 return;
3665
3666 fecn = process_ecn(qp, packet);
3667 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
3668 trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
3669
3670 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
3671 rvt_comm_est(qp);
3672
3673 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
3674 goto nack_inv;
3675
3676 reth = &ohdr->u.tid_rdma.w_req.reth;
3677 vaddr = be64_to_cpu(reth->vaddr);
3678 len = be32_to_cpu(reth->length);
3679
3680 num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
3681 diff = delta_psn(psn, qp->r_psn);
3682 if (unlikely(diff)) {
3683 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
3684 return;
3685 }
3686
3687
3688
3689
3690
3691
3692 if (qpriv->rnr_nak_state)
3693 qp->r_head_ack_queue = qp->r_head_ack_queue ?
3694 qp->r_head_ack_queue - 1 :
3695 rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
3696
3697
3698 next = qp->r_head_ack_queue + 1;
3699 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3700 next = 0;
3701 spin_lock_irqsave(&qp->s_lock, flags);
3702 if (unlikely(next == qp->s_acked_ack_queue)) {
3703 if (!qp->s_ack_queue[next].sent)
3704 goto nack_inv_unlock;
3705 update_ack_queue(qp, next);
3706 }
3707 e = &qp->s_ack_queue[qp->r_head_ack_queue];
3708 req = ack_to_tid_req(e);
3709
3710
3711 if (qpriv->rnr_nak_state) {
3712 qp->r_nak_state = 0;
3713 qp->s_nak_state = 0;
3714 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
3715 qp->r_psn = e->lpsn + 1;
3716 req->state = TID_REQUEST_INIT;
3717 goto update_head;
3718 }
3719
3720 release_rdma_sge_mr(e);
3721
3722
3723 if (!len || len & ~PAGE_MASK)
3724 goto nack_inv_unlock;
3725
3726 rkey = be32_to_cpu(reth->rkey);
3727 qp->r_len = len;
3728
3729 if (e->opcode == TID_OP(WRITE_REQ) &&
3730 (req->setup_head != req->clear_tail ||
3731 req->clear_tail != req->acked_tail))
3732 goto nack_inv_unlock;
3733
3734 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
3735 rkey, IB_ACCESS_REMOTE_WRITE)))
3736 goto nack_acc;
3737
3738 qp->r_psn += num_segs - 1;
3739
3740 e->opcode = (bth0 >> 24) & 0xff;
3741 e->psn = psn;
3742 e->lpsn = qp->r_psn;
3743 e->sent = 0;
3744
3745 req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
3746 req->state = TID_REQUEST_INIT;
3747 req->cur_seg = 0;
3748 req->comp_seg = 0;
3749 req->ack_seg = 0;
3750 req->alloc_seg = 0;
3751 req->isge = 0;
3752 req->seg_len = qpriv->tid_rdma.local.max_len;
3753 req->total_len = len;
3754 req->total_segs = num_segs;
3755 req->r_flow_psn = e->psn;
3756 req->ss.sge = e->rdma_sge;
3757 req->ss.num_sge = 1;
3758
3759 req->flow_idx = req->setup_head;
3760 req->clear_tail = req->setup_head;
3761 req->acked_tail = req->setup_head;
3762
3763 qp->r_state = e->opcode;
3764 qp->r_nak_state = 0;
3765
3766
3767
3768
3769
3770 qp->r_msn++;
3771 qp->r_psn++;
3772
3773 trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
3774 req);
3775
3776 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
3777 qpriv->r_tid_tail = qp->r_head_ack_queue;
3778 } else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
3779 struct tid_rdma_request *ptr;
3780
3781 e = &qp->s_ack_queue[qpriv->r_tid_tail];
3782 ptr = ack_to_tid_req(e);
3783
3784 if (e->opcode != TID_OP(WRITE_REQ) ||
3785 ptr->comp_seg == ptr->total_segs) {
3786 if (qpriv->r_tid_tail == qpriv->r_tid_ack)
3787 qpriv->r_tid_ack = qp->r_head_ack_queue;
3788 qpriv->r_tid_tail = qp->r_head_ack_queue;
3789 }
3790 }
3791update_head:
3792 qp->r_head_ack_queue = next;
3793 qpriv->r_tid_head = qp->r_head_ack_queue;
3794
3795 hfi1_tid_write_alloc_resources(qp, true);
3796 trace_hfi1_tid_write_rsp_rcv_req(qp);
3797
3798
3799 qp->s_flags |= RVT_S_RESP_PENDING;
3800 if (fecn)
3801 qp->s_flags |= RVT_S_ECN;
3802 hfi1_schedule_send(qp);
3803
3804 spin_unlock_irqrestore(&qp->s_lock, flags);
3805 return;
3806
3807nack_inv_unlock:
3808 spin_unlock_irqrestore(&qp->s_lock, flags);
3809nack_inv:
3810 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
3811 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
3812 qp->r_ack_psn = qp->r_psn;
3813
3814 rc_defered_ack(rcd, qp);
3815 return;
3816nack_acc:
3817 spin_unlock_irqrestore(&qp->s_lock, flags);
3818 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
3819 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
3820 qp->r_ack_psn = qp->r_psn;
3821}
3822
3823u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
3824 struct ib_other_headers *ohdr, u32 *bth1,
3825 u32 bth2, u32 *len,
3826 struct rvt_sge_state **ss)
3827{
3828 struct hfi1_ack_priv *epriv = e->priv;
3829 struct tid_rdma_request *req = &epriv->tid_req;
3830 struct hfi1_qp_priv *qpriv = qp->priv;
3831 struct tid_rdma_flow *flow = NULL;
3832 u32 resp_len = 0, hdwords = 0;
3833 void *resp_addr = NULL;
3834 struct tid_rdma_params *remote;
3835
3836 trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
3837 req);
3838 trace_hfi1_tid_write_rsp_build_resp(qp);
3839 trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
3840 flow = &req->flows[req->flow_idx];
3841 switch (req->state) {
3842 default:
3843
3844
3845
3846
3847 hfi1_tid_write_alloc_resources(qp, false);
3848
3849
3850 if (req->cur_seg >= req->alloc_seg)
3851 goto done;
3852
3853
3854
3855
3856
3857 if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
3858 goto done;
3859
3860 req->state = TID_REQUEST_ACTIVE;
3861 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3862 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3863 hfi1_add_tid_reap_timer(qp);
3864 break;
3865
3866 case TID_REQUEST_RESEND_ACTIVE:
3867 case TID_REQUEST_RESEND:
3868 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3869 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3870 if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
3871 req->state = TID_REQUEST_ACTIVE;
3872
3873 hfi1_mod_tid_reap_timer(qp);
3874 break;
3875 }
3876 flow->flow_state.resp_ib_psn = bth2;
3877 resp_addr = (void *)flow->tid_entry;
3878 resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
3879 req->cur_seg++;
3880
3881 memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
3882 epriv->ss.sge.vaddr = resp_addr;
3883 epriv->ss.sge.sge_length = resp_len;
3884 epriv->ss.sge.length = epriv->ss.sge.sge_length;
3885
3886
3887
3888
3889 epriv->ss.sge.mr = NULL;
3890 epriv->ss.sge.m = 0;
3891 epriv->ss.sge.n = 0;
3892
3893 epriv->ss.sg_list = NULL;
3894 epriv->ss.total_len = epriv->ss.sge.sge_length;
3895 epriv->ss.num_sge = 1;
3896
3897 *ss = &epriv->ss;
3898 *len = epriv->ss.total_len;
3899
3900
3901 rcu_read_lock();
3902 remote = rcu_dereference(qpriv->tid_rdma.remote);
3903
3904 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
3905 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
3906 ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
3907 ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
3908 cpu_to_be32((flow->flow_state.generation <<
3909 HFI1_KDETH_BTH_SEQ_SHIFT) |
3910 (flow->flow_state.spsn &
3911 HFI1_KDETH_BTH_SEQ_MASK));
3912 ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
3913 cpu_to_be32(qpriv->tid_rdma.local.qp |
3914 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
3915 TID_RDMA_DESTQP_FLOW_SHIFT) |
3916 qpriv->rcd->ctxt);
3917 ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
3918 *bth1 = remote->qp;
3919 rcu_read_unlock();
3920 hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
3921 qpriv->pending_tid_w_segs++;
3922done:
3923 return hdwords;
3924}
3925
3926static void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
3927{
3928 struct hfi1_qp_priv *qpriv = qp->priv;
3929
3930 lockdep_assert_held(&qp->s_lock);
3931 if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
3932 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3933 qpriv->s_tid_timer.expires = jiffies +
3934 qpriv->tid_timer_timeout_jiffies;
3935 add_timer(&qpriv->s_tid_timer);
3936 }
3937}
3938
3939static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
3940{
3941 struct hfi1_qp_priv *qpriv = qp->priv;
3942
3943 lockdep_assert_held(&qp->s_lock);
3944 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3945 mod_timer(&qpriv->s_tid_timer, jiffies +
3946 qpriv->tid_timer_timeout_jiffies);
3947}
3948
3949static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
3950{
3951 struct hfi1_qp_priv *qpriv = qp->priv;
3952 int rval = 0;
3953
3954 lockdep_assert_held(&qp->s_lock);
3955 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
3956 rval = del_timer(&qpriv->s_tid_timer);
3957 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
3958 }
3959 return rval;
3960}
3961
3962void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
3963{
3964 struct hfi1_qp_priv *qpriv = qp->priv;
3965
3966 del_timer_sync(&qpriv->s_tid_timer);
3967 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
3968}
3969
3970static void hfi1_tid_timeout(struct timer_list *t)
3971{
3972 struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
3973 struct rvt_qp *qp = qpriv->owner;
3974 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
3975 unsigned long flags;
3976 u32 i;
3977
3978 spin_lock_irqsave(&qp->r_lock, flags);
3979 spin_lock(&qp->s_lock);
3980 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
3981 dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
3982 qp->ibqp.qp_num, __func__, __LINE__);
3983 trace_hfi1_msg_tid_timeout(
3984 qp, "resource timeout = ",
3985 (u64)qpriv->tid_timer_timeout_jiffies);
3986 hfi1_stop_tid_reap_timer(qp);
3987
3988
3989
3990
3991 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
3992 for (i = 0; i < rvt_max_atomic(rdi); i++) {
3993 struct tid_rdma_request *req =
3994 ack_to_tid_req(&qp->s_ack_queue[i]);
3995
3996 hfi1_kern_exp_rcv_clear_all(req);
3997 }
3998 spin_unlock(&qp->s_lock);
3999 if (qp->ibqp.event_handler) {
4000 struct ib_event ev;
4001
4002 ev.device = qp->ibqp.device;
4003 ev.element.qp = &qp->ibqp;
4004 ev.event = IB_EVENT_QP_FATAL;
4005 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
4006 }
4007 rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
4008 goto unlock_r_lock;
4009 }
4010 spin_unlock(&qp->s_lock);
4011unlock_r_lock:
4012 spin_unlock_irqrestore(&qp->r_lock, flags);
4013}
4014
4015void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
4016{
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028 struct ib_other_headers *ohdr = packet->ohdr;
4029 struct rvt_qp *qp = packet->qp;
4030 struct hfi1_qp_priv *qpriv = qp->priv;
4031 struct hfi1_ctxtdata *rcd = packet->rcd;
4032 struct rvt_swqe *wqe;
4033 struct tid_rdma_request *req;
4034 struct tid_rdma_flow *flow;
4035 enum ib_wc_status status;
4036 u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
4037 bool fecn;
4038 unsigned long flags;
4039
4040 fecn = process_ecn(qp, packet);
4041 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4042 aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
4043 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4044
4045 spin_lock_irqsave(&qp->s_lock, flags);
4046
4047
4048 if (cmp_psn(psn, qp->s_next_psn) >= 0)
4049 goto ack_done;
4050
4051
4052 if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
4053 goto ack_done;
4054
4055 if (unlikely(qp->s_acked == qp->s_tail))
4056 goto ack_done;
4057
4058
4059
4060
4061
4062
4063 if (qp->r_flags & RVT_R_RDMAR_SEQ) {
4064 if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
4065 goto ack_done;
4066 qp->r_flags &= ~RVT_R_RDMAR_SEQ;
4067 }
4068
4069 wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
4070 if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
4071 goto ack_op_err;
4072
4073 req = wqe_to_tid_req(wqe);
4074
4075
4076
4077
4078
4079 if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
4080 goto ack_done;
4081
4082
4083
4084
4085
4086
4087
4088
4089 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
4090 goto ack_done;
4091
4092 trace_hfi1_ack(qp, psn);
4093
4094 flow = &req->flows[req->setup_head];
4095 flow->pkt = 0;
4096 flow->tid_idx = 0;
4097 flow->tid_offset = 0;
4098 flow->sent = 0;
4099 flow->resync_npkts = 0;
4100 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
4101 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
4102 TID_RDMA_DESTQP_FLOW_MASK;
4103 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
4104 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4105 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
4106 flow->flow_state.resp_ib_psn = psn;
4107 flow->length = min_t(u32, req->seg_len,
4108 (wqe->length - (req->comp_seg * req->seg_len)));
4109
4110 flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
4111 flow->flow_state.lpsn = flow->flow_state.spsn +
4112 flow->npkts - 1;
4113
4114 pktlen = packet->tlen - (packet->hlen + 4);
4115 if (pktlen > sizeof(flow->tid_entry)) {
4116 status = IB_WC_LOC_LEN_ERR;
4117 goto ack_err;
4118 }
4119 memcpy(flow->tid_entry, packet->ebuf, pktlen);
4120 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
4121 trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
4122
4123 req->comp_seg++;
4124 trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
4125
4126
4127
4128
4129 for (i = 0; i < flow->tidcnt; i++) {
4130 trace_hfi1_tid_entry_rcv_write_resp(
4131 qp, i, flow->tid_entry[i]);
4132 if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
4133 status = IB_WC_LOC_LEN_ERR;
4134 goto ack_err;
4135 }
4136 tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
4137 }
4138 if (tidlen * PAGE_SIZE < flow->length) {
4139 status = IB_WC_LOC_LEN_ERR;
4140 goto ack_err;
4141 }
4142
4143 trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
4144 wqe->lpsn, req);
4145
4146
4147
4148
4149 if (!cmp_psn(psn, wqe->psn)) {
4150 req->r_last_acked = mask_psn(wqe->psn - 1);
4151
4152 req->acked_tail = req->setup_head;
4153 }
4154
4155
4156 req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
4157 req->state = TID_REQUEST_ACTIVE;
4158
4159
4160
4161
4162
4163
4164
4165
4166 if (qpriv->s_tid_cur != qpriv->s_tid_head &&
4167 req->comp_seg == req->total_segs) {
4168 for (i = qpriv->s_tid_cur + 1; ; i++) {
4169 if (i == qp->s_size)
4170 i = 0;
4171 wqe = rvt_get_swqe_ptr(qp, i);
4172 if (i == qpriv->s_tid_head)
4173 break;
4174 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
4175 break;
4176 }
4177 qpriv->s_tid_cur = i;
4178 }
4179 qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
4180 hfi1_schedule_tid_send(qp);
4181 goto ack_done;
4182
4183ack_op_err:
4184 status = IB_WC_LOC_QP_OP_ERR;
4185ack_err:
4186 rvt_error_qp(qp, status);
4187ack_done:
4188 if (fecn)
4189 qp->s_flags |= RVT_S_ECN;
4190 spin_unlock_irqrestore(&qp->s_lock, flags);
4191}
4192
4193bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
4194 struct ib_other_headers *ohdr,
4195 u32 *bth1, u32 *bth2, u32 *len)
4196{
4197 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4198 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
4199 struct tid_rdma_params *remote;
4200 struct rvt_qp *qp = req->qp;
4201 struct hfi1_qp_priv *qpriv = qp->priv;
4202 u32 tidentry = flow->tid_entry[flow->tid_idx];
4203 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
4204 struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
4205 u32 next_offset, om = KDETH_OM_LARGE;
4206 bool last_pkt;
4207
4208 if (!tidlen) {
4209 hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
4210 rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
4211 }
4212
4213 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
4214 flow->sent += *len;
4215 next_offset = flow->tid_offset + *len;
4216 last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
4217 next_offset >= tidlen) || (flow->sent >= flow->length);
4218 trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
4219 trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
4220
4221 rcu_read_lock();
4222 remote = rcu_dereference(qpriv->tid_rdma.remote);
4223 KDETH_RESET(wd->kdeth0, KVER, 0x1);
4224 KDETH_SET(wd->kdeth0, SH, !last_pkt);
4225 KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
4226 KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
4227 KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
4228 KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
4229 KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
4230 KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
4231 wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
4232 rcu_read_unlock();
4233
4234 *bth1 = flow->tid_qpn;
4235 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
4236 HFI1_KDETH_BTH_SEQ_MASK) |
4237 (flow->flow_state.generation <<
4238 HFI1_KDETH_BTH_SEQ_SHIFT));
4239 if (last_pkt) {
4240
4241 if (flow->flow_state.lpsn + 1 +
4242 rvt_div_round_up_mtu(qp, req->seg_len) >
4243 MAX_TID_FLOW_PSN)
4244 req->state = TID_REQUEST_SYNC;
4245 *bth2 |= IB_BTH_REQ_ACK;
4246 }
4247
4248 if (next_offset >= tidlen) {
4249 flow->tid_offset = 0;
4250 flow->tid_idx++;
4251 } else {
4252 flow->tid_offset = next_offset;
4253 }
4254 return last_pkt;
4255}
4256
4257void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
4258{
4259 struct rvt_qp *qp = packet->qp;
4260 struct hfi1_qp_priv *priv = qp->priv;
4261 struct hfi1_ctxtdata *rcd = priv->rcd;
4262 struct ib_other_headers *ohdr = packet->ohdr;
4263 struct rvt_ack_entry *e;
4264 struct tid_rdma_request *req;
4265 struct tid_rdma_flow *flow;
4266 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4267 unsigned long flags;
4268 u32 psn, next;
4269 u8 opcode;
4270 bool fecn;
4271
4272 fecn = process_ecn(qp, packet);
4273 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4274 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4275
4276
4277
4278
4279
4280 spin_lock_irqsave(&qp->s_lock, flags);
4281 e = &qp->s_ack_queue[priv->r_tid_tail];
4282 req = ack_to_tid_req(e);
4283 flow = &req->flows[req->clear_tail];
4284 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
4285 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
4286
4287 if (cmp_psn(psn, flow->flow_state.r_next_psn))
4288 goto send_nak;
4289
4290 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4291
4292
4293
4294
4295
4296
4297
4298 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
4299 struct rvt_sge_state ss;
4300 u32 len;
4301 u32 tlen = packet->tlen;
4302 u16 hdrsize = packet->hlen;
4303 u8 pad = packet->pad;
4304 u8 extra_bytes = pad + packet->extra_byte +
4305 (SIZE_OF_CRC << 2);
4306 u32 pmtu = qp->pmtu;
4307
4308 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
4309 goto send_nak;
4310 len = req->comp_seg * req->seg_len;
4311 len += delta_psn(psn,
4312 full_flow_psn(flow, flow->flow_state.spsn)) *
4313 pmtu;
4314 if (unlikely(req->total_len - len < pmtu))
4315 goto send_nak;
4316
4317
4318
4319
4320
4321 ss.sge = e->rdma_sge;
4322 ss.sg_list = NULL;
4323 ss.num_sge = 1;
4324 ss.total_len = req->total_len;
4325 rvt_skip_sge(&ss, len, false);
4326 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
4327 false);
4328
4329 priv->r_next_psn_kdeth = mask_psn(psn + 1);
4330 priv->s_flags |= HFI1_R_TID_SW_PSN;
4331 }
4332 goto exit;
4333 }
4334 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4335 hfi1_kern_exp_rcv_clear(req);
4336 priv->alloc_w_segs--;
4337 rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
4338 req->comp_seg++;
4339 priv->s_nak_state = 0;
4340
4341
4342
4343
4344
4345
4346
4347
4348 trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
4349 trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
4350 req);
4351 trace_hfi1_tid_write_rsp_rcv_data(qp);
4352 validate_r_tid_ack(priv);
4353
4354 if (opcode == TID_OP(WRITE_DATA_LAST)) {
4355 release_rdma_sge_mr(e);
4356 for (next = priv->r_tid_tail + 1; ; next++) {
4357 if (next > rvt_size_atomic(&dev->rdi))
4358 next = 0;
4359 if (next == priv->r_tid_head)
4360 break;
4361 e = &qp->s_ack_queue[next];
4362 if (e->opcode == TID_OP(WRITE_REQ))
4363 break;
4364 }
4365 priv->r_tid_tail = next;
4366 if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
4367 qp->s_acked_ack_queue = 0;
4368 }
4369
4370 hfi1_tid_write_alloc_resources(qp, true);
4371
4372
4373
4374
4375
4376 if (req->cur_seg < req->total_segs ||
4377 qp->s_tail_ack_queue != qp->r_head_ack_queue) {
4378 qp->s_flags |= RVT_S_RESP_PENDING;
4379 hfi1_schedule_send(qp);
4380 }
4381
4382 priv->pending_tid_w_segs--;
4383 if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
4384 if (priv->pending_tid_w_segs)
4385 hfi1_mod_tid_reap_timer(req->qp);
4386 else
4387 hfi1_stop_tid_reap_timer(req->qp);
4388 }
4389
4390done:
4391 tid_rdma_schedule_ack(qp);
4392exit:
4393 priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
4394 if (fecn)
4395 qp->s_flags |= RVT_S_ECN;
4396 spin_unlock_irqrestore(&qp->s_lock, flags);
4397 return;
4398
4399send_nak:
4400 if (!priv->s_nak_state) {
4401 priv->s_nak_state = IB_NAK_PSN_ERROR;
4402 priv->s_nak_psn = flow->flow_state.r_next_psn;
4403 tid_rdma_trigger_ack(qp);
4404 }
4405 goto done;
4406}
4407
4408static bool hfi1_tid_rdma_is_resync_psn(u32 psn)
4409{
4410 return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
4411 HFI1_KDETH_BTH_SEQ_MASK);
4412}
4413
4414u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
4415 struct ib_other_headers *ohdr, u16 iflow,
4416 u32 *bth1, u32 *bth2)
4417{
4418 struct hfi1_qp_priv *qpriv = qp->priv;
4419 struct tid_flow_state *fs = &qpriv->flow_state;
4420 struct tid_rdma_request *req = ack_to_tid_req(e);
4421 struct tid_rdma_flow *flow = &req->flows[iflow];
4422 struct tid_rdma_params *remote;
4423
4424 rcu_read_lock();
4425 remote = rcu_dereference(qpriv->tid_rdma.remote);
4426 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4427 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4428 *bth1 = remote->qp;
4429 rcu_read_unlock();
4430
4431 if (qpriv->resync) {
4432 *bth2 = mask_psn((fs->generation <<
4433 HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4434 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4435 } else if (qpriv->s_nak_state) {
4436 *bth2 = mask_psn(qpriv->s_nak_psn);
4437 ohdr->u.tid_rdma.ack.aeth =
4438 cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
4439 (qpriv->s_nak_state <<
4440 IB_AETH_CREDIT_SHIFT));
4441 } else {
4442 *bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
4443 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4444 }
4445 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4446 ohdr->u.tid_rdma.ack.tid_flow_qp =
4447 cpu_to_be32(qpriv->tid_rdma.local.qp |
4448 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
4449 TID_RDMA_DESTQP_FLOW_SHIFT) |
4450 qpriv->rcd->ctxt);
4451
4452 ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
4453 ohdr->u.tid_rdma.ack.verbs_psn =
4454 cpu_to_be32(flow->flow_state.resp_ib_psn);
4455
4456 if (qpriv->resync) {
4457
4458
4459
4460
4461
4462
4463 if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
4464 ohdr->u.tid_rdma.ack.tid_flow_psn =
4465 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4466 } else {
4467
4468
4469
4470
4471
4472
4473 qpriv->r_next_psn_kdeth_save =
4474 qpriv->r_next_psn_kdeth - 1;
4475 ohdr->u.tid_rdma.ack.tid_flow_psn =
4476 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4477 qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
4478 }
4479 qpriv->resync = false;
4480 }
4481
4482 return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
4483}
4484
4485void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
4486{
4487 struct ib_other_headers *ohdr = packet->ohdr;
4488 struct rvt_qp *qp = packet->qp;
4489 struct hfi1_qp_priv *qpriv = qp->priv;
4490 struct rvt_swqe *wqe;
4491 struct tid_rdma_request *req;
4492 struct tid_rdma_flow *flow;
4493 u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
4494 unsigned long flags;
4495 u16 fidx;
4496
4497 trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
4498 process_ecn(qp, packet);
4499 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4500 aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
4501 req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
4502 resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
4503
4504 spin_lock_irqsave(&qp->s_lock, flags);
4505 trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
4506
4507
4508 if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
4509 cmp_psn(psn, qpriv->s_resync_psn))
4510 goto ack_op_err;
4511
4512 ack_psn = req_psn;
4513 if (hfi1_tid_rdma_is_resync_psn(psn))
4514 ack_kpsn = resync_psn;
4515 else
4516 ack_kpsn = psn;
4517 if (aeth >> 29) {
4518 ack_psn--;
4519 ack_kpsn--;
4520 }
4521
4522 if (unlikely(qp->s_acked == qp->s_tail))
4523 goto ack_op_err;
4524
4525 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4526
4527 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4528 goto ack_op_err;
4529
4530 req = wqe_to_tid_req(wqe);
4531 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4532 wqe->lpsn, req);
4533 flow = &req->flows[req->acked_tail];
4534 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4535
4536
4537 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
4538 cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
4539 goto ack_op_err;
4540
4541 while (cmp_psn(ack_kpsn,
4542 full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
4543 req->ack_seg < req->cur_seg) {
4544 req->ack_seg++;
4545
4546 req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
4547 req->r_last_acked = flow->flow_state.resp_ib_psn;
4548 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4549 wqe->lpsn, req);
4550 if (req->ack_seg == req->total_segs) {
4551 req->state = TID_REQUEST_COMPLETE;
4552 wqe = do_rc_completion(qp, wqe,
4553 to_iport(qp->ibqp.device,
4554 qp->port_num));
4555 trace_hfi1_sender_rcv_tid_ack(qp);
4556 atomic_dec(&qpriv->n_tid_requests);
4557 if (qp->s_acked == qp->s_tail)
4558 break;
4559 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4560 break;
4561 req = wqe_to_tid_req(wqe);
4562 }
4563 flow = &req->flows[req->acked_tail];
4564 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4565 }
4566
4567 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4568 wqe->lpsn, req);
4569 switch (aeth >> 29) {
4570 case 0:
4571 if (qpriv->s_flags & RVT_S_WAIT_ACK)
4572 qpriv->s_flags &= ~RVT_S_WAIT_ACK;
4573 if (!hfi1_tid_rdma_is_resync_psn(psn)) {
4574
4575 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
4576 req->ack_seg < req->cur_seg)
4577 hfi1_mod_tid_retry_timer(qp);
4578 else
4579 hfi1_stop_tid_retry_timer(qp);
4580 hfi1_schedule_send(qp);
4581 } else {
4582 u32 spsn, fpsn, last_acked, generation;
4583 struct tid_rdma_request *rptr;
4584
4585
4586 hfi1_stop_tid_retry_timer(qp);
4587
4588 qp->s_flags &= ~HFI1_S_WAIT_HALT;
4589
4590
4591
4592
4593
4594
4595 qpriv->s_flags &= ~RVT_S_SEND_ONE;
4596 hfi1_schedule_send(qp);
4597
4598 if ((qp->s_acked == qpriv->s_tid_tail &&
4599 req->ack_seg == req->total_segs) ||
4600 qp->s_acked == qp->s_tail) {
4601 qpriv->s_state = TID_OP(WRITE_DATA_LAST);
4602 goto done;
4603 }
4604
4605 if (req->ack_seg == req->comp_seg) {
4606 qpriv->s_state = TID_OP(WRITE_DATA);
4607 goto done;
4608 }
4609
4610
4611
4612
4613
4614 psn = mask_psn(psn + 1);
4615 generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4616 spsn = 0;
4617
4618
4619
4620
4621
4622 if (delta_psn(ack_psn, wqe->lpsn))
4623 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4624 req = wqe_to_tid_req(wqe);
4625 flow = &req->flows[req->acked_tail];
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635 fpsn = full_flow_psn(flow, flow->flow_state.spsn);
4636 req->r_ack_psn = psn;
4637
4638
4639
4640
4641
4642
4643 if (flow->flow_state.generation !=
4644 (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
4645 resync_psn = mask_psn(fpsn - 1);
4646 flow->resync_npkts +=
4647 delta_psn(mask_psn(resync_psn + 1), fpsn);
4648
4649
4650
4651
4652 last_acked = qp->s_acked;
4653 rptr = req;
4654 while (1) {
4655
4656 for (fidx = rptr->acked_tail;
4657 CIRC_CNT(rptr->setup_head, fidx,
4658 MAX_FLOWS);
4659 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
4660 u32 lpsn;
4661 u32 gen;
4662
4663 flow = &rptr->flows[fidx];
4664 gen = flow->flow_state.generation;
4665 if (WARN_ON(gen == generation &&
4666 flow->flow_state.spsn !=
4667 spsn))
4668 continue;
4669 lpsn = flow->flow_state.lpsn;
4670 lpsn = full_flow_psn(flow, lpsn);
4671 flow->npkts =
4672 delta_psn(lpsn,
4673 mask_psn(resync_psn)
4674 );
4675 flow->flow_state.generation =
4676 generation;
4677 flow->flow_state.spsn = spsn;
4678 flow->flow_state.lpsn =
4679 flow->flow_state.spsn +
4680 flow->npkts - 1;
4681 flow->pkt = 0;
4682 spsn += flow->npkts;
4683 resync_psn += flow->npkts;
4684 trace_hfi1_tid_flow_rcv_tid_ack(qp,
4685 fidx,
4686 flow);
4687 }
4688 if (++last_acked == qpriv->s_tid_cur + 1)
4689 break;
4690 if (last_acked == qp->s_size)
4691 last_acked = 0;
4692 wqe = rvt_get_swqe_ptr(qp, last_acked);
4693 rptr = wqe_to_tid_req(wqe);
4694 }
4695 req->cur_seg = req->ack_seg;
4696 qpriv->s_tid_tail = qp->s_acked;
4697 qpriv->s_state = TID_OP(WRITE_REQ);
4698 hfi1_schedule_tid_send(qp);
4699 }
4700done:
4701 qpriv->s_retry = qp->s_retry_cnt;
4702 break;
4703
4704 case 3:
4705 hfi1_stop_tid_retry_timer(qp);
4706 switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
4707 IB_AETH_CREDIT_MASK) {
4708 case 0:
4709 if (!req->flows)
4710 break;
4711 flow = &req->flows[req->acked_tail];
4712 flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
4713 if (cmp_psn(psn, flpsn) > 0)
4714 break;
4715 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
4716 flow);
4717 req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4718 req->cur_seg = req->ack_seg;
4719 qpriv->s_tid_tail = qp->s_acked;
4720 qpriv->s_state = TID_OP(WRITE_REQ);
4721 qpriv->s_retry = qp->s_retry_cnt;
4722 hfi1_schedule_tid_send(qp);
4723 break;
4724
4725 default:
4726 break;
4727 }
4728 break;
4729
4730 default:
4731 break;
4732 }
4733
4734ack_op_err:
4735 spin_unlock_irqrestore(&qp->s_lock, flags);
4736}
4737
4738void hfi1_add_tid_retry_timer(struct rvt_qp *qp)
4739{
4740 struct hfi1_qp_priv *priv = qp->priv;
4741 struct ib_qp *ibqp = &qp->ibqp;
4742 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4743
4744 lockdep_assert_held(&qp->s_lock);
4745 if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
4746 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4747 priv->s_tid_retry_timer.expires = jiffies +
4748 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
4749 add_timer(&priv->s_tid_retry_timer);
4750 }
4751}
4752
4753static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
4754{
4755 struct hfi1_qp_priv *priv = qp->priv;
4756 struct ib_qp *ibqp = &qp->ibqp;
4757 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4758
4759 lockdep_assert_held(&qp->s_lock);
4760 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4761 mod_timer(&priv->s_tid_retry_timer, jiffies +
4762 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
4763}
4764
4765static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
4766{
4767 struct hfi1_qp_priv *priv = qp->priv;
4768 int rval = 0;
4769
4770 lockdep_assert_held(&qp->s_lock);
4771 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4772 rval = del_timer(&priv->s_tid_retry_timer);
4773 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4774 }
4775 return rval;
4776}
4777
4778void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
4779{
4780 struct hfi1_qp_priv *priv = qp->priv;
4781
4782 del_timer_sync(&priv->s_tid_retry_timer);
4783 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4784}
4785
4786static void hfi1_tid_retry_timeout(struct timer_list *t)
4787{
4788 struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
4789 struct rvt_qp *qp = priv->owner;
4790 struct rvt_swqe *wqe;
4791 unsigned long flags;
4792 struct tid_rdma_request *req;
4793
4794 spin_lock_irqsave(&qp->r_lock, flags);
4795 spin_lock(&qp->s_lock);
4796 trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
4797 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4798 hfi1_stop_tid_retry_timer(qp);
4799 if (!priv->s_retry) {
4800 trace_hfi1_msg_tid_retry_timeout(
4801 qp,
4802 "Exhausted retries. Tid retry timeout = ",
4803 (u64)priv->tid_retry_timeout_jiffies);
4804
4805 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4806 hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
4807 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
4808 } else {
4809 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4810 req = wqe_to_tid_req(wqe);
4811 trace_hfi1_tid_req_tid_retry_timeout(
4812 qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
4813
4814 priv->s_flags &= ~RVT_S_WAIT_ACK;
4815
4816 priv->s_flags |= RVT_S_SEND_ONE;
4817
4818
4819
4820
4821 qp->s_flags |= HFI1_S_WAIT_HALT;
4822 priv->s_state = TID_OP(RESYNC);
4823 priv->s_retry--;
4824 hfi1_schedule_tid_send(qp);
4825 }
4826 }
4827 spin_unlock(&qp->s_lock);
4828 spin_unlock_irqrestore(&qp->r_lock, flags);
4829}
4830
4831u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
4832 struct ib_other_headers *ohdr, u32 *bth1,
4833 u32 *bth2, u16 fidx)
4834{
4835 struct hfi1_qp_priv *qpriv = qp->priv;
4836 struct tid_rdma_params *remote;
4837 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4838 struct tid_rdma_flow *flow = &req->flows[fidx];
4839 u32 generation;
4840
4841 rcu_read_lock();
4842 remote = rcu_dereference(qpriv->tid_rdma.remote);
4843 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4844 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4845 *bth1 = remote->qp;
4846 rcu_read_unlock();
4847
4848 generation = kern_flow_generation_next(flow->flow_state.generation);
4849 *bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4850 qpriv->s_resync_psn = *bth2;
4851 *bth2 |= IB_BTH_REQ_ACK;
4852 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4853
4854 return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
4855}
4856
4857void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
4858{
4859 struct ib_other_headers *ohdr = packet->ohdr;
4860 struct rvt_qp *qp = packet->qp;
4861 struct hfi1_qp_priv *qpriv = qp->priv;
4862 struct hfi1_ctxtdata *rcd = qpriv->rcd;
4863 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4864 struct rvt_ack_entry *e;
4865 struct tid_rdma_request *req;
4866 struct tid_rdma_flow *flow;
4867 struct tid_flow_state *fs = &qpriv->flow_state;
4868 u32 psn, generation, idx, gen_next;
4869 bool fecn;
4870 unsigned long flags;
4871
4872 fecn = process_ecn(qp, packet);
4873 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4874
4875 generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
4876 spin_lock_irqsave(&qp->s_lock, flags);
4877
4878 gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
4879 generation : kern_flow_generation_next(fs->generation);
4880
4881
4882
4883
4884 if (generation != mask_generation(gen_next - 1) &&
4885 generation != gen_next)
4886 goto bail;
4887
4888 if (qpriv->resync)
4889 goto bail;
4890
4891 spin_lock(&rcd->exp_lock);
4892 if (fs->index >= RXE_NUM_TID_FLOWS) {
4893
4894
4895
4896
4897 fs->generation = generation;
4898 } else {
4899
4900 rcd->flows[fs->index].generation = generation;
4901 fs->generation = kern_setup_hw_flow(rcd, fs->index);
4902 }
4903 fs->psn = 0;
4904
4905
4906
4907
4908 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
4909 trace_hfi1_tid_write_rsp_rcv_resync(qp);
4910
4911
4912
4913
4914
4915
4916 for (idx = qpriv->r_tid_tail; ; idx++) {
4917 u16 flow_idx;
4918
4919 if (idx > rvt_size_atomic(&dev->rdi))
4920 idx = 0;
4921 e = &qp->s_ack_queue[idx];
4922 if (e->opcode == TID_OP(WRITE_REQ)) {
4923 req = ack_to_tid_req(e);
4924 trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
4925 e->lpsn, req);
4926
4927
4928 for (flow_idx = req->clear_tail;
4929 CIRC_CNT(req->setup_head, flow_idx,
4930 MAX_FLOWS);
4931 flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
4932 u32 lpsn;
4933 u32 next;
4934
4935 flow = &req->flows[flow_idx];
4936 lpsn = full_flow_psn(flow,
4937 flow->flow_state.lpsn);
4938 next = flow->flow_state.r_next_psn;
4939 flow->npkts = delta_psn(lpsn, next - 1);
4940 flow->flow_state.generation = fs->generation;
4941 flow->flow_state.spsn = fs->psn;
4942 flow->flow_state.lpsn =
4943 flow->flow_state.spsn + flow->npkts - 1;
4944 flow->flow_state.r_next_psn =
4945 full_flow_psn(flow,
4946 flow->flow_state.spsn);
4947 fs->psn += flow->npkts;
4948 trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
4949 flow);
4950 }
4951 }
4952 if (idx == qp->s_tail_ack_queue)
4953 break;
4954 }
4955
4956 spin_unlock(&rcd->exp_lock);
4957 qpriv->resync = true;
4958
4959 qpriv->s_nak_state = 0;
4960 tid_rdma_trigger_ack(qp);
4961bail:
4962 if (fecn)
4963 qp->s_flags |= RVT_S_ECN;
4964 spin_unlock_irqrestore(&qp->s_lock, flags);
4965}
4966
4967
4968
4969
4970
4971static void update_tid_tail(struct rvt_qp *qp)
4972 __must_hold(&qp->s_lock)
4973{
4974 struct hfi1_qp_priv *priv = qp->priv;
4975 u32 i;
4976 struct rvt_swqe *wqe;
4977
4978 lockdep_assert_held(&qp->s_lock);
4979
4980 if (priv->s_tid_tail == priv->s_tid_cur)
4981 return;
4982 for (i = priv->s_tid_tail + 1; ; i++) {
4983 if (i == qp->s_size)
4984 i = 0;
4985
4986 if (i == priv->s_tid_cur)
4987 break;
4988 wqe = rvt_get_swqe_ptr(qp, i);
4989 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
4990 break;
4991 }
4992 priv->s_tid_tail = i;
4993 priv->s_state = TID_OP(WRITE_RESP);
4994}
4995
4996int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
4997 __must_hold(&qp->s_lock)
4998{
4999 struct hfi1_qp_priv *priv = qp->priv;
5000 struct rvt_swqe *wqe;
5001 u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
5002 struct ib_other_headers *ohdr;
5003 struct rvt_sge_state *ss = &qp->s_sge;
5004 struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
5005 struct tid_rdma_request *req = ack_to_tid_req(e);
5006 bool last = false;
5007 u8 opcode = TID_OP(WRITE_DATA);
5008
5009 lockdep_assert_held(&qp->s_lock);
5010 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5011
5012
5013
5014
5015 if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
5016 atomic_read(&priv->n_requests) &&
5017 !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
5018 HFI1_S_ANY_WAIT_IO))) ||
5019 (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
5020 !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
5021 struct iowait_work *iowork;
5022
5023 iowork = iowait_get_ib_work(&priv->s_iowait);
5024 ps->s_txreq = get_waiting_verbs_txreq(iowork);
5025 if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
5026 priv->s_flags |= HFI1_S_TID_BUSY_SET;
5027 return 1;
5028 }
5029 }
5030
5031 ps->s_txreq = get_txreq(ps->dev, qp);
5032 if (!ps->s_txreq)
5033 goto bail_no_tx;
5034
5035 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
5036
5037 if ((priv->s_flags & RVT_S_ACK_PENDING) &&
5038 make_tid_rdma_ack(qp, ohdr, ps))
5039 return 1;
5040
5041
5042
5043
5044
5045
5046
5047 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
5048 goto bail;
5049
5050 if (priv->s_flags & RVT_S_WAIT_ACK)
5051 goto bail;
5052
5053
5054 if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
5055 goto bail;
5056 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5057 req = wqe_to_tid_req(wqe);
5058 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
5059 wqe->lpsn, req);
5060 switch (priv->s_state) {
5061 case TID_OP(WRITE_REQ):
5062 case TID_OP(WRITE_RESP):
5063 priv->tid_ss.sge = wqe->sg_list[0];
5064 priv->tid_ss.sg_list = wqe->sg_list + 1;
5065 priv->tid_ss.num_sge = wqe->wr.num_sge;
5066 priv->tid_ss.total_len = wqe->length;
5067
5068 if (priv->s_state == TID_OP(WRITE_REQ))
5069 hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
5070 priv->s_state = TID_OP(WRITE_DATA);
5071 fallthrough;
5072
5073 case TID_OP(WRITE_DATA):
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087 trace_hfi1_sender_make_tid_pkt(qp);
5088 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5089 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5090 req = wqe_to_tid_req(wqe);
5091 len = wqe->length;
5092
5093 if (!req->comp_seg || req->cur_seg == req->comp_seg)
5094 goto bail;
5095
5096 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
5097 wqe->psn, wqe->lpsn, req);
5098 last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
5099 &len);
5100
5101 if (last) {
5102
5103 req->clear_tail = CIRC_NEXT(req->clear_tail,
5104 MAX_FLOWS);
5105 if (++req->cur_seg < req->total_segs) {
5106 if (!CIRC_CNT(req->setup_head, req->clear_tail,
5107 MAX_FLOWS))
5108 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
5109 } else {
5110 priv->s_state = TID_OP(WRITE_DATA_LAST);
5111 opcode = TID_OP(WRITE_DATA_LAST);
5112
5113
5114 update_tid_tail(qp);
5115 }
5116 }
5117 hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
5118 ss = &priv->tid_ss;
5119 break;
5120
5121 case TID_OP(RESYNC):
5122 trace_hfi1_sender_make_tid_pkt(qp);
5123
5124 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
5125 req = wqe_to_tid_req(wqe);
5126
5127 if (!req->comp_seg) {
5128 wqe = rvt_get_swqe_ptr(qp,
5129 (!priv->s_tid_cur ? qp->s_size :
5130 priv->s_tid_cur) - 1);
5131 req = wqe_to_tid_req(wqe);
5132 }
5133 hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
5134 &bth2,
5135 CIRC_PREV(req->setup_head,
5136 MAX_FLOWS));
5137 ss = NULL;
5138 len = 0;
5139 opcode = TID_OP(RESYNC);
5140 break;
5141
5142 default:
5143 goto bail;
5144 }
5145 if (priv->s_flags & RVT_S_SEND_ONE) {
5146 priv->s_flags &= ~RVT_S_SEND_ONE;
5147 priv->s_flags |= RVT_S_WAIT_ACK;
5148 bth2 |= IB_BTH_REQ_ACK;
5149 }
5150 qp->s_len -= len;
5151 ps->s_txreq->hdr_dwords = hwords;
5152 ps->s_txreq->sde = priv->s_sde;
5153 ps->s_txreq->ss = ss;
5154 ps->s_txreq->s_cur_size = len;
5155 hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
5156 middle, ps);
5157 return 1;
5158bail:
5159 hfi1_put_txreq(ps->s_txreq);
5160bail_no_tx:
5161 ps->s_txreq = NULL;
5162 priv->s_flags &= ~RVT_S_BUSY;
5163
5164
5165
5166
5167
5168
5169
5170 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5171 return 0;
5172}
5173
5174static int make_tid_rdma_ack(struct rvt_qp *qp,
5175 struct ib_other_headers *ohdr,
5176 struct hfi1_pkt_state *ps)
5177{
5178 struct rvt_ack_entry *e;
5179 struct hfi1_qp_priv *qpriv = qp->priv;
5180 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5181 u32 hwords, next;
5182 u32 len = 0;
5183 u32 bth1 = 0, bth2 = 0;
5184 int middle = 0;
5185 u16 flow;
5186 struct tid_rdma_request *req, *nreq;
5187
5188 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5189
5190 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
5191 goto bail;
5192
5193
5194 hwords = 5;
5195
5196 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5197 req = ack_to_tid_req(e);
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210 if (qpriv->resync) {
5211 if (!req->ack_seg || req->ack_seg == req->total_segs)
5212 qpriv->r_tid_ack = !qpriv->r_tid_ack ?
5213 rvt_size_atomic(&dev->rdi) :
5214 qpriv->r_tid_ack - 1;
5215 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5216 req = ack_to_tid_req(e);
5217 }
5218
5219 trace_hfi1_rsp_make_tid_ack(qp, e->psn);
5220 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5221 req);
5222
5223
5224
5225
5226 if (!qpriv->s_nak_state && !qpriv->resync &&
5227 req->ack_seg == req->comp_seg)
5228 goto bail;
5229
5230 do {
5231
5232
5233
5234
5235
5236
5237 req->ack_seg +=
5238
5239 CIRC_CNT(req->clear_tail, req->acked_tail,
5240 MAX_FLOWS);
5241
5242 req->acked_tail = req->clear_tail;
5243
5244
5245
5246
5247
5248
5249 flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
5250 if (req->ack_seg != req->total_segs)
5251 break;
5252 req->state = TID_REQUEST_COMPLETE;
5253
5254 next = qpriv->r_tid_ack + 1;
5255 if (next > rvt_size_atomic(&dev->rdi))
5256 next = 0;
5257 qpriv->r_tid_ack = next;
5258 if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
5259 break;
5260 nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
5261 if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
5262 break;
5263
5264
5265 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5266 req = ack_to_tid_req(e);
5267 } while (1);
5268
5269
5270
5271
5272
5273 if (qpriv->s_nak_state ||
5274 (qpriv->resync &&
5275 !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
5276 (cmp_psn(qpriv->r_next_psn_kdeth - 1,
5277 full_flow_psn(&req->flows[flow],
5278 req->flows[flow].flow_state.lpsn)) > 0))) {
5279
5280
5281
5282
5283
5284
5285
5286 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5287 req = ack_to_tid_req(e);
5288 flow = req->acked_tail;
5289 } else if (req->ack_seg == req->total_segs &&
5290 qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
5291 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
5292
5293 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5294 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5295 req);
5296 hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
5297 &bth2);
5298 len = 0;
5299 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5300 ps->s_txreq->hdr_dwords = hwords;
5301 ps->s_txreq->sde = qpriv->s_sde;
5302 ps->s_txreq->s_cur_size = len;
5303 ps->s_txreq->ss = NULL;
5304 hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
5305 ps);
5306 ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
5307 return 1;
5308bail:
5309
5310
5311
5312
5313 smp_wmb();
5314 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5315 return 0;
5316}
5317
5318static int hfi1_send_tid_ok(struct rvt_qp *qp)
5319{
5320 struct hfi1_qp_priv *priv = qp->priv;
5321
5322 return !(priv->s_flags & RVT_S_BUSY ||
5323 qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
5324 (verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
5325 (priv->s_flags & RVT_S_RESP_PENDING) ||
5326 !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
5327}
5328
5329void _hfi1_do_tid_send(struct work_struct *work)
5330{
5331 struct iowait_work *w = container_of(work, struct iowait_work, iowork);
5332 struct rvt_qp *qp = iowait_to_qp(w->iow);
5333
5334 hfi1_do_tid_send(qp);
5335}
5336
5337static void hfi1_do_tid_send(struct rvt_qp *qp)
5338{
5339 struct hfi1_pkt_state ps;
5340 struct hfi1_qp_priv *priv = qp->priv;
5341
5342 ps.dev = to_idev(qp->ibqp.device);
5343 ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
5344 ps.ppd = ppd_from_ibp(ps.ibp);
5345 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5346 ps.in_thread = false;
5347 ps.timeout_int = qp->timeout_jiffies / 8;
5348
5349 trace_hfi1_rc_do_tid_send(qp, false);
5350 spin_lock_irqsave(&qp->s_lock, ps.flags);
5351
5352
5353 if (!hfi1_send_tid_ok(qp)) {
5354 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5355 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5356 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5357 return;
5358 }
5359
5360 priv->s_flags |= RVT_S_BUSY;
5361
5362 ps.timeout = jiffies + ps.timeout_int;
5363 ps.cpu = priv->s_sde ? priv->s_sde->cpu :
5364 cpumask_first(cpumask_of_node(ps.ppd->dd->node));
5365 ps.pkts_sent = false;
5366
5367
5368 ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
5369 do {
5370
5371 if (ps.s_txreq) {
5372 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5373 qp->s_flags |= RVT_S_BUSY;
5374 ps.wait = iowait_get_ib_work(&priv->s_iowait);
5375 }
5376 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5377
5378
5379
5380
5381
5382 if (hfi1_verbs_send(qp, &ps))
5383 return;
5384
5385
5386 if (hfi1_schedule_send_yield(qp, &ps, true))
5387 return;
5388
5389 spin_lock_irqsave(&qp->s_lock, ps.flags);
5390 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5391 qp->s_flags &= ~RVT_S_BUSY;
5392 priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
5393 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5394 if (iowait_flag_set(&priv->s_iowait,
5395 IOWAIT_PENDING_IB))
5396 hfi1_schedule_send(qp);
5397 }
5398 }
5399 } while (hfi1_make_tid_rdma_pkt(qp, &ps));
5400 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
5401 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5402}
5403
5404static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
5405{
5406 struct hfi1_qp_priv *priv = qp->priv;
5407 struct hfi1_ibport *ibp =
5408 to_iport(qp->ibqp.device, qp->port_num);
5409 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
5410 struct hfi1_devdata *dd = ppd->dd;
5411
5412 if ((dd->flags & HFI1_SHUTDOWN))
5413 return true;
5414
5415 return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
5416 priv->s_sde ?
5417 priv->s_sde->cpu :
5418 cpumask_first(cpumask_of_node(dd->node)));
5419}
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434bool hfi1_schedule_tid_send(struct rvt_qp *qp)
5435{
5436 lockdep_assert_held(&qp->s_lock);
5437 if (hfi1_send_tid_ok(qp)) {
5438
5439
5440
5441
5442
5443
5444 _hfi1_schedule_tid_send(qp);
5445 return true;
5446 }
5447 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5448 iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
5449 IOWAIT_PENDING_TID);
5450 return false;
5451}
5452
5453bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
5454{
5455 struct rvt_ack_entry *prev;
5456 struct tid_rdma_request *req;
5457 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5458 struct hfi1_qp_priv *priv = qp->priv;
5459 u32 s_prev;
5460
5461 s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
5462 (qp->s_tail_ack_queue - 1);
5463 prev = &qp->s_ack_queue[s_prev];
5464
5465 if ((e->opcode == TID_OP(READ_REQ) ||
5466 e->opcode == OP(RDMA_READ_REQUEST)) &&
5467 prev->opcode == TID_OP(WRITE_REQ)) {
5468 req = ack_to_tid_req(prev);
5469 if (req->ack_seg != req->total_segs) {
5470 priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
5471 return true;
5472 }
5473 }
5474 return false;
5475}
5476
5477static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
5478{
5479 u64 reg;
5480
5481
5482
5483
5484
5485 reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
5486 return mask_psn(reg);
5487}
5488
5489static void tid_rdma_rcv_err(struct hfi1_packet *packet,
5490 struct ib_other_headers *ohdr,
5491 struct rvt_qp *qp, u32 psn, int diff, bool fecn)
5492{
5493 unsigned long flags;
5494
5495 tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
5496 if (fecn) {
5497 spin_lock_irqsave(&qp->s_lock, flags);
5498 qp->s_flags |= RVT_S_ECN;
5499 spin_unlock_irqrestore(&qp->s_lock, flags);
5500 }
5501}
5502
5503static void update_r_next_psn_fecn(struct hfi1_packet *packet,
5504 struct hfi1_qp_priv *priv,
5505 struct hfi1_ctxtdata *rcd,
5506 struct tid_rdma_flow *flow,
5507 bool fecn)
5508{
5509
5510
5511
5512
5513 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
5514 !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
5515 struct hfi1_devdata *dd = rcd->dd;
5516
5517 flow->flow_state.r_next_psn =
5518 read_r_next_psn(dd, rcd->ctxt, flow->idx);
5519 }
5520}
5521