1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
41#define _LINUX_SUNRPC_XPRT_RDMA_H
42
43#include <linux/wait.h>
44#include <linux/spinlock.h>
45#include <linux/atomic.h>
46#include <linux/workqueue.h>
47
48#include <rdma/rdma_cm.h>
49#include <rdma/ib_verbs.h>
50
51#include <linux/sunrpc/clnt.h>
52#include <linux/sunrpc/rpc_rdma.h>
53#include <linux/sunrpc/xprtrdma.h>
54
55#define RDMA_RESOLVE_TIMEOUT (5000)
56#define RDMA_CONNECT_RETRY_MAX (2)
57
58#define RPCRDMA_BIND_TO (60U * HZ)
59#define RPCRDMA_INIT_REEST_TO (5U * HZ)
60#define RPCRDMA_MAX_REEST_TO (30U * HZ)
61#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
62
63
64
65
66struct rpcrdma_ia {
67 const struct rpcrdma_memreg_ops *ri_ops;
68 struct ib_device *ri_device;
69 struct rdma_cm_id *ri_id;
70 struct ib_pd *ri_pd;
71 struct completion ri_done;
72 struct completion ri_remove_done;
73 int ri_async_rc;
74 unsigned int ri_max_segs;
75 unsigned int ri_max_frmr_depth;
76 unsigned int ri_max_inline_write;
77 unsigned int ri_max_inline_read;
78 unsigned int ri_max_send_sges;
79 bool ri_reminv_expected;
80 bool ri_implicit_roundup;
81 enum ib_mr_type ri_mrtype;
82 unsigned long ri_flags;
83 struct ib_qp_attr ri_qp_attr;
84 struct ib_qp_init_attr ri_qp_init_attr;
85};
86
87enum {
88 RPCRDMA_IAF_REMOVING = 0,
89};
90
91
92
93
94
95struct rpcrdma_ep {
96 atomic_t rep_cqcount;
97 int rep_cqinit;
98 int rep_connected;
99 struct ib_qp_init_attr rep_attr;
100 wait_queue_head_t rep_connect_wait;
101 struct rpcrdma_connect_private rep_cm_private;
102 struct rdma_conn_param rep_remote_cma;
103 struct sockaddr_storage rep_remote_addr;
104 struct delayed_work rep_connect_worker;
105};
106
107static inline void
108rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
109{
110 atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
111}
112
113
114
115
116static inline void
117rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
118{
119 send_wr->send_flags = 0;
120 if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
121 rpcrdma_init_cqcount(ep, 0);
122 send_wr->send_flags = IB_SEND_SIGNALED;
123 }
124}
125
126
127
128
129
130#if defined(CONFIG_SUNRPC_BACKCHANNEL)
131#define RPCRDMA_BACKWARD_WRS (8)
132#else
133#define RPCRDMA_BACKWARD_WRS (0)
134#endif
135
136
137
138
139
140
141
142struct rpcrdma_regbuf {
143 struct ib_sge rg_iov;
144 struct ib_device *rg_device;
145 enum dma_data_direction rg_direction;
146 __be32 rg_base[0] __attribute__ ((aligned(256)));
147};
148
149static inline u64
150rdmab_addr(struct rpcrdma_regbuf *rb)
151{
152 return rb->rg_iov.addr;
153}
154
155static inline u32
156rdmab_length(struct rpcrdma_regbuf *rb)
157{
158 return rb->rg_iov.length;
159}
160
161static inline u32
162rdmab_lkey(struct rpcrdma_regbuf *rb)
163{
164 return rb->rg_iov.lkey;
165}
166
167static inline struct rpcrdma_msg *
168rdmab_to_msg(struct rpcrdma_regbuf *rb)
169{
170 return (struct rpcrdma_msg *)rb->rg_base;
171}
172
173static inline struct ib_device *
174rdmab_device(struct rpcrdma_regbuf *rb)
175{
176 return rb->rg_device;
177}
178
179#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199enum {
200 RPCRDMA_MAX_HDR_SEGS = 8,
201 RPCRDMA_HDRBUF_SIZE = 256,
202};
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219struct rpcrdma_rep {
220 struct ib_cqe rr_cqe;
221 int rr_wc_flags;
222 u32 rr_inv_rkey;
223 struct rpcrdma_regbuf *rr_rdmabuf;
224 struct rpcrdma_xprt *rr_rxprt;
225 struct work_struct rr_work;
226 struct xdr_buf rr_hdrbuf;
227 struct xdr_stream rr_stream;
228 struct list_head rr_list;
229 struct ib_recv_wr rr_recv_wr;
230};
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245enum rpcrdma_frmr_state {
246 FRMR_IS_INVALID,
247 FRMR_IS_VALID,
248 FRMR_FLUSHED_FR,
249 FRMR_FLUSHED_LI,
250};
251
252struct rpcrdma_frmr {
253 struct ib_mr *fr_mr;
254 struct ib_cqe fr_cqe;
255 enum rpcrdma_frmr_state fr_state;
256 struct completion fr_linv_done;
257 union {
258 struct ib_reg_wr fr_regwr;
259 struct ib_send_wr fr_invwr;
260 };
261};
262
263struct rpcrdma_fmr {
264 struct ib_fmr *fm_mr;
265 u64 *fm_physaddrs;
266};
267
268struct rpcrdma_mw {
269 struct list_head mw_list;
270 struct scatterlist *mw_sg;
271 int mw_nents;
272 enum dma_data_direction mw_dir;
273 unsigned long mw_flags;
274 union {
275 struct rpcrdma_fmr fmr;
276 struct rpcrdma_frmr frmr;
277 };
278 struct rpcrdma_xprt *mw_xprt;
279 u32 mw_handle;
280 u32 mw_length;
281 u64 mw_offset;
282 struct list_head mw_all;
283};
284
285
286enum {
287 RPCRDMA_MW_F_RI = 1,
288};
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311enum {
312 RPCRDMA_MAX_IOV_SEGS = 3,
313 RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1,
314 RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS +
315 RPCRDMA_MAX_IOV_SEGS,
316};
317
318struct rpcrdma_mr_seg {
319 u32 mr_len;
320 struct page *mr_page;
321 char *mr_offset;
322};
323
324
325
326
327
328
329
330
331
332
333
334enum {
335 RPCRDMA_MIN_SEND_SGES = 3,
336 RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
337 RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
338};
339
340struct rpcrdma_buffer;
341struct rpcrdma_req {
342 struct list_head rl_list;
343 unsigned int rl_mapped_sges;
344 unsigned int rl_connect_cookie;
345 struct rpcrdma_buffer *rl_buffer;
346 struct rpcrdma_rep *rl_reply;
347 struct xdr_stream rl_stream;
348 struct xdr_buf rl_hdrbuf;
349 struct ib_send_wr rl_send_wr;
350 struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
351 struct rpcrdma_regbuf *rl_rdmabuf;
352 struct rpcrdma_regbuf *rl_sendbuf;
353 struct rpcrdma_regbuf *rl_recvbuf;
354
355 struct ib_cqe rl_cqe;
356 struct list_head rl_all;
357 bool rl_backchannel;
358
359 struct list_head rl_registered;
360 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
361};
362
363static inline void
364rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
365{
366 rqst->rq_xprtdata = req;
367}
368
369static inline struct rpcrdma_req *
370rpcr_to_rdmar(struct rpc_rqst *rqst)
371{
372 return rqst->rq_xprtdata;
373}
374
375static inline void
376rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
377{
378 list_add_tail(&mw->mw_list, list);
379}
380
381static inline struct rpcrdma_mw *
382rpcrdma_pop_mw(struct list_head *list)
383{
384 struct rpcrdma_mw *mw;
385
386 mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
387 list_del(&mw->mw_list);
388 return mw;
389}
390
391
392
393
394
395
396
397struct rpcrdma_buffer {
398 spinlock_t rb_mwlock;
399 struct list_head rb_mws;
400 struct list_head rb_all;
401
402 spinlock_t rb_lock;
403 int rb_send_count, rb_recv_count;
404 struct list_head rb_send_bufs;
405 struct list_head rb_recv_bufs;
406 u32 rb_max_requests;
407 atomic_t rb_credits;
408
409 u32 rb_bc_srv_max_requests;
410 spinlock_t rb_reqslock;
411 struct list_head rb_allreqs;
412
413 u32 rb_bc_max_requests;
414
415 spinlock_t rb_recovery_lock;
416 struct list_head rb_stale_mrs;
417 struct delayed_work rb_recovery_worker;
418 struct delayed_work rb_refresh_worker;
419};
420#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
421
422
423
424
425
426
427
428struct rpcrdma_create_data_internal {
429 struct sockaddr_storage addr;
430 unsigned int max_requests;
431 unsigned int rsize;
432 unsigned int wsize;
433 unsigned int inline_rsize;
434 unsigned int inline_wsize;
435 unsigned int padding;
436};
437
438
439
440
441struct rpcrdma_stats {
442
443 unsigned long read_chunk_count;
444 unsigned long write_chunk_count;
445 unsigned long reply_chunk_count;
446 unsigned long long total_rdma_request;
447
448
449 unsigned long long pullup_copy_count;
450 unsigned long hardway_register_count;
451 unsigned long failed_marshal_count;
452 unsigned long bad_reply_count;
453 unsigned long mrs_recovered;
454 unsigned long mrs_orphaned;
455 unsigned long mrs_allocated;
456
457
458 unsigned long long total_rdma_reply;
459 unsigned long long fixup_copy_count;
460 unsigned long local_inv_needed;
461 unsigned long nomsg_call_count;
462 unsigned long bcall_count;
463};
464
465
466
467
468struct rpcrdma_xprt;
469struct rpcrdma_memreg_ops {
470 struct rpcrdma_mr_seg *
471 (*ro_map)(struct rpcrdma_xprt *,
472 struct rpcrdma_mr_seg *, int, bool,
473 struct rpcrdma_mw **);
474 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
475 struct list_head *);
476 void (*ro_unmap_safe)(struct rpcrdma_xprt *,
477 struct rpcrdma_req *, bool);
478 void (*ro_recover_mr)(struct rpcrdma_mw *);
479 int (*ro_open)(struct rpcrdma_ia *,
480 struct rpcrdma_ep *,
481 struct rpcrdma_create_data_internal *);
482 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
483 int (*ro_init_mr)(struct rpcrdma_ia *,
484 struct rpcrdma_mw *);
485 void (*ro_release_mr)(struct rpcrdma_mw *);
486 const char *ro_displayname;
487 const int ro_send_w_inv_ok;
488};
489
490extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
491extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
492
493
494
495
496
497
498
499
500
501
502
503struct rpcrdma_xprt {
504 struct rpc_xprt rx_xprt;
505 struct rpcrdma_ia rx_ia;
506 struct rpcrdma_ep rx_ep;
507 struct rpcrdma_buffer rx_buf;
508 struct rpcrdma_create_data_internal rx_data;
509 struct delayed_work rx_connect_worker;
510 struct rpcrdma_stats rx_stats;
511};
512
513#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
514#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
515
516
517
518
519extern int xprt_rdma_pad_optimize;
520
521
522
523
524extern unsigned int xprt_rdma_memreg_strategy;
525
526
527
528
529int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
530void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
531void rpcrdma_ia_close(struct rpcrdma_ia *);
532bool frwr_is_supported(struct rpcrdma_ia *);
533bool fmr_is_supported(struct rpcrdma_ia *);
534
535
536
537
538int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
539 struct rpcrdma_create_data_internal *);
540void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
541int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
542void rpcrdma_conn_func(struct rpcrdma_ep *ep);
543void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
544
545int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
546 struct rpcrdma_req *);
547int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
548
549
550
551
552struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
553struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
554void rpcrdma_destroy_req(struct rpcrdma_req *);
555int rpcrdma_buffer_create(struct rpcrdma_xprt *);
556void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
557
558struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
559void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
560struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
561void rpcrdma_buffer_put(struct rpcrdma_req *);
562void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
563void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
564
565void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
566
567struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
568 gfp_t);
569bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
570void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
571
572static inline bool
573rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
574{
575 return rb->rg_device != NULL;
576}
577
578static inline bool
579rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
580{
581 if (likely(rpcrdma_regbuf_is_mapped(rb)))
582 return true;
583 return __rpcrdma_dma_map_regbuf(ia, rb);
584}
585
586int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
587
588int rpcrdma_alloc_wq(void);
589void rpcrdma_destroy_wq(void);
590
591
592
593
594
595static inline enum dma_data_direction
596rpcrdma_data_dir(bool writing)
597{
598 return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
599}
600
601
602
603
604
605enum rpcrdma_chunktype {
606 rpcrdma_noch = 0,
607 rpcrdma_readch,
608 rpcrdma_areadch,
609 rpcrdma_writech,
610 rpcrdma_replych
611};
612
613bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
614 u32, struct xdr_buf *, enum rpcrdma_chunktype);
615void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
616int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
617void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
618void rpcrdma_reply_handler(struct work_struct *work);
619
620static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
621{
622 xdr->head[0].iov_len = len;
623 xdr->len = len;
624}
625
626
627
628extern unsigned int xprt_rdma_max_inline_read;
629void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
630void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
631void rpcrdma_connect_worker(struct work_struct *work);
632void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
633int xprt_rdma_init(void);
634void xprt_rdma_cleanup(void);
635
636
637
638#if defined(CONFIG_SUNRPC_BACKCHANNEL)
639int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
640int xprt_rdma_bc_up(struct svc_serv *, struct net *);
641size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
642int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
643void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
644int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
645void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
646void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
647#endif
648
649extern struct xprt_class xprt_rdma_bc;
650
651#endif
652