1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
43#define _LINUX_SUNRPC_XPRT_RDMA_H
44
45#include <linux/wait.h>
46#include <linux/spinlock.h>
47#include <linux/atomic.h>
48#include <linux/kref.h>
49#include <linux/workqueue.h>
50#include <linux/llist.h>
51
52#include <rdma/rdma_cm.h>
53#include <rdma/ib_verbs.h>
54
55#include <linux/sunrpc/clnt.h>
56#include <linux/sunrpc/rpc_rdma_cid.h>
57#include <linux/sunrpc/rpc_rdma.h>
58#include <linux/sunrpc/xprtrdma.h>
59
60#define RDMA_RESOLVE_TIMEOUT (5000)
61#define RDMA_CONNECT_RETRY_MAX (2)
62
63#define RPCRDMA_BIND_TO (60U * HZ)
64#define RPCRDMA_INIT_REEST_TO (5U * HZ)
65#define RPCRDMA_MAX_REEST_TO (30U * HZ)
66#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
67
68
69
70
71struct rpcrdma_ep {
72 struct kref re_kref;
73 struct rdma_cm_id *re_id;
74 struct ib_pd *re_pd;
75 unsigned int re_max_rdma_segs;
76 unsigned int re_max_fr_depth;
77 bool re_implicit_roundup;
78 enum ib_mr_type re_mrtype;
79 struct completion re_done;
80 unsigned int re_send_count;
81 unsigned int re_send_batch;
82 unsigned int re_max_inline_send;
83 unsigned int re_max_inline_recv;
84 int re_async_rc;
85 int re_connect_status;
86 atomic_t re_receiving;
87 atomic_t re_force_disconnect;
88 struct ib_qp_init_attr re_attr;
89 wait_queue_head_t re_connect_wait;
90 struct rpc_xprt *re_xprt;
91 struct rpcrdma_connect_private
92 re_cm_private;
93 struct rdma_conn_param re_remote_cma;
94 int re_receive_count;
95 unsigned int re_max_requests;
96 unsigned int re_inline_send;
97 unsigned int re_inline_recv;
98
99 atomic_t re_completion_ids;
100};
101
102
103
104
105
106
107
108#if defined(CONFIG_SUNRPC_BACKCHANNEL)
109#define RPCRDMA_BACKWARD_WRS (32)
110#else
111#define RPCRDMA_BACKWARD_WRS (0)
112#endif
113
114
115
116
117struct rpcrdma_regbuf {
118 struct ib_sge rg_iov;
119 struct ib_device *rg_device;
120 enum dma_data_direction rg_direction;
121 void *rg_data;
122};
123
124static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb)
125{
126 return rb->rg_iov.addr;
127}
128
129static inline u32 rdmab_length(struct rpcrdma_regbuf *rb)
130{
131 return rb->rg_iov.length;
132}
133
134static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb)
135{
136 return rb->rg_iov.lkey;
137}
138
139static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb)
140{
141 return rb->rg_device;
142}
143
144static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
145{
146 return rb->rg_data;
147}
148
149#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167enum {
168 RPCRDMA_MAX_HDR_SEGS = 16,
169};
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185struct rpcrdma_rep {
186 struct ib_cqe rr_cqe;
187 struct rpc_rdma_cid rr_cid;
188
189 __be32 rr_xid;
190 __be32 rr_vers;
191 __be32 rr_proc;
192 int rr_wc_flags;
193 u32 rr_inv_rkey;
194 bool rr_temp;
195 struct rpcrdma_regbuf *rr_rdmabuf;
196 struct rpcrdma_xprt *rr_rxprt;
197 struct rpc_rqst *rr_rqst;
198 struct xdr_buf rr_hdrbuf;
199 struct xdr_stream rr_stream;
200 struct llist_node rr_node;
201 struct ib_recv_wr rr_recv_wr;
202 struct list_head rr_all;
203};
204
205
206
207
208
209
210
211enum {
212 RPCRDMA_MAX_RECV_BATCH = 7,
213};
214
215
216
217struct rpcrdma_req;
218struct rpcrdma_sendctx {
219 struct ib_cqe sc_cqe;
220 struct rpc_rdma_cid sc_cid;
221 struct rpcrdma_req *sc_req;
222 unsigned int sc_unmap_count;
223 struct ib_sge sc_sges[];
224};
225
226
227
228
229
230
231
232struct rpcrdma_req;
233struct rpcrdma_mr {
234 struct list_head mr_list;
235 struct rpcrdma_req *mr_req;
236
237 struct ib_mr *mr_ibmr;
238 struct ib_device *mr_device;
239 struct scatterlist *mr_sg;
240 int mr_nents;
241 enum dma_data_direction mr_dir;
242 struct ib_cqe mr_cqe;
243 struct completion mr_linv_done;
244 union {
245 struct ib_reg_wr mr_regwr;
246 struct ib_send_wr mr_invwr;
247 };
248 struct rpcrdma_xprt *mr_xprt;
249 u32 mr_handle;
250 u32 mr_length;
251 u64 mr_offset;
252 struct list_head mr_all;
253 struct rpc_rdma_cid mr_cid;
254};
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277enum {
278 RPCRDMA_MAX_IOV_SEGS = 3,
279 RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1,
280 RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS +
281 RPCRDMA_MAX_IOV_SEGS,
282};
283
284
285struct rpcrdma_mr_seg {
286 u32 mr_len;
287 struct page *mr_page;
288 u64 mr_offset;
289};
290
291
292
293
294
295
296
297
298
299
300
301enum {
302 RPCRDMA_MIN_SEND_SGES = 3,
303 RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
304 RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
305};
306
307struct rpcrdma_buffer;
308struct rpcrdma_req {
309 struct list_head rl_list;
310 struct rpc_rqst rl_slot;
311 struct rpcrdma_rep *rl_reply;
312 struct xdr_stream rl_stream;
313 struct xdr_buf rl_hdrbuf;
314 struct ib_send_wr rl_wr;
315 struct rpcrdma_sendctx *rl_sendctx;
316 struct rpcrdma_regbuf *rl_rdmabuf;
317 struct rpcrdma_regbuf *rl_sendbuf;
318 struct rpcrdma_regbuf *rl_recvbuf;
319
320 struct list_head rl_all;
321 struct kref rl_kref;
322
323 struct list_head rl_free_mrs;
324 struct list_head rl_registered;
325 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
326};
327
328static inline struct rpcrdma_req *
329rpcr_to_rdmar(const struct rpc_rqst *rqst)
330{
331 return container_of(rqst, struct rpcrdma_req, rl_slot);
332}
333
334static inline void
335rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
336{
337 list_add(&mr->mr_list, list);
338}
339
340static inline struct rpcrdma_mr *
341rpcrdma_mr_pop(struct list_head *list)
342{
343 struct rpcrdma_mr *mr;
344
345 mr = list_first_entry_or_null(list, struct rpcrdma_mr, mr_list);
346 if (mr)
347 list_del_init(&mr->mr_list);
348 return mr;
349}
350
351
352
353
354
355
356
357struct rpcrdma_buffer {
358 spinlock_t rb_lock;
359 struct list_head rb_send_bufs;
360 struct list_head rb_mrs;
361
362 unsigned long rb_sc_head;
363 unsigned long rb_sc_tail;
364 unsigned long rb_sc_last;
365 struct rpcrdma_sendctx **rb_sc_ctxs;
366
367 struct list_head rb_allreqs;
368 struct list_head rb_all_mrs;
369 struct list_head rb_all_reps;
370
371 struct llist_head rb_free_reps;
372
373 __be32 rb_max_requests;
374 u32 rb_credits;
375
376 u32 rb_bc_srv_max_requests;
377 u32 rb_bc_max_requests;
378
379 struct work_struct rb_refresh_worker;
380};
381
382
383
384
385struct rpcrdma_stats {
386
387 unsigned long read_chunk_count;
388 unsigned long write_chunk_count;
389 unsigned long reply_chunk_count;
390 unsigned long long total_rdma_request;
391
392
393 unsigned long long pullup_copy_count;
394 unsigned long hardway_register_count;
395 unsigned long failed_marshal_count;
396 unsigned long bad_reply_count;
397 unsigned long mrs_recycled;
398 unsigned long mrs_orphaned;
399 unsigned long mrs_allocated;
400 unsigned long empty_sendctx_q;
401
402
403 unsigned long long total_rdma_reply;
404 unsigned long long fixup_copy_count;
405 unsigned long reply_waits_for_send;
406 unsigned long local_inv_needed;
407 unsigned long nomsg_call_count;
408 unsigned long bcall_count;
409};
410
411
412
413
414
415
416
417
418
419
420
421struct rpcrdma_xprt {
422 struct rpc_xprt rx_xprt;
423 struct rpcrdma_ep *rx_ep;
424 struct rpcrdma_buffer rx_buf;
425 struct delayed_work rx_connect_worker;
426 struct rpc_timeout rx_timeout;
427 struct rpcrdma_stats rx_stats;
428};
429
430#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
431
432static inline const char *
433rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
434{
435 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
436}
437
438static inline const char *
439rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
440{
441 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
442}
443
444
445
446
447extern int xprt_rdma_pad_optimize;
448
449
450
451
452extern unsigned int xprt_rdma_memreg_strategy;
453
454
455
456
457void rpcrdma_force_disconnect(struct rpcrdma_ep *ep);
458void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
459int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
460void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
461
462void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
463
464
465
466
467struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
468 gfp_t flags);
469int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
470void rpcrdma_req_destroy(struct rpcrdma_req *req);
471int rpcrdma_buffer_create(struct rpcrdma_xprt *);
472void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
473struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
474
475struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
476void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
477
478struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
479void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
480 struct rpcrdma_req *req);
481void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
482void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
483
484bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
485 gfp_t flags);
486bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
487 struct rpcrdma_regbuf *rb);
488
489
490
491
492
493
494static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
495{
496 return rb->rg_device != NULL;
497}
498
499
500
501
502
503
504
505
506static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
507 struct rpcrdma_regbuf *rb)
508{
509 if (likely(rpcrdma_regbuf_is_mapped(rb)))
510 return true;
511 return __rpcrdma_regbuf_dma_map(r_xprt, rb);
512}
513
514
515
516
517
518static inline enum dma_data_direction
519rpcrdma_data_dir(bool writing)
520{
521 return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
522}
523
524
525
526void frwr_reset(struct rpcrdma_req *req);
527int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
528int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
529void frwr_mr_release(struct rpcrdma_mr *mr);
530struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
531 struct rpcrdma_mr_seg *seg,
532 int nsegs, bool writing, __be32 xid,
533 struct rpcrdma_mr *mr);
534int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
535void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
536void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
537void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
538
539
540
541
542
543enum rpcrdma_chunktype {
544 rpcrdma_noch = 0,
545 rpcrdma_noch_pullup,
546 rpcrdma_noch_mapped,
547 rpcrdma_readch,
548 rpcrdma_areadch,
549 rpcrdma_writech,
550 rpcrdma_replych
551};
552
553int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
554 struct rpcrdma_req *req, u32 hdrlen,
555 struct xdr_buf *xdr,
556 enum rpcrdma_chunktype rtype);
557void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
558int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
559void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
560void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
561void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
562void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
563void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
564
565static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
566{
567 xdr->head[0].iov_len = len;
568 xdr->len = len;
569}
570
571
572
573extern unsigned int xprt_rdma_max_inline_read;
574extern unsigned int xprt_rdma_max_inline_write;
575void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
576void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
577void xprt_rdma_close(struct rpc_xprt *xprt);
578void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
579int xprt_rdma_init(void);
580void xprt_rdma_cleanup(void);
581
582
583
584#if defined(CONFIG_SUNRPC_BACKCHANNEL)
585int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
586size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
587unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
588int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
589void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
590int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
591void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
592void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
593#endif
594
595extern struct xprt_class xprt_rdma_bc;
596
597#endif
598