1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
41#define _LINUX_SUNRPC_XPRT_RDMA_H
42
43#include <linux/wait.h>
44#include <linux/spinlock.h>
45#include <linux/atomic.h>
46#include <linux/workqueue.h>
47
48#include <rdma/rdma_cm.h>
49#include <rdma/ib_verbs.h>
50
51#include <linux/sunrpc/clnt.h>
52#include <linux/sunrpc/rpc_rdma.h>
53#include <linux/sunrpc/xprtrdma.h>
54
55#define RDMA_RESOLVE_TIMEOUT (5000)
56#define RDMA_CONNECT_RETRY_MAX (2)
57
58#define RPCRDMA_BIND_TO (60U * HZ)
59#define RPCRDMA_INIT_REEST_TO (5U * HZ)
60#define RPCRDMA_MAX_REEST_TO (30U * HZ)
61#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
62
63
64
65
66struct rpcrdma_ia {
67 const struct rpcrdma_memreg_ops *ri_ops;
68 rwlock_t ri_qplock;
69 struct ib_device *ri_device;
70 struct rdma_cm_id *ri_id;
71 struct ib_pd *ri_pd;
72 struct ib_mr *ri_dma_mr;
73 struct completion ri_done;
74 int ri_async_rc;
75 unsigned int ri_max_frmr_depth;
76 struct ib_qp_attr ri_qp_attr;
77 struct ib_qp_init_attr ri_qp_init_attr;
78};
79
80
81
82
83
84struct rpcrdma_ep {
85 atomic_t rep_cqcount;
86 int rep_cqinit;
87 int rep_connected;
88 struct ib_qp_init_attr rep_attr;
89 wait_queue_head_t rep_connect_wait;
90 struct rdma_conn_param rep_remote_cma;
91 struct sockaddr_storage rep_remote_addr;
92 struct delayed_work rep_connect_worker;
93};
94
95#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
96#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
97
98
99
100
101
102#if defined(CONFIG_SUNRPC_BACKCHANNEL)
103#define RPCRDMA_BACKWARD_WRS (8)
104#else
105#define RPCRDMA_BACKWARD_WRS (0)
106#endif
107
108
109
110
111
112
113
114struct rpcrdma_regbuf {
115 size_t rg_size;
116 struct rpcrdma_req *rg_owner;
117 struct ib_sge rg_iov;
118 __be32 rg_base[0] __attribute__ ((aligned(256)));
119};
120
121static inline u64
122rdmab_addr(struct rpcrdma_regbuf *rb)
123{
124 return rb->rg_iov.addr;
125}
126
127static inline u32
128rdmab_length(struct rpcrdma_regbuf *rb)
129{
130 return rb->rg_iov.length;
131}
132
133static inline u32
134rdmab_lkey(struct rpcrdma_regbuf *rb)
135{
136 return rb->rg_iov.lkey;
137}
138
139static inline struct rpcrdma_msg *
140rdmab_to_msg(struct rpcrdma_regbuf *rb)
141{
142 return (struct rpcrdma_msg *)rb->rg_base;
143}
144
145#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164#define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE)
165#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2)
166
167struct rpcrdma_buffer;
168
169struct rpcrdma_rep {
170 struct ib_cqe rr_cqe;
171 unsigned int rr_len;
172 struct ib_device *rr_device;
173 struct rpcrdma_xprt *rr_rxprt;
174 struct work_struct rr_work;
175 struct list_head rr_list;
176 struct rpcrdma_regbuf *rr_rdmabuf;
177};
178
179#define RPCRDMA_BAD_LEN (~0U)
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194enum rpcrdma_frmr_state {
195 FRMR_IS_INVALID,
196 FRMR_IS_VALID,
197 FRMR_IS_STALE,
198};
199
200struct rpcrdma_frmr {
201 struct scatterlist *sg;
202 int sg_nents;
203 struct ib_mr *fr_mr;
204 struct ib_cqe fr_cqe;
205 enum rpcrdma_frmr_state fr_state;
206 struct completion fr_linv_done;
207 struct work_struct fr_work;
208 struct rpcrdma_xprt *fr_xprt;
209 union {
210 struct ib_reg_wr fr_regwr;
211 struct ib_send_wr fr_invwr;
212 };
213};
214
215struct rpcrdma_fmr {
216 struct ib_fmr *fmr;
217 u64 *physaddrs;
218};
219
220struct rpcrdma_mw {
221 union {
222 struct rpcrdma_fmr fmr;
223 struct rpcrdma_frmr frmr;
224 };
225 struct list_head mw_list;
226 struct list_head mw_all;
227};
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253struct rpcrdma_mr_seg {
254 struct rpcrdma_mw *rl_mw;
255 u64 mr_base;
256 u32 mr_rkey;
257 u32 mr_len;
258 int mr_nsegs;
259 enum dma_data_direction mr_dir;
260 dma_addr_t mr_dma;
261 size_t mr_dmalen;
262 struct page *mr_page;
263 char *mr_offset;
264};
265
266#define RPCRDMA_MAX_IOVS (2)
267
268struct rpcrdma_req {
269 struct list_head rl_free;
270 unsigned int rl_niovs;
271 unsigned int rl_nchunks;
272 unsigned int rl_connect_cookie;
273 struct rpcrdma_buffer *rl_buffer;
274 struct rpcrdma_rep *rl_reply;
275 struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS];
276 struct rpcrdma_regbuf *rl_rdmabuf;
277 struct rpcrdma_regbuf *rl_sendbuf;
278 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
279
280 struct ib_cqe rl_cqe;
281 struct list_head rl_all;
282 bool rl_backchannel;
283};
284
285static inline struct rpcrdma_req *
286rpcr_to_rdmar(struct rpc_rqst *rqst)
287{
288 void *buffer = rqst->rq_buffer;
289 struct rpcrdma_regbuf *rb;
290
291 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base);
292 return rb->rg_owner;
293}
294
295
296
297
298
299
300
301struct rpcrdma_buffer {
302 spinlock_t rb_mwlock;
303 struct list_head rb_mws;
304 struct list_head rb_all;
305 char *rb_pool;
306
307 spinlock_t rb_lock;
308 struct list_head rb_send_bufs;
309 struct list_head rb_recv_bufs;
310 u32 rb_max_requests;
311 atomic_t rb_credits;
312
313 u32 rb_bc_srv_max_requests;
314 spinlock_t rb_reqslock;
315 struct list_head rb_allreqs;
316
317 u32 rb_bc_max_requests;
318};
319#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
320
321
322
323
324
325
326
327struct rpcrdma_create_data_internal {
328 struct sockaddr_storage addr;
329 unsigned int max_requests;
330 unsigned int rsize;
331 unsigned int wsize;
332 unsigned int inline_rsize;
333 unsigned int inline_wsize;
334 unsigned int padding;
335};
336
337#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
338 (rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
339
340#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
341 (rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
342
343#define RPCRDMA_INLINE_PAD_VALUE(rq)\
344 rpcx_to_rdmad(rq->rq_xprt).padding
345
346
347
348
349struct rpcrdma_stats {
350 unsigned long read_chunk_count;
351 unsigned long write_chunk_count;
352 unsigned long reply_chunk_count;
353
354 unsigned long long total_rdma_request;
355 unsigned long long total_rdma_reply;
356
357 unsigned long long pullup_copy_count;
358 unsigned long long fixup_copy_count;
359 unsigned long hardway_register_count;
360 unsigned long failed_marshal_count;
361 unsigned long bad_reply_count;
362 unsigned long nomsg_call_count;
363 unsigned long bcall_count;
364};
365
366
367
368
369struct rpcrdma_xprt;
370struct rpcrdma_memreg_ops {
371 int (*ro_map)(struct rpcrdma_xprt *,
372 struct rpcrdma_mr_seg *, int, bool);
373 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
374 struct rpcrdma_req *);
375 int (*ro_unmap)(struct rpcrdma_xprt *,
376 struct rpcrdma_mr_seg *);
377 int (*ro_open)(struct rpcrdma_ia *,
378 struct rpcrdma_ep *,
379 struct rpcrdma_create_data_internal *);
380 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
381 int (*ro_init)(struct rpcrdma_xprt *);
382 void (*ro_destroy)(struct rpcrdma_buffer *);
383 const char *ro_displayname;
384};
385
386extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
387extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
388extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops;
389
390
391
392
393
394
395
396
397
398
399
400struct rpcrdma_xprt {
401 struct rpc_xprt rx_xprt;
402 struct rpcrdma_ia rx_ia;
403 struct rpcrdma_ep rx_ep;
404 struct rpcrdma_buffer rx_buf;
405 struct rpcrdma_create_data_internal rx_data;
406 struct delayed_work rx_connect_worker;
407 struct rpcrdma_stats rx_stats;
408};
409
410#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
411#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
412
413
414
415
416extern int xprt_rdma_pad_optimize;
417
418
419
420
421int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
422void rpcrdma_ia_close(struct rpcrdma_ia *);
423
424
425
426
427int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
428 struct rpcrdma_create_data_internal *);
429void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
430int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
431void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
432
433int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
434 struct rpcrdma_req *);
435int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
436 struct rpcrdma_rep *);
437
438
439
440
441struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
442struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
443void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
444int rpcrdma_buffer_create(struct rpcrdma_xprt *);
445void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
446
447struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
448void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
449struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
450void rpcrdma_buffer_put(struct rpcrdma_req *);
451void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
452void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
453
454struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
455 size_t, gfp_t);
456void rpcrdma_free_regbuf(struct rpcrdma_ia *,
457 struct rpcrdma_regbuf *);
458
459unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
460int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
461
462int frwr_alloc_recovery_wq(void);
463void frwr_destroy_recovery_wq(void);
464
465int rpcrdma_alloc_wq(void);
466void rpcrdma_destroy_wq(void);
467
468
469
470
471
472void rpcrdma_mapping_error(struct rpcrdma_mr_seg *);
473
474static inline enum dma_data_direction
475rpcrdma_data_dir(bool writing)
476{
477 return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
478}
479
480static inline void
481rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg,
482 enum dma_data_direction direction)
483{
484 seg->mr_dir = direction;
485 seg->mr_dmalen = seg->mr_len;
486
487 if (seg->mr_page)
488 seg->mr_dma = ib_dma_map_page(device,
489 seg->mr_page, offset_in_page(seg->mr_offset),
490 seg->mr_dmalen, seg->mr_dir);
491 else
492 seg->mr_dma = ib_dma_map_single(device,
493 seg->mr_offset,
494 seg->mr_dmalen, seg->mr_dir);
495
496 if (ib_dma_mapping_error(device, seg->mr_dma))
497 rpcrdma_mapping_error(seg);
498}
499
500static inline void
501rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg)
502{
503 if (seg->mr_page)
504 ib_dma_unmap_page(device,
505 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
506 else
507 ib_dma_unmap_single(device,
508 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
509}
510
511
512
513
514void rpcrdma_connect_worker(struct work_struct *);
515void rpcrdma_conn_func(struct rpcrdma_ep *);
516void rpcrdma_reply_handler(struct rpcrdma_rep *);
517
518
519
520
521int rpcrdma_marshal_req(struct rpc_rqst *);
522
523
524
525extern unsigned int xprt_rdma_max_inline_read;
526void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
527void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
528void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
529int xprt_rdma_init(void);
530void xprt_rdma_cleanup(void);
531
532
533
534#if defined(CONFIG_SUNRPC_BACKCHANNEL)
535int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
536int xprt_rdma_bc_up(struct svc_serv *, struct net *);
537int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
538void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
539int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
540void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
541void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
542#endif
543
544extern struct xprt_class xprt_rdma_bc;
545
546#endif
547