1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
41#define _LINUX_SUNRPC_XPRT_RDMA_H
42
43#include <linux/wait.h>
44#include <linux/spinlock.h>
45#include <linux/atomic.h>
46#include <linux/workqueue.h>
47
48#include <rdma/rdma_cm.h>
49#include <rdma/ib_verbs.h>
50
51#include <linux/sunrpc/clnt.h>
52#include <linux/sunrpc/rpc_rdma.h>
53#include <linux/sunrpc/xprtrdma.h>
54
55#define RDMA_RESOLVE_TIMEOUT (5000)
56#define RDMA_CONNECT_RETRY_MAX (2)
57
58
59
60
61struct rpcrdma_ia {
62 const struct rpcrdma_memreg_ops *ri_ops;
63 rwlock_t ri_qplock;
64 struct ib_device *ri_device;
65 struct rdma_cm_id *ri_id;
66 struct ib_pd *ri_pd;
67 struct ib_mr *ri_dma_mr;
68 struct completion ri_done;
69 int ri_async_rc;
70 unsigned int ri_max_frmr_depth;
71 struct ib_device_attr ri_devattr;
72 struct ib_qp_attr ri_qp_attr;
73 struct ib_qp_init_attr ri_qp_init_attr;
74};
75
76
77
78
79
80#define RPCRDMA_WC_BUDGET (128)
81#define RPCRDMA_POLLSIZE (16)
82
83struct rpcrdma_ep {
84 atomic_t rep_cqcount;
85 int rep_cqinit;
86 int rep_connected;
87 struct ib_qp_init_attr rep_attr;
88 wait_queue_head_t rep_connect_wait;
89 struct rdma_conn_param rep_remote_cma;
90 struct sockaddr_storage rep_remote_addr;
91 struct delayed_work rep_connect_worker;
92 struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
93 struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
94};
95
96
97
98
99
100#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
101
102#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
103#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
104
105
106
107#define RPCRDMA_IGNORE_COMPLETION (0ULL)
108
109
110
111
112
113
114
115struct rpcrdma_regbuf {
116 size_t rg_size;
117 struct rpcrdma_req *rg_owner;
118 struct ib_sge rg_iov;
119 __be32 rg_base[0] __attribute__ ((aligned(256)));
120};
121
122static inline u64
123rdmab_addr(struct rpcrdma_regbuf *rb)
124{
125 return rb->rg_iov.addr;
126}
127
128static inline u32
129rdmab_length(struct rpcrdma_regbuf *rb)
130{
131 return rb->rg_iov.length;
132}
133
134static inline u32
135rdmab_lkey(struct rpcrdma_regbuf *rb)
136{
137 return rb->rg_iov.lkey;
138}
139
140static inline struct rpcrdma_msg *
141rdmab_to_msg(struct rpcrdma_regbuf *rb)
142{
143 return (struct rpcrdma_msg *)rb->rg_base;
144}
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163#define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE)
164#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2)
165
166struct rpcrdma_buffer;
167
168struct rpcrdma_rep {
169 unsigned int rr_len;
170 struct ib_device *rr_device;
171 struct rpcrdma_xprt *rr_rxprt;
172 struct list_head rr_list;
173 struct rpcrdma_regbuf *rr_rdmabuf;
174};
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189enum rpcrdma_frmr_state {
190 FRMR_IS_INVALID,
191 FRMR_IS_VALID,
192 FRMR_IS_STALE,
193};
194
195struct rpcrdma_frmr {
196 struct ib_fast_reg_page_list *fr_pgl;
197 struct ib_mr *fr_mr;
198 enum rpcrdma_frmr_state fr_state;
199 struct work_struct fr_work;
200 struct rpcrdma_xprt *fr_xprt;
201};
202
203struct rpcrdma_fmr {
204 struct ib_fmr *fmr;
205 u64 *physaddrs;
206};
207
208struct rpcrdma_mw {
209 union {
210 struct rpcrdma_fmr fmr;
211 struct rpcrdma_frmr frmr;
212 } r;
213 void (*mw_sendcompletion)(struct ib_wc *);
214 struct list_head mw_list;
215 struct list_head mw_all;
216};
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242struct rpcrdma_mr_seg {
243 struct rpcrdma_mw *rl_mw;
244 u64 mr_base;
245 u32 mr_rkey;
246 u32 mr_len;
247 int mr_nsegs;
248 enum dma_data_direction mr_dir;
249 dma_addr_t mr_dma;
250 size_t mr_dmalen;
251 struct page *mr_page;
252 char *mr_offset;
253};
254
255#define RPCRDMA_MAX_IOVS (2)
256
257struct rpcrdma_req {
258 unsigned int rl_niovs;
259 unsigned int rl_nchunks;
260 unsigned int rl_connect_cookie;
261 struct rpcrdma_buffer *rl_buffer;
262 struct rpcrdma_rep *rl_reply;
263 struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS];
264 struct rpcrdma_regbuf *rl_rdmabuf;
265 struct rpcrdma_regbuf *rl_sendbuf;
266 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
267};
268
269static inline struct rpcrdma_req *
270rpcr_to_rdmar(struct rpc_rqst *rqst)
271{
272 void *buffer = rqst->rq_buffer;
273 struct rpcrdma_regbuf *rb;
274
275 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base);
276 return rb->rg_owner;
277}
278
279
280
281
282
283
284
285struct rpcrdma_buffer {
286 spinlock_t rb_mwlock;
287 struct list_head rb_mws;
288 struct list_head rb_all;
289 char *rb_pool;
290
291 spinlock_t rb_lock;
292 u32 rb_max_requests;
293 int rb_send_index;
294 int rb_recv_index;
295 struct rpcrdma_req **rb_send_bufs;
296 struct rpcrdma_rep **rb_recv_bufs;
297};
298#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
299
300
301
302
303
304
305
306struct rpcrdma_create_data_internal {
307 struct sockaddr_storage addr;
308 unsigned int max_requests;
309 unsigned int rsize;
310 unsigned int wsize;
311 unsigned int inline_rsize;
312 unsigned int inline_wsize;
313 unsigned int padding;
314};
315
316#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
317 (rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
318
319#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
320 (rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
321
322#define RPCRDMA_INLINE_PAD_VALUE(rq)\
323 rpcx_to_rdmad(rq->rq_xprt).padding
324
325
326
327
328struct rpcrdma_stats {
329 unsigned long read_chunk_count;
330 unsigned long write_chunk_count;
331 unsigned long reply_chunk_count;
332
333 unsigned long long total_rdma_request;
334 unsigned long long total_rdma_reply;
335
336 unsigned long long pullup_copy_count;
337 unsigned long long fixup_copy_count;
338 unsigned long hardway_register_count;
339 unsigned long failed_marshal_count;
340 unsigned long bad_reply_count;
341 unsigned long nomsg_call_count;
342};
343
344
345
346
347struct rpcrdma_xprt;
348struct rpcrdma_memreg_ops {
349 int (*ro_map)(struct rpcrdma_xprt *,
350 struct rpcrdma_mr_seg *, int, bool);
351 int (*ro_unmap)(struct rpcrdma_xprt *,
352 struct rpcrdma_mr_seg *);
353 int (*ro_open)(struct rpcrdma_ia *,
354 struct rpcrdma_ep *,
355 struct rpcrdma_create_data_internal *);
356 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
357 int (*ro_init)(struct rpcrdma_xprt *);
358 void (*ro_destroy)(struct rpcrdma_buffer *);
359 const char *ro_displayname;
360};
361
362extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
363extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
364extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops;
365
366
367
368
369
370
371
372
373
374
375
376struct rpcrdma_xprt {
377 struct rpc_xprt rx_xprt;
378 struct rpcrdma_ia rx_ia;
379 struct rpcrdma_ep rx_ep;
380 struct rpcrdma_buffer rx_buf;
381 struct rpcrdma_create_data_internal rx_data;
382 struct delayed_work rx_connect_worker;
383 struct rpcrdma_stats rx_stats;
384};
385
386#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
387#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
388
389
390
391
392extern int xprt_rdma_pad_optimize;
393
394
395
396
397int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
398void rpcrdma_ia_close(struct rpcrdma_ia *);
399
400
401
402
403int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
404 struct rpcrdma_create_data_internal *);
405void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
406int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
407void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
408
409int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
410 struct rpcrdma_req *);
411int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
412 struct rpcrdma_rep *);
413
414
415
416
417int rpcrdma_buffer_create(struct rpcrdma_xprt *);
418void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
419
420struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
421void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
422struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
423void rpcrdma_buffer_put(struct rpcrdma_req *);
424void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
425void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
426
427struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
428 size_t, gfp_t);
429void rpcrdma_free_regbuf(struct rpcrdma_ia *,
430 struct rpcrdma_regbuf *);
431
432unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
433
434int frwr_alloc_recovery_wq(void);
435void frwr_destroy_recovery_wq(void);
436
437
438
439
440
441void rpcrdma_mapping_error(struct rpcrdma_mr_seg *);
442
443static inline enum dma_data_direction
444rpcrdma_data_dir(bool writing)
445{
446 return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
447}
448
449static inline void
450rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg,
451 enum dma_data_direction direction)
452{
453 seg->mr_dir = direction;
454 seg->mr_dmalen = seg->mr_len;
455
456 if (seg->mr_page)
457 seg->mr_dma = ib_dma_map_page(device,
458 seg->mr_page, offset_in_page(seg->mr_offset),
459 seg->mr_dmalen, seg->mr_dir);
460 else
461 seg->mr_dma = ib_dma_map_single(device,
462 seg->mr_offset,
463 seg->mr_dmalen, seg->mr_dir);
464
465 if (ib_dma_mapping_error(device, seg->mr_dma))
466 rpcrdma_mapping_error(seg);
467}
468
469static inline void
470rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg)
471{
472 if (seg->mr_page)
473 ib_dma_unmap_page(device,
474 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
475 else
476 ib_dma_unmap_single(device,
477 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
478}
479
480
481
482
483void rpcrdma_connect_worker(struct work_struct *);
484void rpcrdma_conn_func(struct rpcrdma_ep *);
485void rpcrdma_reply_handler(struct rpcrdma_rep *);
486
487
488
489
490int rpcrdma_marshal_req(struct rpc_rqst *);
491
492
493
494int xprt_rdma_init(void);
495void xprt_rdma_cleanup(void);
496
497
498extern struct kmem_cache *svc_rdma_map_cachep;
499
500extern struct kmem_cache *svc_rdma_ctxt_cachep;
501
502extern struct workqueue_struct *svc_rdma_wq;
503
504#endif
505