1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32#include <linux/module.h>
33#include <linux/buffer_head.h>
34
35#include <linux/genhd.h>
36#include <linux/blkdev.h>
37#include <linux/hash.h>
38
39#include "blocklayout.h"
40
41#define NFSDBG_FACILITY NFSDBG_PNFS_LD
42
43static int decode_sector_number(__be32 **rp, sector_t *sp)
44{
45 uint64_t s;
46
47 *rp = xdr_decode_hyper(*rp, &s);
48 if (s & 0x1ff) {
49 printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
50 return -1;
51 }
52 *sp = s >> SECTOR_SHIFT;
53 return 0;
54}
55
56
57
58
59int nfs4_blkdev_put(struct block_device *bdev)
60{
61 dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
62 MINOR(bdev->bd_dev));
63 return blkdev_put(bdev, FMODE_READ);
64}
65
66ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
67 size_t mlen)
68{
69 struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
70 nfs_net_id);
71
72 if (mlen != sizeof (struct bl_dev_msg))
73 return -EINVAL;
74
75 if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
76 return -EFAULT;
77
78 wake_up(&nn->bl_wq);
79
80 return mlen;
81}
82
83void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
84{
85 struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg);
86
87 if (msg->errno >= 0)
88 return;
89 wake_up(bl_pipe_msg->bl_wq);
90}
91
92
93
94
95struct pnfs_block_dev *
96nfs4_blk_decode_device(struct nfs_server *server,
97 struct pnfs_device *dev)
98{
99 struct pnfs_block_dev *rv;
100 struct block_device *bd = NULL;
101 struct bl_pipe_msg bl_pipe_msg;
102 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
103 struct bl_msg_hdr bl_msg = {
104 .type = BL_DEVICE_MOUNT,
105 .totallen = dev->mincount,
106 };
107 uint8_t *dataptr;
108 DECLARE_WAITQUEUE(wq, current);
109 int offset, len, i, rc;
110 struct net *net = server->nfs_client->cl_net;
111 struct nfs_net *nn = net_generic(net, nfs_net_id);
112 struct bl_dev_msg *reply = &nn->bl_mount_reply;
113
114 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
115 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
116 dev->mincount);
117
118 bl_pipe_msg.bl_wq = &nn->bl_wq;
119 memset(msg, 0, sizeof(*msg));
120 msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
121 if (!msg->data) {
122 rv = ERR_PTR(-ENOMEM);
123 goto out;
124 }
125
126 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
127 dataptr = (uint8_t *) msg->data;
128 len = dev->mincount;
129 offset = sizeof(bl_msg);
130 for (i = 0; len > 0; i++) {
131 memcpy(&dataptr[offset], page_address(dev->pages[i]),
132 len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE);
133 len -= PAGE_CACHE_SIZE;
134 offset += PAGE_CACHE_SIZE;
135 }
136 msg->len = sizeof(bl_msg) + dev->mincount;
137
138 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
139 add_wait_queue(&nn->bl_wq, &wq);
140 rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
141 if (rc < 0) {
142 remove_wait_queue(&nn->bl_wq, &wq);
143 rv = ERR_PTR(rc);
144 goto out;
145 }
146
147 set_current_state(TASK_UNINTERRUPTIBLE);
148 schedule();
149 __set_current_state(TASK_RUNNING);
150 remove_wait_queue(&nn->bl_wq, &wq);
151
152 if (reply->status != BL_DEVICE_REQUEST_PROC) {
153 dprintk("%s failed to open device: %d\n",
154 __func__, reply->status);
155 rv = ERR_PTR(-EINVAL);
156 goto out;
157 }
158
159 bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor),
160 FMODE_READ, NULL);
161 if (IS_ERR(bd)) {
162 dprintk("%s failed to open device : %ld\n", __func__,
163 PTR_ERR(bd));
164 rv = ERR_CAST(bd);
165 goto out;
166 }
167
168 rv = kzalloc(sizeof(*rv), GFP_NOFS);
169 if (!rv) {
170 rv = ERR_PTR(-ENOMEM);
171 goto out;
172 }
173
174 rv->bm_mdev = bd;
175 memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid));
176 rv->net = net;
177 dprintk("%s Created device %s with bd_block_size %u\n",
178 __func__,
179 bd->bd_disk->disk_name,
180 bd->bd_block_size);
181
182out:
183 kfree(msg->data);
184 return rv;
185}
186
187
188static struct block_device *translate_devid(struct pnfs_layout_hdr *lo,
189 struct nfs4_deviceid *id)
190{
191 struct block_device *rv = NULL;
192 struct block_mount_id *mid;
193 struct pnfs_block_dev *dev;
194
195 dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
196 mid = BLK_ID(lo);
197 spin_lock(&mid->bm_lock);
198 list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
199 if (memcmp(id->data, dev->bm_mdevid.data,
200 NFS4_DEVICEID4_SIZE) == 0) {
201 rv = dev->bm_mdev;
202 goto out;
203 }
204 }
205 out:
206 spin_unlock(&mid->bm_lock);
207 dprintk("%s returning %p\n", __func__, rv);
208 return rv;
209}
210
211
212struct layout_verification {
213 u32 mode;
214 u64 start;
215 u64 inval;
216 u64 cowread;
217};
218
219
220
221
222static int verify_extent(struct pnfs_block_extent *be,
223 struct layout_verification *lv)
224{
225 if (lv->mode == IOMODE_READ) {
226 if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
227 be->be_state == PNFS_BLOCK_INVALID_DATA)
228 return -EIO;
229 if (be->be_f_offset != lv->start)
230 return -EIO;
231 lv->start += be->be_length;
232 return 0;
233 }
234
235 if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
236 if (be->be_f_offset != lv->start)
237 return -EIO;
238 if (lv->cowread > lv->start)
239 return -EIO;
240 lv->start += be->be_length;
241 lv->inval = lv->start;
242 return 0;
243 } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
244 if (be->be_f_offset != lv->start)
245 return -EIO;
246 lv->start += be->be_length;
247 return 0;
248 } else if (be->be_state == PNFS_BLOCK_READ_DATA) {
249 if (be->be_f_offset > lv->start)
250 return -EIO;
251 if (be->be_f_offset < lv->inval)
252 return -EIO;
253 if (be->be_f_offset < lv->cowread)
254 return -EIO;
255
256
257
258 lv->inval = lv->inval + be->be_length;
259 lv->cowread = be->be_f_offset + be->be_length;
260 return 0;
261 } else
262 return -EIO;
263}
264
265
266int
267nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
268 struct nfs4_layoutget_res *lgr, gfp_t gfp_flags)
269{
270 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
271 int i, status = -EIO;
272 uint32_t count;
273 struct pnfs_block_extent *be = NULL, *save;
274 struct xdr_stream stream;
275 struct xdr_buf buf;
276 struct page *scratch;
277 __be32 *p;
278 struct layout_verification lv = {
279 .mode = lgr->range.iomode,
280 .start = lgr->range.offset >> SECTOR_SHIFT,
281 .inval = lgr->range.offset >> SECTOR_SHIFT,
282 .cowread = lgr->range.offset >> SECTOR_SHIFT,
283 };
284 LIST_HEAD(extents);
285
286 dprintk("---> %s\n", __func__);
287
288 scratch = alloc_page(gfp_flags);
289 if (!scratch)
290 return -ENOMEM;
291
292 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
293 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
294
295 p = xdr_inline_decode(&stream, 4);
296 if (unlikely(!p))
297 goto out_err;
298
299 count = be32_to_cpup(p++);
300
301 dprintk("%s enter, number of extents %i\n", __func__, count);
302 p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count);
303 if (unlikely(!p))
304 goto out_err;
305
306
307
308
309
310 for (i = 0; i < count; i++) {
311 be = bl_alloc_extent();
312 if (!be) {
313 status = -ENOMEM;
314 goto out_err;
315 }
316 memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE);
317 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
318 be->be_mdev = translate_devid(lo, &be->be_devid);
319 if (!be->be_mdev)
320 goto out_err;
321
322
323
324
325 if (decode_sector_number(&p, &be->be_f_offset) < 0)
326 goto out_err;
327 if (decode_sector_number(&p, &be->be_length) < 0)
328 goto out_err;
329 if (decode_sector_number(&p, &be->be_v_offset) < 0)
330 goto out_err;
331 be->be_state = be32_to_cpup(p++);
332 if (be->be_state == PNFS_BLOCK_INVALID_DATA)
333 be->be_inval = &bl->bl_inval;
334 if (verify_extent(be, &lv)) {
335 dprintk("%s verify failed\n", __func__);
336 goto out_err;
337 }
338 list_add_tail(&be->be_node, &extents);
339 }
340 if (lgr->range.offset + lgr->range.length !=
341 lv.start << SECTOR_SHIFT) {
342 dprintk("%s Final length mismatch\n", __func__);
343 be = NULL;
344 goto out_err;
345 }
346 if (lv.start < lv.cowread) {
347 dprintk("%s Final uncovered COW extent\n", __func__);
348 be = NULL;
349 goto out_err;
350 }
351
352
353
354 spin_lock(&bl->bl_ext_lock);
355 list_for_each_entry_safe(be, save, &extents, be_node) {
356 list_del(&be->be_node);
357 status = bl_add_merge_extent(bl, be);
358 if (status) {
359 spin_unlock(&bl->bl_ext_lock);
360
361
362
363
364 be = NULL;
365 goto out_err;
366 }
367 }
368 spin_unlock(&bl->bl_ext_lock);
369 status = 0;
370 out:
371 __free_page(scratch);
372 dprintk("%s returns %i\n", __func__, status);
373 return status;
374
375 out_err:
376 bl_put_extent(be);
377 while (!list_empty(&extents)) {
378 be = list_first_entry(&extents, struct pnfs_block_extent,
379 be_node);
380 list_del(&be->be_node);
381 bl_put_extent(be);
382 }
383 goto out;
384}
385