1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30#include <linux/ceph/libceph.h>
31#include <linux/ceph/osd_client.h>
32#include <linux/ceph/mon_client.h>
33#include <linux/ceph/decode.h>
34#include <linux/parser.h>
35
36#include <linux/kernel.h>
37#include <linux/device.h>
38#include <linux/module.h>
39#include <linux/fs.h>
40#include <linux/blkdev.h>
41
42#include "rbd_types.h"
43
44
45
46
47
48
49
50#define SECTOR_SHIFT 9
51#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
52
53#define RBD_DRV_NAME "rbd"
54#define RBD_DRV_NAME_LONG "rbd (rados block device)"
55
56#define RBD_MINORS_PER_MAJOR 256
57
58#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
59#define RBD_MAX_POOL_NAME_LEN 64
60#define RBD_MAX_SNAP_NAME_LEN 32
61#define RBD_MAX_OPT_LEN 1024
62
63#define RBD_SNAP_HEAD_NAME "-"
64
65
66
67
68
69
70
71#define DEV_NAME_LEN 32
72#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
73
74#define RBD_NOTIFY_TIMEOUT_DEFAULT 10
75
76
77
78
79struct rbd_image_header {
80 u64 image_size;
81 char block_name[32];
82 __u8 obj_order;
83 __u8 crypt_type;
84 __u8 comp_type;
85 struct ceph_snap_context *snapc;
86 size_t snap_names_len;
87 u64 snap_seq;
88 u32 total_snaps;
89
90 char *snap_names;
91 u64 *snap_sizes;
92
93 u64 obj_version;
94};
95
96struct rbd_options {
97 int notify_timeout;
98};
99
100
101
102
103struct rbd_client {
104 struct ceph_client *client;
105 struct rbd_options *rbd_opts;
106 struct kref kref;
107 struct list_head node;
108};
109
110
111
112
113struct rbd_req_status {
114 int done;
115 int rc;
116 u64 bytes;
117};
118
119
120
121
122struct rbd_req_coll {
123 int total;
124 int num_done;
125 struct kref kref;
126 struct rbd_req_status status[0];
127};
128
129
130
131
132struct rbd_request {
133 struct request *rq;
134 struct bio *bio;
135 struct page **pages;
136 u64 len;
137 int coll_index;
138 struct rbd_req_coll *coll;
139};
140
141struct rbd_snap {
142 struct device dev;
143 const char *name;
144 size_t size;
145 struct list_head node;
146 u64 id;
147};
148
149
150
151
152struct rbd_device {
153 int id;
154
155 int major;
156 struct gendisk *disk;
157 struct request_queue *q;
158
159 struct rbd_client *rbd_client;
160
161 char name[DEV_NAME_LEN];
162
163 spinlock_t lock;
164
165 struct rbd_image_header header;
166 char obj[RBD_MAX_OBJ_NAME_LEN];
167 int obj_len;
168 char obj_md_name[RBD_MAX_MD_NAME_LEN];
169 char pool_name[RBD_MAX_POOL_NAME_LEN];
170 int poolid;
171
172 struct ceph_osd_event *watch_event;
173 struct ceph_osd_request *watch_request;
174
175
176 struct rw_semaphore header_rwsem;
177 char snap_name[RBD_MAX_SNAP_NAME_LEN];
178 u32 cur_snap;
179
180 int read_only;
181
182 struct list_head node;
183
184
185 struct list_head snaps;
186
187
188 struct device dev;
189};
190
191static DEFINE_MUTEX(ctl_mutex);
192
193static LIST_HEAD(rbd_dev_list);
194static DEFINE_SPINLOCK(rbd_dev_list_lock);
195
196static LIST_HEAD(rbd_client_list);
197static DEFINE_SPINLOCK(rbd_client_list_lock);
198
199static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
200static void rbd_dev_release(struct device *dev);
201static ssize_t rbd_snap_add(struct device *dev,
202 struct device_attribute *attr,
203 const char *buf,
204 size_t count);
205static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
206 struct rbd_snap *snap);
207
208static ssize_t rbd_add(struct bus_type *bus, const char *buf,
209 size_t count);
210static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
211 size_t count);
212
213static struct bus_attribute rbd_bus_attrs[] = {
214 __ATTR(add, S_IWUSR, NULL, rbd_add),
215 __ATTR(remove, S_IWUSR, NULL, rbd_remove),
216 __ATTR_NULL
217};
218
219static struct bus_type rbd_bus_type = {
220 .name = "rbd",
221 .bus_attrs = rbd_bus_attrs,
222};
223
224static void rbd_root_dev_release(struct device *dev)
225{
226}
227
228static struct device rbd_root_dev = {
229 .init_name = "rbd",
230 .release = rbd_root_dev_release,
231};
232
233
234static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
235{
236 return get_device(&rbd_dev->dev);
237}
238
239static void rbd_put_dev(struct rbd_device *rbd_dev)
240{
241 put_device(&rbd_dev->dev);
242}
243
244static int __rbd_update_snaps(struct rbd_device *rbd_dev);
245
246static int rbd_open(struct block_device *bdev, fmode_t mode)
247{
248 struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
249
250 rbd_get_dev(rbd_dev);
251
252 set_device_ro(bdev, rbd_dev->read_only);
253
254 if ((mode & FMODE_WRITE) && rbd_dev->read_only)
255 return -EROFS;
256
257 return 0;
258}
259
260static int rbd_release(struct gendisk *disk, fmode_t mode)
261{
262 struct rbd_device *rbd_dev = disk->private_data;
263
264 rbd_put_dev(rbd_dev);
265
266 return 0;
267}
268
269static const struct block_device_operations rbd_bd_ops = {
270 .owner = THIS_MODULE,
271 .open = rbd_open,
272 .release = rbd_release,
273};
274
275
276
277
278
279static struct rbd_client *rbd_client_create(struct ceph_options *opt,
280 struct rbd_options *rbd_opts)
281{
282 struct rbd_client *rbdc;
283 int ret = -ENOMEM;
284
285 dout("rbd_client_create\n");
286 rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
287 if (!rbdc)
288 goto out_opt;
289
290 kref_init(&rbdc->kref);
291 INIT_LIST_HEAD(&rbdc->node);
292
293 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
294
295 rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
296 if (IS_ERR(rbdc->client))
297 goto out_mutex;
298 opt = NULL;
299
300 ret = ceph_open_session(rbdc->client);
301 if (ret < 0)
302 goto out_err;
303
304 rbdc->rbd_opts = rbd_opts;
305
306 spin_lock(&rbd_client_list_lock);
307 list_add_tail(&rbdc->node, &rbd_client_list);
308 spin_unlock(&rbd_client_list_lock);
309
310 mutex_unlock(&ctl_mutex);
311
312 dout("rbd_client_create created %p\n", rbdc);
313 return rbdc;
314
315out_err:
316 ceph_destroy_client(rbdc->client);
317out_mutex:
318 mutex_unlock(&ctl_mutex);
319 kfree(rbdc);
320out_opt:
321 if (opt)
322 ceph_destroy_options(opt);
323 return ERR_PTR(ret);
324}
325
326
327
328
329static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
330{
331 struct rbd_client *client_node;
332
333 if (opt->flags & CEPH_OPT_NOSHARE)
334 return NULL;
335
336 list_for_each_entry(client_node, &rbd_client_list, node)
337 if (ceph_compare_options(opt, client_node->client) == 0)
338 return client_node;
339 return NULL;
340}
341
342
343
344
345enum {
346 Opt_notify_timeout,
347 Opt_last_int,
348
349 Opt_last_string,
350
351};
352
353static match_table_t rbdopt_tokens = {
354 {Opt_notify_timeout, "notify_timeout=%d"},
355
356
357 {-1, NULL}
358};
359
360static int parse_rbd_opts_token(char *c, void *private)
361{
362 struct rbd_options *rbdopt = private;
363 substring_t argstr[MAX_OPT_ARGS];
364 int token, intval, ret;
365
366 token = match_token(c, rbdopt_tokens, argstr);
367 if (token < 0)
368 return -EINVAL;
369
370 if (token < Opt_last_int) {
371 ret = match_int(&argstr[0], &intval);
372 if (ret < 0) {
373 pr_err("bad mount option arg (not int) "
374 "at '%s'\n", c);
375 return ret;
376 }
377 dout("got int token %d val %d\n", token, intval);
378 } else if (token > Opt_last_int && token < Opt_last_string) {
379 dout("got string token %d val %s\n", token,
380 argstr[0].from);
381 } else {
382 dout("got token %d\n", token);
383 }
384
385 switch (token) {
386 case Opt_notify_timeout:
387 rbdopt->notify_timeout = intval;
388 break;
389 default:
390 BUG_ON(token);
391 }
392 return 0;
393}
394
395
396
397
398
399static struct rbd_client *rbd_get_client(const char *mon_addr,
400 size_t mon_addr_len,
401 char *options)
402{
403 struct rbd_client *rbdc;
404 struct ceph_options *opt;
405 struct rbd_options *rbd_opts;
406
407 rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
408 if (!rbd_opts)
409 return ERR_PTR(-ENOMEM);
410
411 rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
412
413 opt = ceph_parse_options(options, mon_addr,
414 mon_addr + mon_addr_len,
415 parse_rbd_opts_token, rbd_opts);
416 if (IS_ERR(opt)) {
417 kfree(rbd_opts);
418 return ERR_CAST(opt);
419 }
420
421 spin_lock(&rbd_client_list_lock);
422 rbdc = __rbd_client_find(opt);
423 if (rbdc) {
424
425 kref_get(&rbdc->kref);
426 spin_unlock(&rbd_client_list_lock);
427
428 ceph_destroy_options(opt);
429 kfree(rbd_opts);
430
431 return rbdc;
432 }
433 spin_unlock(&rbd_client_list_lock);
434
435 rbdc = rbd_client_create(opt, rbd_opts);
436
437 if (IS_ERR(rbdc))
438 kfree(rbd_opts);
439
440 return rbdc;
441}
442
443
444
445
446
447
448static void rbd_client_release(struct kref *kref)
449{
450 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
451
452 dout("rbd_release_client %p\n", rbdc);
453 list_del(&rbdc->node);
454
455 ceph_destroy_client(rbdc->client);
456 kfree(rbdc->rbd_opts);
457 kfree(rbdc);
458}
459
460
461
462
463
464static void rbd_put_client(struct rbd_device *rbd_dev)
465{
466 spin_lock(&rbd_client_list_lock);
467 kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
468 spin_unlock(&rbd_client_list_lock);
469 rbd_dev->rbd_client = NULL;
470}
471
472
473
474
475static void rbd_coll_release(struct kref *kref)
476{
477 struct rbd_req_coll *coll =
478 container_of(kref, struct rbd_req_coll, kref);
479
480 dout("rbd_coll_release %p\n", coll);
481 kfree(coll);
482}
483
484
485
486
487
488static int rbd_header_from_disk(struct rbd_image_header *header,
489 struct rbd_image_header_ondisk *ondisk,
490 int allocated_snaps,
491 gfp_t gfp_flags)
492{
493 int i;
494 u32 snap_count;
495
496 if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
497 return -ENXIO;
498
499 snap_count = le32_to_cpu(ondisk->snap_count);
500 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
501 snap_count * sizeof (*ondisk),
502 gfp_flags);
503 if (!header->snapc)
504 return -ENOMEM;
505
506 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
507 if (snap_count) {
508 header->snap_names = kmalloc(header->snap_names_len,
509 GFP_KERNEL);
510 if (!header->snap_names)
511 goto err_snapc;
512 header->snap_sizes = kmalloc(snap_count * sizeof(u64),
513 GFP_KERNEL);
514 if (!header->snap_sizes)
515 goto err_names;
516 } else {
517 header->snap_names = NULL;
518 header->snap_sizes = NULL;
519 }
520 memcpy(header->block_name, ondisk->block_name,
521 sizeof(ondisk->block_name));
522
523 header->image_size = le64_to_cpu(ondisk->image_size);
524 header->obj_order = ondisk->options.order;
525 header->crypt_type = ondisk->options.crypt_type;
526 header->comp_type = ondisk->options.comp_type;
527
528 atomic_set(&header->snapc->nref, 1);
529 header->snap_seq = le64_to_cpu(ondisk->snap_seq);
530 header->snapc->num_snaps = snap_count;
531 header->total_snaps = snap_count;
532
533 if (snap_count && allocated_snaps == snap_count) {
534 for (i = 0; i < snap_count; i++) {
535 header->snapc->snaps[i] =
536 le64_to_cpu(ondisk->snaps[i].id);
537 header->snap_sizes[i] =
538 le64_to_cpu(ondisk->snaps[i].image_size);
539 }
540
541
542 memcpy(header->snap_names, &ondisk->snaps[i],
543 header->snap_names_len);
544 }
545
546 return 0;
547
548err_names:
549 kfree(header->snap_names);
550err_snapc:
551 kfree(header->snapc);
552 return -ENOMEM;
553}
554
555static int snap_index(struct rbd_image_header *header, int snap_num)
556{
557 return header->total_snaps - snap_num;
558}
559
560static u64 cur_snap_id(struct rbd_device *rbd_dev)
561{
562 struct rbd_image_header *header = &rbd_dev->header;
563
564 if (!rbd_dev->cur_snap)
565 return 0;
566
567 return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
568}
569
570static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
571 u64 *seq, u64 *size)
572{
573 int i;
574 char *p = header->snap_names;
575
576 for (i = 0; i < header->total_snaps; i++) {
577 if (!strcmp(snap_name, p)) {
578
579
580
581 if (seq)
582 *seq = header->snapc->snaps[i];
583 if (size)
584 *size = header->snap_sizes[i];
585 return i;
586 }
587 p += strlen(p) + 1;
588 }
589 return -ENOENT;
590}
591
592static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
593{
594 struct rbd_image_header *header = &dev->header;
595 struct ceph_snap_context *snapc = header->snapc;
596 int ret = -ENOENT;
597
598 BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
599
600 down_write(&dev->header_rwsem);
601
602 if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
603 sizeof (RBD_SNAP_HEAD_NAME))) {
604 if (header->total_snaps)
605 snapc->seq = header->snap_seq;
606 else
607 snapc->seq = 0;
608 dev->cur_snap = 0;
609 dev->read_only = 0;
610 if (size)
611 *size = header->image_size;
612 } else {
613 ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
614 if (ret < 0)
615 goto done;
616
617 dev->cur_snap = header->total_snaps - ret;
618 dev->read_only = 1;
619 }
620
621 ret = 0;
622done:
623 up_write(&dev->header_rwsem);
624 return ret;
625}
626
627static void rbd_header_free(struct rbd_image_header *header)
628{
629 kfree(header->snapc);
630 kfree(header->snap_names);
631 kfree(header->snap_sizes);
632}
633
634
635
636
637static u64 rbd_get_segment(struct rbd_image_header *header,
638 const char *block_name,
639 u64 ofs, u64 len,
640 char *seg_name, u64 *segofs)
641{
642 u64 seg = ofs >> header->obj_order;
643
644 if (seg_name)
645 snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
646 "%s.%012llx", block_name, seg);
647
648 ofs = ofs & ((1 << header->obj_order) - 1);
649 len = min_t(u64, len, (1 << header->obj_order) - ofs);
650
651 if (segofs)
652 *segofs = ofs;
653
654 return len;
655}
656
657static int rbd_get_num_segments(struct rbd_image_header *header,
658 u64 ofs, u64 len)
659{
660 u64 start_seg = ofs >> header->obj_order;
661 u64 end_seg = (ofs + len - 1) >> header->obj_order;
662 return end_seg - start_seg + 1;
663}
664
665
666
667
668static u64 rbd_obj_bytes(struct rbd_image_header *header)
669{
670 return 1 << header->obj_order;
671}
672
673
674
675
676
677static void bio_chain_put(struct bio *chain)
678{
679 struct bio *tmp;
680
681 while (chain) {
682 tmp = chain;
683 chain = chain->bi_next;
684 bio_put(tmp);
685 }
686}
687
688
689
690
691static void zero_bio_chain(struct bio *chain, int start_ofs)
692{
693 struct bio_vec *bv;
694 unsigned long flags;
695 void *buf;
696 int i;
697 int pos = 0;
698
699 while (chain) {
700 bio_for_each_segment(bv, chain, i) {
701 if (pos + bv->bv_len > start_ofs) {
702 int remainder = max(start_ofs - pos, 0);
703 buf = bvec_kmap_irq(bv, &flags);
704 memset(buf + remainder, 0,
705 bv->bv_len - remainder);
706 bvec_kunmap_irq(buf, &flags);
707 }
708 pos += bv->bv_len;
709 }
710
711 chain = chain->bi_next;
712 }
713}
714
715
716
717
718
719static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
720 struct bio_pair **bp,
721 int len, gfp_t gfpmask)
722{
723 struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
724 int total = 0;
725
726 if (*bp) {
727 bio_pair_release(*bp);
728 *bp = NULL;
729 }
730
731 while (old_chain && (total < len)) {
732 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
733 if (!tmp)
734 goto err_out;
735
736 if (total + old_chain->bi_size > len) {
737 struct bio_pair *bp;
738
739
740
741
742
743 dout("bio_chain_clone split! total=%d remaining=%d"
744 "bi_size=%d\n",
745 (int)total, (int)len-total,
746 (int)old_chain->bi_size);
747
748
749
750 bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
751 if (!bp)
752 goto err_out;
753
754 __bio_clone(tmp, &bp->bio1);
755
756 *next = &bp->bio2;
757 } else {
758 __bio_clone(tmp, old_chain);
759 *next = old_chain->bi_next;
760 }
761
762 tmp->bi_bdev = NULL;
763 gfpmask &= ~__GFP_WAIT;
764 tmp->bi_next = NULL;
765
766 if (!new_chain) {
767 new_chain = tail = tmp;
768 } else {
769 tail->bi_next = tmp;
770 tail = tmp;
771 }
772 old_chain = old_chain->bi_next;
773
774 total += tmp->bi_size;
775 }
776
777 BUG_ON(total < len);
778
779 if (tail)
780 tail->bi_next = NULL;
781
782 *old = old_chain;
783
784 return new_chain;
785
786err_out:
787 dout("bio_chain_clone with err\n");
788 bio_chain_put(new_chain);
789 return NULL;
790}
791
792
793
794
795static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
796 int num_ops,
797 int opcode,
798 u32 payload_len)
799{
800 *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
801 GFP_NOIO);
802 if (!*ops)
803 return -ENOMEM;
804 (*ops)[0].op = opcode;
805
806
807
808
809 (*ops)[0].payload_len = payload_len;
810 return 0;
811}
812
813static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
814{
815 kfree(ops);
816}
817
818static void rbd_coll_end_req_index(struct request *rq,
819 struct rbd_req_coll *coll,
820 int index,
821 int ret, u64 len)
822{
823 struct request_queue *q;
824 int min, max, i;
825
826 dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
827 coll, index, ret, len);
828
829 if (!rq)
830 return;
831
832 if (!coll) {
833 blk_end_request(rq, ret, len);
834 return;
835 }
836
837 q = rq->q;
838
839 spin_lock_irq(q->queue_lock);
840 coll->status[index].done = 1;
841 coll->status[index].rc = ret;
842 coll->status[index].bytes = len;
843 max = min = coll->num_done;
844 while (max < coll->total && coll->status[max].done)
845 max++;
846
847 for (i = min; i<max; i++) {
848 __blk_end_request(rq, coll->status[i].rc,
849 coll->status[i].bytes);
850 coll->num_done++;
851 kref_put(&coll->kref, rbd_coll_release);
852 }
853 spin_unlock_irq(q->queue_lock);
854}
855
856static void rbd_coll_end_req(struct rbd_request *req,
857 int ret, u64 len)
858{
859 rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
860}
861
862
863
864
865static int rbd_do_request(struct request *rq,
866 struct rbd_device *dev,
867 struct ceph_snap_context *snapc,
868 u64 snapid,
869 const char *obj, u64 ofs, u64 len,
870 struct bio *bio,
871 struct page **pages,
872 int num_pages,
873 int flags,
874 struct ceph_osd_req_op *ops,
875 int num_reply,
876 struct rbd_req_coll *coll,
877 int coll_index,
878 void (*rbd_cb)(struct ceph_osd_request *req,
879 struct ceph_msg *msg),
880 struct ceph_osd_request **linger_req,
881 u64 *ver)
882{
883 struct ceph_osd_request *req;
884 struct ceph_file_layout *layout;
885 int ret;
886 u64 bno;
887 struct timespec mtime = CURRENT_TIME;
888 struct rbd_request *req_data;
889 struct ceph_osd_request_head *reqhead;
890 struct ceph_osd_client *osdc;
891
892 req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
893 if (!req_data) {
894 if (coll)
895 rbd_coll_end_req_index(rq, coll, coll_index,
896 -ENOMEM, len);
897 return -ENOMEM;
898 }
899
900 if (coll) {
901 req_data->coll = coll;
902 req_data->coll_index = coll_index;
903 }
904
905 dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
906
907 down_read(&dev->header_rwsem);
908
909 osdc = &dev->rbd_client->client->osdc;
910 req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
911 false, GFP_NOIO, pages, bio);
912 if (!req) {
913 up_read(&dev->header_rwsem);
914 ret = -ENOMEM;
915 goto done_pages;
916 }
917
918 req->r_callback = rbd_cb;
919
920 req_data->rq = rq;
921 req_data->bio = bio;
922 req_data->pages = pages;
923 req_data->len = len;
924
925 req->r_priv = req_data;
926
927 reqhead = req->r_request->front.iov_base;
928 reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
929
930 strncpy(req->r_oid, obj, sizeof(req->r_oid));
931 req->r_oid_len = strlen(req->r_oid);
932
933 layout = &req->r_file_layout;
934 memset(layout, 0, sizeof(*layout));
935 layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
936 layout->fl_stripe_count = cpu_to_le32(1);
937 layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
938 layout->fl_pg_preferred = cpu_to_le32(-1);
939 layout->fl_pg_pool = cpu_to_le32(dev->poolid);
940 ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
941 req, ops);
942
943 ceph_osdc_build_request(req, ofs, &len,
944 ops,
945 snapc,
946 &mtime,
947 req->r_oid, req->r_oid_len);
948 up_read(&dev->header_rwsem);
949
950 if (linger_req) {
951 ceph_osdc_set_request_linger(osdc, req);
952 *linger_req = req;
953 }
954
955 ret = ceph_osdc_start_request(osdc, req, false);
956 if (ret < 0)
957 goto done_err;
958
959 if (!rbd_cb) {
960 ret = ceph_osdc_wait_request(osdc, req);
961 if (ver)
962 *ver = le64_to_cpu(req->r_reassert_version.version);
963 dout("reassert_ver=%lld\n",
964 le64_to_cpu(req->r_reassert_version.version));
965 ceph_osdc_put_request(req);
966 }
967 return ret;
968
969done_err:
970 bio_chain_put(req_data->bio);
971 ceph_osdc_put_request(req);
972done_pages:
973 rbd_coll_end_req(req_data, ret, len);
974 kfree(req_data);
975 return ret;
976}
977
978
979
980
981static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
982{
983 struct rbd_request *req_data = req->r_priv;
984 struct ceph_osd_reply_head *replyhead;
985 struct ceph_osd_op *op;
986 __s32 rc;
987 u64 bytes;
988 int read_op;
989
990
991 replyhead = msg->front.iov_base;
992 WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
993 op = (void *)(replyhead + 1);
994 rc = le32_to_cpu(replyhead->result);
995 bytes = le64_to_cpu(op->extent.length);
996 read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
997
998 dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
999
1000 if (rc == -ENOENT && read_op) {
1001 zero_bio_chain(req_data->bio, 0);
1002 rc = 0;
1003 } else if (rc == 0 && read_op && bytes < req_data->len) {
1004 zero_bio_chain(req_data->bio, bytes);
1005 bytes = req_data->len;
1006 }
1007
1008 rbd_coll_end_req(req_data, rc, bytes);
1009
1010 if (req_data->bio)
1011 bio_chain_put(req_data->bio);
1012
1013 ceph_osdc_put_request(req);
1014 kfree(req_data);
1015}
1016
1017static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
1018{
1019 ceph_osdc_put_request(req);
1020}
1021
1022
1023
1024
1025static int rbd_req_sync_op(struct rbd_device *dev,
1026 struct ceph_snap_context *snapc,
1027 u64 snapid,
1028 int opcode,
1029 int flags,
1030 struct ceph_osd_req_op *orig_ops,
1031 int num_reply,
1032 const char *obj,
1033 u64 ofs, u64 len,
1034 char *buf,
1035 struct ceph_osd_request **linger_req,
1036 u64 *ver)
1037{
1038 int ret;
1039 struct page **pages;
1040 int num_pages;
1041 struct ceph_osd_req_op *ops = orig_ops;
1042 u32 payload_len;
1043
1044 num_pages = calc_pages_for(ofs , len);
1045 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
1046 if (IS_ERR(pages))
1047 return PTR_ERR(pages);
1048
1049 if (!orig_ops) {
1050 payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
1051 ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
1052 if (ret < 0)
1053 goto done;
1054
1055 if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
1056 ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
1057 if (ret < 0)
1058 goto done_ops;
1059 }
1060 }
1061
1062 ret = rbd_do_request(NULL, dev, snapc, snapid,
1063 obj, ofs, len, NULL,
1064 pages, num_pages,
1065 flags,
1066 ops,
1067 2,
1068 NULL, 0,
1069 NULL,
1070 linger_req, ver);
1071 if (ret < 0)
1072 goto done_ops;
1073
1074 if ((flags & CEPH_OSD_FLAG_READ) && buf)
1075 ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
1076
1077done_ops:
1078 if (!orig_ops)
1079 rbd_destroy_ops(ops);
1080done:
1081 ceph_release_page_vector(pages, num_pages);
1082 return ret;
1083}
1084
1085
1086
1087
1088static int rbd_do_op(struct request *rq,
1089 struct rbd_device *rbd_dev ,
1090 struct ceph_snap_context *snapc,
1091 u64 snapid,
1092 int opcode, int flags, int num_reply,
1093 u64 ofs, u64 len,
1094 struct bio *bio,
1095 struct rbd_req_coll *coll,
1096 int coll_index)
1097{
1098 char *seg_name;
1099 u64 seg_ofs;
1100 u64 seg_len;
1101 int ret;
1102 struct ceph_osd_req_op *ops;
1103 u32 payload_len;
1104
1105 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
1106 if (!seg_name)
1107 return -ENOMEM;
1108
1109 seg_len = rbd_get_segment(&rbd_dev->header,
1110 rbd_dev->header.block_name,
1111 ofs, len,
1112 seg_name, &seg_ofs);
1113
1114 payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
1115
1116 ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
1117 if (ret < 0)
1118 goto done;
1119
1120
1121
1122
1123 BUG_ON(seg_len < len);
1124
1125 ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
1126 seg_name, seg_ofs, seg_len,
1127 bio,
1128 NULL, 0,
1129 flags,
1130 ops,
1131 num_reply,
1132 coll, coll_index,
1133 rbd_req_cb, 0, NULL);
1134
1135 rbd_destroy_ops(ops);
1136done:
1137 kfree(seg_name);
1138 return ret;
1139}
1140
1141
1142
1143
1144static int rbd_req_write(struct request *rq,
1145 struct rbd_device *rbd_dev,
1146 struct ceph_snap_context *snapc,
1147 u64 ofs, u64 len,
1148 struct bio *bio,
1149 struct rbd_req_coll *coll,
1150 int coll_index)
1151{
1152 return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
1153 CEPH_OSD_OP_WRITE,
1154 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1155 2,
1156 ofs, len, bio, coll, coll_index);
1157}
1158
1159
1160
1161
1162static int rbd_req_read(struct request *rq,
1163 struct rbd_device *rbd_dev,
1164 u64 snapid,
1165 u64 ofs, u64 len,
1166 struct bio *bio,
1167 struct rbd_req_coll *coll,
1168 int coll_index)
1169{
1170 return rbd_do_op(rq, rbd_dev, NULL,
1171 (snapid ? snapid : CEPH_NOSNAP),
1172 CEPH_OSD_OP_READ,
1173 CEPH_OSD_FLAG_READ,
1174 2,
1175 ofs, len, bio, coll, coll_index);
1176}
1177
1178
1179
1180
1181static int rbd_req_sync_read(struct rbd_device *dev,
1182 struct ceph_snap_context *snapc,
1183 u64 snapid,
1184 const char *obj,
1185 u64 ofs, u64 len,
1186 char *buf,
1187 u64 *ver)
1188{
1189 return rbd_req_sync_op(dev, NULL,
1190 (snapid ? snapid : CEPH_NOSNAP),
1191 CEPH_OSD_OP_READ,
1192 CEPH_OSD_FLAG_READ,
1193 NULL,
1194 1, obj, ofs, len, buf, NULL, ver);
1195}
1196
1197
1198
1199
1200static int rbd_req_sync_notify_ack(struct rbd_device *dev,
1201 u64 ver,
1202 u64 notify_id,
1203 const char *obj)
1204{
1205 struct ceph_osd_req_op *ops;
1206 struct page **pages = NULL;
1207 int ret;
1208
1209 ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
1210 if (ret < 0)
1211 return ret;
1212
1213 ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
1214 ops[0].watch.cookie = notify_id;
1215 ops[0].watch.flag = 0;
1216
1217 ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
1218 obj, 0, 0, NULL,
1219 pages, 0,
1220 CEPH_OSD_FLAG_READ,
1221 ops,
1222 1,
1223 NULL, 0,
1224 rbd_simple_req_cb, 0, NULL);
1225
1226 rbd_destroy_ops(ops);
1227 return ret;
1228}
1229
1230static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
1231{
1232 struct rbd_device *dev = (struct rbd_device *)data;
1233 int rc;
1234
1235 if (!dev)
1236 return;
1237
1238 dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
1239 notify_id, (int)opcode);
1240 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1241 rc = __rbd_update_snaps(dev);
1242 mutex_unlock(&ctl_mutex);
1243 if (rc)
1244 pr_warning(RBD_DRV_NAME "%d got notification but failed to "
1245 " update snaps: %d\n", dev->major, rc);
1246
1247 rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
1248}
1249
1250
1251
1252
1253static int rbd_req_sync_watch(struct rbd_device *dev,
1254 const char *obj,
1255 u64 ver)
1256{
1257 struct ceph_osd_req_op *ops;
1258 struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
1259
1260 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
1261 if (ret < 0)
1262 return ret;
1263
1264 ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
1265 (void *)dev, &dev->watch_event);
1266 if (ret < 0)
1267 goto fail;
1268
1269 ops[0].watch.ver = cpu_to_le64(ver);
1270 ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
1271 ops[0].watch.flag = 1;
1272
1273 ret = rbd_req_sync_op(dev, NULL,
1274 CEPH_NOSNAP,
1275 0,
1276 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1277 ops,
1278 1, obj, 0, 0, NULL,
1279 &dev->watch_request, NULL);
1280
1281 if (ret < 0)
1282 goto fail_event;
1283
1284 rbd_destroy_ops(ops);
1285 return 0;
1286
1287fail_event:
1288 ceph_osdc_cancel_event(dev->watch_event);
1289 dev->watch_event = NULL;
1290fail:
1291 rbd_destroy_ops(ops);
1292 return ret;
1293}
1294
1295
1296
1297
1298static int rbd_req_sync_unwatch(struct rbd_device *dev,
1299 const char *obj)
1300{
1301 struct ceph_osd_req_op *ops;
1302
1303 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
1304 if (ret < 0)
1305 return ret;
1306
1307 ops[0].watch.ver = 0;
1308 ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
1309 ops[0].watch.flag = 0;
1310
1311 ret = rbd_req_sync_op(dev, NULL,
1312 CEPH_NOSNAP,
1313 0,
1314 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1315 ops,
1316 1, obj, 0, 0, NULL, NULL, NULL);
1317
1318 rbd_destroy_ops(ops);
1319 ceph_osdc_cancel_event(dev->watch_event);
1320 dev->watch_event = NULL;
1321 return ret;
1322}
1323
1324struct rbd_notify_info {
1325 struct rbd_device *dev;
1326};
1327
1328static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
1329{
1330 struct rbd_device *dev = (struct rbd_device *)data;
1331 if (!dev)
1332 return;
1333
1334 dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
1335 notify_id, (int)opcode);
1336}
1337
1338
1339
1340
1341static int rbd_req_sync_notify(struct rbd_device *dev,
1342 const char *obj)
1343{
1344 struct ceph_osd_req_op *ops;
1345 struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
1346 struct ceph_osd_event *event;
1347 struct rbd_notify_info info;
1348 int payload_len = sizeof(u32) + sizeof(u32);
1349 int ret;
1350
1351 ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
1352 if (ret < 0)
1353 return ret;
1354
1355 info.dev = dev;
1356
1357 ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
1358 (void *)&info, &event);
1359 if (ret < 0)
1360 goto fail;
1361
1362 ops[0].watch.ver = 1;
1363 ops[0].watch.flag = 1;
1364 ops[0].watch.cookie = event->cookie;
1365 ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
1366 ops[0].watch.timeout = 12;
1367
1368 ret = rbd_req_sync_op(dev, NULL,
1369 CEPH_NOSNAP,
1370 0,
1371 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1372 ops,
1373 1, obj, 0, 0, NULL, NULL, NULL);
1374 if (ret < 0)
1375 goto fail_event;
1376
1377 ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT);
1378 dout("ceph_osdc_wait_event returned %d\n", ret);
1379 rbd_destroy_ops(ops);
1380 return 0;
1381
1382fail_event:
1383 ceph_osdc_cancel_event(event);
1384fail:
1385 rbd_destroy_ops(ops);
1386 return ret;
1387}
1388
1389
1390
1391
1392static int rbd_req_sync_exec(struct rbd_device *dev,
1393 const char *obj,
1394 const char *cls,
1395 const char *method,
1396 const char *data,
1397 int len,
1398 u64 *ver)
1399{
1400 struct ceph_osd_req_op *ops;
1401 int cls_len = strlen(cls);
1402 int method_len = strlen(method);
1403 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
1404 cls_len + method_len + len);
1405 if (ret < 0)
1406 return ret;
1407
1408 ops[0].cls.class_name = cls;
1409 ops[0].cls.class_len = (__u8)cls_len;
1410 ops[0].cls.method_name = method;
1411 ops[0].cls.method_len = (__u8)method_len;
1412 ops[0].cls.argc = 0;
1413 ops[0].cls.indata = data;
1414 ops[0].cls.indata_len = len;
1415
1416 ret = rbd_req_sync_op(dev, NULL,
1417 CEPH_NOSNAP,
1418 0,
1419 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1420 ops,
1421 1, obj, 0, 0, NULL, NULL, ver);
1422
1423 rbd_destroy_ops(ops);
1424
1425 dout("cls_exec returned %d\n", ret);
1426 return ret;
1427}
1428
1429static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
1430{
1431 struct rbd_req_coll *coll =
1432 kzalloc(sizeof(struct rbd_req_coll) +
1433 sizeof(struct rbd_req_status) * num_reqs,
1434 GFP_ATOMIC);
1435
1436 if (!coll)
1437 return NULL;
1438 coll->total = num_reqs;
1439 kref_init(&coll->kref);
1440 return coll;
1441}
1442
1443
1444
1445
1446static void rbd_rq_fn(struct request_queue *q)
1447{
1448 struct rbd_device *rbd_dev = q->queuedata;
1449 struct request *rq;
1450 struct bio_pair *bp = NULL;
1451
1452 while ((rq = blk_fetch_request(q))) {
1453 struct bio *bio;
1454 struct bio *rq_bio, *next_bio = NULL;
1455 bool do_write;
1456 int size, op_size = 0;
1457 u64 ofs;
1458 int num_segs, cur_seg = 0;
1459 struct rbd_req_coll *coll;
1460
1461
1462 if (!rq)
1463 break;
1464
1465 dout("fetched request\n");
1466
1467
1468 if ((rq->cmd_type != REQ_TYPE_FS)) {
1469 __blk_end_request_all(rq, 0);
1470 continue;
1471 }
1472
1473
1474 do_write = (rq_data_dir(rq) == WRITE);
1475
1476 size = blk_rq_bytes(rq);
1477 ofs = blk_rq_pos(rq) * SECTOR_SIZE;
1478 rq_bio = rq->bio;
1479 if (do_write && rbd_dev->read_only) {
1480 __blk_end_request_all(rq, -EROFS);
1481 continue;
1482 }
1483
1484 spin_unlock_irq(q->queue_lock);
1485
1486 dout("%s 0x%x bytes at 0x%llx\n",
1487 do_write ? "write" : "read",
1488 size, blk_rq_pos(rq) * SECTOR_SIZE);
1489
1490 num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
1491 coll = rbd_alloc_coll(num_segs);
1492 if (!coll) {
1493 spin_lock_irq(q->queue_lock);
1494 __blk_end_request_all(rq, -ENOMEM);
1495 continue;
1496 }
1497
1498 do {
1499
1500 dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
1501 op_size = rbd_get_segment(&rbd_dev->header,
1502 rbd_dev->header.block_name,
1503 ofs, size,
1504 NULL, NULL);
1505 kref_get(&coll->kref);
1506 bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
1507 op_size, GFP_ATOMIC);
1508 if (!bio) {
1509 rbd_coll_end_req_index(rq, coll, cur_seg,
1510 -ENOMEM, op_size);
1511 goto next_seg;
1512 }
1513
1514
1515
1516 if (do_write)
1517 rbd_req_write(rq, rbd_dev,
1518 rbd_dev->header.snapc,
1519 ofs,
1520 op_size, bio,
1521 coll, cur_seg);
1522 else
1523 rbd_req_read(rq, rbd_dev,
1524 cur_snap_id(rbd_dev),
1525 ofs,
1526 op_size, bio,
1527 coll, cur_seg);
1528
1529next_seg:
1530 size -= op_size;
1531 ofs += op_size;
1532
1533 cur_seg++;
1534 rq_bio = next_bio;
1535 } while (size > 0);
1536 kref_put(&coll->kref, rbd_coll_release);
1537
1538 if (bp)
1539 bio_pair_release(bp);
1540 spin_lock_irq(q->queue_lock);
1541 }
1542}
1543
1544
1545
1546
1547
1548
1549static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
1550 struct bio_vec *bvec)
1551{
1552 struct rbd_device *rbd_dev = q->queuedata;
1553 unsigned int chunk_sectors;
1554 sector_t sector;
1555 unsigned int bio_sectors;
1556 int max;
1557
1558 chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
1559 sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
1560 bio_sectors = bmd->bi_size >> SECTOR_SHIFT;
1561
1562 max = (chunk_sectors - ((sector & (chunk_sectors - 1))
1563 + bio_sectors)) << SECTOR_SHIFT;
1564 if (max < 0)
1565 max = 0;
1566 if (max <= bvec->bv_len && bio_sectors == 0)
1567 return bvec->bv_len;
1568 return max;
1569}
1570
1571static void rbd_free_disk(struct rbd_device *rbd_dev)
1572{
1573 struct gendisk *disk = rbd_dev->disk;
1574
1575 if (!disk)
1576 return;
1577
1578 rbd_header_free(&rbd_dev->header);
1579
1580 if (disk->flags & GENHD_FL_UP)
1581 del_gendisk(disk);
1582 if (disk->queue)
1583 blk_cleanup_queue(disk->queue);
1584 put_disk(disk);
1585}
1586
1587
1588
1589
1590static int rbd_read_header(struct rbd_device *rbd_dev,
1591 struct rbd_image_header *header)
1592{
1593 ssize_t rc;
1594 struct rbd_image_header_ondisk *dh;
1595 int snap_count = 0;
1596 u64 ver;
1597 size_t len;
1598
1599
1600
1601
1602
1603
1604 len = sizeof (*dh);
1605 while (1) {
1606 dh = kmalloc(len, GFP_KERNEL);
1607 if (!dh)
1608 return -ENOMEM;
1609
1610 rc = rbd_req_sync_read(rbd_dev,
1611 NULL, CEPH_NOSNAP,
1612 rbd_dev->obj_md_name,
1613 0, len,
1614 (char *)dh, &ver);
1615 if (rc < 0)
1616 goto out_dh;
1617
1618 rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
1619 if (rc < 0) {
1620 if (rc == -ENXIO)
1621 pr_warning("unrecognized header format"
1622 " for image %s", rbd_dev->obj);
1623 goto out_dh;
1624 }
1625
1626 if (snap_count == header->total_snaps)
1627 break;
1628
1629 snap_count = header->total_snaps;
1630 len = sizeof (*dh) +
1631 snap_count * sizeof(struct rbd_image_snap_ondisk) +
1632 header->snap_names_len;
1633
1634 rbd_header_free(header);
1635 kfree(dh);
1636 }
1637 header->obj_version = ver;
1638
1639out_dh:
1640 kfree(dh);
1641 return rc;
1642}
1643
1644
1645
1646
1647static int rbd_header_add_snap(struct rbd_device *dev,
1648 const char *snap_name,
1649 gfp_t gfp_flags)
1650{
1651 int name_len = strlen(snap_name);
1652 u64 new_snapid;
1653 int ret;
1654 void *data, *p, *e;
1655 u64 ver;
1656 struct ceph_mon_client *monc;
1657
1658
1659 if (dev->cur_snap)
1660 return -EINVAL;
1661
1662 monc = &dev->rbd_client->client->monc;
1663 ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
1664 dout("created snapid=%lld\n", new_snapid);
1665 if (ret < 0)
1666 return ret;
1667
1668 data = kmalloc(name_len + 16, gfp_flags);
1669 if (!data)
1670 return -ENOMEM;
1671
1672 p = data;
1673 e = data + name_len + 16;
1674
1675 ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
1676 ceph_encode_64_safe(&p, e, new_snapid, bad);
1677
1678 ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
1679 data, p - data, &ver);
1680
1681 kfree(data);
1682
1683 if (ret < 0)
1684 return ret;
1685
1686 dev->header.snapc->seq = new_snapid;
1687
1688 return 0;
1689bad:
1690 return -ERANGE;
1691}
1692
1693static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
1694{
1695 struct rbd_snap *snap;
1696
1697 while (!list_empty(&rbd_dev->snaps)) {
1698 snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
1699 __rbd_remove_snap_dev(rbd_dev, snap);
1700 }
1701}
1702
1703
1704
1705
1706static int __rbd_update_snaps(struct rbd_device *rbd_dev)
1707{
1708 int ret;
1709 struct rbd_image_header h;
1710 u64 snap_seq;
1711 int follow_seq = 0;
1712
1713 ret = rbd_read_header(rbd_dev, &h);
1714 if (ret < 0)
1715 return ret;
1716
1717
1718 set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
1719
1720 down_write(&rbd_dev->header_rwsem);
1721
1722 snap_seq = rbd_dev->header.snapc->seq;
1723 if (rbd_dev->header.total_snaps &&
1724 rbd_dev->header.snapc->snaps[0] == snap_seq)
1725
1726
1727 follow_seq = 1;
1728
1729 kfree(rbd_dev->header.snapc);
1730 kfree(rbd_dev->header.snap_names);
1731 kfree(rbd_dev->header.snap_sizes);
1732
1733 rbd_dev->header.total_snaps = h.total_snaps;
1734 rbd_dev->header.snapc = h.snapc;
1735 rbd_dev->header.snap_names = h.snap_names;
1736 rbd_dev->header.snap_names_len = h.snap_names_len;
1737 rbd_dev->header.snap_sizes = h.snap_sizes;
1738 if (follow_seq)
1739 rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
1740 else
1741 rbd_dev->header.snapc->seq = snap_seq;
1742
1743 ret = __rbd_init_snaps_header(rbd_dev);
1744
1745 up_write(&rbd_dev->header_rwsem);
1746
1747 return ret;
1748}
1749
1750static int rbd_init_disk(struct rbd_device *rbd_dev)
1751{
1752 struct gendisk *disk;
1753 struct request_queue *q;
1754 int rc;
1755 u64 segment_size;
1756 u64 total_size = 0;
1757
1758
1759 rc = rbd_read_header(rbd_dev, &rbd_dev->header);
1760 if (rc)
1761 return rc;
1762
1763
1764 rc = __rbd_init_snaps_header(rbd_dev);
1765 if (rc)
1766 return rc;
1767
1768 rc = rbd_header_set_snap(rbd_dev, &total_size);
1769 if (rc)
1770 return rc;
1771
1772
1773 rc = -ENOMEM;
1774 disk = alloc_disk(RBD_MINORS_PER_MAJOR);
1775 if (!disk)
1776 goto out;
1777
1778 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
1779 rbd_dev->id);
1780 disk->major = rbd_dev->major;
1781 disk->first_minor = 0;
1782 disk->fops = &rbd_bd_ops;
1783 disk->private_data = rbd_dev;
1784
1785
1786 rc = -ENOMEM;
1787 q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
1788 if (!q)
1789 goto out_disk;
1790
1791
1792 blk_queue_physical_block_size(q, SECTOR_SIZE);
1793
1794
1795 segment_size = rbd_obj_bytes(&rbd_dev->header);
1796 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
1797 blk_queue_max_segment_size(q, segment_size);
1798 blk_queue_io_min(q, segment_size);
1799 blk_queue_io_opt(q, segment_size);
1800
1801 blk_queue_merge_bvec(q, rbd_merge_bvec);
1802 disk->queue = q;
1803
1804 q->queuedata = rbd_dev;
1805
1806 rbd_dev->disk = disk;
1807 rbd_dev->q = q;
1808
1809
1810 set_capacity(disk, total_size / SECTOR_SIZE);
1811 add_disk(disk);
1812
1813 pr_info("%s: added with size 0x%llx\n",
1814 disk->disk_name, (unsigned long long)total_size);
1815 return 0;
1816
1817out_disk:
1818 put_disk(disk);
1819out:
1820 return rc;
1821}
1822
1823
1824
1825
1826
1827static struct rbd_device *dev_to_rbd_dev(struct device *dev)
1828{
1829 return container_of(dev, struct rbd_device, dev);
1830}
1831
1832static ssize_t rbd_size_show(struct device *dev,
1833 struct device_attribute *attr, char *buf)
1834{
1835 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1836
1837 return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
1838}
1839
1840static ssize_t rbd_major_show(struct device *dev,
1841 struct device_attribute *attr, char *buf)
1842{
1843 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1844
1845 return sprintf(buf, "%d\n", rbd_dev->major);
1846}
1847
1848static ssize_t rbd_client_id_show(struct device *dev,
1849 struct device_attribute *attr, char *buf)
1850{
1851 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1852
1853 return sprintf(buf, "client%lld\n",
1854 ceph_client_id(rbd_dev->rbd_client->client));
1855}
1856
1857static ssize_t rbd_pool_show(struct device *dev,
1858 struct device_attribute *attr, char *buf)
1859{
1860 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1861
1862 return sprintf(buf, "%s\n", rbd_dev->pool_name);
1863}
1864
1865static ssize_t rbd_name_show(struct device *dev,
1866 struct device_attribute *attr, char *buf)
1867{
1868 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1869
1870 return sprintf(buf, "%s\n", rbd_dev->obj);
1871}
1872
1873static ssize_t rbd_snap_show(struct device *dev,
1874 struct device_attribute *attr,
1875 char *buf)
1876{
1877 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1878
1879 return sprintf(buf, "%s\n", rbd_dev->snap_name);
1880}
1881
1882static ssize_t rbd_image_refresh(struct device *dev,
1883 struct device_attribute *attr,
1884 const char *buf,
1885 size_t size)
1886{
1887 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1888 int rc;
1889 int ret = size;
1890
1891 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1892
1893 rc = __rbd_update_snaps(rbd_dev);
1894 if (rc < 0)
1895 ret = rc;
1896
1897 mutex_unlock(&ctl_mutex);
1898 return ret;
1899}
1900
1901static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
1902static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
1903static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
1904static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
1905static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
1906static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
1907static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
1908static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
1909
1910static struct attribute *rbd_attrs[] = {
1911 &dev_attr_size.attr,
1912 &dev_attr_major.attr,
1913 &dev_attr_client_id.attr,
1914 &dev_attr_pool.attr,
1915 &dev_attr_name.attr,
1916 &dev_attr_current_snap.attr,
1917 &dev_attr_refresh.attr,
1918 &dev_attr_create_snap.attr,
1919 NULL
1920};
1921
1922static struct attribute_group rbd_attr_group = {
1923 .attrs = rbd_attrs,
1924};
1925
1926static const struct attribute_group *rbd_attr_groups[] = {
1927 &rbd_attr_group,
1928 NULL
1929};
1930
1931static void rbd_sysfs_dev_release(struct device *dev)
1932{
1933}
1934
1935static struct device_type rbd_device_type = {
1936 .name = "rbd",
1937 .groups = rbd_attr_groups,
1938 .release = rbd_sysfs_dev_release,
1939};
1940
1941
1942
1943
1944
1945
1946static ssize_t rbd_snap_size_show(struct device *dev,
1947 struct device_attribute *attr,
1948 char *buf)
1949{
1950 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1951
1952 return sprintf(buf, "%zd\n", snap->size);
1953}
1954
1955static ssize_t rbd_snap_id_show(struct device *dev,
1956 struct device_attribute *attr,
1957 char *buf)
1958{
1959 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1960
1961 return sprintf(buf, "%llu\n", (unsigned long long) snap->id);
1962}
1963
1964static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
1965static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
1966
1967static struct attribute *rbd_snap_attrs[] = {
1968 &dev_attr_snap_size.attr,
1969 &dev_attr_snap_id.attr,
1970 NULL,
1971};
1972
1973static struct attribute_group rbd_snap_attr_group = {
1974 .attrs = rbd_snap_attrs,
1975};
1976
1977static void rbd_snap_dev_release(struct device *dev)
1978{
1979 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1980 kfree(snap->name);
1981 kfree(snap);
1982}
1983
1984static const struct attribute_group *rbd_snap_attr_groups[] = {
1985 &rbd_snap_attr_group,
1986 NULL
1987};
1988
1989static struct device_type rbd_snap_device_type = {
1990 .groups = rbd_snap_attr_groups,
1991 .release = rbd_snap_dev_release,
1992};
1993
1994static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
1995 struct rbd_snap *snap)
1996{
1997 list_del(&snap->node);
1998 device_unregister(&snap->dev);
1999}
2000
2001static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
2002 struct rbd_snap *snap,
2003 struct device *parent)
2004{
2005 struct device *dev = &snap->dev;
2006 int ret;
2007
2008 dev->type = &rbd_snap_device_type;
2009 dev->parent = parent;
2010 dev->release = rbd_snap_dev_release;
2011 dev_set_name(dev, "snap_%s", snap->name);
2012 ret = device_register(dev);
2013
2014 return ret;
2015}
2016
2017static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
2018 int i, const char *name,
2019 struct rbd_snap **snapp)
2020{
2021 int ret;
2022 struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
2023 if (!snap)
2024 return -ENOMEM;
2025 snap->name = kstrdup(name, GFP_KERNEL);
2026 snap->size = rbd_dev->header.snap_sizes[i];
2027 snap->id = rbd_dev->header.snapc->snaps[i];
2028 if (device_is_registered(&rbd_dev->dev)) {
2029 ret = rbd_register_snap_dev(rbd_dev, snap,
2030 &rbd_dev->dev);
2031 if (ret < 0)
2032 goto err;
2033 }
2034 *snapp = snap;
2035 return 0;
2036err:
2037 kfree(snap->name);
2038 kfree(snap);
2039 return ret;
2040}
2041
2042
2043
2044
2045const char *rbd_prev_snap_name(const char *name, const char *start)
2046{
2047 if (name < start + 2)
2048 return NULL;
2049
2050 name -= 2;
2051 while (*name) {
2052 if (name == start)
2053 return start;
2054 name--;
2055 }
2056 return name + 1;
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
2068{
2069 const char *name, *first_name;
2070 int i = rbd_dev->header.total_snaps;
2071 struct rbd_snap *snap, *old_snap = NULL;
2072 int ret;
2073 struct list_head *p, *n;
2074
2075 first_name = rbd_dev->header.snap_names;
2076 name = first_name + rbd_dev->header.snap_names_len;
2077
2078 list_for_each_prev_safe(p, n, &rbd_dev->snaps) {
2079 u64 cur_id;
2080
2081 old_snap = list_entry(p, struct rbd_snap, node);
2082
2083 if (i)
2084 cur_id = rbd_dev->header.snapc->snaps[i - 1];
2085
2086 if (!i || old_snap->id < cur_id) {
2087
2088 __rbd_remove_snap_dev(rbd_dev, old_snap);
2089 continue;
2090 }
2091 if (old_snap->id == cur_id) {
2092
2093 i--;
2094 name = rbd_prev_snap_name(name, first_name);
2095 continue;
2096 }
2097 for (; i > 0;
2098 i--, name = rbd_prev_snap_name(name, first_name)) {
2099 if (!name) {
2100 WARN_ON(1);
2101 return -EINVAL;
2102 }
2103 cur_id = rbd_dev->header.snapc->snaps[i];
2104
2105 if (cur_id >= old_snap->id)
2106 break;
2107
2108 ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
2109 if (ret < 0)
2110 return ret;
2111
2112
2113 list_add(&snap->node, n);
2114 p = &snap->node;
2115 }
2116 }
2117
2118 for (; i > 0; i--) {
2119 name = rbd_prev_snap_name(name, first_name);
2120 if (!name) {
2121 WARN_ON(1);
2122 return -EINVAL;
2123 }
2124 ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
2125 if (ret < 0)
2126 return ret;
2127 list_add(&snap->node, &rbd_dev->snaps);
2128 }
2129
2130 return 0;
2131}
2132
2133static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2134{
2135 int ret;
2136 struct device *dev;
2137 struct rbd_snap *snap;
2138
2139 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2140 dev = &rbd_dev->dev;
2141
2142 dev->bus = &rbd_bus_type;
2143 dev->type = &rbd_device_type;
2144 dev->parent = &rbd_root_dev;
2145 dev->release = rbd_dev_release;
2146 dev_set_name(dev, "%d", rbd_dev->id);
2147 ret = device_register(dev);
2148 if (ret < 0)
2149 goto out;
2150
2151 list_for_each_entry(snap, &rbd_dev->snaps, node) {
2152 ret = rbd_register_snap_dev(rbd_dev, snap,
2153 &rbd_dev->dev);
2154 if (ret < 0)
2155 break;
2156 }
2157out:
2158 mutex_unlock(&ctl_mutex);
2159 return ret;
2160}
2161
2162static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
2163{
2164 device_unregister(&rbd_dev->dev);
2165}
2166
2167static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
2168{
2169 int ret, rc;
2170
2171 do {
2172 ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
2173 rbd_dev->header.obj_version);
2174 if (ret == -ERANGE) {
2175 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2176 rc = __rbd_update_snaps(rbd_dev);
2177 mutex_unlock(&ctl_mutex);
2178 if (rc < 0)
2179 return rc;
2180 }
2181 } while (ret == -ERANGE);
2182
2183 return ret;
2184}
2185
2186static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
2187
2188
2189
2190
2191
2192static void rbd_id_get(struct rbd_device *rbd_dev)
2193{
2194 rbd_dev->id = atomic64_inc_return(&rbd_id_max);
2195
2196 spin_lock(&rbd_dev_list_lock);
2197 list_add_tail(&rbd_dev->node, &rbd_dev_list);
2198 spin_unlock(&rbd_dev_list_lock);
2199}
2200
2201
2202
2203
2204
2205static void rbd_id_put(struct rbd_device *rbd_dev)
2206{
2207 struct list_head *tmp;
2208 int rbd_id = rbd_dev->id;
2209 int max_id;
2210
2211 BUG_ON(rbd_id < 1);
2212
2213 spin_lock(&rbd_dev_list_lock);
2214 list_del_init(&rbd_dev->node);
2215
2216
2217
2218
2219
2220 if (rbd_id != atomic64_read(&rbd_id_max)) {
2221 spin_unlock(&rbd_dev_list_lock);
2222 return;
2223 }
2224
2225
2226
2227
2228
2229
2230 max_id = 0;
2231 list_for_each_prev(tmp, &rbd_dev_list) {
2232 struct rbd_device *rbd_dev;
2233
2234 rbd_dev = list_entry(tmp, struct rbd_device, node);
2235 if (rbd_id > max_id)
2236 max_id = rbd_id;
2237 }
2238 spin_unlock(&rbd_dev_list_lock);
2239
2240
2241
2242
2243
2244
2245
2246 atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id);
2247}
2248
2249
2250
2251
2252
2253
2254
2255static inline size_t next_token(const char **buf)
2256{
2257
2258
2259
2260
2261 const char *spaces = " \f\n\r\t\v";
2262
2263 *buf += strspn(*buf, spaces);
2264
2265 return strcspn(*buf, spaces);
2266}
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282static inline size_t copy_token(const char **buf,
2283 char *token,
2284 size_t token_size)
2285{
2286 size_t len;
2287
2288 len = next_token(buf);
2289 if (len < token_size) {
2290 memcpy(token, *buf, len);
2291 *(token + len) = '\0';
2292 }
2293 *buf += len;
2294
2295 return len;
2296}
2297
2298
2299
2300
2301
2302
2303
2304static int rbd_add_parse_args(struct rbd_device *rbd_dev,
2305 const char *buf,
2306 const char **mon_addrs,
2307 size_t *mon_addrs_size,
2308 char *options,
2309 size_t options_size)
2310{
2311 size_t len;
2312
2313
2314
2315 len = next_token(&buf);
2316 if (!len)
2317 return -EINVAL;
2318 *mon_addrs_size = len + 1;
2319 *mon_addrs = buf;
2320
2321 buf += len;
2322
2323 len = copy_token(&buf, options, options_size);
2324 if (!len || len >= options_size)
2325 return -EINVAL;
2326
2327 len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
2328 if (!len || len >= sizeof (rbd_dev->pool_name))
2329 return -EINVAL;
2330
2331 len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
2332 if (!len || len >= sizeof (rbd_dev->obj))
2333 return -EINVAL;
2334
2335
2336
2337 rbd_dev->obj_len = len;
2338
2339 BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
2340 < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
2341 sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
2342
2343
2344
2345
2346
2347 len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
2348 if (!len)
2349 memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
2350 sizeof (RBD_SNAP_HEAD_NAME));
2351 else if (len >= sizeof (rbd_dev->snap_name))
2352 return -EINVAL;
2353
2354 return 0;
2355}
2356
2357static ssize_t rbd_add(struct bus_type *bus,
2358 const char *buf,
2359 size_t count)
2360{
2361 struct rbd_device *rbd_dev;
2362 const char *mon_addrs = NULL;
2363 size_t mon_addrs_size = 0;
2364 char *options = NULL;
2365 struct ceph_osd_client *osdc;
2366 int rc = -ENOMEM;
2367
2368 if (!try_module_get(THIS_MODULE))
2369 return -ENODEV;
2370
2371 rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
2372 if (!rbd_dev)
2373 goto err_nomem;
2374 options = kmalloc(count, GFP_KERNEL);
2375 if (!options)
2376 goto err_nomem;
2377
2378
2379 spin_lock_init(&rbd_dev->lock);
2380 INIT_LIST_HEAD(&rbd_dev->node);
2381 INIT_LIST_HEAD(&rbd_dev->snaps);
2382 init_rwsem(&rbd_dev->header_rwsem);
2383
2384 init_rwsem(&rbd_dev->header_rwsem);
2385
2386
2387 rbd_id_get(rbd_dev);
2388
2389
2390 BUILD_BUG_ON(DEV_NAME_LEN
2391 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
2392 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
2393
2394
2395 rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
2396 options, count);
2397 if (rc)
2398 goto err_put_id;
2399
2400 rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
2401 options);
2402 if (IS_ERR(rbd_dev->rbd_client)) {
2403 rc = PTR_ERR(rbd_dev->rbd_client);
2404 goto err_put_id;
2405 }
2406
2407
2408 osdc = &rbd_dev->rbd_client->client->osdc;
2409 rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
2410 if (rc < 0)
2411 goto err_out_client;
2412 rbd_dev->poolid = rc;
2413
2414
2415 rc = register_blkdev(0, rbd_dev->name);
2416 if (rc < 0)
2417 goto err_out_client;
2418 rbd_dev->major = rc;
2419
2420 rc = rbd_bus_add_dev(rbd_dev);
2421 if (rc)
2422 goto err_out_blkdev;
2423
2424
2425
2426
2427
2428
2429
2430 rc = rbd_init_disk(rbd_dev);
2431 if (rc)
2432 goto err_out_bus;
2433
2434 rc = rbd_init_watch_dev(rbd_dev);
2435 if (rc)
2436 goto err_out_bus;
2437
2438 return count;
2439
2440err_out_bus:
2441
2442
2443 rbd_bus_del_dev(rbd_dev);
2444 kfree(options);
2445 return rc;
2446
2447err_out_blkdev:
2448 unregister_blkdev(rbd_dev->major, rbd_dev->name);
2449err_out_client:
2450 rbd_put_client(rbd_dev);
2451err_put_id:
2452 rbd_id_put(rbd_dev);
2453err_nomem:
2454 kfree(options);
2455 kfree(rbd_dev);
2456
2457 dout("Error adding device %s\n", buf);
2458 module_put(THIS_MODULE);
2459
2460 return (ssize_t) rc;
2461}
2462
2463static struct rbd_device *__rbd_get_dev(unsigned long id)
2464{
2465 struct list_head *tmp;
2466 struct rbd_device *rbd_dev;
2467
2468 spin_lock(&rbd_dev_list_lock);
2469 list_for_each(tmp, &rbd_dev_list) {
2470 rbd_dev = list_entry(tmp, struct rbd_device, node);
2471 if (rbd_dev->id == id) {
2472 spin_unlock(&rbd_dev_list_lock);
2473 return rbd_dev;
2474 }
2475 }
2476 spin_unlock(&rbd_dev_list_lock);
2477 return NULL;
2478}
2479
2480static void rbd_dev_release(struct device *dev)
2481{
2482 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2483
2484 if (rbd_dev->watch_request) {
2485 struct ceph_client *client = rbd_dev->rbd_client->client;
2486
2487 ceph_osdc_unregister_linger_request(&client->osdc,
2488 rbd_dev->watch_request);
2489 }
2490 if (rbd_dev->watch_event)
2491 rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
2492
2493 rbd_put_client(rbd_dev);
2494
2495
2496 rbd_free_disk(rbd_dev);
2497 unregister_blkdev(rbd_dev->major, rbd_dev->name);
2498
2499
2500 rbd_id_put(rbd_dev);
2501 kfree(rbd_dev);
2502
2503
2504 module_put(THIS_MODULE);
2505}
2506
2507static ssize_t rbd_remove(struct bus_type *bus,
2508 const char *buf,
2509 size_t count)
2510{
2511 struct rbd_device *rbd_dev = NULL;
2512 int target_id, rc;
2513 unsigned long ul;
2514 int ret = count;
2515
2516 rc = strict_strtoul(buf, 10, &ul);
2517 if (rc)
2518 return rc;
2519
2520
2521 target_id = (int) ul;
2522 if (target_id != ul)
2523 return -EINVAL;
2524
2525 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2526
2527 rbd_dev = __rbd_get_dev(target_id);
2528 if (!rbd_dev) {
2529 ret = -ENOENT;
2530 goto done;
2531 }
2532
2533 __rbd_remove_all_snaps(rbd_dev);
2534 rbd_bus_del_dev(rbd_dev);
2535
2536done:
2537 mutex_unlock(&ctl_mutex);
2538 return ret;
2539}
2540
2541static ssize_t rbd_snap_add(struct device *dev,
2542 struct device_attribute *attr,
2543 const char *buf,
2544 size_t count)
2545{
2546 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2547 int ret;
2548 char *name = kmalloc(count + 1, GFP_KERNEL);
2549 if (!name)
2550 return -ENOMEM;
2551
2552 snprintf(name, count, "%s", buf);
2553
2554 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2555
2556 ret = rbd_header_add_snap(rbd_dev,
2557 name, GFP_KERNEL);
2558 if (ret < 0)
2559 goto err_unlock;
2560
2561 ret = __rbd_update_snaps(rbd_dev);
2562 if (ret < 0)
2563 goto err_unlock;
2564
2565
2566
2567 mutex_unlock(&ctl_mutex);
2568
2569
2570 rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
2571
2572 ret = count;
2573 kfree(name);
2574 return ret;
2575
2576err_unlock:
2577 mutex_unlock(&ctl_mutex);
2578 kfree(name);
2579 return ret;
2580}
2581
2582
2583
2584
2585
2586static int rbd_sysfs_init(void)
2587{
2588 int ret;
2589
2590 ret = device_register(&rbd_root_dev);
2591 if (ret < 0)
2592 return ret;
2593
2594 ret = bus_register(&rbd_bus_type);
2595 if (ret < 0)
2596 device_unregister(&rbd_root_dev);
2597
2598 return ret;
2599}
2600
2601static void rbd_sysfs_cleanup(void)
2602{
2603 bus_unregister(&rbd_bus_type);
2604 device_unregister(&rbd_root_dev);
2605}
2606
2607int __init rbd_init(void)
2608{
2609 int rc;
2610
2611 rc = rbd_sysfs_init();
2612 if (rc)
2613 return rc;
2614 pr_info("loaded " RBD_DRV_NAME_LONG "\n");
2615 return 0;
2616}
2617
2618void __exit rbd_exit(void)
2619{
2620 rbd_sysfs_cleanup();
2621}
2622
2623module_init(rbd_init);
2624module_exit(rbd_exit);
2625
2626MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
2627MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
2628MODULE_DESCRIPTION("rados block device");
2629
2630
2631MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
2632
2633MODULE_LICENSE("GPL");
2634