1
2
3
4
5
6
7
8
9
10#include "qemu/osdep.h"
11#include <glusterfs/api/glfs.h>
12#include "block/block_int.h"
13#include "qapi/error.h"
14#include "qapi/qmp/qerror.h"
15#include "qemu/uri.h"
16#include "qemu/error-report.h"
17#include "qemu/cutils.h"
18
19#define GLUSTER_OPT_FILENAME "filename"
20#define GLUSTER_OPT_VOLUME "volume"
21#define GLUSTER_OPT_PATH "path"
22#define GLUSTER_OPT_TYPE "type"
23#define GLUSTER_OPT_SERVER_PATTERN "server."
24#define GLUSTER_OPT_HOST "host"
25#define GLUSTER_OPT_PORT "port"
26#define GLUSTER_OPT_TO "to"
27#define GLUSTER_OPT_IPV4 "ipv4"
28#define GLUSTER_OPT_IPV6 "ipv6"
29#define GLUSTER_OPT_SOCKET "socket"
30#define GLUSTER_OPT_DEBUG "debug"
31#define GLUSTER_DEFAULT_PORT 24007
32#define GLUSTER_DEBUG_DEFAULT 4
33#define GLUSTER_DEBUG_MAX 9
34#define GLUSTER_OPT_LOGFILE "logfile"
35#define GLUSTER_LOGFILE_DEFAULT "-"
36
37#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
38
39typedef struct GlusterAIOCB {
40 int64_t size;
41 int ret;
42 Coroutine *coroutine;
43 AioContext *aio_context;
44} GlusterAIOCB;
45
46typedef struct BDRVGlusterState {
47 struct glfs *glfs;
48 struct glfs_fd *fd;
49 char *logfile;
50 bool supports_seek_data;
51 int debug;
52} BDRVGlusterState;
53
54typedef struct BDRVGlusterReopenState {
55 struct glfs *glfs;
56 struct glfs_fd *fd;
57} BDRVGlusterReopenState;
58
59
60typedef struct GlfsPreopened {
61 char *volume;
62 glfs_t *fs;
63 int ref;
64} GlfsPreopened;
65
66typedef struct ListElement {
67 QLIST_ENTRY(ListElement) list;
68 GlfsPreopened saved;
69} ListElement;
70
71static QLIST_HEAD(glfs_list, ListElement) glfs_list;
72
73static QemuOptsList qemu_gluster_create_opts = {
74 .name = "qemu-gluster-create-opts",
75 .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
76 .desc = {
77 {
78 .name = BLOCK_OPT_SIZE,
79 .type = QEMU_OPT_SIZE,
80 .help = "Virtual disk size"
81 },
82 {
83 .name = BLOCK_OPT_PREALLOC,
84 .type = QEMU_OPT_STRING,
85 .help = "Preallocation mode (allowed values: off, full)"
86 },
87 {
88 .name = GLUSTER_OPT_DEBUG,
89 .type = QEMU_OPT_NUMBER,
90 .help = "Gluster log level, valid range is 0-9",
91 },
92 {
93 .name = GLUSTER_OPT_LOGFILE,
94 .type = QEMU_OPT_STRING,
95 .help = "Logfile path of libgfapi",
96 },
97 { }
98 }
99};
100
101static QemuOptsList runtime_opts = {
102 .name = "gluster",
103 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
104 .desc = {
105 {
106 .name = GLUSTER_OPT_FILENAME,
107 .type = QEMU_OPT_STRING,
108 .help = "URL to the gluster image",
109 },
110 {
111 .name = GLUSTER_OPT_DEBUG,
112 .type = QEMU_OPT_NUMBER,
113 .help = "Gluster log level, valid range is 0-9",
114 },
115 {
116 .name = GLUSTER_OPT_LOGFILE,
117 .type = QEMU_OPT_STRING,
118 .help = "Logfile path of libgfapi",
119 },
120 { }
121 },
122};
123
124static QemuOptsList runtime_json_opts = {
125 .name = "gluster_json",
126 .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
127 .desc = {
128 {
129 .name = GLUSTER_OPT_VOLUME,
130 .type = QEMU_OPT_STRING,
131 .help = "name of gluster volume where VM image resides",
132 },
133 {
134 .name = GLUSTER_OPT_PATH,
135 .type = QEMU_OPT_STRING,
136 .help = "absolute path to image file in gluster volume",
137 },
138 {
139 .name = GLUSTER_OPT_DEBUG,
140 .type = QEMU_OPT_NUMBER,
141 .help = "Gluster log level, valid range is 0-9",
142 },
143 { }
144 },
145};
146
147static QemuOptsList runtime_type_opts = {
148 .name = "gluster_type",
149 .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
150 .desc = {
151 {
152 .name = GLUSTER_OPT_TYPE,
153 .type = QEMU_OPT_STRING,
154 .help = "inet|unix",
155 },
156 { }
157 },
158};
159
160static QemuOptsList runtime_unix_opts = {
161 .name = "gluster_unix",
162 .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
163 .desc = {
164 {
165 .name = GLUSTER_OPT_SOCKET,
166 .type = QEMU_OPT_STRING,
167 .help = "socket file path)",
168 },
169 { }
170 },
171};
172
173static QemuOptsList runtime_inet_opts = {
174 .name = "gluster_inet",
175 .head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head),
176 .desc = {
177 {
178 .name = GLUSTER_OPT_TYPE,
179 .type = QEMU_OPT_STRING,
180 .help = "inet|unix",
181 },
182 {
183 .name = GLUSTER_OPT_HOST,
184 .type = QEMU_OPT_STRING,
185 .help = "host address (hostname/ipv4/ipv6 addresses)",
186 },
187 {
188 .name = GLUSTER_OPT_PORT,
189 .type = QEMU_OPT_STRING,
190 .help = "port number on which glusterd is listening (default 24007)",
191 },
192 {
193 .name = "to",
194 .type = QEMU_OPT_NUMBER,
195 .help = "max port number, not supported by gluster",
196 },
197 {
198 .name = "ipv4",
199 .type = QEMU_OPT_BOOL,
200 .help = "ipv4 bool value, not supported by gluster",
201 },
202 {
203 .name = "ipv6",
204 .type = QEMU_OPT_BOOL,
205 .help = "ipv6 bool value, not supported by gluster",
206 },
207 { }
208 },
209};
210
211static void glfs_set_preopened(const char *volume, glfs_t *fs)
212{
213 ListElement *entry = NULL;
214
215 entry = g_new(ListElement, 1);
216
217 entry->saved.volume = g_strdup(volume);
218
219 entry->saved.fs = fs;
220 entry->saved.ref = 1;
221
222 QLIST_INSERT_HEAD(&glfs_list, entry, list);
223}
224
225static glfs_t *glfs_find_preopened(const char *volume)
226{
227 ListElement *entry = NULL;
228
229 QLIST_FOREACH(entry, &glfs_list, list) {
230 if (strcmp(entry->saved.volume, volume) == 0) {
231 entry->saved.ref++;
232 return entry->saved.fs;
233 }
234 }
235
236 return NULL;
237}
238
239static void glfs_clear_preopened(glfs_t *fs)
240{
241 ListElement *entry = NULL;
242 ListElement *next;
243
244 if (fs == NULL) {
245 return;
246 }
247
248 QLIST_FOREACH_SAFE(entry, &glfs_list, list, next) {
249 if (entry->saved.fs == fs) {
250 if (--entry->saved.ref) {
251 return;
252 }
253
254 QLIST_REMOVE(entry, list);
255
256 glfs_fini(entry->saved.fs);
257 g_free(entry->saved.volume);
258 g_free(entry);
259 }
260 }
261}
262
263static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
264{
265 char *p, *q;
266
267 if (!path) {
268 return -EINVAL;
269 }
270
271
272 p = q = path + strspn(path, "/");
273 p += strcspn(p, "/");
274 if (*p == '\0') {
275 return -EINVAL;
276 }
277 gconf->volume = g_strndup(q, p - q);
278
279
280 p += strspn(p, "/");
281 if (*p == '\0') {
282 return -EINVAL;
283 }
284 gconf->path = g_strdup(p);
285 return 0;
286}
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
321 const char *filename)
322{
323 SocketAddress *gsconf;
324 URI *uri;
325 QueryParams *qp = NULL;
326 bool is_unix = false;
327 int ret = 0;
328
329 uri = uri_parse(filename);
330 if (!uri) {
331 return -EINVAL;
332 }
333
334 gconf->server = g_new0(SocketAddressList, 1);
335 gconf->server->value = gsconf = g_new0(SocketAddress, 1);
336
337
338 if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
339 gsconf->type = SOCKET_ADDRESS_TYPE_INET;
340 } else if (!strcmp(uri->scheme, "gluster+tcp")) {
341 gsconf->type = SOCKET_ADDRESS_TYPE_INET;
342 } else if (!strcmp(uri->scheme, "gluster+unix")) {
343 gsconf->type = SOCKET_ADDRESS_TYPE_UNIX;
344 is_unix = true;
345 } else if (!strcmp(uri->scheme, "gluster+rdma")) {
346 gsconf->type = SOCKET_ADDRESS_TYPE_INET;
347 warn_report("rdma feature is not supported, falling back to tcp");
348 } else {
349 ret = -EINVAL;
350 goto out;
351 }
352
353 ret = parse_volume_options(gconf, uri->path);
354 if (ret < 0) {
355 goto out;
356 }
357
358 qp = query_params_parse(uri->query);
359 if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
360 ret = -EINVAL;
361 goto out;
362 }
363
364 if (is_unix) {
365 if (uri->server || uri->port) {
366 ret = -EINVAL;
367 goto out;
368 }
369 if (strcmp(qp->p[0].name, "socket")) {
370 ret = -EINVAL;
371 goto out;
372 }
373 gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
374 } else {
375 gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost");
376 if (uri->port) {
377 gsconf->u.inet.port = g_strdup_printf("%d", uri->port);
378 } else {
379 gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
380 }
381 }
382
383out:
384 if (qp) {
385 query_params_free(qp);
386 }
387 uri_free(uri);
388 return ret;
389}
390
391static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
392 Error **errp)
393{
394 struct glfs *glfs;
395 int ret;
396 int old_errno;
397 SocketAddressList *server;
398 unsigned long long port;
399
400 glfs = glfs_find_preopened(gconf->volume);
401 if (glfs) {
402 return glfs;
403 }
404
405 glfs = glfs_new(gconf->volume);
406 if (!glfs) {
407 goto out;
408 }
409
410 glfs_set_preopened(gconf->volume, glfs);
411
412 for (server = gconf->server; server; server = server->next) {
413 switch (server->value->type) {
414 case SOCKET_ADDRESS_TYPE_UNIX:
415 ret = glfs_set_volfile_server(glfs, "unix",
416 server->value->u.q_unix.path, 0);
417 break;
418 case SOCKET_ADDRESS_TYPE_INET:
419 if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
420 port > 65535) {
421 error_setg(errp, "'%s' is not a valid port number",
422 server->value->u.inet.port);
423 errno = EINVAL;
424 goto out;
425 }
426 ret = glfs_set_volfile_server(glfs, "tcp",
427 server->value->u.inet.host,
428 (int)port);
429 break;
430 case SOCKET_ADDRESS_TYPE_VSOCK:
431 case SOCKET_ADDRESS_TYPE_FD:
432 default:
433 abort();
434 }
435
436 if (ret < 0) {
437 goto out;
438 }
439 }
440
441 ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug);
442 if (ret < 0) {
443 goto out;
444 }
445
446 ret = glfs_init(glfs);
447 if (ret) {
448 error_setg(errp, "Gluster connection for volume %s, path %s failed"
449 " to connect", gconf->volume, gconf->path);
450 for (server = gconf->server; server; server = server->next) {
451 if (server->value->type == SOCKET_ADDRESS_TYPE_UNIX) {
452 error_append_hint(errp, "hint: failed on socket %s ",
453 server->value->u.q_unix.path);
454 } else {
455 error_append_hint(errp, "hint: failed on host %s and port %s ",
456 server->value->u.inet.host,
457 server->value->u.inet.port);
458 }
459 }
460
461 error_append_hint(errp, "Please refer to gluster logs for more info\n");
462
463
464 if (errno == 0) {
465 errno = EINVAL;
466 }
467
468 goto out;
469 }
470 return glfs;
471
472out:
473 if (glfs) {
474 old_errno = errno;
475 glfs_clear_preopened(glfs);
476 errno = old_errno;
477 }
478 return NULL;
479}
480
481
482
483
484static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
485 QDict *options, Error **errp)
486{
487 QemuOpts *opts;
488 SocketAddress *gsconf = NULL;
489 SocketAddressList *curr = NULL;
490 QDict *backing_options = NULL;
491 Error *local_err = NULL;
492 char *str = NULL;
493 const char *ptr;
494 int i, type, num_servers;
495
496
497 opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
498 qemu_opts_absorb_qdict(opts, options, &local_err);
499 if (local_err) {
500 goto out;
501 }
502
503 num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
504 if (num_servers < 1) {
505 error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
506 goto out;
507 }
508
509 ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
510 if (!ptr) {
511 error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
512 goto out;
513 }
514 gconf->volume = g_strdup(ptr);
515
516 ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
517 if (!ptr) {
518 error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
519 goto out;
520 }
521 gconf->path = g_strdup(ptr);
522 qemu_opts_del(opts);
523
524 for (i = 0; i < num_servers; i++) {
525 str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
526 qdict_extract_subqdict(options, &backing_options, str);
527
528
529 opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
530 qemu_opts_absorb_qdict(opts, backing_options, &local_err);
531 if (local_err) {
532 goto out;
533 }
534
535 ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
536 if (!ptr) {
537 error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
538 error_append_hint(&local_err, GERR_INDEX_HINT, i);
539 goto out;
540
541 }
542 gsconf = g_new0(SocketAddress, 1);
543 if (!strcmp(ptr, "tcp")) {
544 ptr = "inet";
545 }
546 type = qapi_enum_parse(&SocketAddressType_lookup, ptr, -1, NULL);
547 if (type != SOCKET_ADDRESS_TYPE_INET
548 && type != SOCKET_ADDRESS_TYPE_UNIX) {
549 error_setg(&local_err,
550 "Parameter '%s' may be 'inet' or 'unix'",
551 GLUSTER_OPT_TYPE);
552 error_append_hint(&local_err, GERR_INDEX_HINT, i);
553 goto out;
554 }
555 gsconf->type = type;
556 qemu_opts_del(opts);
557
558 if (gsconf->type == SOCKET_ADDRESS_TYPE_INET) {
559
560 opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
561 qemu_opts_absorb_qdict(opts, backing_options, &local_err);
562 if (local_err) {
563 goto out;
564 }
565
566 ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
567 if (!ptr) {
568 error_setg(&local_err, QERR_MISSING_PARAMETER,
569 GLUSTER_OPT_HOST);
570 error_append_hint(&local_err, GERR_INDEX_HINT, i);
571 goto out;
572 }
573 gsconf->u.inet.host = g_strdup(ptr);
574 ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
575 if (!ptr) {
576 error_setg(&local_err, QERR_MISSING_PARAMETER,
577 GLUSTER_OPT_PORT);
578 error_append_hint(&local_err, GERR_INDEX_HINT, i);
579 goto out;
580 }
581 gsconf->u.inet.port = g_strdup(ptr);
582
583
584
585
586 ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
587 if (ptr) {
588 gsconf->u.inet.has_to = true;
589 }
590 ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
591 if (ptr) {
592 gsconf->u.inet.has_ipv4 = true;
593 }
594 ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
595 if (ptr) {
596 gsconf->u.inet.has_ipv6 = true;
597 }
598 if (gsconf->u.inet.has_to) {
599 error_setg(&local_err, "Parameter 'to' not supported");
600 goto out;
601 }
602 if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) {
603 error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
604 goto out;
605 }
606 qemu_opts_del(opts);
607 } else {
608
609 opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
610 qemu_opts_absorb_qdict(opts, backing_options, &local_err);
611 if (local_err) {
612 goto out;
613 }
614
615 ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
616 if (!ptr) {
617 error_setg(&local_err, QERR_MISSING_PARAMETER,
618 GLUSTER_OPT_SOCKET);
619 error_append_hint(&local_err, GERR_INDEX_HINT, i);
620 goto out;
621 }
622 gsconf->u.q_unix.path = g_strdup(ptr);
623 qemu_opts_del(opts);
624 }
625
626 if (gconf->server == NULL) {
627 gconf->server = g_new0(SocketAddressList, 1);
628 gconf->server->value = gsconf;
629 curr = gconf->server;
630 } else {
631 curr->next = g_new0(SocketAddressList, 1);
632 curr->next->value = gsconf;
633 curr = curr->next;
634 }
635 gsconf = NULL;
636
637 QDECREF(backing_options);
638 backing_options = NULL;
639 g_free(str);
640 str = NULL;
641 }
642
643 return 0;
644
645out:
646 error_propagate(errp, local_err);
647 qapi_free_SocketAddress(gsconf);
648 qemu_opts_del(opts);
649 g_free(str);
650 QDECREF(backing_options);
651 errno = EINVAL;
652 return -errno;
653}
654
655static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
656 const char *filename,
657 QDict *options, Error **errp)
658{
659 int ret;
660 if (filename) {
661 ret = qemu_gluster_parse_uri(gconf, filename);
662 if (ret < 0) {
663 error_setg(errp, "invalid URI");
664 error_append_hint(errp, "Usage: file=gluster[+transport]://"
665 "[host[:port]]volume/path[?socket=...]"
666 "[,file.debug=N]"
667 "[,file.logfile=/path/filename.log]\n");
668 errno = -ret;
669 return NULL;
670 }
671 } else {
672 ret = qemu_gluster_parse_json(gconf, options, errp);
673 if (ret < 0) {
674 error_append_hint(errp, "Usage: "
675 "-drive driver=qcow2,file.driver=gluster,"
676 "file.volume=testvol,file.path=/path/a.qcow2"
677 "[,file.debug=9]"
678 "[,file.logfile=/path/filename.log],"
679 "file.server.0.type=inet,"
680 "file.server.0.host=1.2.3.4,"
681 "file.server.0.port=24007,"
682 "file.server.1.transport=unix,"
683 "file.server.1.socket=/var/run/glusterd.socket ..."
684 "\n");
685 errno = -ret;
686 return NULL;
687 }
688
689 }
690
691 return qemu_gluster_glfs_init(gconf, errp);
692}
693
694
695
696
697static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
698{
699 GlusterAIOCB *acb = (GlusterAIOCB *)arg;
700
701 if (!ret || ret == acb->size) {
702 acb->ret = 0;
703 } else if (ret < 0) {
704 acb->ret = -errno;
705 } else {
706 acb->ret = -EIO;
707 }
708
709 aio_co_schedule(acb->aio_context, acb->coroutine);
710}
711
712static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
713{
714 assert(open_flags != NULL);
715
716 *open_flags |= O_BINARY;
717
718 if (bdrv_flags & BDRV_O_RDWR) {
719 *open_flags |= O_RDWR;
720 } else {
721 *open_flags |= O_RDONLY;
722 }
723
724 if ((bdrv_flags & BDRV_O_NOCACHE)) {
725 *open_flags |= O_DIRECT;
726 }
727}
728
729
730
731
732
733
734
735
736static bool qemu_gluster_test_seek(struct glfs_fd *fd)
737{
738 off_t ret = 0;
739
740#if defined SEEK_HOLE && defined SEEK_DATA
741 off_t eof;
742
743 eof = glfs_lseek(fd, 0, SEEK_END);
744 if (eof < 0) {
745
746 return false;
747 }
748
749
750 ret = glfs_lseek(fd, eof, SEEK_DATA);
751#endif
752
753 return (ret < 0) && (errno == ENXIO);
754}
755
756static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
757 int bdrv_flags, Error **errp)
758{
759 BDRVGlusterState *s = bs->opaque;
760 int open_flags = 0;
761 int ret = 0;
762 BlockdevOptionsGluster *gconf = NULL;
763 QemuOpts *opts;
764 Error *local_err = NULL;
765 const char *filename, *logfile;
766
767 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
768 qemu_opts_absorb_qdict(opts, options, &local_err);
769 if (local_err) {
770 error_propagate(errp, local_err);
771 ret = -EINVAL;
772 goto out;
773 }
774
775 filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
776
777 s->debug = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
778 GLUSTER_DEBUG_DEFAULT);
779 if (s->debug < 0) {
780 s->debug = 0;
781 } else if (s->debug > GLUSTER_DEBUG_MAX) {
782 s->debug = GLUSTER_DEBUG_MAX;
783 }
784
785 gconf = g_new0(BlockdevOptionsGluster, 1);
786 gconf->debug = s->debug;
787 gconf->has_debug = true;
788
789 logfile = qemu_opt_get(opts, GLUSTER_OPT_LOGFILE);
790 s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT);
791
792 gconf->logfile = g_strdup(s->logfile);
793 gconf->has_logfile = true;
794
795 s->glfs = qemu_gluster_init(gconf, filename, options, errp);
796 if (!s->glfs) {
797 ret = -errno;
798 goto out;
799 }
800
801#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
802
803
804
805
806
807
808 ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
809 "resync-failed-syncs-after-fsync",
810 "on");
811 if (ret < 0) {
812 error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
813 ret = -errno;
814 goto out;
815 }
816#endif
817
818 qemu_gluster_parse_flags(bdrv_flags, &open_flags);
819
820 s->fd = glfs_open(s->glfs, gconf->path, open_flags);
821 if (!s->fd) {
822 ret = -errno;
823 }
824
825 s->supports_seek_data = qemu_gluster_test_seek(s->fd);
826
827out:
828 qemu_opts_del(opts);
829 qapi_free_BlockdevOptionsGluster(gconf);
830 if (!ret) {
831 return ret;
832 }
833 g_free(s->logfile);
834 if (s->fd) {
835 glfs_close(s->fd);
836 }
837
838 glfs_clear_preopened(s->glfs);
839
840 return ret;
841}
842
843static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
844 BlockReopenQueue *queue, Error **errp)
845{
846 int ret = 0;
847 BDRVGlusterState *s;
848 BDRVGlusterReopenState *reop_s;
849 BlockdevOptionsGluster *gconf;
850 int open_flags = 0;
851
852 assert(state != NULL);
853 assert(state->bs != NULL);
854
855 s = state->bs->opaque;
856
857 state->opaque = g_new0(BDRVGlusterReopenState, 1);
858 reop_s = state->opaque;
859
860 qemu_gluster_parse_flags(state->flags, &open_flags);
861
862 gconf = g_new0(BlockdevOptionsGluster, 1);
863 gconf->debug = s->debug;
864 gconf->has_debug = true;
865 gconf->logfile = g_strdup(s->logfile);
866 gconf->has_logfile = true;
867 reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
868 if (reop_s->glfs == NULL) {
869 ret = -errno;
870 goto exit;
871 }
872
873#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
874 ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
875 "resync-failed-syncs-after-fsync", "on");
876 if (ret < 0) {
877 error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
878 ret = -errno;
879 goto exit;
880 }
881#endif
882
883 reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
884 if (reop_s->fd == NULL) {
885
886 ret = -errno;
887 goto exit;
888 }
889
890exit:
891
892 qapi_free_BlockdevOptionsGluster(gconf);
893 return ret;
894}
895
896static void qemu_gluster_reopen_commit(BDRVReopenState *state)
897{
898 BDRVGlusterReopenState *reop_s = state->opaque;
899 BDRVGlusterState *s = state->bs->opaque;
900
901
902
903 if (s->fd) {
904 glfs_close(s->fd);
905 }
906
907 glfs_clear_preopened(s->glfs);
908
909
910 s->fd = reop_s->fd;
911 s->glfs = reop_s->glfs;
912
913 g_free(state->opaque);
914 state->opaque = NULL;
915
916 return;
917}
918
919
920static void qemu_gluster_reopen_abort(BDRVReopenState *state)
921{
922 BDRVGlusterReopenState *reop_s = state->opaque;
923
924 if (reop_s == NULL) {
925 return;
926 }
927
928 if (reop_s->fd) {
929 glfs_close(reop_s->fd);
930 }
931
932 glfs_clear_preopened(reop_s->glfs);
933
934 g_free(state->opaque);
935 state->opaque = NULL;
936
937 return;
938}
939
940#ifdef CONFIG_GLUSTERFS_ZEROFILL
941static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
942 int64_t offset,
943 int size,
944 BdrvRequestFlags flags)
945{
946 int ret;
947 GlusterAIOCB acb;
948 BDRVGlusterState *s = bs->opaque;
949
950 acb.size = size;
951 acb.ret = 0;
952 acb.coroutine = qemu_coroutine_self();
953 acb.aio_context = bdrv_get_aio_context(bs);
954
955 ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
956 if (ret < 0) {
957 return -errno;
958 }
959
960 qemu_coroutine_yield();
961 return acb.ret;
962}
963#endif
964
965static int qemu_gluster_create(const char *filename,
966 QemuOpts *opts, Error **errp)
967{
968 BlockdevOptionsGluster *gconf;
969 struct glfs *glfs;
970 struct glfs_fd *fd;
971 int ret = 0;
972 PreallocMode prealloc;
973 int64_t total_size = 0;
974 char *tmp = NULL;
975 Error *local_err = NULL;
976
977 gconf = g_new0(BlockdevOptionsGluster, 1);
978 gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
979 GLUSTER_DEBUG_DEFAULT);
980 if (gconf->debug < 0) {
981 gconf->debug = 0;
982 } else if (gconf->debug > GLUSTER_DEBUG_MAX) {
983 gconf->debug = GLUSTER_DEBUG_MAX;
984 }
985 gconf->has_debug = true;
986
987 gconf->logfile = qemu_opt_get_del(opts, GLUSTER_OPT_LOGFILE);
988 if (!gconf->logfile) {
989 gconf->logfile = g_strdup(GLUSTER_LOGFILE_DEFAULT);
990 }
991 gconf->has_logfile = true;
992
993 glfs = qemu_gluster_init(gconf, filename, NULL, errp);
994 if (!glfs) {
995 ret = -errno;
996 goto out;
997 }
998
999 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1000 BDRV_SECTOR_SIZE);
1001
1002 tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
1003 prealloc = qapi_enum_parse(&PreallocMode_lookup, tmp, PREALLOC_MODE_OFF,
1004 &local_err);
1005 g_free(tmp);
1006 if (local_err) {
1007 error_propagate(errp, local_err);
1008 ret = -EINVAL;
1009 goto out;
1010 }
1011
1012 fd = glfs_creat(glfs, gconf->path,
1013 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
1014 if (!fd) {
1015 ret = -errno;
1016 goto out;
1017 }
1018
1019 switch (prealloc) {
1020#ifdef CONFIG_GLUSTERFS_FALLOCATE
1021 case PREALLOC_MODE_FALLOC:
1022 if (glfs_fallocate(fd, 0, 0, total_size)) {
1023 error_setg(errp, "Could not preallocate data for the new file");
1024 ret = -errno;
1025 }
1026 break;
1027#endif
1028#ifdef CONFIG_GLUSTERFS_ZEROFILL
1029 case PREALLOC_MODE_FULL:
1030 if (!glfs_ftruncate(fd, total_size)) {
1031 if (glfs_zerofill(fd, 0, total_size)) {
1032 error_setg(errp, "Could not zerofill the new file");
1033 ret = -errno;
1034 }
1035 } else {
1036 error_setg(errp, "Could not resize file");
1037 ret = -errno;
1038 }
1039 break;
1040#endif
1041 case PREALLOC_MODE_OFF:
1042 if (glfs_ftruncate(fd, total_size) != 0) {
1043 ret = -errno;
1044 error_setg(errp, "Could not resize file");
1045 }
1046 break;
1047 default:
1048 ret = -EINVAL;
1049 error_setg(errp, "Unsupported preallocation mode: %s",
1050 PreallocMode_str(prealloc));
1051 break;
1052 }
1053
1054 if (glfs_close(fd) != 0) {
1055 ret = -errno;
1056 }
1057out:
1058 qapi_free_BlockdevOptionsGluster(gconf);
1059 glfs_clear_preopened(glfs);
1060 return ret;
1061}
1062
1063static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
1064 int64_t sector_num, int nb_sectors,
1065 QEMUIOVector *qiov, int write)
1066{
1067 int ret;
1068 GlusterAIOCB acb;
1069 BDRVGlusterState *s = bs->opaque;
1070 size_t size = nb_sectors * BDRV_SECTOR_SIZE;
1071 off_t offset = sector_num * BDRV_SECTOR_SIZE;
1072
1073 acb.size = size;
1074 acb.ret = 0;
1075 acb.coroutine = qemu_coroutine_self();
1076 acb.aio_context = bdrv_get_aio_context(bs);
1077
1078 if (write) {
1079 ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
1080 gluster_finish_aiocb, &acb);
1081 } else {
1082 ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
1083 gluster_finish_aiocb, &acb);
1084 }
1085
1086 if (ret < 0) {
1087 return -errno;
1088 }
1089
1090 qemu_coroutine_yield();
1091 return acb.ret;
1092}
1093
1094static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
1095 PreallocMode prealloc, Error **errp)
1096{
1097 int ret;
1098 BDRVGlusterState *s = bs->opaque;
1099
1100 if (prealloc != PREALLOC_MODE_OFF) {
1101 error_setg(errp, "Unsupported preallocation mode '%s'",
1102 PreallocMode_str(prealloc));
1103 return -ENOTSUP;
1104 }
1105
1106 ret = glfs_ftruncate(s->fd, offset);
1107 if (ret < 0) {
1108 ret = -errno;
1109 error_setg_errno(errp, -ret, "Failed to truncate file");
1110 return ret;
1111 }
1112
1113 return 0;
1114}
1115
1116static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
1117 int64_t sector_num,
1118 int nb_sectors,
1119 QEMUIOVector *qiov)
1120{
1121 return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
1122}
1123
1124static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
1125 int64_t sector_num,
1126 int nb_sectors,
1127 QEMUIOVector *qiov)
1128{
1129 return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
1130}
1131
1132static void qemu_gluster_close(BlockDriverState *bs)
1133{
1134 BDRVGlusterState *s = bs->opaque;
1135
1136 g_free(s->logfile);
1137 if (s->fd) {
1138 glfs_close(s->fd);
1139 s->fd = NULL;
1140 }
1141 glfs_clear_preopened(s->glfs);
1142}
1143
1144static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
1145{
1146 int ret;
1147 GlusterAIOCB acb;
1148 BDRVGlusterState *s = bs->opaque;
1149
1150 acb.size = 0;
1151 acb.ret = 0;
1152 acb.coroutine = qemu_coroutine_self();
1153 acb.aio_context = bdrv_get_aio_context(bs);
1154
1155 ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
1156 if (ret < 0) {
1157 ret = -errno;
1158 goto error;
1159 }
1160
1161 qemu_coroutine_yield();
1162 if (acb.ret < 0) {
1163 ret = acb.ret;
1164 goto error;
1165 }
1166
1167 return acb.ret;
1168
1169error:
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183 qemu_gluster_close(bs);
1184 bs->drv = NULL;
1185 return ret;
1186}
1187
1188#ifdef CONFIG_GLUSTERFS_DISCARD
1189static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
1190 int64_t offset, int size)
1191{
1192 int ret;
1193 GlusterAIOCB acb;
1194 BDRVGlusterState *s = bs->opaque;
1195
1196 acb.size = 0;
1197 acb.ret = 0;
1198 acb.coroutine = qemu_coroutine_self();
1199 acb.aio_context = bdrv_get_aio_context(bs);
1200
1201 ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
1202 if (ret < 0) {
1203 return -errno;
1204 }
1205
1206 qemu_coroutine_yield();
1207 return acb.ret;
1208}
1209#endif
1210
1211static int64_t qemu_gluster_getlength(BlockDriverState *bs)
1212{
1213 BDRVGlusterState *s = bs->opaque;
1214 int64_t ret;
1215
1216 ret = glfs_lseek(s->fd, 0, SEEK_END);
1217 if (ret < 0) {
1218 return -errno;
1219 } else {
1220 return ret;
1221 }
1222}
1223
1224static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
1225{
1226 BDRVGlusterState *s = bs->opaque;
1227 struct stat st;
1228 int ret;
1229
1230 ret = glfs_fstat(s->fd, &st);
1231 if (ret < 0) {
1232 return -errno;
1233 } else {
1234 return st.st_blocks * 512;
1235 }
1236}
1237
1238static int qemu_gluster_has_zero_init(BlockDriverState *bs)
1239{
1240
1241 return 0;
1242}
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256static int find_allocation(BlockDriverState *bs, off_t start,
1257 off_t *data, off_t *hole)
1258{
1259 BDRVGlusterState *s = bs->opaque;
1260
1261 if (!s->supports_seek_data) {
1262 goto exit;
1263 }
1264
1265#if defined SEEK_HOLE && defined SEEK_DATA
1266 off_t offs;
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279 offs = glfs_lseek(s->fd, start, SEEK_DATA);
1280 if (offs < 0) {
1281 return -errno;
1282 }
1283
1284 if (offs < start) {
1285
1286
1287
1288
1289 return -EIO;
1290 }
1291
1292 if (offs > start) {
1293
1294 *hole = start;
1295 *data = offs;
1296 return 0;
1297 }
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318 offs = glfs_lseek(s->fd, start, SEEK_HOLE);
1319 if (offs < 0) {
1320 return -errno;
1321 }
1322
1323 if (offs < start) {
1324
1325
1326
1327
1328 return -EIO;
1329 }
1330
1331 if (offs > start) {
1332
1333
1334
1335
1336
1337
1338 *data = start;
1339 *hole = offs;
1340 return 0;
1341 }
1342
1343
1344 return -EBUSY;
1345#endif
1346
1347exit:
1348 return -ENOTSUP;
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366static int64_t coroutine_fn qemu_gluster_co_get_block_status(
1367 BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
1368 BlockDriverState **file)
1369{
1370 BDRVGlusterState *s = bs->opaque;
1371 off_t start, data = 0, hole = 0;
1372 int64_t total_size;
1373 int ret = -EINVAL;
1374
1375 if (!s->fd) {
1376 return ret;
1377 }
1378
1379 start = sector_num * BDRV_SECTOR_SIZE;
1380 total_size = bdrv_getlength(bs);
1381 if (total_size < 0) {
1382 return total_size;
1383 } else if (start >= total_size) {
1384 *pnum = 0;
1385 return 0;
1386 } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
1387 nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
1388 }
1389
1390 ret = find_allocation(bs, start, &data, &hole);
1391 if (ret == -ENXIO) {
1392
1393 *pnum = nb_sectors;
1394 ret = BDRV_BLOCK_ZERO;
1395 } else if (ret < 0) {
1396
1397 *pnum = nb_sectors;
1398 ret = BDRV_BLOCK_DATA;
1399 } else if (data == start) {
1400
1401
1402 *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
1403 ret = BDRV_BLOCK_DATA;
1404 } else {
1405
1406 assert(hole == start);
1407 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1408 ret = BDRV_BLOCK_ZERO;
1409 }
1410
1411 *file = bs;
1412
1413 return ret | BDRV_BLOCK_OFFSET_VALID | start;
1414}
1415
1416
1417static BlockDriver bdrv_gluster = {
1418 .format_name = "gluster",
1419 .protocol_name = "gluster",
1420 .instance_size = sizeof(BDRVGlusterState),
1421 .bdrv_needs_filename = false,
1422 .bdrv_file_open = qemu_gluster_open,
1423 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1424 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1425 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1426 .bdrv_close = qemu_gluster_close,
1427 .bdrv_create = qemu_gluster_create,
1428 .bdrv_getlength = qemu_gluster_getlength,
1429 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1430 .bdrv_truncate = qemu_gluster_truncate,
1431 .bdrv_co_readv = qemu_gluster_co_readv,
1432 .bdrv_co_writev = qemu_gluster_co_writev,
1433 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1434 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1435#ifdef CONFIG_GLUSTERFS_DISCARD
1436 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1437#endif
1438#ifdef CONFIG_GLUSTERFS_ZEROFILL
1439 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1440#endif
1441 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1442 .create_opts = &qemu_gluster_create_opts,
1443};
1444
1445static BlockDriver bdrv_gluster_tcp = {
1446 .format_name = "gluster",
1447 .protocol_name = "gluster+tcp",
1448 .instance_size = sizeof(BDRVGlusterState),
1449 .bdrv_needs_filename = false,
1450 .bdrv_file_open = qemu_gluster_open,
1451 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1452 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1453 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1454 .bdrv_close = qemu_gluster_close,
1455 .bdrv_create = qemu_gluster_create,
1456 .bdrv_getlength = qemu_gluster_getlength,
1457 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1458 .bdrv_truncate = qemu_gluster_truncate,
1459 .bdrv_co_readv = qemu_gluster_co_readv,
1460 .bdrv_co_writev = qemu_gluster_co_writev,
1461 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1462 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1463#ifdef CONFIG_GLUSTERFS_DISCARD
1464 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1465#endif
1466#ifdef CONFIG_GLUSTERFS_ZEROFILL
1467 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1468#endif
1469 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1470 .create_opts = &qemu_gluster_create_opts,
1471};
1472
1473static BlockDriver bdrv_gluster_unix = {
1474 .format_name = "gluster",
1475 .protocol_name = "gluster+unix",
1476 .instance_size = sizeof(BDRVGlusterState),
1477 .bdrv_needs_filename = true,
1478 .bdrv_file_open = qemu_gluster_open,
1479 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1480 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1481 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1482 .bdrv_close = qemu_gluster_close,
1483 .bdrv_create = qemu_gluster_create,
1484 .bdrv_getlength = qemu_gluster_getlength,
1485 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1486 .bdrv_truncate = qemu_gluster_truncate,
1487 .bdrv_co_readv = qemu_gluster_co_readv,
1488 .bdrv_co_writev = qemu_gluster_co_writev,
1489 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1490 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1491#ifdef CONFIG_GLUSTERFS_DISCARD
1492 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1493#endif
1494#ifdef CONFIG_GLUSTERFS_ZEROFILL
1495 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1496#endif
1497 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1498 .create_opts = &qemu_gluster_create_opts,
1499};
1500
1501
1502
1503
1504
1505
1506
1507static BlockDriver bdrv_gluster_rdma = {
1508 .format_name = "gluster",
1509 .protocol_name = "gluster+rdma",
1510 .instance_size = sizeof(BDRVGlusterState),
1511 .bdrv_needs_filename = true,
1512 .bdrv_file_open = qemu_gluster_open,
1513 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1514 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1515 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1516 .bdrv_close = qemu_gluster_close,
1517 .bdrv_create = qemu_gluster_create,
1518 .bdrv_getlength = qemu_gluster_getlength,
1519 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1520 .bdrv_truncate = qemu_gluster_truncate,
1521 .bdrv_co_readv = qemu_gluster_co_readv,
1522 .bdrv_co_writev = qemu_gluster_co_writev,
1523 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1524 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1525#ifdef CONFIG_GLUSTERFS_DISCARD
1526 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1527#endif
1528#ifdef CONFIG_GLUSTERFS_ZEROFILL
1529 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1530#endif
1531 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1532 .create_opts = &qemu_gluster_create_opts,
1533};
1534
1535static void bdrv_gluster_init(void)
1536{
1537 bdrv_register(&bdrv_gluster_rdma);
1538 bdrv_register(&bdrv_gluster_unix);
1539 bdrv_register(&bdrv_gluster_tcp);
1540 bdrv_register(&bdrv_gluster);
1541}
1542
1543block_init(bdrv_gluster_init);
1544