1
2
3
4
5
6
7
8
9
10#include "qemu/osdep.h"
11#include <glusterfs/api/glfs.h>
12#include "block/block_int.h"
13#include "qapi/error.h"
14#include "qapi/qmp/qerror.h"
15#include "qapi/util.h"
16#include "qemu/uri.h"
17#include "qemu/error-report.h"
18#include "qemu/cutils.h"
19
20#define GLUSTER_OPT_FILENAME "filename"
21#define GLUSTER_OPT_VOLUME "volume"
22#define GLUSTER_OPT_PATH "path"
23#define GLUSTER_OPT_TYPE "type"
24#define GLUSTER_OPT_SERVER_PATTERN "server."
25#define GLUSTER_OPT_HOST "host"
26#define GLUSTER_OPT_PORT "port"
27#define GLUSTER_OPT_TO "to"
28#define GLUSTER_OPT_IPV4 "ipv4"
29#define GLUSTER_OPT_IPV6 "ipv6"
30#define GLUSTER_OPT_SOCKET "socket"
31#define GLUSTER_OPT_DEBUG "debug"
32#define GLUSTER_DEFAULT_PORT 24007
33#define GLUSTER_DEBUG_DEFAULT 4
34#define GLUSTER_DEBUG_MAX 9
35#define GLUSTER_OPT_LOGFILE "logfile"
36#define GLUSTER_LOGFILE_DEFAULT "-"
37
38#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
39
40typedef struct GlusterAIOCB {
41 int64_t size;
42 int ret;
43 Coroutine *coroutine;
44 AioContext *aio_context;
45} GlusterAIOCB;
46
47typedef struct BDRVGlusterState {
48 struct glfs *glfs;
49 struct glfs_fd *fd;
50 char *logfile;
51 bool supports_seek_data;
52 int debug;
53} BDRVGlusterState;
54
55typedef struct BDRVGlusterReopenState {
56 struct glfs *glfs;
57 struct glfs_fd *fd;
58} BDRVGlusterReopenState;
59
60
61typedef struct GlfsPreopened {
62 char *volume;
63 glfs_t *fs;
64 int ref;
65} GlfsPreopened;
66
67typedef struct ListElement {
68 QLIST_ENTRY(ListElement) list;
69 GlfsPreopened saved;
70} ListElement;
71
72static QLIST_HEAD(glfs_list, ListElement) glfs_list;
73
74static QemuOptsList qemu_gluster_create_opts = {
75 .name = "qemu-gluster-create-opts",
76 .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
77 .desc = {
78 {
79 .name = BLOCK_OPT_SIZE,
80 .type = QEMU_OPT_SIZE,
81 .help = "Virtual disk size"
82 },
83 {
84 .name = BLOCK_OPT_PREALLOC,
85 .type = QEMU_OPT_STRING,
86 .help = "Preallocation mode (allowed values: off, full)"
87 },
88 {
89 .name = GLUSTER_OPT_DEBUG,
90 .type = QEMU_OPT_NUMBER,
91 .help = "Gluster log level, valid range is 0-9",
92 },
93 {
94 .name = GLUSTER_OPT_LOGFILE,
95 .type = QEMU_OPT_STRING,
96 .help = "Logfile path of libgfapi",
97 },
98 { }
99 }
100};
101
102static QemuOptsList runtime_opts = {
103 .name = "gluster",
104 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
105 .desc = {
106 {
107 .name = GLUSTER_OPT_FILENAME,
108 .type = QEMU_OPT_STRING,
109 .help = "URL to the gluster image",
110 },
111 {
112 .name = GLUSTER_OPT_DEBUG,
113 .type = QEMU_OPT_NUMBER,
114 .help = "Gluster log level, valid range is 0-9",
115 },
116 {
117 .name = GLUSTER_OPT_LOGFILE,
118 .type = QEMU_OPT_STRING,
119 .help = "Logfile path of libgfapi",
120 },
121 { }
122 },
123};
124
125static QemuOptsList runtime_json_opts = {
126 .name = "gluster_json",
127 .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
128 .desc = {
129 {
130 .name = GLUSTER_OPT_VOLUME,
131 .type = QEMU_OPT_STRING,
132 .help = "name of gluster volume where VM image resides",
133 },
134 {
135 .name = GLUSTER_OPT_PATH,
136 .type = QEMU_OPT_STRING,
137 .help = "absolute path to image file in gluster volume",
138 },
139 {
140 .name = GLUSTER_OPT_DEBUG,
141 .type = QEMU_OPT_NUMBER,
142 .help = "Gluster log level, valid range is 0-9",
143 },
144 { }
145 },
146};
147
148static QemuOptsList runtime_type_opts = {
149 .name = "gluster_type",
150 .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
151 .desc = {
152 {
153 .name = GLUSTER_OPT_TYPE,
154 .type = QEMU_OPT_STRING,
155 .help = "inet|unix",
156 },
157 { }
158 },
159};
160
161static QemuOptsList runtime_unix_opts = {
162 .name = "gluster_unix",
163 .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
164 .desc = {
165 {
166 .name = GLUSTER_OPT_SOCKET,
167 .type = QEMU_OPT_STRING,
168 .help = "socket file path)",
169 },
170 { }
171 },
172};
173
174static QemuOptsList runtime_inet_opts = {
175 .name = "gluster_inet",
176 .head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head),
177 .desc = {
178 {
179 .name = GLUSTER_OPT_TYPE,
180 .type = QEMU_OPT_STRING,
181 .help = "inet|unix",
182 },
183 {
184 .name = GLUSTER_OPT_HOST,
185 .type = QEMU_OPT_STRING,
186 .help = "host address (hostname/ipv4/ipv6 addresses)",
187 },
188 {
189 .name = GLUSTER_OPT_PORT,
190 .type = QEMU_OPT_STRING,
191 .help = "port number on which glusterd is listening (default 24007)",
192 },
193 {
194 .name = "to",
195 .type = QEMU_OPT_NUMBER,
196 .help = "max port number, not supported by gluster",
197 },
198 {
199 .name = "ipv4",
200 .type = QEMU_OPT_BOOL,
201 .help = "ipv4 bool value, not supported by gluster",
202 },
203 {
204 .name = "ipv6",
205 .type = QEMU_OPT_BOOL,
206 .help = "ipv6 bool value, not supported by gluster",
207 },
208 { }
209 },
210};
211
212static void glfs_set_preopened(const char *volume, glfs_t *fs)
213{
214 ListElement *entry = NULL;
215
216 entry = g_new(ListElement, 1);
217
218 entry->saved.volume = g_strdup(volume);
219
220 entry->saved.fs = fs;
221 entry->saved.ref = 1;
222
223 QLIST_INSERT_HEAD(&glfs_list, entry, list);
224}
225
226static glfs_t *glfs_find_preopened(const char *volume)
227{
228 ListElement *entry = NULL;
229
230 QLIST_FOREACH(entry, &glfs_list, list) {
231 if (strcmp(entry->saved.volume, volume) == 0) {
232 entry->saved.ref++;
233 return entry->saved.fs;
234 }
235 }
236
237 return NULL;
238}
239
240static void glfs_clear_preopened(glfs_t *fs)
241{
242 ListElement *entry = NULL;
243 ListElement *next;
244
245 if (fs == NULL) {
246 return;
247 }
248
249 QLIST_FOREACH_SAFE(entry, &glfs_list, list, next) {
250 if (entry->saved.fs == fs) {
251 if (--entry->saved.ref) {
252 return;
253 }
254
255 QLIST_REMOVE(entry, list);
256
257 glfs_fini(entry->saved.fs);
258 g_free(entry->saved.volume);
259 g_free(entry);
260 }
261 }
262}
263
264static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
265{
266 char *p, *q;
267
268 if (!path) {
269 return -EINVAL;
270 }
271
272
273 p = q = path + strspn(path, "/");
274 p += strcspn(p, "/");
275 if (*p == '\0') {
276 return -EINVAL;
277 }
278 gconf->volume = g_strndup(q, p - q);
279
280
281 p += strspn(p, "/");
282 if (*p == '\0') {
283 return -EINVAL;
284 }
285 gconf->path = g_strdup(p);
286 return 0;
287}
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
322 const char *filename)
323{
324 SocketAddress *gsconf;
325 URI *uri;
326 QueryParams *qp = NULL;
327 bool is_unix = false;
328 int ret = 0;
329
330 uri = uri_parse(filename);
331 if (!uri) {
332 return -EINVAL;
333 }
334
335 gconf->server = g_new0(SocketAddressList, 1);
336 gconf->server->value = gsconf = g_new0(SocketAddress, 1);
337
338
339 if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
340 gsconf->type = SOCKET_ADDRESS_TYPE_INET;
341 } else if (!strcmp(uri->scheme, "gluster+tcp")) {
342 gsconf->type = SOCKET_ADDRESS_TYPE_INET;
343 } else if (!strcmp(uri->scheme, "gluster+unix")) {
344 gsconf->type = SOCKET_ADDRESS_TYPE_UNIX;
345 is_unix = true;
346 } else if (!strcmp(uri->scheme, "gluster+rdma")) {
347 gsconf->type = SOCKET_ADDRESS_TYPE_INET;
348 warn_report("rdma feature is not supported, falling back to tcp");
349 } else {
350 ret = -EINVAL;
351 goto out;
352 }
353
354 ret = parse_volume_options(gconf, uri->path);
355 if (ret < 0) {
356 goto out;
357 }
358
359 qp = query_params_parse(uri->query);
360 if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
361 ret = -EINVAL;
362 goto out;
363 }
364
365 if (is_unix) {
366 if (uri->server || uri->port) {
367 ret = -EINVAL;
368 goto out;
369 }
370 if (strcmp(qp->p[0].name, "socket")) {
371 ret = -EINVAL;
372 goto out;
373 }
374 gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
375 } else {
376 gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost");
377 if (uri->port) {
378 gsconf->u.inet.port = g_strdup_printf("%d", uri->port);
379 } else {
380 gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
381 }
382 }
383
384out:
385 if (qp) {
386 query_params_free(qp);
387 }
388 uri_free(uri);
389 return ret;
390}
391
392static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
393 Error **errp)
394{
395 struct glfs *glfs;
396 int ret;
397 int old_errno;
398 SocketAddressList *server;
399 unsigned long long port;
400
401 glfs = glfs_find_preopened(gconf->volume);
402 if (glfs) {
403 return glfs;
404 }
405
406 glfs = glfs_new(gconf->volume);
407 if (!glfs) {
408 goto out;
409 }
410
411 glfs_set_preopened(gconf->volume, glfs);
412
413 for (server = gconf->server; server; server = server->next) {
414 switch (server->value->type) {
415 case SOCKET_ADDRESS_TYPE_UNIX:
416 ret = glfs_set_volfile_server(glfs, "unix",
417 server->value->u.q_unix.path, 0);
418 break;
419 case SOCKET_ADDRESS_TYPE_INET:
420 if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
421 port > 65535) {
422 error_setg(errp, "'%s' is not a valid port number",
423 server->value->u.inet.port);
424 errno = EINVAL;
425 goto out;
426 }
427 ret = glfs_set_volfile_server(glfs, "tcp",
428 server->value->u.inet.host,
429 (int)port);
430 break;
431 case SOCKET_ADDRESS_TYPE_VSOCK:
432 case SOCKET_ADDRESS_TYPE_FD:
433 default:
434 abort();
435 }
436
437 if (ret < 0) {
438 goto out;
439 }
440 }
441
442 ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug);
443 if (ret < 0) {
444 goto out;
445 }
446
447 ret = glfs_init(glfs);
448 if (ret) {
449 error_setg(errp, "Gluster connection for volume %s, path %s failed"
450 " to connect", gconf->volume, gconf->path);
451 for (server = gconf->server; server; server = server->next) {
452 if (server->value->type == SOCKET_ADDRESS_TYPE_UNIX) {
453 error_append_hint(errp, "hint: failed on socket %s ",
454 server->value->u.q_unix.path);
455 } else {
456 error_append_hint(errp, "hint: failed on host %s and port %s ",
457 server->value->u.inet.host,
458 server->value->u.inet.port);
459 }
460 }
461
462 error_append_hint(errp, "Please refer to gluster logs for more info\n");
463
464
465 if (errno == 0) {
466 errno = EINVAL;
467 }
468
469 goto out;
470 }
471 return glfs;
472
473out:
474 if (glfs) {
475 old_errno = errno;
476 glfs_clear_preopened(glfs);
477 errno = old_errno;
478 }
479 return NULL;
480}
481
482
483
484
485static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
486 QDict *options, Error **errp)
487{
488 QemuOpts *opts;
489 SocketAddress *gsconf = NULL;
490 SocketAddressList *curr = NULL;
491 QDict *backing_options = NULL;
492 Error *local_err = NULL;
493 char *str = NULL;
494 const char *ptr;
495 int i, type, num_servers;
496
497
498 opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
499 qemu_opts_absorb_qdict(opts, options, &local_err);
500 if (local_err) {
501 goto out;
502 }
503
504 num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
505 if (num_servers < 1) {
506 error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
507 goto out;
508 }
509
510 ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
511 if (!ptr) {
512 error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
513 goto out;
514 }
515 gconf->volume = g_strdup(ptr);
516
517 ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
518 if (!ptr) {
519 error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
520 goto out;
521 }
522 gconf->path = g_strdup(ptr);
523 qemu_opts_del(opts);
524
525 for (i = 0; i < num_servers; i++) {
526 str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
527 qdict_extract_subqdict(options, &backing_options, str);
528
529
530 opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
531 qemu_opts_absorb_qdict(opts, backing_options, &local_err);
532 if (local_err) {
533 goto out;
534 }
535
536 ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
537 if (!ptr) {
538 error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
539 error_append_hint(&local_err, GERR_INDEX_HINT, i);
540 goto out;
541
542 }
543 gsconf = g_new0(SocketAddress, 1);
544 if (!strcmp(ptr, "tcp")) {
545 ptr = "inet";
546 }
547 type = qapi_enum_parse(SocketAddressType_lookup, ptr,
548 SOCKET_ADDRESS_TYPE__MAX, -1, NULL);
549 if (type != SOCKET_ADDRESS_TYPE_INET
550 && type != SOCKET_ADDRESS_TYPE_UNIX) {
551 error_setg(&local_err,
552 "Parameter '%s' may be 'inet' or 'unix'",
553 GLUSTER_OPT_TYPE);
554 error_append_hint(&local_err, GERR_INDEX_HINT, i);
555 goto out;
556 }
557 gsconf->type = type;
558 qemu_opts_del(opts);
559
560 if (gsconf->type == SOCKET_ADDRESS_TYPE_INET) {
561
562 opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
563 qemu_opts_absorb_qdict(opts, backing_options, &local_err);
564 if (local_err) {
565 goto out;
566 }
567
568 ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
569 if (!ptr) {
570 error_setg(&local_err, QERR_MISSING_PARAMETER,
571 GLUSTER_OPT_HOST);
572 error_append_hint(&local_err, GERR_INDEX_HINT, i);
573 goto out;
574 }
575 gsconf->u.inet.host = g_strdup(ptr);
576 ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
577 if (!ptr) {
578 error_setg(&local_err, QERR_MISSING_PARAMETER,
579 GLUSTER_OPT_PORT);
580 error_append_hint(&local_err, GERR_INDEX_HINT, i);
581 goto out;
582 }
583 gsconf->u.inet.port = g_strdup(ptr);
584
585
586
587
588 ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
589 if (ptr) {
590 gsconf->u.inet.has_to = true;
591 }
592 ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
593 if (ptr) {
594 gsconf->u.inet.has_ipv4 = true;
595 }
596 ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
597 if (ptr) {
598 gsconf->u.inet.has_ipv6 = true;
599 }
600 if (gsconf->u.inet.has_to) {
601 error_setg(&local_err, "Parameter 'to' not supported");
602 goto out;
603 }
604 if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) {
605 error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
606 goto out;
607 }
608 qemu_opts_del(opts);
609 } else {
610
611 opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
612 qemu_opts_absorb_qdict(opts, backing_options, &local_err);
613 if (local_err) {
614 goto out;
615 }
616
617 ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
618 if (!ptr) {
619 error_setg(&local_err, QERR_MISSING_PARAMETER,
620 GLUSTER_OPT_SOCKET);
621 error_append_hint(&local_err, GERR_INDEX_HINT, i);
622 goto out;
623 }
624 gsconf->u.q_unix.path = g_strdup(ptr);
625 qemu_opts_del(opts);
626 }
627
628 if (gconf->server == NULL) {
629 gconf->server = g_new0(SocketAddressList, 1);
630 gconf->server->value = gsconf;
631 curr = gconf->server;
632 } else {
633 curr->next = g_new0(SocketAddressList, 1);
634 curr->next->value = gsconf;
635 curr = curr->next;
636 }
637 gsconf = NULL;
638
639 QDECREF(backing_options);
640 backing_options = NULL;
641 g_free(str);
642 str = NULL;
643 }
644
645 return 0;
646
647out:
648 error_propagate(errp, local_err);
649 qapi_free_SocketAddress(gsconf);
650 qemu_opts_del(opts);
651 g_free(str);
652 QDECREF(backing_options);
653 errno = EINVAL;
654 return -errno;
655}
656
657static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
658 const char *filename,
659 QDict *options, Error **errp)
660{
661 int ret;
662 if (filename) {
663 ret = qemu_gluster_parse_uri(gconf, filename);
664 if (ret < 0) {
665 error_setg(errp, "invalid URI");
666 error_append_hint(errp, "Usage: file=gluster[+transport]://"
667 "[host[:port]]volume/path[?socket=...]"
668 "[,file.debug=N]"
669 "[,file.logfile=/path/filename.log]\n");
670 errno = -ret;
671 return NULL;
672 }
673 } else {
674 ret = qemu_gluster_parse_json(gconf, options, errp);
675 if (ret < 0) {
676 error_append_hint(errp, "Usage: "
677 "-drive driver=qcow2,file.driver=gluster,"
678 "file.volume=testvol,file.path=/path/a.qcow2"
679 "[,file.debug=9]"
680 "[,file.logfile=/path/filename.log],"
681 "file.server.0.type=inet,"
682 "file.server.0.host=1.2.3.4,"
683 "file.server.0.port=24007,"
684 "file.server.1.transport=unix,"
685 "file.server.1.socket=/var/run/glusterd.socket ..."
686 "\n");
687 errno = -ret;
688 return NULL;
689 }
690
691 }
692
693 return qemu_gluster_glfs_init(gconf, errp);
694}
695
696
697
698
699static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
700{
701 GlusterAIOCB *acb = (GlusterAIOCB *)arg;
702
703 if (!ret || ret == acb->size) {
704 acb->ret = 0;
705 } else if (ret < 0) {
706 acb->ret = -errno;
707 } else {
708 acb->ret = -EIO;
709 }
710
711 aio_co_schedule(acb->aio_context, acb->coroutine);
712}
713
714static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
715{
716 assert(open_flags != NULL);
717
718 *open_flags |= O_BINARY;
719
720 if (bdrv_flags & BDRV_O_RDWR) {
721 *open_flags |= O_RDWR;
722 } else {
723 *open_flags |= O_RDONLY;
724 }
725
726 if ((bdrv_flags & BDRV_O_NOCACHE)) {
727 *open_flags |= O_DIRECT;
728 }
729}
730
731
732
733
734
735
736
737
738static bool qemu_gluster_test_seek(struct glfs_fd *fd)
739{
740 off_t ret = 0;
741
742#if defined SEEK_HOLE && defined SEEK_DATA
743 off_t eof;
744
745 eof = glfs_lseek(fd, 0, SEEK_END);
746 if (eof < 0) {
747
748 return false;
749 }
750
751
752 ret = glfs_lseek(fd, eof, SEEK_DATA);
753#endif
754
755 return (ret < 0) && (errno == ENXIO);
756}
757
758static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
759 int bdrv_flags, Error **errp)
760{
761 BDRVGlusterState *s = bs->opaque;
762 int open_flags = 0;
763 int ret = 0;
764 BlockdevOptionsGluster *gconf = NULL;
765 QemuOpts *opts;
766 Error *local_err = NULL;
767 const char *filename, *logfile;
768
769 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
770 qemu_opts_absorb_qdict(opts, options, &local_err);
771 if (local_err) {
772 error_propagate(errp, local_err);
773 ret = -EINVAL;
774 goto out;
775 }
776
777 filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
778
779 s->debug = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
780 GLUSTER_DEBUG_DEFAULT);
781 if (s->debug < 0) {
782 s->debug = 0;
783 } else if (s->debug > GLUSTER_DEBUG_MAX) {
784 s->debug = GLUSTER_DEBUG_MAX;
785 }
786
787 gconf = g_new0(BlockdevOptionsGluster, 1);
788 gconf->debug = s->debug;
789 gconf->has_debug = true;
790
791 logfile = qemu_opt_get(opts, GLUSTER_OPT_LOGFILE);
792 s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT);
793
794 gconf->logfile = g_strdup(s->logfile);
795 gconf->has_logfile = true;
796
797 s->glfs = qemu_gluster_init(gconf, filename, options, errp);
798 if (!s->glfs) {
799 ret = -errno;
800 goto out;
801 }
802
803#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
804
805
806
807
808
809
810 ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
811 "resync-failed-syncs-after-fsync",
812 "on");
813 if (ret < 0) {
814 error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
815 ret = -errno;
816 goto out;
817 }
818#endif
819
820 qemu_gluster_parse_flags(bdrv_flags, &open_flags);
821
822 s->fd = glfs_open(s->glfs, gconf->path, open_flags);
823 if (!s->fd) {
824 ret = -errno;
825 }
826
827 s->supports_seek_data = qemu_gluster_test_seek(s->fd);
828
829out:
830 qemu_opts_del(opts);
831 qapi_free_BlockdevOptionsGluster(gconf);
832 if (!ret) {
833 return ret;
834 }
835 g_free(s->logfile);
836 if (s->fd) {
837 glfs_close(s->fd);
838 }
839
840 glfs_clear_preopened(s->glfs);
841
842 return ret;
843}
844
845static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
846 BlockReopenQueue *queue, Error **errp)
847{
848 int ret = 0;
849 BDRVGlusterState *s;
850 BDRVGlusterReopenState *reop_s;
851 BlockdevOptionsGluster *gconf;
852 int open_flags = 0;
853
854 assert(state != NULL);
855 assert(state->bs != NULL);
856
857 s = state->bs->opaque;
858
859 state->opaque = g_new0(BDRVGlusterReopenState, 1);
860 reop_s = state->opaque;
861
862 qemu_gluster_parse_flags(state->flags, &open_flags);
863
864 gconf = g_new0(BlockdevOptionsGluster, 1);
865 gconf->debug = s->debug;
866 gconf->has_debug = true;
867 gconf->logfile = g_strdup(s->logfile);
868 gconf->has_logfile = true;
869 reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
870 if (reop_s->glfs == NULL) {
871 ret = -errno;
872 goto exit;
873 }
874
875#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
876 ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
877 "resync-failed-syncs-after-fsync", "on");
878 if (ret < 0) {
879 error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
880 ret = -errno;
881 goto exit;
882 }
883#endif
884
885 reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
886 if (reop_s->fd == NULL) {
887
888 ret = -errno;
889 goto exit;
890 }
891
892exit:
893
894 qapi_free_BlockdevOptionsGluster(gconf);
895 return ret;
896}
897
898static void qemu_gluster_reopen_commit(BDRVReopenState *state)
899{
900 BDRVGlusterReopenState *reop_s = state->opaque;
901 BDRVGlusterState *s = state->bs->opaque;
902
903
904
905 if (s->fd) {
906 glfs_close(s->fd);
907 }
908
909 glfs_clear_preopened(s->glfs);
910
911
912 s->fd = reop_s->fd;
913 s->glfs = reop_s->glfs;
914
915 g_free(state->opaque);
916 state->opaque = NULL;
917
918 return;
919}
920
921
922static void qemu_gluster_reopen_abort(BDRVReopenState *state)
923{
924 BDRVGlusterReopenState *reop_s = state->opaque;
925
926 if (reop_s == NULL) {
927 return;
928 }
929
930 if (reop_s->fd) {
931 glfs_close(reop_s->fd);
932 }
933
934 glfs_clear_preopened(reop_s->glfs);
935
936 g_free(state->opaque);
937 state->opaque = NULL;
938
939 return;
940}
941
942#ifdef CONFIG_GLUSTERFS_ZEROFILL
943static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
944 int64_t offset,
945 int size,
946 BdrvRequestFlags flags)
947{
948 int ret;
949 GlusterAIOCB acb;
950 BDRVGlusterState *s = bs->opaque;
951
952 acb.size = size;
953 acb.ret = 0;
954 acb.coroutine = qemu_coroutine_self();
955 acb.aio_context = bdrv_get_aio_context(bs);
956
957 ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
958 if (ret < 0) {
959 return -errno;
960 }
961
962 qemu_coroutine_yield();
963 return acb.ret;
964}
965#endif
966
967static int qemu_gluster_create(const char *filename,
968 QemuOpts *opts, Error **errp)
969{
970 BlockdevOptionsGluster *gconf;
971 struct glfs *glfs;
972 struct glfs_fd *fd;
973 int ret = 0;
974 PreallocMode prealloc;
975 int64_t total_size = 0;
976 char *tmp = NULL;
977 Error *local_err = NULL;
978
979 gconf = g_new0(BlockdevOptionsGluster, 1);
980 gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
981 GLUSTER_DEBUG_DEFAULT);
982 if (gconf->debug < 0) {
983 gconf->debug = 0;
984 } else if (gconf->debug > GLUSTER_DEBUG_MAX) {
985 gconf->debug = GLUSTER_DEBUG_MAX;
986 }
987 gconf->has_debug = true;
988
989 gconf->logfile = qemu_opt_get_del(opts, GLUSTER_OPT_LOGFILE);
990 if (!gconf->logfile) {
991 gconf->logfile = g_strdup(GLUSTER_LOGFILE_DEFAULT);
992 }
993 gconf->has_logfile = true;
994
995 glfs = qemu_gluster_init(gconf, filename, NULL, errp);
996 if (!glfs) {
997 ret = -errno;
998 goto out;
999 }
1000
1001 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1002 BDRV_SECTOR_SIZE);
1003
1004 tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
1005 prealloc = qapi_enum_parse(PreallocMode_lookup, tmp,
1006 PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
1007 &local_err);
1008 g_free(tmp);
1009 if (local_err) {
1010 error_propagate(errp, local_err);
1011 ret = -EINVAL;
1012 goto out;
1013 }
1014
1015 fd = glfs_creat(glfs, gconf->path,
1016 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
1017 if (!fd) {
1018 ret = -errno;
1019 goto out;
1020 }
1021
1022 switch (prealloc) {
1023#ifdef CONFIG_GLUSTERFS_FALLOCATE
1024 case PREALLOC_MODE_FALLOC:
1025 if (glfs_fallocate(fd, 0, 0, total_size)) {
1026 error_setg(errp, "Could not preallocate data for the new file");
1027 ret = -errno;
1028 }
1029 break;
1030#endif
1031#ifdef CONFIG_GLUSTERFS_ZEROFILL
1032 case PREALLOC_MODE_FULL:
1033 if (!glfs_ftruncate(fd, total_size)) {
1034 if (glfs_zerofill(fd, 0, total_size)) {
1035 error_setg(errp, "Could not zerofill the new file");
1036 ret = -errno;
1037 }
1038 } else {
1039 error_setg(errp, "Could not resize file");
1040 ret = -errno;
1041 }
1042 break;
1043#endif
1044 case PREALLOC_MODE_OFF:
1045 if (glfs_ftruncate(fd, total_size) != 0) {
1046 ret = -errno;
1047 error_setg(errp, "Could not resize file");
1048 }
1049 break;
1050 default:
1051 ret = -EINVAL;
1052 error_setg(errp, "Unsupported preallocation mode: %s",
1053 PreallocMode_lookup[prealloc]);
1054 break;
1055 }
1056
1057 if (glfs_close(fd) != 0) {
1058 ret = -errno;
1059 }
1060out:
1061 qapi_free_BlockdevOptionsGluster(gconf);
1062 glfs_clear_preopened(glfs);
1063 return ret;
1064}
1065
1066static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
1067 int64_t sector_num, int nb_sectors,
1068 QEMUIOVector *qiov, int write)
1069{
1070 int ret;
1071 GlusterAIOCB acb;
1072 BDRVGlusterState *s = bs->opaque;
1073 size_t size = nb_sectors * BDRV_SECTOR_SIZE;
1074 off_t offset = sector_num * BDRV_SECTOR_SIZE;
1075
1076 acb.size = size;
1077 acb.ret = 0;
1078 acb.coroutine = qemu_coroutine_self();
1079 acb.aio_context = bdrv_get_aio_context(bs);
1080
1081 if (write) {
1082 ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
1083 gluster_finish_aiocb, &acb);
1084 } else {
1085 ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
1086 gluster_finish_aiocb, &acb);
1087 }
1088
1089 if (ret < 0) {
1090 return -errno;
1091 }
1092
1093 qemu_coroutine_yield();
1094 return acb.ret;
1095}
1096
1097static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
1098 PreallocMode prealloc, Error **errp)
1099{
1100 int ret;
1101 BDRVGlusterState *s = bs->opaque;
1102
1103 if (prealloc != PREALLOC_MODE_OFF) {
1104 error_setg(errp, "Unsupported preallocation mode '%s'",
1105 PreallocMode_lookup[prealloc]);
1106 return -ENOTSUP;
1107 }
1108
1109 ret = glfs_ftruncate(s->fd, offset);
1110 if (ret < 0) {
1111 ret = -errno;
1112 error_setg_errno(errp, -ret, "Failed to truncate file");
1113 return ret;
1114 }
1115
1116 return 0;
1117}
1118
1119static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
1120 int64_t sector_num,
1121 int nb_sectors,
1122 QEMUIOVector *qiov)
1123{
1124 return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
1125}
1126
1127static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
1128 int64_t sector_num,
1129 int nb_sectors,
1130 QEMUIOVector *qiov)
1131{
1132 return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
1133}
1134
1135static void qemu_gluster_close(BlockDriverState *bs)
1136{
1137 BDRVGlusterState *s = bs->opaque;
1138
1139 g_free(s->logfile);
1140 if (s->fd) {
1141 glfs_close(s->fd);
1142 s->fd = NULL;
1143 }
1144 glfs_clear_preopened(s->glfs);
1145}
1146
1147static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
1148{
1149 int ret;
1150 GlusterAIOCB acb;
1151 BDRVGlusterState *s = bs->opaque;
1152
1153 acb.size = 0;
1154 acb.ret = 0;
1155 acb.coroutine = qemu_coroutine_self();
1156 acb.aio_context = bdrv_get_aio_context(bs);
1157
1158 ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
1159 if (ret < 0) {
1160 ret = -errno;
1161 goto error;
1162 }
1163
1164 qemu_coroutine_yield();
1165 if (acb.ret < 0) {
1166 ret = acb.ret;
1167 goto error;
1168 }
1169
1170 return acb.ret;
1171
1172error:
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186 qemu_gluster_close(bs);
1187 bs->drv = NULL;
1188 return ret;
1189}
1190
1191#ifdef CONFIG_GLUSTERFS_DISCARD
1192static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
1193 int64_t offset, int size)
1194{
1195 int ret;
1196 GlusterAIOCB acb;
1197 BDRVGlusterState *s = bs->opaque;
1198
1199 acb.size = 0;
1200 acb.ret = 0;
1201 acb.coroutine = qemu_coroutine_self();
1202 acb.aio_context = bdrv_get_aio_context(bs);
1203
1204 ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
1205 if (ret < 0) {
1206 return -errno;
1207 }
1208
1209 qemu_coroutine_yield();
1210 return acb.ret;
1211}
1212#endif
1213
1214static int64_t qemu_gluster_getlength(BlockDriverState *bs)
1215{
1216 BDRVGlusterState *s = bs->opaque;
1217 int64_t ret;
1218
1219 ret = glfs_lseek(s->fd, 0, SEEK_END);
1220 if (ret < 0) {
1221 return -errno;
1222 } else {
1223 return ret;
1224 }
1225}
1226
1227static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
1228{
1229 BDRVGlusterState *s = bs->opaque;
1230 struct stat st;
1231 int ret;
1232
1233 ret = glfs_fstat(s->fd, &st);
1234 if (ret < 0) {
1235 return -errno;
1236 } else {
1237 return st.st_blocks * 512;
1238 }
1239}
1240
1241static int qemu_gluster_has_zero_init(BlockDriverState *bs)
1242{
1243
1244 return 0;
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259static int find_allocation(BlockDriverState *bs, off_t start,
1260 off_t *data, off_t *hole)
1261{
1262 BDRVGlusterState *s = bs->opaque;
1263
1264 if (!s->supports_seek_data) {
1265 goto exit;
1266 }
1267
1268#if defined SEEK_HOLE && defined SEEK_DATA
1269 off_t offs;
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282 offs = glfs_lseek(s->fd, start, SEEK_DATA);
1283 if (offs < 0) {
1284 return -errno;
1285 }
1286
1287 if (offs < start) {
1288
1289
1290
1291
1292 return -EIO;
1293 }
1294
1295 if (offs > start) {
1296
1297 *hole = start;
1298 *data = offs;
1299 return 0;
1300 }
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321 offs = glfs_lseek(s->fd, start, SEEK_HOLE);
1322 if (offs < 0) {
1323 return -errno;
1324 }
1325
1326 if (offs < start) {
1327
1328
1329
1330
1331 return -EIO;
1332 }
1333
1334 if (offs > start) {
1335
1336
1337
1338
1339
1340
1341 *data = start;
1342 *hole = offs;
1343 return 0;
1344 }
1345
1346
1347 return -EBUSY;
1348#endif
1349
1350exit:
1351 return -ENOTSUP;
1352}
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369static int64_t coroutine_fn qemu_gluster_co_get_block_status(
1370 BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
1371 BlockDriverState **file)
1372{
1373 BDRVGlusterState *s = bs->opaque;
1374 off_t start, data = 0, hole = 0;
1375 int64_t total_size;
1376 int ret = -EINVAL;
1377
1378 if (!s->fd) {
1379 return ret;
1380 }
1381
1382 start = sector_num * BDRV_SECTOR_SIZE;
1383 total_size = bdrv_getlength(bs);
1384 if (total_size < 0) {
1385 return total_size;
1386 } else if (start >= total_size) {
1387 *pnum = 0;
1388 return 0;
1389 } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
1390 nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
1391 }
1392
1393 ret = find_allocation(bs, start, &data, &hole);
1394 if (ret == -ENXIO) {
1395
1396 *pnum = nb_sectors;
1397 ret = BDRV_BLOCK_ZERO;
1398 } else if (ret < 0) {
1399
1400 *pnum = nb_sectors;
1401 ret = BDRV_BLOCK_DATA;
1402 } else if (data == start) {
1403
1404
1405 *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
1406 ret = BDRV_BLOCK_DATA;
1407 } else {
1408
1409 assert(hole == start);
1410 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1411 ret = BDRV_BLOCK_ZERO;
1412 }
1413
1414 *file = bs;
1415
1416 return ret | BDRV_BLOCK_OFFSET_VALID | start;
1417}
1418
1419
1420static BlockDriver bdrv_gluster = {
1421 .format_name = "gluster",
1422 .protocol_name = "gluster",
1423 .instance_size = sizeof(BDRVGlusterState),
1424 .bdrv_needs_filename = false,
1425 .bdrv_file_open = qemu_gluster_open,
1426 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1427 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1428 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1429 .bdrv_close = qemu_gluster_close,
1430 .bdrv_create = qemu_gluster_create,
1431 .bdrv_getlength = qemu_gluster_getlength,
1432 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1433 .bdrv_truncate = qemu_gluster_truncate,
1434 .bdrv_co_readv = qemu_gluster_co_readv,
1435 .bdrv_co_writev = qemu_gluster_co_writev,
1436 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1437 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1438#ifdef CONFIG_GLUSTERFS_DISCARD
1439 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1440#endif
1441#ifdef CONFIG_GLUSTERFS_ZEROFILL
1442 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1443#endif
1444 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1445 .create_opts = &qemu_gluster_create_opts,
1446};
1447
1448static BlockDriver bdrv_gluster_tcp = {
1449 .format_name = "gluster",
1450 .protocol_name = "gluster+tcp",
1451 .instance_size = sizeof(BDRVGlusterState),
1452 .bdrv_needs_filename = false,
1453 .bdrv_file_open = qemu_gluster_open,
1454 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1455 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1456 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1457 .bdrv_close = qemu_gluster_close,
1458 .bdrv_create = qemu_gluster_create,
1459 .bdrv_getlength = qemu_gluster_getlength,
1460 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1461 .bdrv_truncate = qemu_gluster_truncate,
1462 .bdrv_co_readv = qemu_gluster_co_readv,
1463 .bdrv_co_writev = qemu_gluster_co_writev,
1464 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1465 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1466#ifdef CONFIG_GLUSTERFS_DISCARD
1467 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1468#endif
1469#ifdef CONFIG_GLUSTERFS_ZEROFILL
1470 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1471#endif
1472 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1473 .create_opts = &qemu_gluster_create_opts,
1474};
1475
1476static BlockDriver bdrv_gluster_unix = {
1477 .format_name = "gluster",
1478 .protocol_name = "gluster+unix",
1479 .instance_size = sizeof(BDRVGlusterState),
1480 .bdrv_needs_filename = true,
1481 .bdrv_file_open = qemu_gluster_open,
1482 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1483 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1484 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1485 .bdrv_close = qemu_gluster_close,
1486 .bdrv_create = qemu_gluster_create,
1487 .bdrv_getlength = qemu_gluster_getlength,
1488 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1489 .bdrv_truncate = qemu_gluster_truncate,
1490 .bdrv_co_readv = qemu_gluster_co_readv,
1491 .bdrv_co_writev = qemu_gluster_co_writev,
1492 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1493 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1494#ifdef CONFIG_GLUSTERFS_DISCARD
1495 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1496#endif
1497#ifdef CONFIG_GLUSTERFS_ZEROFILL
1498 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1499#endif
1500 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1501 .create_opts = &qemu_gluster_create_opts,
1502};
1503
1504
1505
1506
1507
1508
1509
1510static BlockDriver bdrv_gluster_rdma = {
1511 .format_name = "gluster",
1512 .protocol_name = "gluster+rdma",
1513 .instance_size = sizeof(BDRVGlusterState),
1514 .bdrv_needs_filename = true,
1515 .bdrv_file_open = qemu_gluster_open,
1516 .bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
1517 .bdrv_reopen_commit = qemu_gluster_reopen_commit,
1518 .bdrv_reopen_abort = qemu_gluster_reopen_abort,
1519 .bdrv_close = qemu_gluster_close,
1520 .bdrv_create = qemu_gluster_create,
1521 .bdrv_getlength = qemu_gluster_getlength,
1522 .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
1523 .bdrv_truncate = qemu_gluster_truncate,
1524 .bdrv_co_readv = qemu_gluster_co_readv,
1525 .bdrv_co_writev = qemu_gluster_co_writev,
1526 .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
1527 .bdrv_has_zero_init = qemu_gluster_has_zero_init,
1528#ifdef CONFIG_GLUSTERFS_DISCARD
1529 .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
1530#endif
1531#ifdef CONFIG_GLUSTERFS_ZEROFILL
1532 .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
1533#endif
1534 .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
1535 .create_opts = &qemu_gluster_create_opts,
1536};
1537
1538static void bdrv_gluster_init(void)
1539{
1540 bdrv_register(&bdrv_gluster_rdma);
1541 bdrv_register(&bdrv_gluster_unix);
1542 bdrv_register(&bdrv_gluster_tcp);
1543 bdrv_register(&bdrv_gluster);
1544}
1545
1546block_init(bdrv_gluster_init);
1547