1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21
22#include "block/export.h"
23#include "qapi/error.h"
24#include "qemu/queue.h"
25#include "trace.h"
26#include "nbd-internal.h"
27#include "qemu/units.h"
28
29#define NBD_META_ID_BASE_ALLOCATION 0
30#define NBD_META_ID_ALLOCATION_DEPTH 1
31
32#define NBD_META_ID_DIRTY_BITMAP 2
33
34
35
36
37
38
39
40#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
41
42static int system_errno_to_nbd_errno(int err)
43{
44 switch (err) {
45 case 0:
46 return NBD_SUCCESS;
47 case EPERM:
48 case EROFS:
49 return NBD_EPERM;
50 case EIO:
51 return NBD_EIO;
52 case ENOMEM:
53 return NBD_ENOMEM;
54#ifdef EDQUOT
55 case EDQUOT:
56#endif
57 case EFBIG:
58 case ENOSPC:
59 return NBD_ENOSPC;
60 case EOVERFLOW:
61 return NBD_EOVERFLOW;
62 case ENOTSUP:
63#if ENOTSUP != EOPNOTSUPP
64 case EOPNOTSUPP:
65#endif
66 return NBD_ENOTSUP;
67 case ESHUTDOWN:
68 return NBD_ESHUTDOWN;
69 case EINVAL:
70 default:
71 return NBD_EINVAL;
72 }
73}
74
75
76
77typedef struct NBDRequestData NBDRequestData;
78
79struct NBDRequestData {
80 QSIMPLEQ_ENTRY(NBDRequestData) entry;
81 NBDClient *client;
82 uint8_t *data;
83 bool complete;
84};
85
86struct NBDExport {
87 BlockExport common;
88
89 char *name;
90 char *description;
91 uint64_t size;
92 uint16_t nbdflags;
93 QTAILQ_HEAD(, NBDClient) clients;
94 QTAILQ_ENTRY(NBDExport) next;
95
96 BlockBackend *eject_notifier_blk;
97 Notifier eject_notifier;
98
99 bool allocation_depth;
100 BdrvDirtyBitmap **export_bitmaps;
101 size_t nr_export_bitmaps;
102};
103
104static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
105
106
107
108
109typedef struct NBDExportMetaContexts {
110 NBDExport *exp;
111 size_t count;
112 bool base_allocation;
113 bool allocation_depth;
114 bool *bitmaps;
115
116
117
118} NBDExportMetaContexts;
119
120struct NBDClient {
121 int refcount;
122 void (*close_fn)(NBDClient *client, bool negotiated);
123
124 NBDExport *exp;
125 QCryptoTLSCreds *tlscreds;
126 char *tlsauthz;
127 QIOChannelSocket *sioc;
128 QIOChannel *ioc;
129
130 Coroutine *recv_coroutine;
131
132 CoMutex send_lock;
133 Coroutine *send_coroutine;
134
135 bool read_yielding;
136 bool quiescing;
137
138 QTAILQ_ENTRY(NBDClient) next;
139 int nb_requests;
140 bool closing;
141
142 uint32_t check_align;
143
144 bool structured_reply;
145 NBDExportMetaContexts export_meta;
146
147 uint32_t opt;
148 uint32_t optlen;
149
150};
151
152static void nbd_client_receive_next_request(NBDClient *client);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
182 uint32_t type, uint32_t length)
183{
184 stq_be_p(&rep->magic, NBD_REP_MAGIC);
185 stl_be_p(&rep->option, option);
186 stl_be_p(&rep->type, type);
187 stl_be_p(&rep->length, length);
188}
189
190
191
192static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
193 uint32_t len, Error **errp)
194{
195 NBDOptionReply rep;
196
197 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
198 type, nbd_rep_lookup(type), len);
199
200 assert(len < NBD_MAX_BUFFER_SIZE);
201
202 set_be_option_rep(&rep, client->opt, type, len);
203 return nbd_write(client->ioc, &rep, sizeof(rep), errp);
204}
205
206
207
208static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
209 Error **errp)
210{
211 return nbd_negotiate_send_rep_len(client, type, 0, errp);
212}
213
214
215
216static int GCC_FMT_ATTR(4, 0)
217nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
218 Error **errp, const char *fmt, va_list va)
219{
220 ERRP_GUARD();
221 g_autofree char *msg = NULL;
222 int ret;
223 size_t len;
224
225 msg = g_strdup_vprintf(fmt, va);
226 len = strlen(msg);
227 assert(len < NBD_MAX_STRING_SIZE);
228 trace_nbd_negotiate_send_rep_err(msg);
229 ret = nbd_negotiate_send_rep_len(client, type, len, errp);
230 if (ret < 0) {
231 return ret;
232 }
233 if (nbd_write(client->ioc, msg, len, errp) < 0) {
234 error_prepend(errp, "write failed (error message): ");
235 return -EIO;
236 }
237
238 return 0;
239}
240
241
242
243
244static char *
245nbd_sanitize_name(const char *name)
246{
247 if (strnlen(name, 80) < 80) {
248 return g_strdup(name);
249 }
250
251 return g_strdup_printf("%.80s...", name);
252}
253
254
255
256static int GCC_FMT_ATTR(4, 5)
257nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
258 Error **errp, const char *fmt, ...)
259{
260 va_list va;
261 int ret;
262
263 va_start(va, fmt);
264 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
265 va_end(va);
266 return ret;
267}
268
269
270
271
272static int GCC_FMT_ATTR(4, 0)
273nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
274 const char *fmt, va_list va)
275{
276 int ret = nbd_drop(client->ioc, client->optlen, errp);
277
278 client->optlen = 0;
279 if (!ret) {
280 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
281 }
282 return ret;
283}
284
285static int GCC_FMT_ATTR(4, 5)
286nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
287 const char *fmt, ...)
288{
289 int ret;
290 va_list va;
291
292 va_start(va, fmt);
293 ret = nbd_opt_vdrop(client, type, errp, fmt, va);
294 va_end(va);
295
296 return ret;
297}
298
299static int GCC_FMT_ATTR(3, 4)
300nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
301{
302 int ret;
303 va_list va;
304
305 va_start(va, fmt);
306 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
307 va_end(va);
308
309 return ret;
310}
311
312
313
314
315
316static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
317 bool check_nul, Error **errp)
318{
319 if (size > client->optlen) {
320 return nbd_opt_invalid(client, errp,
321 "Inconsistent lengths in option %s",
322 nbd_opt_lookup(client->opt));
323 }
324 client->optlen -= size;
325 if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
326 return -EIO;
327 }
328
329 if (check_nul && strnlen(buffer, size) != size) {
330 return nbd_opt_invalid(client, errp,
331 "Unexpected embedded NUL in option %s",
332 nbd_opt_lookup(client->opt));
333 }
334 return 1;
335}
336
337
338
339
340static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
341{
342 if (size > client->optlen) {
343 return nbd_opt_invalid(client, errp,
344 "Inconsistent lengths in option %s",
345 nbd_opt_lookup(client->opt));
346 }
347 client->optlen -= size;
348 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
349}
350
351
352
353
354
355
356
357
358
359
360
361
362
363static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
364 Error **errp)
365{
366 int ret;
367 uint32_t len;
368 g_autofree char *local_name = NULL;
369
370 *name = NULL;
371 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
372 if (ret <= 0) {
373 return ret;
374 }
375 len = cpu_to_be32(len);
376
377 if (len > NBD_MAX_STRING_SIZE) {
378 return nbd_opt_invalid(client, errp,
379 "Invalid name length: %" PRIu32, len);
380 }
381
382 local_name = g_malloc(len + 1);
383 ret = nbd_opt_read(client, local_name, len, true, errp);
384 if (ret <= 0) {
385 return ret;
386 }
387 local_name[len] = '\0';
388
389 if (length) {
390 *length = len;
391 }
392 *name = g_steal_pointer(&local_name);
393
394 return 1;
395}
396
397
398
399static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
400 Error **errp)
401{
402 ERRP_GUARD();
403 size_t name_len, desc_len;
404 uint32_t len;
405 const char *name = exp->name ? exp->name : "";
406 const char *desc = exp->description ? exp->description : "";
407 QIOChannel *ioc = client->ioc;
408 int ret;
409
410 trace_nbd_negotiate_send_rep_list(name, desc);
411 name_len = strlen(name);
412 desc_len = strlen(desc);
413 assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
414 len = name_len + desc_len + sizeof(len);
415 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
416 if (ret < 0) {
417 return ret;
418 }
419
420 len = cpu_to_be32(name_len);
421 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
422 error_prepend(errp, "write failed (name length): ");
423 return -EINVAL;
424 }
425
426 if (nbd_write(ioc, name, name_len, errp) < 0) {
427 error_prepend(errp, "write failed (name buffer): ");
428 return -EINVAL;
429 }
430
431 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
432 error_prepend(errp, "write failed (description buffer): ");
433 return -EINVAL;
434 }
435
436 return 0;
437}
438
439
440
441static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
442{
443 NBDExport *exp;
444 assert(client->opt == NBD_OPT_LIST);
445
446
447 QTAILQ_FOREACH(exp, &exports, next) {
448 if (nbd_negotiate_send_rep_list(client, exp, errp)) {
449 return -EINVAL;
450 }
451 }
452
453 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
454}
455
456static void nbd_check_meta_export(NBDClient *client)
457{
458 if (client->exp != client->export_meta.exp) {
459 client->export_meta.count = 0;
460 }
461}
462
463
464
465static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
466 Error **errp)
467{
468 ERRP_GUARD();
469 g_autofree char *name = NULL;
470 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
471 size_t len;
472 int ret;
473 uint16_t myflags;
474
475
476
477
478
479
480
481
482 trace_nbd_negotiate_handle_export_name();
483 if (client->optlen > NBD_MAX_STRING_SIZE) {
484 error_setg(errp, "Bad length received");
485 return -EINVAL;
486 }
487 name = g_malloc(client->optlen + 1);
488 if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
489 return -EIO;
490 }
491 name[client->optlen] = '\0';
492 client->optlen = 0;
493
494 trace_nbd_negotiate_handle_export_name_request(name);
495
496 client->exp = nbd_export_find(name);
497 if (!client->exp) {
498 error_setg(errp, "export not found");
499 return -EINVAL;
500 }
501
502 myflags = client->exp->nbdflags;
503 if (client->structured_reply) {
504 myflags |= NBD_FLAG_SEND_DF;
505 }
506 trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
507 stq_be_p(buf, client->exp->size);
508 stw_be_p(buf + 8, myflags);
509 len = no_zeroes ? 10 : sizeof(buf);
510 ret = nbd_write(client->ioc, buf, len, errp);
511 if (ret < 0) {
512 error_prepend(errp, "write failed: ");
513 return ret;
514 }
515
516 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
517 blk_exp_ref(&client->exp->common);
518 nbd_check_meta_export(client);
519
520 return 0;
521}
522
523
524
525
526static int nbd_negotiate_send_info(NBDClient *client,
527 uint16_t info, uint32_t length, void *buf,
528 Error **errp)
529{
530 int rc;
531
532 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
533 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
534 sizeof(info) + length, errp);
535 if (rc < 0) {
536 return rc;
537 }
538 info = cpu_to_be16(info);
539 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
540 return -EIO;
541 }
542 if (nbd_write(client->ioc, buf, length, errp) < 0) {
543 return -EIO;
544 }
545 return 0;
546}
547
548
549
550
551
552
553
554
555static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
556{
557 int ret;
558
559 assert(client->optlen);
560 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
561 nbd_opt_lookup(client->opt));
562 if (fatal && !ret) {
563 error_setg(errp, "option '%s' has unexpected length",
564 nbd_opt_lookup(client->opt));
565 return -EINVAL;
566 }
567 return ret;
568}
569
570
571
572
573static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
574{
575 int rc;
576 g_autofree char *name = NULL;
577 NBDExport *exp;
578 uint16_t requests;
579 uint16_t request;
580 uint32_t namelen = 0;
581 bool sendname = false;
582 bool blocksize = false;
583 uint32_t sizes[3];
584 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
585 uint32_t check_align = 0;
586 uint16_t myflags;
587
588
589
590
591
592
593
594 rc = nbd_opt_read_name(client, &name, &namelen, errp);
595 if (rc <= 0) {
596 return rc;
597 }
598 trace_nbd_negotiate_handle_export_name_request(name);
599
600 rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
601 if (rc <= 0) {
602 return rc;
603 }
604 requests = be16_to_cpu(requests);
605 trace_nbd_negotiate_handle_info_requests(requests);
606 while (requests--) {
607 rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
608 if (rc <= 0) {
609 return rc;
610 }
611 request = be16_to_cpu(request);
612 trace_nbd_negotiate_handle_info_request(request,
613 nbd_info_lookup(request));
614
615
616
617 switch (request) {
618 case NBD_INFO_NAME:
619 sendname = true;
620 break;
621 case NBD_INFO_BLOCK_SIZE:
622 blocksize = true;
623 break;
624 }
625 }
626 if (client->optlen) {
627 return nbd_reject_length(client, false, errp);
628 }
629
630 exp = nbd_export_find(name);
631 if (!exp) {
632 g_autofree char *sane_name = nbd_sanitize_name(name);
633
634 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
635 errp, "export '%s' not present",
636 sane_name);
637 }
638
639
640 if (sendname) {
641 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
642 errp);
643 if (rc < 0) {
644 return rc;
645 }
646 }
647
648
649
650 if (exp->description) {
651 size_t len = strlen(exp->description);
652
653 assert(len <= NBD_MAX_STRING_SIZE);
654 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
655 len, exp->description, errp);
656 if (rc < 0) {
657 return rc;
658 }
659 }
660
661
662
663
664
665 if (client->opt == NBD_OPT_INFO || blocksize) {
666 check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
667 } else {
668 sizes[0] = 1;
669 }
670 assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
671
672
673 sizes[1] = MAX(4096, sizes[0]);
674
675 sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
676 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
677 sizes[0] = cpu_to_be32(sizes[0]);
678 sizes[1] = cpu_to_be32(sizes[1]);
679 sizes[2] = cpu_to_be32(sizes[2]);
680 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
681 sizeof(sizes), sizes, errp);
682 if (rc < 0) {
683 return rc;
684 }
685
686
687 myflags = exp->nbdflags;
688 if (client->structured_reply) {
689 myflags |= NBD_FLAG_SEND_DF;
690 }
691 trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
692 stq_be_p(buf, exp->size);
693 stw_be_p(buf + 8, myflags);
694 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
695 sizeof(buf), buf, errp);
696 if (rc < 0) {
697 return rc;
698 }
699
700
701
702
703
704
705
706 if (client->opt == NBD_OPT_INFO && !blocksize &&
707 blk_get_request_alignment(exp->common.blk) > 1) {
708 return nbd_negotiate_send_rep_err(client,
709 NBD_REP_ERR_BLOCK_SIZE_REQD,
710 errp,
711 "request NBD_INFO_BLOCK_SIZE to "
712 "use this export");
713 }
714
715
716 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
717 if (rc < 0) {
718 return rc;
719 }
720
721 if (client->opt == NBD_OPT_GO) {
722 client->exp = exp;
723 client->check_align = check_align;
724 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
725 blk_exp_ref(&client->exp->common);
726 nbd_check_meta_export(client);
727 rc = 1;
728 }
729 return rc;
730}
731
732
733
734
735static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
736 Error **errp)
737{
738 QIOChannel *ioc;
739 QIOChannelTLS *tioc;
740 struct NBDTLSHandshakeData data = { 0 };
741
742 assert(client->opt == NBD_OPT_STARTTLS);
743
744 trace_nbd_negotiate_handle_starttls();
745 ioc = client->ioc;
746
747 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
748 return NULL;
749 }
750
751 tioc = qio_channel_tls_new_server(ioc,
752 client->tlscreds,
753 client->tlsauthz,
754 errp);
755 if (!tioc) {
756 return NULL;
757 }
758
759 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
760 trace_nbd_negotiate_handle_starttls_handshake();
761 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
762 qio_channel_tls_handshake(tioc,
763 nbd_tls_handshake,
764 &data,
765 NULL,
766 NULL);
767
768 if (!data.complete) {
769 g_main_loop_run(data.loop);
770 }
771 g_main_loop_unref(data.loop);
772 if (data.error) {
773 object_unref(OBJECT(tioc));
774 error_propagate(errp, data.error);
775 return NULL;
776 }
777
778 return QIO_CHANNEL(tioc);
779}
780
781
782
783
784
785
786
787static int nbd_negotiate_send_meta_context(NBDClient *client,
788 const char *context,
789 uint32_t context_id,
790 Error **errp)
791{
792 NBDOptionReplyMetaContext opt;
793 struct iovec iov[] = {
794 {.iov_base = &opt, .iov_len = sizeof(opt)},
795 {.iov_base = (void *)context, .iov_len = strlen(context)}
796 };
797
798 assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
799 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
800 context_id = 0;
801 }
802
803 trace_nbd_negotiate_meta_query_reply(context, context_id);
804 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
805 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
806 stl_be_p(&opt.context_id, context_id);
807
808 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
809}
810
811
812
813
814
815static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
816 const char *query)
817{
818 if (!*query) {
819 trace_nbd_negotiate_meta_query_parse("empty");
820 return client->opt == NBD_OPT_LIST_META_CONTEXT;
821 }
822 if (strcmp(query, pattern) == 0) {
823 trace_nbd_negotiate_meta_query_parse(pattern);
824 return true;
825 }
826 trace_nbd_negotiate_meta_query_skip("pattern not matched");
827 return false;
828}
829
830
831
832
833static bool nbd_strshift(const char **str, const char *prefix)
834{
835 size_t len = strlen(prefix);
836
837 if (strncmp(*str, prefix, len) == 0) {
838 *str += len;
839 return true;
840 }
841 return false;
842}
843
844
845
846
847
848
849static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
850 const char *query)
851{
852 if (!nbd_strshift(&query, "base:")) {
853 return false;
854 }
855 trace_nbd_negotiate_meta_query_parse("base:");
856
857 if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
858 meta->base_allocation = true;
859 }
860 return true;
861}
862
863
864
865
866
867
868
869static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
870 const char *query)
871{
872 size_t i;
873
874 if (!nbd_strshift(&query, "qemu:")) {
875 return false;
876 }
877 trace_nbd_negotiate_meta_query_parse("qemu:");
878
879 if (!*query) {
880 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
881 meta->allocation_depth = meta->exp->allocation_depth;
882 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
883 }
884 trace_nbd_negotiate_meta_query_parse("empty");
885 return true;
886 }
887
888 if (strcmp(query, "allocation-depth") == 0) {
889 trace_nbd_negotiate_meta_query_parse("allocation-depth");
890 meta->allocation_depth = meta->exp->allocation_depth;
891 return true;
892 }
893
894 if (nbd_strshift(&query, "dirty-bitmap:")) {
895 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
896 if (!*query) {
897 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
898 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
899 }
900 trace_nbd_negotiate_meta_query_parse("empty");
901 return true;
902 }
903
904 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
905 const char *bm_name;
906
907 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
908 if (strcmp(bm_name, query) == 0) {
909 meta->bitmaps[i] = true;
910 trace_nbd_negotiate_meta_query_parse(query);
911 return true;
912 }
913 }
914 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
915 return true;
916 }
917
918 trace_nbd_negotiate_meta_query_skip("unknown qemu context");
919 return true;
920}
921
922
923
924
925
926
927
928
929
930
931static int nbd_negotiate_meta_query(NBDClient *client,
932 NBDExportMetaContexts *meta, Error **errp)
933{
934 int ret;
935 g_autofree char *query = NULL;
936 uint32_t len;
937
938 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
939 if (ret <= 0) {
940 return ret;
941 }
942 len = cpu_to_be32(len);
943
944 if (len > NBD_MAX_STRING_SIZE) {
945 trace_nbd_negotiate_meta_query_skip("length too long");
946 return nbd_opt_skip(client, len, errp);
947 }
948
949 query = g_malloc(len + 1);
950 ret = nbd_opt_read(client, query, len, true, errp);
951 if (ret <= 0) {
952 return ret;
953 }
954 query[len] = '\0';
955
956 if (nbd_meta_base_query(client, meta, query)) {
957 return 1;
958 }
959 if (nbd_meta_qemu_query(client, meta, query)) {
960 return 1;
961 }
962
963 trace_nbd_negotiate_meta_query_skip("unknown namespace");
964 return 1;
965}
966
967
968
969
970
971static int nbd_negotiate_meta_queries(NBDClient *client,
972 NBDExportMetaContexts *meta, Error **errp)
973{
974 int ret;
975 g_autofree char *export_name = NULL;
976 g_autofree bool *bitmaps = NULL;
977 NBDExportMetaContexts local_meta = {0};
978 uint32_t nb_queries;
979 size_t i;
980 size_t count = 0;
981
982 if (!client->structured_reply) {
983 return nbd_opt_invalid(client, errp,
984 "request option '%s' when structured reply "
985 "is not negotiated",
986 nbd_opt_lookup(client->opt));
987 }
988
989 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
990
991 meta = &local_meta;
992 }
993
994 g_free(meta->bitmaps);
995 memset(meta, 0, sizeof(*meta));
996
997 ret = nbd_opt_read_name(client, &export_name, NULL, errp);
998 if (ret <= 0) {
999 return ret;
1000 }
1001
1002 meta->exp = nbd_export_find(export_name);
1003 if (meta->exp == NULL) {
1004 g_autofree char *sane_name = nbd_sanitize_name(export_name);
1005
1006 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
1007 "export '%s' not present", sane_name);
1008 }
1009 meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
1010 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1011 bitmaps = meta->bitmaps;
1012 }
1013
1014 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1015 if (ret <= 0) {
1016 return ret;
1017 }
1018 nb_queries = cpu_to_be32(nb_queries);
1019 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1020 export_name, nb_queries);
1021
1022 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1023
1024 meta->base_allocation = true;
1025 meta->allocation_depth = meta->exp->allocation_depth;
1026 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
1027 } else {
1028 for (i = 0; i < nb_queries; ++i) {
1029 ret = nbd_negotiate_meta_query(client, meta, errp);
1030 if (ret <= 0) {
1031 return ret;
1032 }
1033 }
1034 }
1035
1036 if (meta->base_allocation) {
1037 ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1038 NBD_META_ID_BASE_ALLOCATION,
1039 errp);
1040 if (ret < 0) {
1041 return ret;
1042 }
1043 count++;
1044 }
1045
1046 if (meta->allocation_depth) {
1047 ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
1048 NBD_META_ID_ALLOCATION_DEPTH,
1049 errp);
1050 if (ret < 0) {
1051 return ret;
1052 }
1053 count++;
1054 }
1055
1056 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
1057 const char *bm_name;
1058 g_autofree char *context = NULL;
1059
1060 if (!meta->bitmaps[i]) {
1061 continue;
1062 }
1063
1064 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
1065 context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
1066
1067 ret = nbd_negotiate_send_meta_context(client, context,
1068 NBD_META_ID_DIRTY_BITMAP + i,
1069 errp);
1070 if (ret < 0) {
1071 return ret;
1072 }
1073 count++;
1074 }
1075
1076 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1077 if (ret == 0) {
1078 meta->count = count;
1079 }
1080
1081 return ret;
1082}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093static int nbd_negotiate_options(NBDClient *client, Error **errp)
1094{
1095 uint32_t flags;
1096 bool fixedNewstyle = false;
1097 bool no_zeroes = false;
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
1115 return -EIO;
1116 }
1117 trace_nbd_negotiate_options_flags(flags);
1118 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
1119 fixedNewstyle = true;
1120 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1121 }
1122 if (flags & NBD_FLAG_C_NO_ZEROES) {
1123 no_zeroes = true;
1124 flags &= ~NBD_FLAG_C_NO_ZEROES;
1125 }
1126 if (flags != 0) {
1127 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1128 return -EINVAL;
1129 }
1130
1131 while (1) {
1132 int ret;
1133 uint32_t option, length;
1134 uint64_t magic;
1135
1136 if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1137 return -EINVAL;
1138 }
1139 trace_nbd_negotiate_options_check_magic(magic);
1140 if (magic != NBD_OPTS_MAGIC) {
1141 error_setg(errp, "Bad magic received");
1142 return -EINVAL;
1143 }
1144
1145 if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1146 return -EINVAL;
1147 }
1148 client->opt = option;
1149
1150 if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1151 return -EINVAL;
1152 }
1153 assert(!client->optlen);
1154 client->optlen = length;
1155
1156 if (length > NBD_MAX_BUFFER_SIZE) {
1157 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1158 length, NBD_MAX_BUFFER_SIZE);
1159 return -EINVAL;
1160 }
1161
1162 trace_nbd_negotiate_options_check_option(option,
1163 nbd_opt_lookup(option));
1164 if (client->tlscreds &&
1165 client->ioc == (QIOChannel *)client->sioc) {
1166 QIOChannel *tioc;
1167 if (!fixedNewstyle) {
1168 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1169 return -EINVAL;
1170 }
1171 switch (option) {
1172 case NBD_OPT_STARTTLS:
1173 if (length) {
1174
1175
1176 return nbd_reject_length(client, true, errp);
1177 }
1178 tioc = nbd_negotiate_handle_starttls(client, errp);
1179 if (!tioc) {
1180 return -EIO;
1181 }
1182 ret = 0;
1183 object_unref(OBJECT(client->ioc));
1184 client->ioc = QIO_CHANNEL(tioc);
1185 break;
1186
1187 case NBD_OPT_EXPORT_NAME:
1188
1189 error_setg(errp, "Option 0x%x not permitted before TLS",
1190 option);
1191 return -EINVAL;
1192
1193 default:
1194
1195
1196
1197
1198
1199
1200 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
1201 option == NBD_OPT_ABORT ? NULL : errp,
1202 "Option 0x%" PRIx32
1203 " not permitted before TLS", option);
1204 if (option == NBD_OPT_ABORT) {
1205 return 1;
1206 }
1207 break;
1208 }
1209 } else if (fixedNewstyle) {
1210 switch (option) {
1211 case NBD_OPT_LIST:
1212 if (length) {
1213 ret = nbd_reject_length(client, false, errp);
1214 } else {
1215 ret = nbd_negotiate_handle_list(client, errp);
1216 }
1217 break;
1218
1219 case NBD_OPT_ABORT:
1220
1221
1222
1223 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
1224 return 1;
1225
1226 case NBD_OPT_EXPORT_NAME:
1227 return nbd_negotiate_handle_export_name(client, no_zeroes,
1228 errp);
1229
1230 case NBD_OPT_INFO:
1231 case NBD_OPT_GO:
1232 ret = nbd_negotiate_handle_info(client, errp);
1233 if (ret == 1) {
1234 assert(option == NBD_OPT_GO);
1235 return 0;
1236 }
1237 break;
1238
1239 case NBD_OPT_STARTTLS:
1240 if (length) {
1241 ret = nbd_reject_length(client, false, errp);
1242 } else if (client->tlscreds) {
1243 ret = nbd_negotiate_send_rep_err(client,
1244 NBD_REP_ERR_INVALID, errp,
1245 "TLS already enabled");
1246 } else {
1247 ret = nbd_negotiate_send_rep_err(client,
1248 NBD_REP_ERR_POLICY, errp,
1249 "TLS not configured");
1250 }
1251 break;
1252
1253 case NBD_OPT_STRUCTURED_REPLY:
1254 if (length) {
1255 ret = nbd_reject_length(client, false, errp);
1256 } else if (client->structured_reply) {
1257 ret = nbd_negotiate_send_rep_err(
1258 client, NBD_REP_ERR_INVALID, errp,
1259 "structured reply already negotiated");
1260 } else {
1261 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1262 client->structured_reply = true;
1263 }
1264 break;
1265
1266 case NBD_OPT_LIST_META_CONTEXT:
1267 case NBD_OPT_SET_META_CONTEXT:
1268 ret = nbd_negotiate_meta_queries(client, &client->export_meta,
1269 errp);
1270 break;
1271
1272 default:
1273 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
1274 "Unsupported option %" PRIu32 " (%s)",
1275 option, nbd_opt_lookup(option));
1276 break;
1277 }
1278 } else {
1279
1280
1281
1282
1283 switch (option) {
1284 case NBD_OPT_EXPORT_NAME:
1285 return nbd_negotiate_handle_export_name(client, no_zeroes,
1286 errp);
1287
1288 default:
1289 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
1290 option, nbd_opt_lookup(option));
1291 return -EINVAL;
1292 }
1293 }
1294 if (ret < 0) {
1295 return ret;
1296 }
1297 }
1298}
1299
1300
1301
1302
1303
1304
1305
1306
1307static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1308{
1309 ERRP_GUARD();
1310 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
1311 int ret;
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 qio_channel_set_blocking(client->ioc, false, NULL);
1328
1329 trace_nbd_negotiate_begin();
1330 memcpy(buf, "NBDMAGIC", 8);
1331
1332 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
1333 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
1334
1335 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
1336 error_prepend(errp, "write failed: ");
1337 return -EINVAL;
1338 }
1339 ret = nbd_negotiate_options(client, errp);
1340 if (ret != 0) {
1341 if (ret < 0) {
1342 error_prepend(errp, "option negotiation failed: ");
1343 }
1344 return ret;
1345 }
1346
1347
1348 if (client->exp && client->exp->common.ctx) {
1349 qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
1350 }
1351
1352 assert(!client->optlen);
1353 trace_nbd_negotiate_success();
1354
1355 return 0;
1356}
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366static inline int coroutine_fn
1367nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1368{
1369 bool partial = false;
1370
1371 assert(size);
1372 while (size > 0) {
1373 struct iovec iov = { .iov_base = buffer, .iov_len = size };
1374 ssize_t len;
1375
1376 len = qio_channel_readv(client->ioc, &iov, 1, errp);
1377 if (len == QIO_CHANNEL_ERR_BLOCK) {
1378 client->read_yielding = true;
1379 qio_channel_yield(client->ioc, G_IO_IN);
1380 client->read_yielding = false;
1381 if (client->quiescing) {
1382 return -EAGAIN;
1383 }
1384 continue;
1385 } else if (len < 0) {
1386 return -EIO;
1387 } else if (len == 0) {
1388 if (partial) {
1389 error_setg(errp,
1390 "Unexpected end-of-file before all bytes were read");
1391 return -EIO;
1392 } else {
1393 return 0;
1394 }
1395 }
1396
1397 partial = true;
1398 size -= len;
1399 buffer = (uint8_t *) buffer + len;
1400 }
1401 return 1;
1402}
1403
1404static int nbd_receive_request(NBDClient *client, NBDRequest *request,
1405 Error **errp)
1406{
1407 uint8_t buf[NBD_REQUEST_SIZE];
1408 uint32_t magic;
1409 int ret;
1410
1411 ret = nbd_read_eof(client, buf, sizeof(buf), errp);
1412 if (ret < 0) {
1413 return ret;
1414 }
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425 magic = ldl_be_p(buf);
1426 request->flags = lduw_be_p(buf + 4);
1427 request->type = lduw_be_p(buf + 6);
1428 request->handle = ldq_be_p(buf + 8);
1429 request->from = ldq_be_p(buf + 16);
1430 request->len = ldl_be_p(buf + 24);
1431
1432 trace_nbd_receive_request(magic, request->flags, request->type,
1433 request->from, request->len);
1434
1435 if (magic != NBD_REQUEST_MAGIC) {
1436 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
1437 return -EINVAL;
1438 }
1439 return 0;
1440}
1441
1442#define MAX_NBD_REQUESTS 16
1443
1444void nbd_client_get(NBDClient *client)
1445{
1446 client->refcount++;
1447}
1448
1449void nbd_client_put(NBDClient *client)
1450{
1451 if (--client->refcount == 0) {
1452
1453
1454
1455 assert(client->closing);
1456
1457 qio_channel_detach_aio_context(client->ioc);
1458 object_unref(OBJECT(client->sioc));
1459 object_unref(OBJECT(client->ioc));
1460 if (client->tlscreds) {
1461 object_unref(OBJECT(client->tlscreds));
1462 }
1463 g_free(client->tlsauthz);
1464 if (client->exp) {
1465 QTAILQ_REMOVE(&client->exp->clients, client, next);
1466 blk_exp_unref(&client->exp->common);
1467 }
1468 g_free(client->export_meta.bitmaps);
1469 g_free(client);
1470 }
1471}
1472
1473static void client_close(NBDClient *client, bool negotiated)
1474{
1475 if (client->closing) {
1476 return;
1477 }
1478
1479 client->closing = true;
1480
1481
1482
1483
1484 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
1485 NULL);
1486
1487
1488 if (client->close_fn) {
1489 client->close_fn(client, negotiated);
1490 }
1491}
1492
1493static NBDRequestData *nbd_request_get(NBDClient *client)
1494{
1495 NBDRequestData *req;
1496
1497 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1498 client->nb_requests++;
1499
1500 req = g_new0(NBDRequestData, 1);
1501 nbd_client_get(client);
1502 req->client = client;
1503 return req;
1504}
1505
1506static void nbd_request_put(NBDRequestData *req)
1507{
1508 NBDClient *client = req->client;
1509
1510 if (req->data) {
1511 qemu_vfree(req->data);
1512 }
1513 g_free(req);
1514
1515 client->nb_requests--;
1516 nbd_client_receive_next_request(client);
1517
1518 nbd_client_put(client);
1519}
1520
1521static void blk_aio_attached(AioContext *ctx, void *opaque)
1522{
1523 NBDExport *exp = opaque;
1524 NBDClient *client;
1525
1526 trace_nbd_blk_aio_attached(exp->name, ctx);
1527
1528 exp->common.ctx = ctx;
1529
1530 QTAILQ_FOREACH(client, &exp->clients, next) {
1531 qio_channel_attach_aio_context(client->ioc, ctx);
1532
1533 assert(client->recv_coroutine == NULL);
1534 assert(client->send_coroutine == NULL);
1535
1536 if (client->quiescing) {
1537 client->quiescing = false;
1538 nbd_client_receive_next_request(client);
1539 }
1540 }
1541}
1542
1543static void nbd_aio_detach_bh(void *opaque)
1544{
1545 NBDExport *exp = opaque;
1546 NBDClient *client;
1547
1548 QTAILQ_FOREACH(client, &exp->clients, next) {
1549 qio_channel_detach_aio_context(client->ioc);
1550 client->quiescing = true;
1551
1552 if (client->recv_coroutine) {
1553 if (client->read_yielding) {
1554 qemu_aio_coroutine_enter(exp->common.ctx,
1555 client->recv_coroutine);
1556 } else {
1557 AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL);
1558 }
1559 }
1560
1561 if (client->send_coroutine) {
1562 AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL);
1563 }
1564 }
1565}
1566
1567static void blk_aio_detach(void *opaque)
1568{
1569 NBDExport *exp = opaque;
1570
1571 trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1572
1573 aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp);
1574
1575 exp->common.ctx = NULL;
1576}
1577
1578static void nbd_eject_notifier(Notifier *n, void *data)
1579{
1580 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1581
1582 blk_exp_request_shutdown(&exp->common);
1583}
1584
1585void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
1586{
1587 NBDExport *nbd_exp = container_of(exp, NBDExport, common);
1588 assert(exp->drv == &blk_exp_nbd);
1589 assert(nbd_exp->eject_notifier_blk == NULL);
1590
1591 blk_ref(blk);
1592 nbd_exp->eject_notifier_blk = blk;
1593 nbd_exp->eject_notifier.notify = nbd_eject_notifier;
1594 blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
1595}
1596
1597static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1598 Error **errp)
1599{
1600 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1601 BlockExportOptionsNbd *arg = &exp_args->u.nbd;
1602 BlockBackend *blk = blk_exp->blk;
1603 int64_t size;
1604 uint64_t perm, shared_perm;
1605 bool readonly = !exp_args->writable;
1606 bool shared = !exp_args->writable;
1607 strList *bitmaps;
1608 size_t i;
1609 int ret;
1610
1611 assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
1612
1613 if (!nbd_server_is_running()) {
1614 error_setg(errp, "NBD server not running");
1615 return -EINVAL;
1616 }
1617
1618 if (!arg->has_name) {
1619 arg->name = exp_args->node_name;
1620 }
1621
1622 if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
1623 error_setg(errp, "export name '%s' too long", arg->name);
1624 return -EINVAL;
1625 }
1626
1627 if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
1628 error_setg(errp, "description '%s' too long", arg->description);
1629 return -EINVAL;
1630 }
1631
1632 if (nbd_export_find(arg->name)) {
1633 error_setg(errp, "NBD server already has export named '%s'", arg->name);
1634 return -EEXIST;
1635 }
1636
1637 size = blk_getlength(blk);
1638 if (size < 0) {
1639 error_setg_errno(errp, -size,
1640 "Failed to determine the NBD export's length");
1641 return size;
1642 }
1643
1644
1645
1646 blk_get_perm(blk, &perm, &shared_perm);
1647 ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1648 if (ret < 0) {
1649 return ret;
1650 }
1651
1652 QTAILQ_INIT(&exp->clients);
1653 exp->name = g_strdup(arg->name);
1654 exp->description = g_strdup(arg->description);
1655 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1656 NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
1657 if (readonly) {
1658 exp->nbdflags |= NBD_FLAG_READ_ONLY;
1659 if (shared) {
1660 exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1661 }
1662 } else {
1663 exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1664 NBD_FLAG_SEND_FAST_ZERO);
1665 }
1666 exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1667
1668 for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
1669 exp->nr_export_bitmaps++;
1670 }
1671 exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
1672 for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1673 i++, bitmaps = bitmaps->next) {
1674 const char *bitmap = bitmaps->value;
1675 BlockDriverState *bs = blk_bs(blk);
1676 BdrvDirtyBitmap *bm = NULL;
1677
1678 while (bs) {
1679 bm = bdrv_find_dirty_bitmap(bs, bitmap);
1680 if (bm != NULL) {
1681 break;
1682 }
1683
1684 bs = bdrv_filter_or_cow_bs(bs);
1685 }
1686
1687 if (bm == NULL) {
1688 ret = -ENOENT;
1689 error_setg(errp, "Bitmap '%s' is not found", bitmap);
1690 goto fail;
1691 }
1692
1693 if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1694 ret = -EINVAL;
1695 goto fail;
1696 }
1697
1698 if (readonly && bdrv_is_writable(bs) &&
1699 bdrv_dirty_bitmap_enabled(bm)) {
1700 ret = -EINVAL;
1701 error_setg(errp,
1702 "Enabled bitmap '%s' incompatible with readonly export",
1703 bitmap);
1704 goto fail;
1705 }
1706
1707 exp->export_bitmaps[i] = bm;
1708 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1709 }
1710
1711
1712 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1713 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
1714 }
1715
1716 exp->allocation_depth = arg->allocation_depth;
1717
1718 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1719
1720 QTAILQ_INSERT_TAIL(&exports, exp, next);
1721
1722 return 0;
1723
1724fail:
1725 g_free(exp->export_bitmaps);
1726 g_free(exp->name);
1727 g_free(exp->description);
1728 return ret;
1729}
1730
1731NBDExport *nbd_export_find(const char *name)
1732{
1733 NBDExport *exp;
1734 QTAILQ_FOREACH(exp, &exports, next) {
1735 if (strcmp(name, exp->name) == 0) {
1736 return exp;
1737 }
1738 }
1739
1740 return NULL;
1741}
1742
1743AioContext *
1744nbd_export_aio_context(NBDExport *exp)
1745{
1746 return exp->common.ctx;
1747}
1748
1749static void nbd_export_request_shutdown(BlockExport *blk_exp)
1750{
1751 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1752 NBDClient *client, *next;
1753
1754 blk_exp_ref(&exp->common);
1755
1756
1757
1758
1759
1760
1761
1762 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1763 client_close(client, true);
1764 }
1765 if (exp->name) {
1766 g_free(exp->name);
1767 exp->name = NULL;
1768 QTAILQ_REMOVE(&exports, exp, next);
1769 }
1770 blk_exp_unref(&exp->common);
1771}
1772
1773static void nbd_export_delete(BlockExport *blk_exp)
1774{
1775 size_t i;
1776 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1777
1778 assert(exp->name == NULL);
1779 assert(QTAILQ_EMPTY(&exp->clients));
1780
1781 g_free(exp->description);
1782 exp->description = NULL;
1783
1784 if (exp->common.blk) {
1785 if (exp->eject_notifier_blk) {
1786 notifier_remove(&exp->eject_notifier);
1787 blk_unref(exp->eject_notifier_blk);
1788 }
1789 blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
1790 blk_aio_detach, exp);
1791 }
1792
1793 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1794 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
1795 }
1796}
1797
1798const BlockExportDriver blk_exp_nbd = {
1799 .type = BLOCK_EXPORT_TYPE_NBD,
1800 .instance_size = sizeof(NBDExport),
1801 .create = nbd_export_create,
1802 .delete = nbd_export_delete,
1803 .request_shutdown = nbd_export_request_shutdown,
1804};
1805
1806static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
1807 unsigned niov, Error **errp)
1808{
1809 int ret;
1810
1811 g_assert(qemu_in_coroutine());
1812 qemu_co_mutex_lock(&client->send_lock);
1813 client->send_coroutine = qemu_coroutine_self();
1814
1815 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
1816
1817 client->send_coroutine = NULL;
1818 qemu_co_mutex_unlock(&client->send_lock);
1819
1820 return ret;
1821}
1822
1823static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
1824 uint64_t handle)
1825{
1826 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
1827 stl_be_p(&reply->error, error);
1828 stq_be_p(&reply->handle, handle);
1829}
1830
1831static int nbd_co_send_simple_reply(NBDClient *client,
1832 uint64_t handle,
1833 uint32_t error,
1834 void *data,
1835 size_t len,
1836 Error **errp)
1837{
1838 NBDSimpleReply reply;
1839 int nbd_err = system_errno_to_nbd_errno(error);
1840 struct iovec iov[] = {
1841 {.iov_base = &reply, .iov_len = sizeof(reply)},
1842 {.iov_base = data, .iov_len = len}
1843 };
1844
1845 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
1846 len);
1847 set_be_simple_reply(&reply, nbd_err, handle);
1848
1849 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1850}
1851
1852static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
1853 uint16_t type, uint64_t handle, uint32_t length)
1854{
1855 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
1856 stw_be_p(&chunk->flags, flags);
1857 stw_be_p(&chunk->type, type);
1858 stq_be_p(&chunk->handle, handle);
1859 stl_be_p(&chunk->length, length);
1860}
1861
1862static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
1863 uint64_t handle,
1864 Error **errp)
1865{
1866 NBDStructuredReplyChunk chunk;
1867 struct iovec iov[] = {
1868 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1869 };
1870
1871 trace_nbd_co_send_structured_done(handle);
1872 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
1873
1874 return nbd_co_send_iov(client, iov, 1, errp);
1875}
1876
1877static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
1878 uint64_t handle,
1879 uint64_t offset,
1880 void *data,
1881 size_t size,
1882 bool final,
1883 Error **errp)
1884{
1885 NBDStructuredReadData chunk;
1886 struct iovec iov[] = {
1887 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1888 {.iov_base = data, .iov_len = size}
1889 };
1890
1891 assert(size);
1892 trace_nbd_co_send_structured_read(handle, offset, data, size);
1893 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1894 NBD_REPLY_TYPE_OFFSET_DATA, handle,
1895 sizeof(chunk) - sizeof(chunk.h) + size);
1896 stq_be_p(&chunk.offset, offset);
1897
1898 return nbd_co_send_iov(client, iov, 2, errp);
1899}
1900
1901static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
1902 uint64_t handle,
1903 uint32_t error,
1904 const char *msg,
1905 Error **errp)
1906{
1907 NBDStructuredError chunk;
1908 int nbd_err = system_errno_to_nbd_errno(error);
1909 struct iovec iov[] = {
1910 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1911 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
1912 };
1913
1914 assert(nbd_err);
1915 trace_nbd_co_send_structured_error(handle, nbd_err,
1916 nbd_err_lookup(nbd_err), msg ? msg : "");
1917 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
1918 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1919 stl_be_p(&chunk.error, nbd_err);
1920 stw_be_p(&chunk.message_length, iov[1].iov_len);
1921
1922 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
1923}
1924
1925
1926
1927
1928
1929static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
1930 uint64_t handle,
1931 uint64_t offset,
1932 uint8_t *data,
1933 size_t size,
1934 Error **errp)
1935{
1936 int ret = 0;
1937 NBDExport *exp = client->exp;
1938 size_t progress = 0;
1939
1940 while (progress < size) {
1941 int64_t pnum;
1942 int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
1943 offset + progress,
1944 size - progress, &pnum, NULL,
1945 NULL);
1946 bool final;
1947
1948 if (status < 0) {
1949 char *msg = g_strdup_printf("unable to check for holes: %s",
1950 strerror(-status));
1951
1952 ret = nbd_co_send_structured_error(client, handle, -status, msg,
1953 errp);
1954 g_free(msg);
1955 return ret;
1956 }
1957 assert(pnum && pnum <= size - progress);
1958 final = progress + pnum == size;
1959 if (status & BDRV_BLOCK_ZERO) {
1960 NBDStructuredReadHole chunk;
1961 struct iovec iov[] = {
1962 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1963 };
1964
1965 trace_nbd_co_send_structured_read_hole(handle, offset + progress,
1966 pnum);
1967 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1968 NBD_REPLY_TYPE_OFFSET_HOLE,
1969 handle, sizeof(chunk) - sizeof(chunk.h));
1970 stq_be_p(&chunk.offset, offset + progress);
1971 stl_be_p(&chunk.length, pnum);
1972 ret = nbd_co_send_iov(client, iov, 1, errp);
1973 } else {
1974 ret = blk_pread(exp->common.blk, offset + progress,
1975 data + progress, pnum);
1976 if (ret < 0) {
1977 error_setg_errno(errp, -ret, "reading from file failed");
1978 break;
1979 }
1980 ret = nbd_co_send_structured_read(client, handle, offset + progress,
1981 data + progress, pnum, final,
1982 errp);
1983 }
1984
1985 if (ret < 0) {
1986 break;
1987 }
1988 progress += pnum;
1989 }
1990 return ret;
1991}
1992
1993typedef struct NBDExtentArray {
1994 NBDExtent *extents;
1995 unsigned int nb_alloc;
1996 unsigned int count;
1997 uint64_t total_length;
1998 bool can_add;
1999 bool converted_to_be;
2000} NBDExtentArray;
2001
2002static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
2003{
2004 NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
2005
2006 ea->nb_alloc = nb_alloc;
2007 ea->extents = g_new(NBDExtent, nb_alloc);
2008 ea->can_add = true;
2009
2010 return ea;
2011}
2012
2013static void nbd_extent_array_free(NBDExtentArray *ea)
2014{
2015 g_free(ea->extents);
2016 g_free(ea);
2017}
2018G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free);
2019
2020
2021static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
2022{
2023 int i;
2024
2025 assert(!ea->converted_to_be);
2026 ea->can_add = false;
2027 ea->converted_to_be = true;
2028
2029 for (i = 0; i < ea->count; i++) {
2030 ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2031 ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
2032 }
2033}
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045static int nbd_extent_array_add(NBDExtentArray *ea,
2046 uint32_t length, uint32_t flags)
2047{
2048 assert(ea->can_add);
2049
2050 if (!length) {
2051 return 0;
2052 }
2053
2054
2055 if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2056 uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
2057
2058 if (sum <= UINT32_MAX) {
2059 ea->extents[ea->count - 1].length = sum;
2060 ea->total_length += length;
2061 return 0;
2062 }
2063 }
2064
2065 if (ea->count >= ea->nb_alloc) {
2066 ea->can_add = false;
2067 return -1;
2068 }
2069
2070 ea->total_length += length;
2071 ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
2072 ea->count++;
2073
2074 return 0;
2075}
2076
2077static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
2078 uint64_t bytes, NBDExtentArray *ea)
2079{
2080 while (bytes) {
2081 uint32_t flags;
2082 int64_t num;
2083 int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
2084 NULL, NULL);
2085
2086 if (ret < 0) {
2087 return ret;
2088 }
2089
2090 flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2091 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2092
2093 if (nbd_extent_array_add(ea, num, flags) < 0) {
2094 return 0;
2095 }
2096
2097 offset += num;
2098 bytes -= num;
2099 }
2100
2101 return 0;
2102}
2103
2104static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
2105 uint64_t bytes, NBDExtentArray *ea)
2106{
2107 while (bytes) {
2108 int64_t num;
2109 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
2110 &num);
2111
2112 if (ret < 0) {
2113 return ret;
2114 }
2115
2116 if (nbd_extent_array_add(ea, num, ret) < 0) {
2117 return 0;
2118 }
2119
2120 offset += num;
2121 bytes -= num;
2122 }
2123
2124 return 0;
2125}
2126
2127
2128
2129
2130
2131
2132
2133static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
2134 NBDExtentArray *ea,
2135 bool last, uint32_t context_id, Error **errp)
2136{
2137 NBDStructuredMeta chunk;
2138 struct iovec iov[] = {
2139 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2140 {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
2141 };
2142
2143 nbd_extent_array_convert_to_be(ea);
2144
2145 trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
2146 last);
2147 set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
2148 NBD_REPLY_TYPE_BLOCK_STATUS,
2149 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
2150 stl_be_p(&chunk.context_id, context_id);
2151
2152 return nbd_co_send_iov(client, iov, 2, errp);
2153}
2154
2155
2156static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
2157 BlockDriverState *bs, uint64_t offset,
2158 uint32_t length, bool dont_fragment,
2159 bool last, uint32_t context_id,
2160 Error **errp)
2161{
2162 int ret;
2163 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2164 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2165
2166 if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2167 ret = blockstatus_to_extents(bs, offset, length, ea);
2168 } else {
2169 ret = blockalloc_to_extents(bs, offset, length, ea);
2170 }
2171 if (ret < 0) {
2172 return nbd_co_send_structured_error(
2173 client, handle, -ret, "can't get block status", errp);
2174 }
2175
2176 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2177}
2178
2179
2180static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
2181 uint64_t offset, uint64_t length,
2182 NBDExtentArray *es)
2183{
2184 int64_t start, dirty_start, dirty_count;
2185 int64_t end = offset + length;
2186 bool full = false;
2187
2188 bdrv_dirty_bitmap_lock(bitmap);
2189
2190 for (start = offset;
2191 bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
2192 &dirty_start, &dirty_count);
2193 start = dirty_start + dirty_count)
2194 {
2195 if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2196 (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2197 {
2198 full = true;
2199 break;
2200 }
2201 }
2202
2203 if (!full) {
2204
2205 (void) nbd_extent_array_add(es, end - start, 0);
2206 }
2207
2208 bdrv_dirty_bitmap_unlock(bitmap);
2209}
2210
2211static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
2212 BdrvDirtyBitmap *bitmap, uint64_t offset,
2213 uint32_t length, bool dont_fragment, bool last,
2214 uint32_t context_id, Error **errp)
2215{
2216 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2217 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2218
2219 bitmap_to_extents(bitmap, offset, length, ea);
2220
2221 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2222}
2223
2224
2225
2226
2227
2228
2229
2230
2231static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
2232 Error **errp)
2233{
2234 NBDClient *client = req->client;
2235 int valid_flags;
2236 int ret;
2237
2238 g_assert(qemu_in_coroutine());
2239 assert(client->recv_coroutine == qemu_coroutine_self());
2240 ret = nbd_receive_request(client, request, errp);
2241 if (ret < 0) {
2242 return ret;
2243 }
2244
2245 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
2246 nbd_cmd_lookup(request->type));
2247
2248 if (request->type != NBD_CMD_WRITE) {
2249
2250 req->complete = true;
2251 }
2252
2253 if (request->type == NBD_CMD_DISC) {
2254
2255
2256 return -EIO;
2257 }
2258
2259 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
2260 request->type == NBD_CMD_CACHE)
2261 {
2262 if (request->len > NBD_MAX_BUFFER_SIZE) {
2263 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
2264 request->len, NBD_MAX_BUFFER_SIZE);
2265 return -EINVAL;
2266 }
2267
2268 if (request->type != NBD_CMD_CACHE) {
2269 req->data = blk_try_blockalign(client->exp->common.blk,
2270 request->len);
2271 if (req->data == NULL) {
2272 error_setg(errp, "No memory");
2273 return -ENOMEM;
2274 }
2275 }
2276 }
2277
2278 if (request->type == NBD_CMD_WRITE) {
2279 if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
2280 errp) < 0)
2281 {
2282 return -EIO;
2283 }
2284 req->complete = true;
2285
2286 trace_nbd_co_receive_request_payload_received(request->handle,
2287 request->len);
2288 }
2289
2290
2291 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
2292 (request->type == NBD_CMD_WRITE ||
2293 request->type == NBD_CMD_WRITE_ZEROES ||
2294 request->type == NBD_CMD_TRIM)) {
2295 error_setg(errp, "Export is read-only");
2296 return -EROFS;
2297 }
2298 if (request->from > client->exp->size ||
2299 request->len > client->exp->size - request->from) {
2300 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
2301 ", Size: %" PRIu64, request->from, request->len,
2302 client->exp->size);
2303 return (request->type == NBD_CMD_WRITE ||
2304 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
2305 }
2306 if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
2307 client->check_align)) {
2308
2309
2310
2311
2312 trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
2313 request->from,
2314 request->len,
2315 client->check_align);
2316 }
2317 valid_flags = NBD_CMD_FLAG_FUA;
2318 if (request->type == NBD_CMD_READ && client->structured_reply) {
2319 valid_flags |= NBD_CMD_FLAG_DF;
2320 } else if (request->type == NBD_CMD_WRITE_ZEROES) {
2321 valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
2322 } else if (request->type == NBD_CMD_BLOCK_STATUS) {
2323 valid_flags |= NBD_CMD_FLAG_REQ_ONE;
2324 }
2325 if (request->flags & ~valid_flags) {
2326 error_setg(errp, "unsupported flags for command %s (got 0x%x)",
2327 nbd_cmd_lookup(request->type), request->flags);
2328 return -EINVAL;
2329 }
2330
2331 return 0;
2332}
2333
2334
2335
2336
2337
2338static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
2339 uint64_t handle,
2340 int ret,
2341 const char *error_msg,
2342 Error **errp)
2343{
2344 if (client->structured_reply && ret < 0) {
2345 return nbd_co_send_structured_error(client, handle, -ret, error_msg,
2346 errp);
2347 } else {
2348 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
2349 NULL, 0, errp);
2350 }
2351}
2352
2353
2354
2355
2356static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
2357 uint8_t *data, Error **errp)
2358{
2359 int ret;
2360 NBDExport *exp = client->exp;
2361
2362 assert(request->type == NBD_CMD_READ);
2363
2364
2365 if (request->flags & NBD_CMD_FLAG_FUA) {
2366 ret = blk_co_flush(exp->common.blk);
2367 if (ret < 0) {
2368 return nbd_send_generic_reply(client, request->handle, ret,
2369 "flush failed", errp);
2370 }
2371 }
2372
2373 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
2374 request->len)
2375 {
2376 return nbd_co_send_sparse_read(client, request->handle, request->from,
2377 data, request->len, errp);
2378 }
2379
2380 ret = blk_pread(exp->common.blk, request->from, data, request->len);
2381 if (ret < 0) {
2382 return nbd_send_generic_reply(client, request->handle, ret,
2383 "reading from file failed", errp);
2384 }
2385
2386 if (client->structured_reply) {
2387 if (request->len) {
2388 return nbd_co_send_structured_read(client, request->handle,
2389 request->from, data,
2390 request->len, true, errp);
2391 } else {
2392 return nbd_co_send_structured_done(client, request->handle, errp);
2393 }
2394 } else {
2395 return nbd_co_send_simple_reply(client, request->handle, 0,
2396 data, request->len, errp);
2397 }
2398}
2399
2400
2401
2402
2403
2404
2405
2406
2407static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
2408 Error **errp)
2409{
2410 int ret;
2411 NBDExport *exp = client->exp;
2412
2413 assert(request->type == NBD_CMD_CACHE);
2414
2415 ret = blk_co_preadv(exp->common.blk, request->from, request->len,
2416 NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
2417
2418 return nbd_send_generic_reply(client, request->handle, ret,
2419 "caching data failed", errp);
2420}
2421
2422
2423
2424
2425static coroutine_fn int nbd_handle_request(NBDClient *client,
2426 NBDRequest *request,
2427 uint8_t *data, Error **errp)
2428{
2429 int ret;
2430 int flags;
2431 NBDExport *exp = client->exp;
2432 char *msg;
2433 size_t i;
2434
2435 switch (request->type) {
2436 case NBD_CMD_CACHE:
2437 return nbd_do_cmd_cache(client, request, errp);
2438
2439 case NBD_CMD_READ:
2440 return nbd_do_cmd_read(client, request, data, errp);
2441
2442 case NBD_CMD_WRITE:
2443 flags = 0;
2444 if (request->flags & NBD_CMD_FLAG_FUA) {
2445 flags |= BDRV_REQ_FUA;
2446 }
2447 ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
2448 flags);
2449 return nbd_send_generic_reply(client, request->handle, ret,
2450 "writing to file failed", errp);
2451
2452 case NBD_CMD_WRITE_ZEROES:
2453 flags = 0;
2454 if (request->flags & NBD_CMD_FLAG_FUA) {
2455 flags |= BDRV_REQ_FUA;
2456 }
2457 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
2458 flags |= BDRV_REQ_MAY_UNMAP;
2459 }
2460 if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2461 flags |= BDRV_REQ_NO_FALLBACK;
2462 }
2463 ret = 0;
2464
2465 while (ret >= 0 && request->len) {
2466 int align = client->check_align ?: 1;
2467 int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
2468 align));
2469 ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags);
2470 request->len -= len;
2471 request->from += len;
2472 }
2473 return nbd_send_generic_reply(client, request->handle, ret,
2474 "writing to file failed", errp);
2475
2476 case NBD_CMD_DISC:
2477
2478 abort();
2479
2480 case NBD_CMD_FLUSH:
2481 ret = blk_co_flush(exp->common.blk);
2482 return nbd_send_generic_reply(client, request->handle, ret,
2483 "flush failed", errp);
2484
2485 case NBD_CMD_TRIM:
2486 ret = 0;
2487
2488 while (ret >= 0 && request->len) {
2489 int align = client->check_align ?: 1;
2490 int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
2491 align));
2492 ret = blk_co_pdiscard(exp->common.blk, request->from, len);
2493 request->len -= len;
2494 request->from += len;
2495 }
2496 if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
2497 ret = blk_co_flush(exp->common.blk);
2498 }
2499 return nbd_send_generic_reply(client, request->handle, ret,
2500 "discard failed", errp);
2501
2502 case NBD_CMD_BLOCK_STATUS:
2503 if (!request->len) {
2504 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2505 "need non-zero length", errp);
2506 }
2507 if (client->export_meta.count) {
2508 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
2509 int contexts_remaining = client->export_meta.count;
2510
2511 if (client->export_meta.base_allocation) {
2512 ret = nbd_co_send_block_status(client, request->handle,
2513 blk_bs(exp->common.blk),
2514 request->from,
2515 request->len, dont_fragment,
2516 !--contexts_remaining,
2517 NBD_META_ID_BASE_ALLOCATION,
2518 errp);
2519 if (ret < 0) {
2520 return ret;
2521 }
2522 }
2523
2524 if (client->export_meta.allocation_depth) {
2525 ret = nbd_co_send_block_status(client, request->handle,
2526 blk_bs(exp->common.blk),
2527 request->from, request->len,
2528 dont_fragment,
2529 !--contexts_remaining,
2530 NBD_META_ID_ALLOCATION_DEPTH,
2531 errp);
2532 if (ret < 0) {
2533 return ret;
2534 }
2535 }
2536
2537 for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
2538 if (!client->export_meta.bitmaps[i]) {
2539 continue;
2540 }
2541 ret = nbd_co_send_bitmap(client, request->handle,
2542 client->exp->export_bitmaps[i],
2543 request->from, request->len,
2544 dont_fragment, !--contexts_remaining,
2545 NBD_META_ID_DIRTY_BITMAP + i, errp);
2546 if (ret < 0) {
2547 return ret;
2548 }
2549 }
2550
2551 assert(!contexts_remaining);
2552
2553 return 0;
2554 } else {
2555 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2556 "CMD_BLOCK_STATUS not negotiated",
2557 errp);
2558 }
2559
2560 default:
2561 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
2562 request->type);
2563 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
2564 errp);
2565 g_free(msg);
2566 return ret;
2567 }
2568}
2569
2570
2571static coroutine_fn void nbd_trip(void *opaque)
2572{
2573 NBDClient *client = opaque;
2574 NBDRequestData *req;
2575 NBDRequest request = { 0 };
2576 int ret;
2577 Error *local_err = NULL;
2578
2579 trace_nbd_trip();
2580 if (client->closing) {
2581 nbd_client_put(client);
2582 return;
2583 }
2584
2585 if (client->quiescing) {
2586
2587
2588
2589
2590 nbd_client_put(client);
2591 client->recv_coroutine = NULL;
2592 aio_wait_kick();
2593 return;
2594 }
2595
2596 req = nbd_request_get(client);
2597 ret = nbd_co_receive_request(req, &request, &local_err);
2598 client->recv_coroutine = NULL;
2599
2600 if (client->closing) {
2601
2602
2603
2604
2605 goto done;
2606 }
2607
2608 if (ret == -EAGAIN) {
2609 assert(client->quiescing);
2610 goto done;
2611 }
2612
2613 nbd_client_receive_next_request(client);
2614 if (ret == -EIO) {
2615 goto disconnect;
2616 }
2617
2618 if (ret < 0) {
2619
2620
2621 Error *export_err = local_err;
2622
2623 local_err = NULL;
2624 ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
2625 error_get_pretty(export_err), &local_err);
2626 error_free(export_err);
2627 } else {
2628 ret = nbd_handle_request(client, &request, req->data, &local_err);
2629 }
2630 if (ret < 0) {
2631 error_prepend(&local_err, "Failed to send reply: ");
2632 goto disconnect;
2633 }
2634
2635
2636
2637
2638 if (!req->complete) {
2639 error_setg(&local_err, "Request handling failed in intermediate state");
2640 goto disconnect;
2641 }
2642
2643done:
2644 nbd_request_put(req);
2645 nbd_client_put(client);
2646 return;
2647
2648disconnect:
2649 if (local_err) {
2650 error_reportf_err(local_err, "Disconnect client, due to: ");
2651 }
2652 nbd_request_put(req);
2653 client_close(client, true);
2654 nbd_client_put(client);
2655}
2656
2657static void nbd_client_receive_next_request(NBDClient *client)
2658{
2659 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
2660 !client->quiescing) {
2661 nbd_client_get(client);
2662 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
2663 aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
2664 }
2665}
2666
2667static coroutine_fn void nbd_co_client_start(void *opaque)
2668{
2669 NBDClient *client = opaque;
2670 Error *local_err = NULL;
2671
2672 qemu_co_mutex_init(&client->send_lock);
2673
2674 if (nbd_negotiate(client, &local_err)) {
2675 if (local_err) {
2676 error_report_err(local_err);
2677 }
2678 client_close(client, false);
2679 return;
2680 }
2681
2682 nbd_client_receive_next_request(client);
2683}
2684
2685
2686
2687
2688
2689
2690void nbd_client_new(QIOChannelSocket *sioc,
2691 QCryptoTLSCreds *tlscreds,
2692 const char *tlsauthz,
2693 void (*close_fn)(NBDClient *, bool))
2694{
2695 NBDClient *client;
2696 Coroutine *co;
2697
2698 client = g_new0(NBDClient, 1);
2699 client->refcount = 1;
2700 client->tlscreds = tlscreds;
2701 if (tlscreds) {
2702 object_ref(OBJECT(client->tlscreds));
2703 }
2704 client->tlsauthz = g_strdup(tlsauthz);
2705 client->sioc = sioc;
2706 object_ref(OBJECT(client->sioc));
2707 client->ioc = QIO_CHANNEL(sioc);
2708 object_ref(OBJECT(client->ioc));
2709 client->close_fn = close_fn;
2710
2711 co = qemu_coroutine_create(nbd_co_client_start, client);
2712 qemu_coroutine_enter(co);
2713}
2714