1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21
22#include "block/export.h"
23#include "qapi/error.h"
24#include "qemu/queue.h"
25#include "trace.h"
26#include "nbd-internal.h"
27#include "qemu/units.h"
28
29#define NBD_META_ID_BASE_ALLOCATION 0
30#define NBD_META_ID_ALLOCATION_DEPTH 1
31
32#define NBD_META_ID_DIRTY_BITMAP 2
33
34
35
36
37
38
39
40#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
41
42static int system_errno_to_nbd_errno(int err)
43{
44 switch (err) {
45 case 0:
46 return NBD_SUCCESS;
47 case EPERM:
48 case EROFS:
49 return NBD_EPERM;
50 case EIO:
51 return NBD_EIO;
52 case ENOMEM:
53 return NBD_ENOMEM;
54#ifdef EDQUOT
55 case EDQUOT:
56#endif
57 case EFBIG:
58 case ENOSPC:
59 return NBD_ENOSPC;
60 case EOVERFLOW:
61 return NBD_EOVERFLOW;
62 case ENOTSUP:
63#if ENOTSUP != EOPNOTSUPP
64 case EOPNOTSUPP:
65#endif
66 return NBD_ENOTSUP;
67 case ESHUTDOWN:
68 return NBD_ESHUTDOWN;
69 case EINVAL:
70 default:
71 return NBD_EINVAL;
72 }
73}
74
75
76
77typedef struct NBDRequestData NBDRequestData;
78
79struct NBDRequestData {
80 QSIMPLEQ_ENTRY(NBDRequestData) entry;
81 NBDClient *client;
82 uint8_t *data;
83 bool complete;
84};
85
86struct NBDExport {
87 BlockExport common;
88
89 char *name;
90 char *description;
91 uint64_t size;
92 uint16_t nbdflags;
93 QTAILQ_HEAD(, NBDClient) clients;
94 QTAILQ_ENTRY(NBDExport) next;
95
96 BlockBackend *eject_notifier_blk;
97 Notifier eject_notifier;
98
99 bool allocation_depth;
100 BdrvDirtyBitmap **export_bitmaps;
101 size_t nr_export_bitmaps;
102};
103
104static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
105
106
107
108
109typedef struct NBDExportMetaContexts {
110 NBDExport *exp;
111 size_t count;
112 bool base_allocation;
113 bool allocation_depth;
114 bool *bitmaps;
115
116
117
118} NBDExportMetaContexts;
119
120struct NBDClient {
121 int refcount;
122 void (*close_fn)(NBDClient *client, bool negotiated);
123
124 NBDExport *exp;
125 QCryptoTLSCreds *tlscreds;
126 char *tlsauthz;
127 QIOChannelSocket *sioc;
128 QIOChannel *ioc;
129
130 Coroutine *recv_coroutine;
131
132 CoMutex send_lock;
133 Coroutine *send_coroutine;
134
135 bool read_yielding;
136 bool quiescing;
137
138 QTAILQ_ENTRY(NBDClient) next;
139 int nb_requests;
140 bool closing;
141
142 uint32_t check_align;
143
144 bool structured_reply;
145 NBDExportMetaContexts export_meta;
146
147 uint32_t opt;
148 uint32_t optlen;
149
150};
151
152static void nbd_client_receive_next_request(NBDClient *client);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
182 uint32_t type, uint32_t length)
183{
184 stq_be_p(&rep->magic, NBD_REP_MAGIC);
185 stl_be_p(&rep->option, option);
186 stl_be_p(&rep->type, type);
187 stl_be_p(&rep->length, length);
188}
189
190
191
192static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
193 uint32_t len, Error **errp)
194{
195 NBDOptionReply rep;
196
197 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
198 type, nbd_rep_lookup(type), len);
199
200 assert(len < NBD_MAX_BUFFER_SIZE);
201
202 set_be_option_rep(&rep, client->opt, type, len);
203 return nbd_write(client->ioc, &rep, sizeof(rep), errp);
204}
205
206
207
208static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
209 Error **errp)
210{
211 return nbd_negotiate_send_rep_len(client, type, 0, errp);
212}
213
214
215
216static int GCC_FMT_ATTR(4, 0)
217nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
218 Error **errp, const char *fmt, va_list va)
219{
220 ERRP_GUARD();
221 g_autofree char *msg = NULL;
222 int ret;
223 size_t len;
224
225 msg = g_strdup_vprintf(fmt, va);
226 len = strlen(msg);
227 assert(len < NBD_MAX_STRING_SIZE);
228 trace_nbd_negotiate_send_rep_err(msg);
229 ret = nbd_negotiate_send_rep_len(client, type, len, errp);
230 if (ret < 0) {
231 return ret;
232 }
233 if (nbd_write(client->ioc, msg, len, errp) < 0) {
234 error_prepend(errp, "write failed (error message): ");
235 return -EIO;
236 }
237
238 return 0;
239}
240
241
242
243
244static char *
245nbd_sanitize_name(const char *name)
246{
247 if (strnlen(name, 80) < 80) {
248 return g_strdup(name);
249 }
250
251 return g_strdup_printf("%.80s...", name);
252}
253
254
255
256static int GCC_FMT_ATTR(4, 5)
257nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
258 Error **errp, const char *fmt, ...)
259{
260 va_list va;
261 int ret;
262
263 va_start(va, fmt);
264 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
265 va_end(va);
266 return ret;
267}
268
269
270
271
272static int GCC_FMT_ATTR(4, 0)
273nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
274 const char *fmt, va_list va)
275{
276 int ret = nbd_drop(client->ioc, client->optlen, errp);
277
278 client->optlen = 0;
279 if (!ret) {
280 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
281 }
282 return ret;
283}
284
285static int GCC_FMT_ATTR(4, 5)
286nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
287 const char *fmt, ...)
288{
289 int ret;
290 va_list va;
291
292 va_start(va, fmt);
293 ret = nbd_opt_vdrop(client, type, errp, fmt, va);
294 va_end(va);
295
296 return ret;
297}
298
299static int GCC_FMT_ATTR(3, 4)
300nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
301{
302 int ret;
303 va_list va;
304
305 va_start(va, fmt);
306 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
307 va_end(va);
308
309 return ret;
310}
311
312
313
314
315
316static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
317 bool check_nul, Error **errp)
318{
319 if (size > client->optlen) {
320 return nbd_opt_invalid(client, errp,
321 "Inconsistent lengths in option %s",
322 nbd_opt_lookup(client->opt));
323 }
324 client->optlen -= size;
325 if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
326 return -EIO;
327 }
328
329 if (check_nul && strnlen(buffer, size) != size) {
330 return nbd_opt_invalid(client, errp,
331 "Unexpected embedded NUL in option %s",
332 nbd_opt_lookup(client->opt));
333 }
334 return 1;
335}
336
337
338
339
340static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
341{
342 if (size > client->optlen) {
343 return nbd_opt_invalid(client, errp,
344 "Inconsistent lengths in option %s",
345 nbd_opt_lookup(client->opt));
346 }
347 client->optlen -= size;
348 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
349}
350
351
352
353
354
355
356
357
358
359
360
361
362
363static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
364 Error **errp)
365{
366 int ret;
367 uint32_t len;
368 g_autofree char *local_name = NULL;
369
370 *name = NULL;
371 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
372 if (ret <= 0) {
373 return ret;
374 }
375 len = cpu_to_be32(len);
376
377 if (len > NBD_MAX_STRING_SIZE) {
378 return nbd_opt_invalid(client, errp,
379 "Invalid name length: %" PRIu32, len);
380 }
381
382 local_name = g_malloc(len + 1);
383 ret = nbd_opt_read(client, local_name, len, true, errp);
384 if (ret <= 0) {
385 return ret;
386 }
387 local_name[len] = '\0';
388
389 if (length) {
390 *length = len;
391 }
392 *name = g_steal_pointer(&local_name);
393
394 return 1;
395}
396
397
398
399static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
400 Error **errp)
401{
402 ERRP_GUARD();
403 size_t name_len, desc_len;
404 uint32_t len;
405 const char *name = exp->name ? exp->name : "";
406 const char *desc = exp->description ? exp->description : "";
407 QIOChannel *ioc = client->ioc;
408 int ret;
409
410 trace_nbd_negotiate_send_rep_list(name, desc);
411 name_len = strlen(name);
412 desc_len = strlen(desc);
413 assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
414 len = name_len + desc_len + sizeof(len);
415 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
416 if (ret < 0) {
417 return ret;
418 }
419
420 len = cpu_to_be32(name_len);
421 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
422 error_prepend(errp, "write failed (name length): ");
423 return -EINVAL;
424 }
425
426 if (nbd_write(ioc, name, name_len, errp) < 0) {
427 error_prepend(errp, "write failed (name buffer): ");
428 return -EINVAL;
429 }
430
431 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
432 error_prepend(errp, "write failed (description buffer): ");
433 return -EINVAL;
434 }
435
436 return 0;
437}
438
439
440
441static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
442{
443 NBDExport *exp;
444 assert(client->opt == NBD_OPT_LIST);
445
446
447 QTAILQ_FOREACH(exp, &exports, next) {
448 if (nbd_negotiate_send_rep_list(client, exp, errp)) {
449 return -EINVAL;
450 }
451 }
452
453 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
454}
455
456static void nbd_check_meta_export(NBDClient *client)
457{
458 if (client->exp != client->export_meta.exp) {
459 client->export_meta.count = 0;
460 }
461}
462
463
464
465static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
466 Error **errp)
467{
468 ERRP_GUARD();
469 g_autofree char *name = NULL;
470 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
471 size_t len;
472 int ret;
473 uint16_t myflags;
474
475
476
477
478
479
480
481
482 trace_nbd_negotiate_handle_export_name();
483 if (client->optlen > NBD_MAX_STRING_SIZE) {
484 error_setg(errp, "Bad length received");
485 return -EINVAL;
486 }
487 name = g_malloc(client->optlen + 1);
488 if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
489 return -EIO;
490 }
491 name[client->optlen] = '\0';
492 client->optlen = 0;
493
494 trace_nbd_negotiate_handle_export_name_request(name);
495
496 client->exp = nbd_export_find(name);
497 if (!client->exp) {
498 error_setg(errp, "export not found");
499 return -EINVAL;
500 }
501
502 myflags = client->exp->nbdflags;
503 if (client->structured_reply) {
504 myflags |= NBD_FLAG_SEND_DF;
505 }
506 trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
507 stq_be_p(buf, client->exp->size);
508 stw_be_p(buf + 8, myflags);
509 len = no_zeroes ? 10 : sizeof(buf);
510 ret = nbd_write(client->ioc, buf, len, errp);
511 if (ret < 0) {
512 error_prepend(errp, "write failed: ");
513 return ret;
514 }
515
516 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
517 blk_exp_ref(&client->exp->common);
518 nbd_check_meta_export(client);
519
520 return 0;
521}
522
523
524
525
526static int nbd_negotiate_send_info(NBDClient *client,
527 uint16_t info, uint32_t length, void *buf,
528 Error **errp)
529{
530 int rc;
531
532 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
533 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
534 sizeof(info) + length, errp);
535 if (rc < 0) {
536 return rc;
537 }
538 info = cpu_to_be16(info);
539 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
540 return -EIO;
541 }
542 if (nbd_write(client->ioc, buf, length, errp) < 0) {
543 return -EIO;
544 }
545 return 0;
546}
547
548
549
550
551
552
553
554
555static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
556{
557 int ret;
558
559 assert(client->optlen);
560 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
561 nbd_opt_lookup(client->opt));
562 if (fatal && !ret) {
563 error_setg(errp, "option '%s' has unexpected length",
564 nbd_opt_lookup(client->opt));
565 return -EINVAL;
566 }
567 return ret;
568}
569
570
571
572
573static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
574{
575 int rc;
576 g_autofree char *name = NULL;
577 NBDExport *exp;
578 uint16_t requests;
579 uint16_t request;
580 uint32_t namelen = 0;
581 bool sendname = false;
582 bool blocksize = false;
583 uint32_t sizes[3];
584 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
585 uint32_t check_align = 0;
586 uint16_t myflags;
587
588
589
590
591
592
593
594 rc = nbd_opt_read_name(client, &name, &namelen, errp);
595 if (rc <= 0) {
596 return rc;
597 }
598 trace_nbd_negotiate_handle_export_name_request(name);
599
600 rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
601 if (rc <= 0) {
602 return rc;
603 }
604 requests = be16_to_cpu(requests);
605 trace_nbd_negotiate_handle_info_requests(requests);
606 while (requests--) {
607 rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
608 if (rc <= 0) {
609 return rc;
610 }
611 request = be16_to_cpu(request);
612 trace_nbd_negotiate_handle_info_request(request,
613 nbd_info_lookup(request));
614
615
616
617 switch (request) {
618 case NBD_INFO_NAME:
619 sendname = true;
620 break;
621 case NBD_INFO_BLOCK_SIZE:
622 blocksize = true;
623 break;
624 }
625 }
626 if (client->optlen) {
627 return nbd_reject_length(client, false, errp);
628 }
629
630 exp = nbd_export_find(name);
631 if (!exp) {
632 g_autofree char *sane_name = nbd_sanitize_name(name);
633
634 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
635 errp, "export '%s' not present",
636 sane_name);
637 }
638
639
640 if (sendname) {
641 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
642 errp);
643 if (rc < 0) {
644 return rc;
645 }
646 }
647
648
649
650 if (exp->description) {
651 size_t len = strlen(exp->description);
652
653 assert(len <= NBD_MAX_STRING_SIZE);
654 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
655 len, exp->description, errp);
656 if (rc < 0) {
657 return rc;
658 }
659 }
660
661
662
663
664
665 if (client->opt == NBD_OPT_INFO || blocksize) {
666 check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
667 } else {
668 sizes[0] = 1;
669 }
670 assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
671
672
673 sizes[1] = MAX(4096, sizes[0]);
674
675 sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
676 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
677 sizes[0] = cpu_to_be32(sizes[0]);
678 sizes[1] = cpu_to_be32(sizes[1]);
679 sizes[2] = cpu_to_be32(sizes[2]);
680 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
681 sizeof(sizes), sizes, errp);
682 if (rc < 0) {
683 return rc;
684 }
685
686
687 myflags = exp->nbdflags;
688 if (client->structured_reply) {
689 myflags |= NBD_FLAG_SEND_DF;
690 }
691 trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
692 stq_be_p(buf, exp->size);
693 stw_be_p(buf + 8, myflags);
694 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
695 sizeof(buf), buf, errp);
696 if (rc < 0) {
697 return rc;
698 }
699
700
701
702
703
704
705
706 if (client->opt == NBD_OPT_INFO && !blocksize &&
707 blk_get_request_alignment(exp->common.blk) > 1) {
708 return nbd_negotiate_send_rep_err(client,
709 NBD_REP_ERR_BLOCK_SIZE_REQD,
710 errp,
711 "request NBD_INFO_BLOCK_SIZE to "
712 "use this export");
713 }
714
715
716 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
717 if (rc < 0) {
718 return rc;
719 }
720
721 if (client->opt == NBD_OPT_GO) {
722 client->exp = exp;
723 client->check_align = check_align;
724 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
725 blk_exp_ref(&client->exp->common);
726 nbd_check_meta_export(client);
727 rc = 1;
728 }
729 return rc;
730}
731
732
733
734
735static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
736 Error **errp)
737{
738 QIOChannel *ioc;
739 QIOChannelTLS *tioc;
740 struct NBDTLSHandshakeData data = { 0 };
741
742 assert(client->opt == NBD_OPT_STARTTLS);
743
744 trace_nbd_negotiate_handle_starttls();
745 ioc = client->ioc;
746
747 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
748 return NULL;
749 }
750
751 tioc = qio_channel_tls_new_server(ioc,
752 client->tlscreds,
753 client->tlsauthz,
754 errp);
755 if (!tioc) {
756 return NULL;
757 }
758
759 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
760 trace_nbd_negotiate_handle_starttls_handshake();
761 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
762 qio_channel_tls_handshake(tioc,
763 nbd_tls_handshake,
764 &data,
765 NULL,
766 NULL);
767
768 if (!data.complete) {
769 g_main_loop_run(data.loop);
770 }
771 g_main_loop_unref(data.loop);
772 if (data.error) {
773 object_unref(OBJECT(tioc));
774 error_propagate(errp, data.error);
775 return NULL;
776 }
777
778 return QIO_CHANNEL(tioc);
779}
780
781
782
783
784
785
786
787static int nbd_negotiate_send_meta_context(NBDClient *client,
788 const char *context,
789 uint32_t context_id,
790 Error **errp)
791{
792 NBDOptionReplyMetaContext opt;
793 struct iovec iov[] = {
794 {.iov_base = &opt, .iov_len = sizeof(opt)},
795 {.iov_base = (void *)context, .iov_len = strlen(context)}
796 };
797
798 assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
799 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
800 context_id = 0;
801 }
802
803 trace_nbd_negotiate_meta_query_reply(context, context_id);
804 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
805 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
806 stl_be_p(&opt.context_id, context_id);
807
808 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
809}
810
811
812
813
814
815static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
816 const char *query)
817{
818 if (!*query) {
819 trace_nbd_negotiate_meta_query_parse("empty");
820 return client->opt == NBD_OPT_LIST_META_CONTEXT;
821 }
822 if (strcmp(query, pattern) == 0) {
823 trace_nbd_negotiate_meta_query_parse(pattern);
824 return true;
825 }
826 trace_nbd_negotiate_meta_query_skip("pattern not matched");
827 return false;
828}
829
830
831
832
833static bool nbd_strshift(const char **str, const char *prefix)
834{
835 size_t len = strlen(prefix);
836
837 if (strncmp(*str, prefix, len) == 0) {
838 *str += len;
839 return true;
840 }
841 return false;
842}
843
844
845
846
847
848
849static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
850 const char *query)
851{
852 if (!nbd_strshift(&query, "base:")) {
853 return false;
854 }
855 trace_nbd_negotiate_meta_query_parse("base:");
856
857 if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
858 meta->base_allocation = true;
859 }
860 return true;
861}
862
863
864
865
866
867
868
869static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
870 const char *query)
871{
872 size_t i;
873
874 if (!nbd_strshift(&query, "qemu:")) {
875 return false;
876 }
877 trace_nbd_negotiate_meta_query_parse("qemu:");
878
879 if (!*query) {
880 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
881 meta->allocation_depth = meta->exp->allocation_depth;
882 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
883 }
884 trace_nbd_negotiate_meta_query_parse("empty");
885 return true;
886 }
887
888 if (strcmp(query, "allocation-depth") == 0) {
889 trace_nbd_negotiate_meta_query_parse("allocation-depth");
890 meta->allocation_depth = meta->exp->allocation_depth;
891 return true;
892 }
893
894 if (nbd_strshift(&query, "dirty-bitmap:")) {
895 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
896 if (!*query) {
897 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
898 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
899 }
900 trace_nbd_negotiate_meta_query_parse("empty");
901 return true;
902 }
903
904 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
905 const char *bm_name;
906
907 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
908 if (strcmp(bm_name, query) == 0) {
909 meta->bitmaps[i] = true;
910 trace_nbd_negotiate_meta_query_parse(query);
911 return true;
912 }
913 }
914 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
915 return true;
916 }
917
918 trace_nbd_negotiate_meta_query_skip("unknown qemu context");
919 return true;
920}
921
922
923
924
925
926
927
928
929
930
931static int nbd_negotiate_meta_query(NBDClient *client,
932 NBDExportMetaContexts *meta, Error **errp)
933{
934 int ret;
935 g_autofree char *query = NULL;
936 uint32_t len;
937
938 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
939 if (ret <= 0) {
940 return ret;
941 }
942 len = cpu_to_be32(len);
943
944 if (len > NBD_MAX_STRING_SIZE) {
945 trace_nbd_negotiate_meta_query_skip("length too long");
946 return nbd_opt_skip(client, len, errp);
947 }
948
949 query = g_malloc(len + 1);
950 ret = nbd_opt_read(client, query, len, true, errp);
951 if (ret <= 0) {
952 return ret;
953 }
954 query[len] = '\0';
955
956 if (nbd_meta_base_query(client, meta, query)) {
957 return 1;
958 }
959 if (nbd_meta_qemu_query(client, meta, query)) {
960 return 1;
961 }
962
963 trace_nbd_negotiate_meta_query_skip("unknown namespace");
964 return 1;
965}
966
967
968
969
970
971static int nbd_negotiate_meta_queries(NBDClient *client,
972 NBDExportMetaContexts *meta, Error **errp)
973{
974 int ret;
975 g_autofree char *export_name = NULL;
976
977 g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
978 NBDExportMetaContexts local_meta = {0};
979 uint32_t nb_queries;
980 size_t i;
981 size_t count = 0;
982
983 if (!client->structured_reply) {
984 return nbd_opt_invalid(client, errp,
985 "request option '%s' when structured reply "
986 "is not negotiated",
987 nbd_opt_lookup(client->opt));
988 }
989
990 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
991
992 meta = &local_meta;
993 }
994
995 g_free(meta->bitmaps);
996 memset(meta, 0, sizeof(*meta));
997
998 ret = nbd_opt_read_name(client, &export_name, NULL, errp);
999 if (ret <= 0) {
1000 return ret;
1001 }
1002
1003 meta->exp = nbd_export_find(export_name);
1004 if (meta->exp == NULL) {
1005 g_autofree char *sane_name = nbd_sanitize_name(export_name);
1006
1007 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
1008 "export '%s' not present", sane_name);
1009 }
1010 meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
1011 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1012 bitmaps = meta->bitmaps;
1013 }
1014
1015 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1016 if (ret <= 0) {
1017 return ret;
1018 }
1019 nb_queries = cpu_to_be32(nb_queries);
1020 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1021 export_name, nb_queries);
1022
1023 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1024
1025 meta->base_allocation = true;
1026 meta->allocation_depth = meta->exp->allocation_depth;
1027 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
1028 } else {
1029 for (i = 0; i < nb_queries; ++i) {
1030 ret = nbd_negotiate_meta_query(client, meta, errp);
1031 if (ret <= 0) {
1032 return ret;
1033 }
1034 }
1035 }
1036
1037 if (meta->base_allocation) {
1038 ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1039 NBD_META_ID_BASE_ALLOCATION,
1040 errp);
1041 if (ret < 0) {
1042 return ret;
1043 }
1044 count++;
1045 }
1046
1047 if (meta->allocation_depth) {
1048 ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
1049 NBD_META_ID_ALLOCATION_DEPTH,
1050 errp);
1051 if (ret < 0) {
1052 return ret;
1053 }
1054 count++;
1055 }
1056
1057 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
1058 const char *bm_name;
1059 g_autofree char *context = NULL;
1060
1061 if (!meta->bitmaps[i]) {
1062 continue;
1063 }
1064
1065 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
1066 context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
1067
1068 ret = nbd_negotiate_send_meta_context(client, context,
1069 NBD_META_ID_DIRTY_BITMAP + i,
1070 errp);
1071 if (ret < 0) {
1072 return ret;
1073 }
1074 count++;
1075 }
1076
1077 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1078 if (ret == 0) {
1079 meta->count = count;
1080 }
1081
1082 return ret;
1083}
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094static int nbd_negotiate_options(NBDClient *client, Error **errp)
1095{
1096 uint32_t flags;
1097 bool fixedNewstyle = false;
1098 bool no_zeroes = false;
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
1116 return -EIO;
1117 }
1118 trace_nbd_negotiate_options_flags(flags);
1119 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
1120 fixedNewstyle = true;
1121 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1122 }
1123 if (flags & NBD_FLAG_C_NO_ZEROES) {
1124 no_zeroes = true;
1125 flags &= ~NBD_FLAG_C_NO_ZEROES;
1126 }
1127 if (flags != 0) {
1128 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1129 return -EINVAL;
1130 }
1131
1132 while (1) {
1133 int ret;
1134 uint32_t option, length;
1135 uint64_t magic;
1136
1137 if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1138 return -EINVAL;
1139 }
1140 trace_nbd_negotiate_options_check_magic(magic);
1141 if (magic != NBD_OPTS_MAGIC) {
1142 error_setg(errp, "Bad magic received");
1143 return -EINVAL;
1144 }
1145
1146 if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1147 return -EINVAL;
1148 }
1149 client->opt = option;
1150
1151 if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1152 return -EINVAL;
1153 }
1154 assert(!client->optlen);
1155 client->optlen = length;
1156
1157 if (length > NBD_MAX_BUFFER_SIZE) {
1158 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1159 length, NBD_MAX_BUFFER_SIZE);
1160 return -EINVAL;
1161 }
1162
1163 trace_nbd_negotiate_options_check_option(option,
1164 nbd_opt_lookup(option));
1165 if (client->tlscreds &&
1166 client->ioc == (QIOChannel *)client->sioc) {
1167 QIOChannel *tioc;
1168 if (!fixedNewstyle) {
1169 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1170 return -EINVAL;
1171 }
1172 switch (option) {
1173 case NBD_OPT_STARTTLS:
1174 if (length) {
1175
1176
1177 return nbd_reject_length(client, true, errp);
1178 }
1179 tioc = nbd_negotiate_handle_starttls(client, errp);
1180 if (!tioc) {
1181 return -EIO;
1182 }
1183 ret = 0;
1184 object_unref(OBJECT(client->ioc));
1185 client->ioc = QIO_CHANNEL(tioc);
1186 break;
1187
1188 case NBD_OPT_EXPORT_NAME:
1189
1190 error_setg(errp, "Option 0x%x not permitted before TLS",
1191 option);
1192 return -EINVAL;
1193
1194 default:
1195
1196
1197
1198
1199
1200
1201 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
1202 option == NBD_OPT_ABORT ? NULL : errp,
1203 "Option 0x%" PRIx32
1204 " not permitted before TLS", option);
1205 if (option == NBD_OPT_ABORT) {
1206 return 1;
1207 }
1208 break;
1209 }
1210 } else if (fixedNewstyle) {
1211 switch (option) {
1212 case NBD_OPT_LIST:
1213 if (length) {
1214 ret = nbd_reject_length(client, false, errp);
1215 } else {
1216 ret = nbd_negotiate_handle_list(client, errp);
1217 }
1218 break;
1219
1220 case NBD_OPT_ABORT:
1221
1222
1223
1224 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
1225 return 1;
1226
1227 case NBD_OPT_EXPORT_NAME:
1228 return nbd_negotiate_handle_export_name(client, no_zeroes,
1229 errp);
1230
1231 case NBD_OPT_INFO:
1232 case NBD_OPT_GO:
1233 ret = nbd_negotiate_handle_info(client, errp);
1234 if (ret == 1) {
1235 assert(option == NBD_OPT_GO);
1236 return 0;
1237 }
1238 break;
1239
1240 case NBD_OPT_STARTTLS:
1241 if (length) {
1242 ret = nbd_reject_length(client, false, errp);
1243 } else if (client->tlscreds) {
1244 ret = nbd_negotiate_send_rep_err(client,
1245 NBD_REP_ERR_INVALID, errp,
1246 "TLS already enabled");
1247 } else {
1248 ret = nbd_negotiate_send_rep_err(client,
1249 NBD_REP_ERR_POLICY, errp,
1250 "TLS not configured");
1251 }
1252 break;
1253
1254 case NBD_OPT_STRUCTURED_REPLY:
1255 if (length) {
1256 ret = nbd_reject_length(client, false, errp);
1257 } else if (client->structured_reply) {
1258 ret = nbd_negotiate_send_rep_err(
1259 client, NBD_REP_ERR_INVALID, errp,
1260 "structured reply already negotiated");
1261 } else {
1262 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1263 client->structured_reply = true;
1264 }
1265 break;
1266
1267 case NBD_OPT_LIST_META_CONTEXT:
1268 case NBD_OPT_SET_META_CONTEXT:
1269 ret = nbd_negotiate_meta_queries(client, &client->export_meta,
1270 errp);
1271 break;
1272
1273 default:
1274 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
1275 "Unsupported option %" PRIu32 " (%s)",
1276 option, nbd_opt_lookup(option));
1277 break;
1278 }
1279 } else {
1280
1281
1282
1283
1284 switch (option) {
1285 case NBD_OPT_EXPORT_NAME:
1286 return nbd_negotiate_handle_export_name(client, no_zeroes,
1287 errp);
1288
1289 default:
1290 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
1291 option, nbd_opt_lookup(option));
1292 return -EINVAL;
1293 }
1294 }
1295 if (ret < 0) {
1296 return ret;
1297 }
1298 }
1299}
1300
1301
1302
1303
1304
1305
1306
1307
1308static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1309{
1310 ERRP_GUARD();
1311 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
1312 int ret;
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 qio_channel_set_blocking(client->ioc, false, NULL);
1329
1330 trace_nbd_negotiate_begin();
1331 memcpy(buf, "NBDMAGIC", 8);
1332
1333 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
1334 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
1335
1336 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
1337 error_prepend(errp, "write failed: ");
1338 return -EINVAL;
1339 }
1340 ret = nbd_negotiate_options(client, errp);
1341 if (ret != 0) {
1342 if (ret < 0) {
1343 error_prepend(errp, "option negotiation failed: ");
1344 }
1345 return ret;
1346 }
1347
1348
1349 if (client->exp && client->exp->common.ctx) {
1350 qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
1351 }
1352
1353 assert(!client->optlen);
1354 trace_nbd_negotiate_success();
1355
1356 return 0;
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367static inline int coroutine_fn
1368nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1369{
1370 bool partial = false;
1371
1372 assert(size);
1373 while (size > 0) {
1374 struct iovec iov = { .iov_base = buffer, .iov_len = size };
1375 ssize_t len;
1376
1377 len = qio_channel_readv(client->ioc, &iov, 1, errp);
1378 if (len == QIO_CHANNEL_ERR_BLOCK) {
1379 client->read_yielding = true;
1380 qio_channel_yield(client->ioc, G_IO_IN);
1381 client->read_yielding = false;
1382 if (client->quiescing) {
1383 return -EAGAIN;
1384 }
1385 continue;
1386 } else if (len < 0) {
1387 return -EIO;
1388 } else if (len == 0) {
1389 if (partial) {
1390 error_setg(errp,
1391 "Unexpected end-of-file before all bytes were read");
1392 return -EIO;
1393 } else {
1394 return 0;
1395 }
1396 }
1397
1398 partial = true;
1399 size -= len;
1400 buffer = (uint8_t *) buffer + len;
1401 }
1402 return 1;
1403}
1404
1405static int nbd_receive_request(NBDClient *client, NBDRequest *request,
1406 Error **errp)
1407{
1408 uint8_t buf[NBD_REQUEST_SIZE];
1409 uint32_t magic;
1410 int ret;
1411
1412 ret = nbd_read_eof(client, buf, sizeof(buf), errp);
1413 if (ret < 0) {
1414 return ret;
1415 }
1416 if (ret == 0) {
1417 return -EIO;
1418 }
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429 magic = ldl_be_p(buf);
1430 request->flags = lduw_be_p(buf + 4);
1431 request->type = lduw_be_p(buf + 6);
1432 request->handle = ldq_be_p(buf + 8);
1433 request->from = ldq_be_p(buf + 16);
1434 request->len = ldl_be_p(buf + 24);
1435
1436 trace_nbd_receive_request(magic, request->flags, request->type,
1437 request->from, request->len);
1438
1439 if (magic != NBD_REQUEST_MAGIC) {
1440 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
1441 return -EINVAL;
1442 }
1443 return 0;
1444}
1445
1446#define MAX_NBD_REQUESTS 16
1447
1448void nbd_client_get(NBDClient *client)
1449{
1450 client->refcount++;
1451}
1452
1453void nbd_client_put(NBDClient *client)
1454{
1455 if (--client->refcount == 0) {
1456
1457
1458
1459 assert(client->closing);
1460
1461 qio_channel_detach_aio_context(client->ioc);
1462 object_unref(OBJECT(client->sioc));
1463 object_unref(OBJECT(client->ioc));
1464 if (client->tlscreds) {
1465 object_unref(OBJECT(client->tlscreds));
1466 }
1467 g_free(client->tlsauthz);
1468 if (client->exp) {
1469 QTAILQ_REMOVE(&client->exp->clients, client, next);
1470 blk_exp_unref(&client->exp->common);
1471 }
1472 g_free(client->export_meta.bitmaps);
1473 g_free(client);
1474 }
1475}
1476
1477static void client_close(NBDClient *client, bool negotiated)
1478{
1479 if (client->closing) {
1480 return;
1481 }
1482
1483 client->closing = true;
1484
1485
1486
1487
1488 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
1489 NULL);
1490
1491
1492 if (client->close_fn) {
1493 client->close_fn(client, negotiated);
1494 }
1495}
1496
1497static NBDRequestData *nbd_request_get(NBDClient *client)
1498{
1499 NBDRequestData *req;
1500
1501 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1502 client->nb_requests++;
1503
1504 req = g_new0(NBDRequestData, 1);
1505 nbd_client_get(client);
1506 req->client = client;
1507 return req;
1508}
1509
1510static void nbd_request_put(NBDRequestData *req)
1511{
1512 NBDClient *client = req->client;
1513
1514 if (req->data) {
1515 qemu_vfree(req->data);
1516 }
1517 g_free(req);
1518
1519 client->nb_requests--;
1520
1521 if (client->quiescing && client->nb_requests == 0) {
1522 aio_wait_kick();
1523 }
1524
1525 nbd_client_receive_next_request(client);
1526
1527 nbd_client_put(client);
1528}
1529
1530static void blk_aio_attached(AioContext *ctx, void *opaque)
1531{
1532 NBDExport *exp = opaque;
1533 NBDClient *client;
1534
1535 trace_nbd_blk_aio_attached(exp->name, ctx);
1536
1537 exp->common.ctx = ctx;
1538
1539 QTAILQ_FOREACH(client, &exp->clients, next) {
1540 qio_channel_attach_aio_context(client->ioc, ctx);
1541
1542 assert(client->nb_requests == 0);
1543 assert(client->recv_coroutine == NULL);
1544 assert(client->send_coroutine == NULL);
1545 }
1546}
1547
1548static void blk_aio_detach(void *opaque)
1549{
1550 NBDExport *exp = opaque;
1551 NBDClient *client;
1552
1553 trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1554
1555 QTAILQ_FOREACH(client, &exp->clients, next) {
1556 qio_channel_detach_aio_context(client->ioc);
1557 }
1558
1559 exp->common.ctx = NULL;
1560}
1561
1562static void nbd_drained_begin(void *opaque)
1563{
1564 NBDExport *exp = opaque;
1565 NBDClient *client;
1566
1567 QTAILQ_FOREACH(client, &exp->clients, next) {
1568 client->quiescing = true;
1569 }
1570}
1571
1572static void nbd_drained_end(void *opaque)
1573{
1574 NBDExport *exp = opaque;
1575 NBDClient *client;
1576
1577 QTAILQ_FOREACH(client, &exp->clients, next) {
1578 client->quiescing = false;
1579 nbd_client_receive_next_request(client);
1580 }
1581}
1582
1583static bool nbd_drained_poll(void *opaque)
1584{
1585 NBDExport *exp = opaque;
1586 NBDClient *client;
1587
1588 QTAILQ_FOREACH(client, &exp->clients, next) {
1589 if (client->nb_requests != 0) {
1590
1591
1592
1593
1594 if (client->recv_coroutine != NULL && client->read_yielding) {
1595 qemu_aio_coroutine_enter(exp->common.ctx,
1596 client->recv_coroutine);
1597 }
1598
1599 return true;
1600 }
1601 }
1602
1603 return false;
1604}
1605
1606static void nbd_eject_notifier(Notifier *n, void *data)
1607{
1608 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1609
1610 blk_exp_request_shutdown(&exp->common);
1611}
1612
1613void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
1614{
1615 NBDExport *nbd_exp = container_of(exp, NBDExport, common);
1616 assert(exp->drv == &blk_exp_nbd);
1617 assert(nbd_exp->eject_notifier_blk == NULL);
1618
1619 blk_ref(blk);
1620 nbd_exp->eject_notifier_blk = blk;
1621 nbd_exp->eject_notifier.notify = nbd_eject_notifier;
1622 blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
1623}
1624
1625static const BlockDevOps nbd_block_ops = {
1626 .drained_begin = nbd_drained_begin,
1627 .drained_end = nbd_drained_end,
1628 .drained_poll = nbd_drained_poll,
1629};
1630
1631static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1632 Error **errp)
1633{
1634 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1635 BlockExportOptionsNbd *arg = &exp_args->u.nbd;
1636 BlockBackend *blk = blk_exp->blk;
1637 int64_t size;
1638 uint64_t perm, shared_perm;
1639 bool readonly = !exp_args->writable;
1640 bool shared = !exp_args->writable;
1641 strList *bitmaps;
1642 size_t i;
1643 int ret;
1644
1645 assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
1646
1647 if (!nbd_server_is_running()) {
1648 error_setg(errp, "NBD server not running");
1649 return -EINVAL;
1650 }
1651
1652 if (!arg->has_name) {
1653 arg->name = exp_args->node_name;
1654 }
1655
1656 if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
1657 error_setg(errp, "export name '%s' too long", arg->name);
1658 return -EINVAL;
1659 }
1660
1661 if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
1662 error_setg(errp, "description '%s' too long", arg->description);
1663 return -EINVAL;
1664 }
1665
1666 if (nbd_export_find(arg->name)) {
1667 error_setg(errp, "NBD server already has export named '%s'", arg->name);
1668 return -EEXIST;
1669 }
1670
1671 size = blk_getlength(blk);
1672 if (size < 0) {
1673 error_setg_errno(errp, -size,
1674 "Failed to determine the NBD export's length");
1675 return size;
1676 }
1677
1678
1679
1680 blk_get_perm(blk, &perm, &shared_perm);
1681 ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1682 if (ret < 0) {
1683 return ret;
1684 }
1685
1686 QTAILQ_INIT(&exp->clients);
1687 exp->name = g_strdup(arg->name);
1688 exp->description = g_strdup(arg->description);
1689 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1690 NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
1691 if (readonly) {
1692 exp->nbdflags |= NBD_FLAG_READ_ONLY;
1693 if (shared) {
1694 exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1695 }
1696 } else {
1697 exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1698 NBD_FLAG_SEND_FAST_ZERO);
1699 }
1700 exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1701
1702 for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
1703 exp->nr_export_bitmaps++;
1704 }
1705 exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
1706 for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1707 i++, bitmaps = bitmaps->next) {
1708 const char *bitmap = bitmaps->value;
1709 BlockDriverState *bs = blk_bs(blk);
1710 BdrvDirtyBitmap *bm = NULL;
1711
1712 while (bs) {
1713 bm = bdrv_find_dirty_bitmap(bs, bitmap);
1714 if (bm != NULL) {
1715 break;
1716 }
1717
1718 bs = bdrv_filter_or_cow_bs(bs);
1719 }
1720
1721 if (bm == NULL) {
1722 ret = -ENOENT;
1723 error_setg(errp, "Bitmap '%s' is not found", bitmap);
1724 goto fail;
1725 }
1726
1727 if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1728 ret = -EINVAL;
1729 goto fail;
1730 }
1731
1732 if (readonly && bdrv_is_writable(bs) &&
1733 bdrv_dirty_bitmap_enabled(bm)) {
1734 ret = -EINVAL;
1735 error_setg(errp,
1736 "Enabled bitmap '%s' incompatible with readonly export",
1737 bitmap);
1738 goto fail;
1739 }
1740
1741 exp->export_bitmaps[i] = bm;
1742 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1743 }
1744
1745
1746 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1747 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
1748 }
1749
1750 exp->allocation_depth = arg->allocation_depth;
1751
1752
1753
1754
1755
1756
1757 blk_set_disable_request_queuing(blk, true);
1758
1759 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1760
1761 blk_set_dev_ops(blk, &nbd_block_ops, exp);
1762
1763 QTAILQ_INSERT_TAIL(&exports, exp, next);
1764
1765 return 0;
1766
1767fail:
1768 g_free(exp->export_bitmaps);
1769 g_free(exp->name);
1770 g_free(exp->description);
1771 return ret;
1772}
1773
1774NBDExport *nbd_export_find(const char *name)
1775{
1776 NBDExport *exp;
1777 QTAILQ_FOREACH(exp, &exports, next) {
1778 if (strcmp(name, exp->name) == 0) {
1779 return exp;
1780 }
1781 }
1782
1783 return NULL;
1784}
1785
1786AioContext *
1787nbd_export_aio_context(NBDExport *exp)
1788{
1789 return exp->common.ctx;
1790}
1791
1792static void nbd_export_request_shutdown(BlockExport *blk_exp)
1793{
1794 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1795 NBDClient *client, *next;
1796
1797 blk_exp_ref(&exp->common);
1798
1799
1800
1801
1802
1803
1804
1805 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1806 client_close(client, true);
1807 }
1808 if (exp->name) {
1809 g_free(exp->name);
1810 exp->name = NULL;
1811 QTAILQ_REMOVE(&exports, exp, next);
1812 }
1813 blk_exp_unref(&exp->common);
1814}
1815
1816static void nbd_export_delete(BlockExport *blk_exp)
1817{
1818 size_t i;
1819 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1820
1821 assert(exp->name == NULL);
1822 assert(QTAILQ_EMPTY(&exp->clients));
1823
1824 g_free(exp->description);
1825 exp->description = NULL;
1826
1827 if (exp->common.blk) {
1828 if (exp->eject_notifier_blk) {
1829 notifier_remove(&exp->eject_notifier);
1830 blk_unref(exp->eject_notifier_blk);
1831 }
1832 blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
1833 blk_aio_detach, exp);
1834 blk_set_disable_request_queuing(exp->common.blk, false);
1835 }
1836
1837 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1838 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
1839 }
1840}
1841
1842const BlockExportDriver blk_exp_nbd = {
1843 .type = BLOCK_EXPORT_TYPE_NBD,
1844 .instance_size = sizeof(NBDExport),
1845 .create = nbd_export_create,
1846 .delete = nbd_export_delete,
1847 .request_shutdown = nbd_export_request_shutdown,
1848};
1849
1850static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
1851 unsigned niov, Error **errp)
1852{
1853 int ret;
1854
1855 g_assert(qemu_in_coroutine());
1856 qemu_co_mutex_lock(&client->send_lock);
1857 client->send_coroutine = qemu_coroutine_self();
1858
1859 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
1860
1861 client->send_coroutine = NULL;
1862 qemu_co_mutex_unlock(&client->send_lock);
1863
1864 return ret;
1865}
1866
1867static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
1868 uint64_t handle)
1869{
1870 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
1871 stl_be_p(&reply->error, error);
1872 stq_be_p(&reply->handle, handle);
1873}
1874
1875static int nbd_co_send_simple_reply(NBDClient *client,
1876 uint64_t handle,
1877 uint32_t error,
1878 void *data,
1879 size_t len,
1880 Error **errp)
1881{
1882 NBDSimpleReply reply;
1883 int nbd_err = system_errno_to_nbd_errno(error);
1884 struct iovec iov[] = {
1885 {.iov_base = &reply, .iov_len = sizeof(reply)},
1886 {.iov_base = data, .iov_len = len}
1887 };
1888
1889 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
1890 len);
1891 set_be_simple_reply(&reply, nbd_err, handle);
1892
1893 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1894}
1895
1896static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
1897 uint16_t type, uint64_t handle, uint32_t length)
1898{
1899 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
1900 stw_be_p(&chunk->flags, flags);
1901 stw_be_p(&chunk->type, type);
1902 stq_be_p(&chunk->handle, handle);
1903 stl_be_p(&chunk->length, length);
1904}
1905
1906static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
1907 uint64_t handle,
1908 Error **errp)
1909{
1910 NBDStructuredReplyChunk chunk;
1911 struct iovec iov[] = {
1912 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1913 };
1914
1915 trace_nbd_co_send_structured_done(handle);
1916 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
1917
1918 return nbd_co_send_iov(client, iov, 1, errp);
1919}
1920
1921static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
1922 uint64_t handle,
1923 uint64_t offset,
1924 void *data,
1925 size_t size,
1926 bool final,
1927 Error **errp)
1928{
1929 NBDStructuredReadData chunk;
1930 struct iovec iov[] = {
1931 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1932 {.iov_base = data, .iov_len = size}
1933 };
1934
1935 assert(size);
1936 trace_nbd_co_send_structured_read(handle, offset, data, size);
1937 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1938 NBD_REPLY_TYPE_OFFSET_DATA, handle,
1939 sizeof(chunk) - sizeof(chunk.h) + size);
1940 stq_be_p(&chunk.offset, offset);
1941
1942 return nbd_co_send_iov(client, iov, 2, errp);
1943}
1944
1945static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
1946 uint64_t handle,
1947 uint32_t error,
1948 const char *msg,
1949 Error **errp)
1950{
1951 NBDStructuredError chunk;
1952 int nbd_err = system_errno_to_nbd_errno(error);
1953 struct iovec iov[] = {
1954 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1955 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
1956 };
1957
1958 assert(nbd_err);
1959 trace_nbd_co_send_structured_error(handle, nbd_err,
1960 nbd_err_lookup(nbd_err), msg ? msg : "");
1961 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
1962 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1963 stl_be_p(&chunk.error, nbd_err);
1964 stw_be_p(&chunk.message_length, iov[1].iov_len);
1965
1966 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
1967}
1968
1969
1970
1971
1972
1973static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
1974 uint64_t handle,
1975 uint64_t offset,
1976 uint8_t *data,
1977 size_t size,
1978 Error **errp)
1979{
1980 int ret = 0;
1981 NBDExport *exp = client->exp;
1982 size_t progress = 0;
1983
1984 while (progress < size) {
1985 int64_t pnum;
1986 int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
1987 offset + progress,
1988 size - progress, &pnum, NULL,
1989 NULL);
1990 bool final;
1991
1992 if (status < 0) {
1993 char *msg = g_strdup_printf("unable to check for holes: %s",
1994 strerror(-status));
1995
1996 ret = nbd_co_send_structured_error(client, handle, -status, msg,
1997 errp);
1998 g_free(msg);
1999 return ret;
2000 }
2001 assert(pnum && pnum <= size - progress);
2002 final = progress + pnum == size;
2003 if (status & BDRV_BLOCK_ZERO) {
2004 NBDStructuredReadHole chunk;
2005 struct iovec iov[] = {
2006 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2007 };
2008
2009 trace_nbd_co_send_structured_read_hole(handle, offset + progress,
2010 pnum);
2011 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
2012 NBD_REPLY_TYPE_OFFSET_HOLE,
2013 handle, sizeof(chunk) - sizeof(chunk.h));
2014 stq_be_p(&chunk.offset, offset + progress);
2015 stl_be_p(&chunk.length, pnum);
2016 ret = nbd_co_send_iov(client, iov, 1, errp);
2017 } else {
2018 ret = blk_pread(exp->common.blk, offset + progress,
2019 data + progress, pnum);
2020 if (ret < 0) {
2021 error_setg_errno(errp, -ret, "reading from file failed");
2022 break;
2023 }
2024 ret = nbd_co_send_structured_read(client, handle, offset + progress,
2025 data + progress, pnum, final,
2026 errp);
2027 }
2028
2029 if (ret < 0) {
2030 break;
2031 }
2032 progress += pnum;
2033 }
2034 return ret;
2035}
2036
2037typedef struct NBDExtentArray {
2038 NBDExtent *extents;
2039 unsigned int nb_alloc;
2040 unsigned int count;
2041 uint64_t total_length;
2042 bool can_add;
2043 bool converted_to_be;
2044} NBDExtentArray;
2045
2046static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
2047{
2048 NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
2049
2050 ea->nb_alloc = nb_alloc;
2051 ea->extents = g_new(NBDExtent, nb_alloc);
2052 ea->can_add = true;
2053
2054 return ea;
2055}
2056
2057static void nbd_extent_array_free(NBDExtentArray *ea)
2058{
2059 g_free(ea->extents);
2060 g_free(ea);
2061}
2062G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free);
2063
2064
2065static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
2066{
2067 int i;
2068
2069 assert(!ea->converted_to_be);
2070 ea->can_add = false;
2071 ea->converted_to_be = true;
2072
2073 for (i = 0; i < ea->count; i++) {
2074 ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2075 ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
2076 }
2077}
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089static int nbd_extent_array_add(NBDExtentArray *ea,
2090 uint32_t length, uint32_t flags)
2091{
2092 assert(ea->can_add);
2093
2094 if (!length) {
2095 return 0;
2096 }
2097
2098
2099 if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2100 uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
2101
2102 if (sum <= UINT32_MAX) {
2103 ea->extents[ea->count - 1].length = sum;
2104 ea->total_length += length;
2105 return 0;
2106 }
2107 }
2108
2109 if (ea->count >= ea->nb_alloc) {
2110 ea->can_add = false;
2111 return -1;
2112 }
2113
2114 ea->total_length += length;
2115 ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
2116 ea->count++;
2117
2118 return 0;
2119}
2120
2121static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
2122 uint64_t bytes, NBDExtentArray *ea)
2123{
2124 while (bytes) {
2125 uint32_t flags;
2126 int64_t num;
2127 int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
2128 NULL, NULL);
2129
2130 if (ret < 0) {
2131 return ret;
2132 }
2133
2134 flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2135 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2136
2137 if (nbd_extent_array_add(ea, num, flags) < 0) {
2138 return 0;
2139 }
2140
2141 offset += num;
2142 bytes -= num;
2143 }
2144
2145 return 0;
2146}
2147
2148static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
2149 uint64_t bytes, NBDExtentArray *ea)
2150{
2151 while (bytes) {
2152 int64_t num;
2153 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
2154 &num);
2155
2156 if (ret < 0) {
2157 return ret;
2158 }
2159
2160 if (nbd_extent_array_add(ea, num, ret) < 0) {
2161 return 0;
2162 }
2163
2164 offset += num;
2165 bytes -= num;
2166 }
2167
2168 return 0;
2169}
2170
2171
2172
2173
2174
2175
2176
2177static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
2178 NBDExtentArray *ea,
2179 bool last, uint32_t context_id, Error **errp)
2180{
2181 NBDStructuredMeta chunk;
2182 struct iovec iov[] = {
2183 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2184 {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
2185 };
2186
2187 nbd_extent_array_convert_to_be(ea);
2188
2189 trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
2190 last);
2191 set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
2192 NBD_REPLY_TYPE_BLOCK_STATUS,
2193 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
2194 stl_be_p(&chunk.context_id, context_id);
2195
2196 return nbd_co_send_iov(client, iov, 2, errp);
2197}
2198
2199
2200static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
2201 BlockDriverState *bs, uint64_t offset,
2202 uint32_t length, bool dont_fragment,
2203 bool last, uint32_t context_id,
2204 Error **errp)
2205{
2206 int ret;
2207 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2208 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2209
2210 if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2211 ret = blockstatus_to_extents(bs, offset, length, ea);
2212 } else {
2213 ret = blockalloc_to_extents(bs, offset, length, ea);
2214 }
2215 if (ret < 0) {
2216 return nbd_co_send_structured_error(
2217 client, handle, -ret, "can't get block status", errp);
2218 }
2219
2220 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2221}
2222
2223
2224static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
2225 uint64_t offset, uint64_t length,
2226 NBDExtentArray *es)
2227{
2228 int64_t start, dirty_start, dirty_count;
2229 int64_t end = offset + length;
2230 bool full = false;
2231
2232 bdrv_dirty_bitmap_lock(bitmap);
2233
2234 for (start = offset;
2235 bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
2236 &dirty_start, &dirty_count);
2237 start = dirty_start + dirty_count)
2238 {
2239 if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2240 (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2241 {
2242 full = true;
2243 break;
2244 }
2245 }
2246
2247 if (!full) {
2248
2249 (void) nbd_extent_array_add(es, end - start, 0);
2250 }
2251
2252 bdrv_dirty_bitmap_unlock(bitmap);
2253}
2254
2255static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
2256 BdrvDirtyBitmap *bitmap, uint64_t offset,
2257 uint32_t length, bool dont_fragment, bool last,
2258 uint32_t context_id, Error **errp)
2259{
2260 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2261 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2262
2263 bitmap_to_extents(bitmap, offset, length, ea);
2264
2265 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2266}
2267
2268
2269
2270
2271
2272
2273
2274
2275static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
2276 Error **errp)
2277{
2278 NBDClient *client = req->client;
2279 int valid_flags;
2280 int ret;
2281
2282 g_assert(qemu_in_coroutine());
2283 assert(client->recv_coroutine == qemu_coroutine_self());
2284 ret = nbd_receive_request(client, request, errp);
2285 if (ret < 0) {
2286 return ret;
2287 }
2288
2289 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
2290 nbd_cmd_lookup(request->type));
2291
2292 if (request->type != NBD_CMD_WRITE) {
2293
2294 req->complete = true;
2295 }
2296
2297 if (request->type == NBD_CMD_DISC) {
2298
2299
2300 return -EIO;
2301 }
2302
2303 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
2304 request->type == NBD_CMD_CACHE)
2305 {
2306 if (request->len > NBD_MAX_BUFFER_SIZE) {
2307 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
2308 request->len, NBD_MAX_BUFFER_SIZE);
2309 return -EINVAL;
2310 }
2311
2312 if (request->type != NBD_CMD_CACHE) {
2313 req->data = blk_try_blockalign(client->exp->common.blk,
2314 request->len);
2315 if (req->data == NULL) {
2316 error_setg(errp, "No memory");
2317 return -ENOMEM;
2318 }
2319 }
2320 }
2321
2322 if (request->type == NBD_CMD_WRITE) {
2323 if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
2324 errp) < 0)
2325 {
2326 return -EIO;
2327 }
2328 req->complete = true;
2329
2330 trace_nbd_co_receive_request_payload_received(request->handle,
2331 request->len);
2332 }
2333
2334
2335 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
2336 (request->type == NBD_CMD_WRITE ||
2337 request->type == NBD_CMD_WRITE_ZEROES ||
2338 request->type == NBD_CMD_TRIM)) {
2339 error_setg(errp, "Export is read-only");
2340 return -EROFS;
2341 }
2342 if (request->from > client->exp->size ||
2343 request->len > client->exp->size - request->from) {
2344 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
2345 ", Size: %" PRIu64, request->from, request->len,
2346 client->exp->size);
2347 return (request->type == NBD_CMD_WRITE ||
2348 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
2349 }
2350 if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
2351 client->check_align)) {
2352
2353
2354
2355
2356 trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
2357 request->from,
2358 request->len,
2359 client->check_align);
2360 }
2361 valid_flags = NBD_CMD_FLAG_FUA;
2362 if (request->type == NBD_CMD_READ && client->structured_reply) {
2363 valid_flags |= NBD_CMD_FLAG_DF;
2364 } else if (request->type == NBD_CMD_WRITE_ZEROES) {
2365 valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
2366 } else if (request->type == NBD_CMD_BLOCK_STATUS) {
2367 valid_flags |= NBD_CMD_FLAG_REQ_ONE;
2368 }
2369 if (request->flags & ~valid_flags) {
2370 error_setg(errp, "unsupported flags for command %s (got 0x%x)",
2371 nbd_cmd_lookup(request->type), request->flags);
2372 return -EINVAL;
2373 }
2374
2375 return 0;
2376}
2377
2378
2379
2380
2381
2382static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
2383 uint64_t handle,
2384 int ret,
2385 const char *error_msg,
2386 Error **errp)
2387{
2388 if (client->structured_reply && ret < 0) {
2389 return nbd_co_send_structured_error(client, handle, -ret, error_msg,
2390 errp);
2391 } else {
2392 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
2393 NULL, 0, errp);
2394 }
2395}
2396
2397
2398
2399
2400static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
2401 uint8_t *data, Error **errp)
2402{
2403 int ret;
2404 NBDExport *exp = client->exp;
2405
2406 assert(request->type == NBD_CMD_READ);
2407
2408
2409 if (request->flags & NBD_CMD_FLAG_FUA) {
2410 ret = blk_co_flush(exp->common.blk);
2411 if (ret < 0) {
2412 return nbd_send_generic_reply(client, request->handle, ret,
2413 "flush failed", errp);
2414 }
2415 }
2416
2417 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
2418 request->len)
2419 {
2420 return nbd_co_send_sparse_read(client, request->handle, request->from,
2421 data, request->len, errp);
2422 }
2423
2424 ret = blk_pread(exp->common.blk, request->from, data, request->len);
2425 if (ret < 0) {
2426 return nbd_send_generic_reply(client, request->handle, ret,
2427 "reading from file failed", errp);
2428 }
2429
2430 if (client->structured_reply) {
2431 if (request->len) {
2432 return nbd_co_send_structured_read(client, request->handle,
2433 request->from, data,
2434 request->len, true, errp);
2435 } else {
2436 return nbd_co_send_structured_done(client, request->handle, errp);
2437 }
2438 } else {
2439 return nbd_co_send_simple_reply(client, request->handle, 0,
2440 data, request->len, errp);
2441 }
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
2452 Error **errp)
2453{
2454 int ret;
2455 NBDExport *exp = client->exp;
2456
2457 assert(request->type == NBD_CMD_CACHE);
2458
2459 ret = blk_co_preadv(exp->common.blk, request->from, request->len,
2460 NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
2461
2462 return nbd_send_generic_reply(client, request->handle, ret,
2463 "caching data failed", errp);
2464}
2465
2466
2467
2468
2469static coroutine_fn int nbd_handle_request(NBDClient *client,
2470 NBDRequest *request,
2471 uint8_t *data, Error **errp)
2472{
2473 int ret;
2474 int flags;
2475 NBDExport *exp = client->exp;
2476 char *msg;
2477 size_t i;
2478
2479 switch (request->type) {
2480 case NBD_CMD_CACHE:
2481 return nbd_do_cmd_cache(client, request, errp);
2482
2483 case NBD_CMD_READ:
2484 return nbd_do_cmd_read(client, request, data, errp);
2485
2486 case NBD_CMD_WRITE:
2487 flags = 0;
2488 if (request->flags & NBD_CMD_FLAG_FUA) {
2489 flags |= BDRV_REQ_FUA;
2490 }
2491 ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
2492 flags);
2493 return nbd_send_generic_reply(client, request->handle, ret,
2494 "writing to file failed", errp);
2495
2496 case NBD_CMD_WRITE_ZEROES:
2497 flags = 0;
2498 if (request->flags & NBD_CMD_FLAG_FUA) {
2499 flags |= BDRV_REQ_FUA;
2500 }
2501 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
2502 flags |= BDRV_REQ_MAY_UNMAP;
2503 }
2504 if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2505 flags |= BDRV_REQ_NO_FALLBACK;
2506 }
2507 ret = 0;
2508
2509 while (ret >= 0 && request->len) {
2510 int align = client->check_align ?: 1;
2511 int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
2512 align));
2513 ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags);
2514 request->len -= len;
2515 request->from += len;
2516 }
2517 return nbd_send_generic_reply(client, request->handle, ret,
2518 "writing to file failed", errp);
2519
2520 case NBD_CMD_DISC:
2521
2522 abort();
2523
2524 case NBD_CMD_FLUSH:
2525 ret = blk_co_flush(exp->common.blk);
2526 return nbd_send_generic_reply(client, request->handle, ret,
2527 "flush failed", errp);
2528
2529 case NBD_CMD_TRIM:
2530 ret = 0;
2531
2532 while (ret >= 0 && request->len) {
2533 int align = client->check_align ?: 1;
2534 int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
2535 align));
2536 ret = blk_co_pdiscard(exp->common.blk, request->from, len);
2537 request->len -= len;
2538 request->from += len;
2539 }
2540 if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
2541 ret = blk_co_flush(exp->common.blk);
2542 }
2543 return nbd_send_generic_reply(client, request->handle, ret,
2544 "discard failed", errp);
2545
2546 case NBD_CMD_BLOCK_STATUS:
2547 if (!request->len) {
2548 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2549 "need non-zero length", errp);
2550 }
2551 if (client->export_meta.count) {
2552 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
2553 int contexts_remaining = client->export_meta.count;
2554
2555 if (client->export_meta.base_allocation) {
2556 ret = nbd_co_send_block_status(client, request->handle,
2557 blk_bs(exp->common.blk),
2558 request->from,
2559 request->len, dont_fragment,
2560 !--contexts_remaining,
2561 NBD_META_ID_BASE_ALLOCATION,
2562 errp);
2563 if (ret < 0) {
2564 return ret;
2565 }
2566 }
2567
2568 if (client->export_meta.allocation_depth) {
2569 ret = nbd_co_send_block_status(client, request->handle,
2570 blk_bs(exp->common.blk),
2571 request->from, request->len,
2572 dont_fragment,
2573 !--contexts_remaining,
2574 NBD_META_ID_ALLOCATION_DEPTH,
2575 errp);
2576 if (ret < 0) {
2577 return ret;
2578 }
2579 }
2580
2581 for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
2582 if (!client->export_meta.bitmaps[i]) {
2583 continue;
2584 }
2585 ret = nbd_co_send_bitmap(client, request->handle,
2586 client->exp->export_bitmaps[i],
2587 request->from, request->len,
2588 dont_fragment, !--contexts_remaining,
2589 NBD_META_ID_DIRTY_BITMAP + i, errp);
2590 if (ret < 0) {
2591 return ret;
2592 }
2593 }
2594
2595 assert(!contexts_remaining);
2596
2597 return 0;
2598 } else {
2599 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2600 "CMD_BLOCK_STATUS not negotiated",
2601 errp);
2602 }
2603
2604 default:
2605 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
2606 request->type);
2607 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
2608 errp);
2609 g_free(msg);
2610 return ret;
2611 }
2612}
2613
2614
2615static coroutine_fn void nbd_trip(void *opaque)
2616{
2617 NBDClient *client = opaque;
2618 NBDRequestData *req;
2619 NBDRequest request = { 0 };
2620 int ret;
2621 Error *local_err = NULL;
2622
2623 trace_nbd_trip();
2624 if (client->closing) {
2625 nbd_client_put(client);
2626 return;
2627 }
2628
2629 if (client->quiescing) {
2630
2631
2632
2633
2634 nbd_client_put(client);
2635 client->recv_coroutine = NULL;
2636 aio_wait_kick();
2637 return;
2638 }
2639
2640 req = nbd_request_get(client);
2641 ret = nbd_co_receive_request(req, &request, &local_err);
2642 client->recv_coroutine = NULL;
2643
2644 if (client->closing) {
2645
2646
2647
2648
2649 goto done;
2650 }
2651
2652 if (ret == -EAGAIN) {
2653 assert(client->quiescing);
2654 goto done;
2655 }
2656
2657 nbd_client_receive_next_request(client);
2658 if (ret == -EIO) {
2659 goto disconnect;
2660 }
2661
2662 if (ret < 0) {
2663
2664
2665 Error *export_err = local_err;
2666
2667 local_err = NULL;
2668 ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
2669 error_get_pretty(export_err), &local_err);
2670 error_free(export_err);
2671 } else {
2672 ret = nbd_handle_request(client, &request, req->data, &local_err);
2673 }
2674 if (ret < 0) {
2675 error_prepend(&local_err, "Failed to send reply: ");
2676 goto disconnect;
2677 }
2678
2679
2680
2681
2682 if (!req->complete) {
2683 error_setg(&local_err, "Request handling failed in intermediate state");
2684 goto disconnect;
2685 }
2686
2687done:
2688 nbd_request_put(req);
2689 nbd_client_put(client);
2690 return;
2691
2692disconnect:
2693 if (local_err) {
2694 error_reportf_err(local_err, "Disconnect client, due to: ");
2695 }
2696 nbd_request_put(req);
2697 client_close(client, true);
2698 nbd_client_put(client);
2699}
2700
2701static void nbd_client_receive_next_request(NBDClient *client)
2702{
2703 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
2704 !client->quiescing) {
2705 nbd_client_get(client);
2706 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
2707 aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
2708 }
2709}
2710
2711static coroutine_fn void nbd_co_client_start(void *opaque)
2712{
2713 NBDClient *client = opaque;
2714 Error *local_err = NULL;
2715
2716 qemu_co_mutex_init(&client->send_lock);
2717
2718 if (nbd_negotiate(client, &local_err)) {
2719 if (local_err) {
2720 error_report_err(local_err);
2721 }
2722 client_close(client, false);
2723 return;
2724 }
2725
2726 nbd_client_receive_next_request(client);
2727}
2728
2729
2730
2731
2732
2733
2734void nbd_client_new(QIOChannelSocket *sioc,
2735 QCryptoTLSCreds *tlscreds,
2736 const char *tlsauthz,
2737 void (*close_fn)(NBDClient *, bool))
2738{
2739 NBDClient *client;
2740 Coroutine *co;
2741
2742 client = g_new0(NBDClient, 1);
2743 client->refcount = 1;
2744 client->tlscreds = tlscreds;
2745 if (tlscreds) {
2746 object_ref(OBJECT(client->tlscreds));
2747 }
2748 client->tlsauthz = g_strdup(tlsauthz);
2749 client->sioc = sioc;
2750 object_ref(OBJECT(client->sioc));
2751 client->ioc = QIO_CHANNEL(sioc);
2752 object_ref(OBJECT(client->ioc));
2753 client->close_fn = close_fn;
2754
2755 co = qemu_coroutine_create(nbd_co_client_start, client);
2756 qemu_coroutine_enter(co);
2757}
2758