1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21
22#include "block/export.h"
23#include "qapi/error.h"
24#include "qemu/queue.h"
25#include "trace.h"
26#include "nbd-internal.h"
27#include "qemu/units.h"
28
29#define NBD_META_ID_BASE_ALLOCATION 0
30#define NBD_META_ID_ALLOCATION_DEPTH 1
31
32#define NBD_META_ID_DIRTY_BITMAP 2
33
34
35
36
37
38
39
40#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
41
42static int system_errno_to_nbd_errno(int err)
43{
44 switch (err) {
45 case 0:
46 return NBD_SUCCESS;
47 case EPERM:
48 case EROFS:
49 return NBD_EPERM;
50 case EIO:
51 return NBD_EIO;
52 case ENOMEM:
53 return NBD_ENOMEM;
54#ifdef EDQUOT
55 case EDQUOT:
56#endif
57 case EFBIG:
58 case ENOSPC:
59 return NBD_ENOSPC;
60 case EOVERFLOW:
61 return NBD_EOVERFLOW;
62 case ENOTSUP:
63#if ENOTSUP != EOPNOTSUPP
64 case EOPNOTSUPP:
65#endif
66 return NBD_ENOTSUP;
67 case ESHUTDOWN:
68 return NBD_ESHUTDOWN;
69 case EINVAL:
70 default:
71 return NBD_EINVAL;
72 }
73}
74
75
76
77typedef struct NBDRequestData NBDRequestData;
78
79struct NBDRequestData {
80 QSIMPLEQ_ENTRY(NBDRequestData) entry;
81 NBDClient *client;
82 uint8_t *data;
83 bool complete;
84};
85
86struct NBDExport {
87 BlockExport common;
88
89 char *name;
90 char *description;
91 uint64_t size;
92 uint16_t nbdflags;
93 QTAILQ_HEAD(, NBDClient) clients;
94 QTAILQ_ENTRY(NBDExport) next;
95
96 BlockBackend *eject_notifier_blk;
97 Notifier eject_notifier;
98
99 bool allocation_depth;
100 BdrvDirtyBitmap **export_bitmaps;
101 size_t nr_export_bitmaps;
102};
103
104static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
105
106
107
108
109typedef struct NBDExportMetaContexts {
110 NBDExport *exp;
111 size_t count;
112 bool base_allocation;
113 bool allocation_depth;
114 bool *bitmaps;
115
116
117
118} NBDExportMetaContexts;
119
120struct NBDClient {
121 int refcount;
122 void (*close_fn)(NBDClient *client, bool negotiated);
123
124 NBDExport *exp;
125 QCryptoTLSCreds *tlscreds;
126 char *tlsauthz;
127 QIOChannelSocket *sioc;
128 QIOChannel *ioc;
129
130 Coroutine *recv_coroutine;
131
132 CoMutex send_lock;
133 Coroutine *send_coroutine;
134
135 bool read_yielding;
136 bool quiescing;
137
138 QTAILQ_ENTRY(NBDClient) next;
139 int nb_requests;
140 bool closing;
141
142 uint32_t check_align;
143
144 bool structured_reply;
145 NBDExportMetaContexts export_meta;
146
147 uint32_t opt;
148 uint32_t optlen;
149
150};
151
152static void nbd_client_receive_next_request(NBDClient *client);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
182 uint32_t type, uint32_t length)
183{
184 stq_be_p(&rep->magic, NBD_REP_MAGIC);
185 stl_be_p(&rep->option, option);
186 stl_be_p(&rep->type, type);
187 stl_be_p(&rep->length, length);
188}
189
190
191
192static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
193 uint32_t len, Error **errp)
194{
195 NBDOptionReply rep;
196
197 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
198 type, nbd_rep_lookup(type), len);
199
200 assert(len < NBD_MAX_BUFFER_SIZE);
201
202 set_be_option_rep(&rep, client->opt, type, len);
203 return nbd_write(client->ioc, &rep, sizeof(rep), errp);
204}
205
206
207
208static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
209 Error **errp)
210{
211 return nbd_negotiate_send_rep_len(client, type, 0, errp);
212}
213
214
215
216static int GCC_FMT_ATTR(4, 0)
217nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
218 Error **errp, const char *fmt, va_list va)
219{
220 ERRP_GUARD();
221 g_autofree char *msg = NULL;
222 int ret;
223 size_t len;
224
225 msg = g_strdup_vprintf(fmt, va);
226 len = strlen(msg);
227 assert(len < NBD_MAX_STRING_SIZE);
228 trace_nbd_negotiate_send_rep_err(msg);
229 ret = nbd_negotiate_send_rep_len(client, type, len, errp);
230 if (ret < 0) {
231 return ret;
232 }
233 if (nbd_write(client->ioc, msg, len, errp) < 0) {
234 error_prepend(errp, "write failed (error message): ");
235 return -EIO;
236 }
237
238 return 0;
239}
240
241
242
243
244static char *
245nbd_sanitize_name(const char *name)
246{
247 if (strnlen(name, 80) < 80) {
248 return g_strdup(name);
249 }
250
251 return g_strdup_printf("%.80s...", name);
252}
253
254
255
256static int GCC_FMT_ATTR(4, 5)
257nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
258 Error **errp, const char *fmt, ...)
259{
260 va_list va;
261 int ret;
262
263 va_start(va, fmt);
264 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
265 va_end(va);
266 return ret;
267}
268
269
270
271
272static int GCC_FMT_ATTR(4, 0)
273nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
274 const char *fmt, va_list va)
275{
276 int ret = nbd_drop(client->ioc, client->optlen, errp);
277
278 client->optlen = 0;
279 if (!ret) {
280 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
281 }
282 return ret;
283}
284
285static int GCC_FMT_ATTR(4, 5)
286nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
287 const char *fmt, ...)
288{
289 int ret;
290 va_list va;
291
292 va_start(va, fmt);
293 ret = nbd_opt_vdrop(client, type, errp, fmt, va);
294 va_end(va);
295
296 return ret;
297}
298
299static int GCC_FMT_ATTR(3, 4)
300nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
301{
302 int ret;
303 va_list va;
304
305 va_start(va, fmt);
306 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
307 va_end(va);
308
309 return ret;
310}
311
312
313
314
315
316static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
317 bool check_nul, Error **errp)
318{
319 if (size > client->optlen) {
320 return nbd_opt_invalid(client, errp,
321 "Inconsistent lengths in option %s",
322 nbd_opt_lookup(client->opt));
323 }
324 client->optlen -= size;
325 if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
326 return -EIO;
327 }
328
329 if (check_nul && strnlen(buffer, size) != size) {
330 return nbd_opt_invalid(client, errp,
331 "Unexpected embedded NUL in option %s",
332 nbd_opt_lookup(client->opt));
333 }
334 return 1;
335}
336
337
338
339
340static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
341{
342 if (size > client->optlen) {
343 return nbd_opt_invalid(client, errp,
344 "Inconsistent lengths in option %s",
345 nbd_opt_lookup(client->opt));
346 }
347 client->optlen -= size;
348 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
349}
350
351
352
353
354
355
356
357
358
359
360
361
362
363static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
364 Error **errp)
365{
366 int ret;
367 uint32_t len;
368 g_autofree char *local_name = NULL;
369
370 *name = NULL;
371 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
372 if (ret <= 0) {
373 return ret;
374 }
375 len = cpu_to_be32(len);
376
377 if (len > NBD_MAX_STRING_SIZE) {
378 return nbd_opt_invalid(client, errp,
379 "Invalid name length: %" PRIu32, len);
380 }
381
382 local_name = g_malloc(len + 1);
383 ret = nbd_opt_read(client, local_name, len, true, errp);
384 if (ret <= 0) {
385 return ret;
386 }
387 local_name[len] = '\0';
388
389 if (length) {
390 *length = len;
391 }
392 *name = g_steal_pointer(&local_name);
393
394 return 1;
395}
396
397
398
399static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
400 Error **errp)
401{
402 ERRP_GUARD();
403 size_t name_len, desc_len;
404 uint32_t len;
405 const char *name = exp->name ? exp->name : "";
406 const char *desc = exp->description ? exp->description : "";
407 QIOChannel *ioc = client->ioc;
408 int ret;
409
410 trace_nbd_negotiate_send_rep_list(name, desc);
411 name_len = strlen(name);
412 desc_len = strlen(desc);
413 assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
414 len = name_len + desc_len + sizeof(len);
415 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
416 if (ret < 0) {
417 return ret;
418 }
419
420 len = cpu_to_be32(name_len);
421 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
422 error_prepend(errp, "write failed (name length): ");
423 return -EINVAL;
424 }
425
426 if (nbd_write(ioc, name, name_len, errp) < 0) {
427 error_prepend(errp, "write failed (name buffer): ");
428 return -EINVAL;
429 }
430
431 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
432 error_prepend(errp, "write failed (description buffer): ");
433 return -EINVAL;
434 }
435
436 return 0;
437}
438
439
440
441static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
442{
443 NBDExport *exp;
444 assert(client->opt == NBD_OPT_LIST);
445
446
447 QTAILQ_FOREACH(exp, &exports, next) {
448 if (nbd_negotiate_send_rep_list(client, exp, errp)) {
449 return -EINVAL;
450 }
451 }
452
453 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
454}
455
456static void nbd_check_meta_export(NBDClient *client)
457{
458 if (client->exp != client->export_meta.exp) {
459 client->export_meta.count = 0;
460 }
461}
462
463
464
465static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
466 Error **errp)
467{
468 ERRP_GUARD();
469 g_autofree char *name = NULL;
470 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
471 size_t len;
472 int ret;
473 uint16_t myflags;
474
475
476
477
478
479
480
481
482 trace_nbd_negotiate_handle_export_name();
483 if (client->optlen > NBD_MAX_STRING_SIZE) {
484 error_setg(errp, "Bad length received");
485 return -EINVAL;
486 }
487 name = g_malloc(client->optlen + 1);
488 if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
489 return -EIO;
490 }
491 name[client->optlen] = '\0';
492 client->optlen = 0;
493
494 trace_nbd_negotiate_handle_export_name_request(name);
495
496 client->exp = nbd_export_find(name);
497 if (!client->exp) {
498 error_setg(errp, "export not found");
499 return -EINVAL;
500 }
501
502 myflags = client->exp->nbdflags;
503 if (client->structured_reply) {
504 myflags |= NBD_FLAG_SEND_DF;
505 }
506 trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
507 stq_be_p(buf, client->exp->size);
508 stw_be_p(buf + 8, myflags);
509 len = no_zeroes ? 10 : sizeof(buf);
510 ret = nbd_write(client->ioc, buf, len, errp);
511 if (ret < 0) {
512 error_prepend(errp, "write failed: ");
513 return ret;
514 }
515
516 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
517 blk_exp_ref(&client->exp->common);
518 nbd_check_meta_export(client);
519
520 return 0;
521}
522
523
524
525
526static int nbd_negotiate_send_info(NBDClient *client,
527 uint16_t info, uint32_t length, void *buf,
528 Error **errp)
529{
530 int rc;
531
532 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
533 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
534 sizeof(info) + length, errp);
535 if (rc < 0) {
536 return rc;
537 }
538 info = cpu_to_be16(info);
539 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
540 return -EIO;
541 }
542 if (nbd_write(client->ioc, buf, length, errp) < 0) {
543 return -EIO;
544 }
545 return 0;
546}
547
548
549
550
551
552
553
554
555static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
556{
557 int ret;
558
559 assert(client->optlen);
560 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
561 nbd_opt_lookup(client->opt));
562 if (fatal && !ret) {
563 error_setg(errp, "option '%s' has unexpected length",
564 nbd_opt_lookup(client->opt));
565 return -EINVAL;
566 }
567 return ret;
568}
569
570
571
572
573static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
574{
575 int rc;
576 g_autofree char *name = NULL;
577 NBDExport *exp;
578 uint16_t requests;
579 uint16_t request;
580 uint32_t namelen = 0;
581 bool sendname = false;
582 bool blocksize = false;
583 uint32_t sizes[3];
584 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
585 uint32_t check_align = 0;
586 uint16_t myflags;
587
588
589
590
591
592
593
594 rc = nbd_opt_read_name(client, &name, &namelen, errp);
595 if (rc <= 0) {
596 return rc;
597 }
598 trace_nbd_negotiate_handle_export_name_request(name);
599
600 rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
601 if (rc <= 0) {
602 return rc;
603 }
604 requests = be16_to_cpu(requests);
605 trace_nbd_negotiate_handle_info_requests(requests);
606 while (requests--) {
607 rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
608 if (rc <= 0) {
609 return rc;
610 }
611 request = be16_to_cpu(request);
612 trace_nbd_negotiate_handle_info_request(request,
613 nbd_info_lookup(request));
614
615
616
617 switch (request) {
618 case NBD_INFO_NAME:
619 sendname = true;
620 break;
621 case NBD_INFO_BLOCK_SIZE:
622 blocksize = true;
623 break;
624 }
625 }
626 if (client->optlen) {
627 return nbd_reject_length(client, false, errp);
628 }
629
630 exp = nbd_export_find(name);
631 if (!exp) {
632 g_autofree char *sane_name = nbd_sanitize_name(name);
633
634 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
635 errp, "export '%s' not present",
636 sane_name);
637 }
638
639
640 if (sendname) {
641 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
642 errp);
643 if (rc < 0) {
644 return rc;
645 }
646 }
647
648
649
650 if (exp->description) {
651 size_t len = strlen(exp->description);
652
653 assert(len <= NBD_MAX_STRING_SIZE);
654 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
655 len, exp->description, errp);
656 if (rc < 0) {
657 return rc;
658 }
659 }
660
661
662
663
664
665 if (client->opt == NBD_OPT_INFO || blocksize) {
666 check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
667 } else {
668 sizes[0] = 1;
669 }
670 assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
671
672
673 sizes[1] = MAX(4096, sizes[0]);
674
675 sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
676 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
677 sizes[0] = cpu_to_be32(sizes[0]);
678 sizes[1] = cpu_to_be32(sizes[1]);
679 sizes[2] = cpu_to_be32(sizes[2]);
680 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
681 sizeof(sizes), sizes, errp);
682 if (rc < 0) {
683 return rc;
684 }
685
686
687 myflags = exp->nbdflags;
688 if (client->structured_reply) {
689 myflags |= NBD_FLAG_SEND_DF;
690 }
691 trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
692 stq_be_p(buf, exp->size);
693 stw_be_p(buf + 8, myflags);
694 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
695 sizeof(buf), buf, errp);
696 if (rc < 0) {
697 return rc;
698 }
699
700
701
702
703
704
705
706 if (client->opt == NBD_OPT_INFO && !blocksize &&
707 blk_get_request_alignment(exp->common.blk) > 1) {
708 return nbd_negotiate_send_rep_err(client,
709 NBD_REP_ERR_BLOCK_SIZE_REQD,
710 errp,
711 "request NBD_INFO_BLOCK_SIZE to "
712 "use this export");
713 }
714
715
716 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
717 if (rc < 0) {
718 return rc;
719 }
720
721 if (client->opt == NBD_OPT_GO) {
722 client->exp = exp;
723 client->check_align = check_align;
724 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
725 blk_exp_ref(&client->exp->common);
726 nbd_check_meta_export(client);
727 rc = 1;
728 }
729 return rc;
730}
731
732
733
734
735static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
736 Error **errp)
737{
738 QIOChannel *ioc;
739 QIOChannelTLS *tioc;
740 struct NBDTLSHandshakeData data = { 0 };
741
742 assert(client->opt == NBD_OPT_STARTTLS);
743
744 trace_nbd_negotiate_handle_starttls();
745 ioc = client->ioc;
746
747 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
748 return NULL;
749 }
750
751 tioc = qio_channel_tls_new_server(ioc,
752 client->tlscreds,
753 client->tlsauthz,
754 errp);
755 if (!tioc) {
756 return NULL;
757 }
758
759 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
760 trace_nbd_negotiate_handle_starttls_handshake();
761 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
762 qio_channel_tls_handshake(tioc,
763 nbd_tls_handshake,
764 &data,
765 NULL,
766 NULL);
767
768 if (!data.complete) {
769 g_main_loop_run(data.loop);
770 }
771 g_main_loop_unref(data.loop);
772 if (data.error) {
773 object_unref(OBJECT(tioc));
774 error_propagate(errp, data.error);
775 return NULL;
776 }
777
778 return QIO_CHANNEL(tioc);
779}
780
781
782
783
784
785
786
787static int nbd_negotiate_send_meta_context(NBDClient *client,
788 const char *context,
789 uint32_t context_id,
790 Error **errp)
791{
792 NBDOptionReplyMetaContext opt;
793 struct iovec iov[] = {
794 {.iov_base = &opt, .iov_len = sizeof(opt)},
795 {.iov_base = (void *)context, .iov_len = strlen(context)}
796 };
797
798 assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
799 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
800 context_id = 0;
801 }
802
803 trace_nbd_negotiate_meta_query_reply(context, context_id);
804 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
805 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
806 stl_be_p(&opt.context_id, context_id);
807
808 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
809}
810
811
812
813
814
815static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
816 const char *query)
817{
818 if (!*query) {
819 trace_nbd_negotiate_meta_query_parse("empty");
820 return client->opt == NBD_OPT_LIST_META_CONTEXT;
821 }
822 if (strcmp(query, pattern) == 0) {
823 trace_nbd_negotiate_meta_query_parse(pattern);
824 return true;
825 }
826 trace_nbd_negotiate_meta_query_skip("pattern not matched");
827 return false;
828}
829
830
831
832
833static bool nbd_strshift(const char **str, const char *prefix)
834{
835 size_t len = strlen(prefix);
836
837 if (strncmp(*str, prefix, len) == 0) {
838 *str += len;
839 return true;
840 }
841 return false;
842}
843
844
845
846
847
848
849static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
850 const char *query)
851{
852 if (!nbd_strshift(&query, "base:")) {
853 return false;
854 }
855 trace_nbd_negotiate_meta_query_parse("base:");
856
857 if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
858 meta->base_allocation = true;
859 }
860 return true;
861}
862
863
864
865
866
867
868
869static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
870 const char *query)
871{
872 size_t i;
873
874 if (!nbd_strshift(&query, "qemu:")) {
875 return false;
876 }
877 trace_nbd_negotiate_meta_query_parse("qemu:");
878
879 if (!*query) {
880 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
881 meta->allocation_depth = meta->exp->allocation_depth;
882 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
883 }
884 trace_nbd_negotiate_meta_query_parse("empty");
885 return true;
886 }
887
888 if (strcmp(query, "allocation-depth") == 0) {
889 trace_nbd_negotiate_meta_query_parse("allocation-depth");
890 meta->allocation_depth = meta->exp->allocation_depth;
891 return true;
892 }
893
894 if (nbd_strshift(&query, "dirty-bitmap:")) {
895 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
896 if (!*query) {
897 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
898 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
899 }
900 trace_nbd_negotiate_meta_query_parse("empty");
901 return true;
902 }
903
904 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
905 const char *bm_name;
906
907 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
908 if (strcmp(bm_name, query) == 0) {
909 meta->bitmaps[i] = true;
910 trace_nbd_negotiate_meta_query_parse(query);
911 return true;
912 }
913 }
914 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
915 return true;
916 }
917
918 trace_nbd_negotiate_meta_query_skip("unknown qemu context");
919 return true;
920}
921
922
923
924
925
926
927
928
929
930
931static int nbd_negotiate_meta_query(NBDClient *client,
932 NBDExportMetaContexts *meta, Error **errp)
933{
934 int ret;
935 g_autofree char *query = NULL;
936 uint32_t len;
937
938 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
939 if (ret <= 0) {
940 return ret;
941 }
942 len = cpu_to_be32(len);
943
944 if (len > NBD_MAX_STRING_SIZE) {
945 trace_nbd_negotiate_meta_query_skip("length too long");
946 return nbd_opt_skip(client, len, errp);
947 }
948
949 query = g_malloc(len + 1);
950 ret = nbd_opt_read(client, query, len, true, errp);
951 if (ret <= 0) {
952 return ret;
953 }
954 query[len] = '\0';
955
956 if (nbd_meta_base_query(client, meta, query)) {
957 return 1;
958 }
959 if (nbd_meta_qemu_query(client, meta, query)) {
960 return 1;
961 }
962
963 trace_nbd_negotiate_meta_query_skip("unknown namespace");
964 return 1;
965}
966
967
968
969
970
971static int nbd_negotiate_meta_queries(NBDClient *client,
972 NBDExportMetaContexts *meta, Error **errp)
973{
974 int ret;
975 g_autofree char *export_name = NULL;
976
977 g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
978 NBDExportMetaContexts local_meta = {0};
979 uint32_t nb_queries;
980 size_t i;
981 size_t count = 0;
982
983 if (!client->structured_reply) {
984 return nbd_opt_invalid(client, errp,
985 "request option '%s' when structured reply "
986 "is not negotiated",
987 nbd_opt_lookup(client->opt));
988 }
989
990 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
991
992 meta = &local_meta;
993 }
994
995 g_free(meta->bitmaps);
996 memset(meta, 0, sizeof(*meta));
997
998 ret = nbd_opt_read_name(client, &export_name, NULL, errp);
999 if (ret <= 0) {
1000 return ret;
1001 }
1002
1003 meta->exp = nbd_export_find(export_name);
1004 if (meta->exp == NULL) {
1005 g_autofree char *sane_name = nbd_sanitize_name(export_name);
1006
1007 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
1008 "export '%s' not present", sane_name);
1009 }
1010 meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
1011 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1012 bitmaps = meta->bitmaps;
1013 }
1014
1015 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1016 if (ret <= 0) {
1017 return ret;
1018 }
1019 nb_queries = cpu_to_be32(nb_queries);
1020 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1021 export_name, nb_queries);
1022
1023 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1024
1025 meta->base_allocation = true;
1026 meta->allocation_depth = meta->exp->allocation_depth;
1027 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
1028 } else {
1029 for (i = 0; i < nb_queries; ++i) {
1030 ret = nbd_negotiate_meta_query(client, meta, errp);
1031 if (ret <= 0) {
1032 return ret;
1033 }
1034 }
1035 }
1036
1037 if (meta->base_allocation) {
1038 ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1039 NBD_META_ID_BASE_ALLOCATION,
1040 errp);
1041 if (ret < 0) {
1042 return ret;
1043 }
1044 count++;
1045 }
1046
1047 if (meta->allocation_depth) {
1048 ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
1049 NBD_META_ID_ALLOCATION_DEPTH,
1050 errp);
1051 if (ret < 0) {
1052 return ret;
1053 }
1054 count++;
1055 }
1056
1057 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
1058 const char *bm_name;
1059 g_autofree char *context = NULL;
1060
1061 if (!meta->bitmaps[i]) {
1062 continue;
1063 }
1064
1065 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
1066 context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
1067
1068 ret = nbd_negotiate_send_meta_context(client, context,
1069 NBD_META_ID_DIRTY_BITMAP + i,
1070 errp);
1071 if (ret < 0) {
1072 return ret;
1073 }
1074 count++;
1075 }
1076
1077 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1078 if (ret == 0) {
1079 meta->count = count;
1080 }
1081
1082 return ret;
1083}
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094static int nbd_negotiate_options(NBDClient *client, Error **errp)
1095{
1096 uint32_t flags;
1097 bool fixedNewstyle = false;
1098 bool no_zeroes = false;
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
1116 return -EIO;
1117 }
1118 trace_nbd_negotiate_options_flags(flags);
1119 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
1120 fixedNewstyle = true;
1121 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1122 }
1123 if (flags & NBD_FLAG_C_NO_ZEROES) {
1124 no_zeroes = true;
1125 flags &= ~NBD_FLAG_C_NO_ZEROES;
1126 }
1127 if (flags != 0) {
1128 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1129 return -EINVAL;
1130 }
1131
1132 while (1) {
1133 int ret;
1134 uint32_t option, length;
1135 uint64_t magic;
1136
1137 if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1138 return -EINVAL;
1139 }
1140 trace_nbd_negotiate_options_check_magic(magic);
1141 if (magic != NBD_OPTS_MAGIC) {
1142 error_setg(errp, "Bad magic received");
1143 return -EINVAL;
1144 }
1145
1146 if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1147 return -EINVAL;
1148 }
1149 client->opt = option;
1150
1151 if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1152 return -EINVAL;
1153 }
1154 assert(!client->optlen);
1155 client->optlen = length;
1156
1157 if (length > NBD_MAX_BUFFER_SIZE) {
1158 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1159 length, NBD_MAX_BUFFER_SIZE);
1160 return -EINVAL;
1161 }
1162
1163 trace_nbd_negotiate_options_check_option(option,
1164 nbd_opt_lookup(option));
1165 if (client->tlscreds &&
1166 client->ioc == (QIOChannel *)client->sioc) {
1167 QIOChannel *tioc;
1168 if (!fixedNewstyle) {
1169 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1170 return -EINVAL;
1171 }
1172 switch (option) {
1173 case NBD_OPT_STARTTLS:
1174 if (length) {
1175
1176
1177 return nbd_reject_length(client, true, errp);
1178 }
1179 tioc = nbd_negotiate_handle_starttls(client, errp);
1180 if (!tioc) {
1181 return -EIO;
1182 }
1183 ret = 0;
1184 object_unref(OBJECT(client->ioc));
1185 client->ioc = QIO_CHANNEL(tioc);
1186 break;
1187
1188 case NBD_OPT_EXPORT_NAME:
1189
1190 error_setg(errp, "Option 0x%x not permitted before TLS",
1191 option);
1192 return -EINVAL;
1193
1194 default:
1195
1196
1197
1198
1199
1200
1201 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
1202 option == NBD_OPT_ABORT ? NULL : errp,
1203 "Option 0x%" PRIx32
1204 " not permitted before TLS", option);
1205 if (option == NBD_OPT_ABORT) {
1206 return 1;
1207 }
1208 break;
1209 }
1210 } else if (fixedNewstyle) {
1211 switch (option) {
1212 case NBD_OPT_LIST:
1213 if (length) {
1214 ret = nbd_reject_length(client, false, errp);
1215 } else {
1216 ret = nbd_negotiate_handle_list(client, errp);
1217 }
1218 break;
1219
1220 case NBD_OPT_ABORT:
1221
1222
1223
1224 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
1225 return 1;
1226
1227 case NBD_OPT_EXPORT_NAME:
1228 return nbd_negotiate_handle_export_name(client, no_zeroes,
1229 errp);
1230
1231 case NBD_OPT_INFO:
1232 case NBD_OPT_GO:
1233 ret = nbd_negotiate_handle_info(client, errp);
1234 if (ret == 1) {
1235 assert(option == NBD_OPT_GO);
1236 return 0;
1237 }
1238 break;
1239
1240 case NBD_OPT_STARTTLS:
1241 if (length) {
1242 ret = nbd_reject_length(client, false, errp);
1243 } else if (client->tlscreds) {
1244 ret = nbd_negotiate_send_rep_err(client,
1245 NBD_REP_ERR_INVALID, errp,
1246 "TLS already enabled");
1247 } else {
1248 ret = nbd_negotiate_send_rep_err(client,
1249 NBD_REP_ERR_POLICY, errp,
1250 "TLS not configured");
1251 }
1252 break;
1253
1254 case NBD_OPT_STRUCTURED_REPLY:
1255 if (length) {
1256 ret = nbd_reject_length(client, false, errp);
1257 } else if (client->structured_reply) {
1258 ret = nbd_negotiate_send_rep_err(
1259 client, NBD_REP_ERR_INVALID, errp,
1260 "structured reply already negotiated");
1261 } else {
1262 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1263 client->structured_reply = true;
1264 }
1265 break;
1266
1267 case NBD_OPT_LIST_META_CONTEXT:
1268 case NBD_OPT_SET_META_CONTEXT:
1269 ret = nbd_negotiate_meta_queries(client, &client->export_meta,
1270 errp);
1271 break;
1272
1273 default:
1274 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
1275 "Unsupported option %" PRIu32 " (%s)",
1276 option, nbd_opt_lookup(option));
1277 break;
1278 }
1279 } else {
1280
1281
1282
1283
1284 switch (option) {
1285 case NBD_OPT_EXPORT_NAME:
1286 return nbd_negotiate_handle_export_name(client, no_zeroes,
1287 errp);
1288
1289 default:
1290 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
1291 option, nbd_opt_lookup(option));
1292 return -EINVAL;
1293 }
1294 }
1295 if (ret < 0) {
1296 return ret;
1297 }
1298 }
1299}
1300
1301
1302
1303
1304
1305
1306
1307
1308static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1309{
1310 ERRP_GUARD();
1311 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
1312 int ret;
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 qio_channel_set_blocking(client->ioc, false, NULL);
1329
1330 trace_nbd_negotiate_begin();
1331 memcpy(buf, "NBDMAGIC", 8);
1332
1333 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
1334 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
1335
1336 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
1337 error_prepend(errp, "write failed: ");
1338 return -EINVAL;
1339 }
1340 ret = nbd_negotiate_options(client, errp);
1341 if (ret != 0) {
1342 if (ret < 0) {
1343 error_prepend(errp, "option negotiation failed: ");
1344 }
1345 return ret;
1346 }
1347
1348
1349 if (client->exp && client->exp->common.ctx) {
1350 qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
1351 }
1352
1353 assert(!client->optlen);
1354 trace_nbd_negotiate_success();
1355
1356 return 0;
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367static inline int coroutine_fn
1368nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1369{
1370 bool partial = false;
1371
1372 assert(size);
1373 while (size > 0) {
1374 struct iovec iov = { .iov_base = buffer, .iov_len = size };
1375 ssize_t len;
1376
1377 len = qio_channel_readv(client->ioc, &iov, 1, errp);
1378 if (len == QIO_CHANNEL_ERR_BLOCK) {
1379 client->read_yielding = true;
1380 qio_channel_yield(client->ioc, G_IO_IN);
1381 client->read_yielding = false;
1382 if (client->quiescing) {
1383 return -EAGAIN;
1384 }
1385 continue;
1386 } else if (len < 0) {
1387 return -EIO;
1388 } else if (len == 0) {
1389 if (partial) {
1390 error_setg(errp,
1391 "Unexpected end-of-file before all bytes were read");
1392 return -EIO;
1393 } else {
1394 return 0;
1395 }
1396 }
1397
1398 partial = true;
1399 size -= len;
1400 buffer = (uint8_t *) buffer + len;
1401 }
1402 return 1;
1403}
1404
1405static int nbd_receive_request(NBDClient *client, NBDRequest *request,
1406 Error **errp)
1407{
1408 uint8_t buf[NBD_REQUEST_SIZE];
1409 uint32_t magic;
1410 int ret;
1411
1412 ret = nbd_read_eof(client, buf, sizeof(buf), errp);
1413 if (ret < 0) {
1414 return ret;
1415 }
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 magic = ldl_be_p(buf);
1427 request->flags = lduw_be_p(buf + 4);
1428 request->type = lduw_be_p(buf + 6);
1429 request->handle = ldq_be_p(buf + 8);
1430 request->from = ldq_be_p(buf + 16);
1431 request->len = ldl_be_p(buf + 24);
1432
1433 trace_nbd_receive_request(magic, request->flags, request->type,
1434 request->from, request->len);
1435
1436 if (magic != NBD_REQUEST_MAGIC) {
1437 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
1438 return -EINVAL;
1439 }
1440 return 0;
1441}
1442
1443#define MAX_NBD_REQUESTS 16
1444
1445void nbd_client_get(NBDClient *client)
1446{
1447 client->refcount++;
1448}
1449
1450void nbd_client_put(NBDClient *client)
1451{
1452 if (--client->refcount == 0) {
1453
1454
1455
1456 assert(client->closing);
1457
1458 qio_channel_detach_aio_context(client->ioc);
1459 object_unref(OBJECT(client->sioc));
1460 object_unref(OBJECT(client->ioc));
1461 if (client->tlscreds) {
1462 object_unref(OBJECT(client->tlscreds));
1463 }
1464 g_free(client->tlsauthz);
1465 if (client->exp) {
1466 QTAILQ_REMOVE(&client->exp->clients, client, next);
1467 blk_exp_unref(&client->exp->common);
1468 }
1469 g_free(client->export_meta.bitmaps);
1470 g_free(client);
1471 }
1472}
1473
1474static void client_close(NBDClient *client, bool negotiated)
1475{
1476 if (client->closing) {
1477 return;
1478 }
1479
1480 client->closing = true;
1481
1482
1483
1484
1485 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
1486 NULL);
1487
1488
1489 if (client->close_fn) {
1490 client->close_fn(client, negotiated);
1491 }
1492}
1493
1494static NBDRequestData *nbd_request_get(NBDClient *client)
1495{
1496 NBDRequestData *req;
1497
1498 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1499 client->nb_requests++;
1500
1501 req = g_new0(NBDRequestData, 1);
1502 nbd_client_get(client);
1503 req->client = client;
1504 return req;
1505}
1506
1507static void nbd_request_put(NBDRequestData *req)
1508{
1509 NBDClient *client = req->client;
1510
1511 if (req->data) {
1512 qemu_vfree(req->data);
1513 }
1514 g_free(req);
1515
1516 client->nb_requests--;
1517
1518 if (client->quiescing && client->nb_requests == 0) {
1519 aio_wait_kick();
1520 }
1521
1522 nbd_client_receive_next_request(client);
1523
1524 nbd_client_put(client);
1525}
1526
1527static void blk_aio_attached(AioContext *ctx, void *opaque)
1528{
1529 NBDExport *exp = opaque;
1530 NBDClient *client;
1531
1532 trace_nbd_blk_aio_attached(exp->name, ctx);
1533
1534 exp->common.ctx = ctx;
1535
1536 QTAILQ_FOREACH(client, &exp->clients, next) {
1537 qio_channel_attach_aio_context(client->ioc, ctx);
1538
1539 assert(client->nb_requests == 0);
1540 assert(client->recv_coroutine == NULL);
1541 assert(client->send_coroutine == NULL);
1542 }
1543}
1544
1545static void blk_aio_detach(void *opaque)
1546{
1547 NBDExport *exp = opaque;
1548 NBDClient *client;
1549
1550 trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1551
1552 QTAILQ_FOREACH(client, &exp->clients, next) {
1553 qio_channel_detach_aio_context(client->ioc);
1554 }
1555
1556 exp->common.ctx = NULL;
1557}
1558
1559static void nbd_drained_begin(void *opaque)
1560{
1561 NBDExport *exp = opaque;
1562 NBDClient *client;
1563
1564 QTAILQ_FOREACH(client, &exp->clients, next) {
1565 client->quiescing = true;
1566 }
1567}
1568
1569static void nbd_drained_end(void *opaque)
1570{
1571 NBDExport *exp = opaque;
1572 NBDClient *client;
1573
1574 QTAILQ_FOREACH(client, &exp->clients, next) {
1575 client->quiescing = false;
1576 nbd_client_receive_next_request(client);
1577 }
1578}
1579
1580static bool nbd_drained_poll(void *opaque)
1581{
1582 NBDExport *exp = opaque;
1583 NBDClient *client;
1584
1585 QTAILQ_FOREACH(client, &exp->clients, next) {
1586 if (client->nb_requests != 0) {
1587
1588
1589
1590
1591 if (client->recv_coroutine != NULL && client->read_yielding) {
1592 qemu_aio_coroutine_enter(exp->common.ctx,
1593 client->recv_coroutine);
1594 }
1595
1596 return true;
1597 }
1598 }
1599
1600 return false;
1601}
1602
1603static void nbd_eject_notifier(Notifier *n, void *data)
1604{
1605 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1606
1607 blk_exp_request_shutdown(&exp->common);
1608}
1609
1610void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
1611{
1612 NBDExport *nbd_exp = container_of(exp, NBDExport, common);
1613 assert(exp->drv == &blk_exp_nbd);
1614 assert(nbd_exp->eject_notifier_blk == NULL);
1615
1616 blk_ref(blk);
1617 nbd_exp->eject_notifier_blk = blk;
1618 nbd_exp->eject_notifier.notify = nbd_eject_notifier;
1619 blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
1620}
1621
1622static const BlockDevOps nbd_block_ops = {
1623 .drained_begin = nbd_drained_begin,
1624 .drained_end = nbd_drained_end,
1625 .drained_poll = nbd_drained_poll,
1626};
1627
1628static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1629 Error **errp)
1630{
1631 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1632 BlockExportOptionsNbd *arg = &exp_args->u.nbd;
1633 BlockBackend *blk = blk_exp->blk;
1634 int64_t size;
1635 uint64_t perm, shared_perm;
1636 bool readonly = !exp_args->writable;
1637 bool shared = !exp_args->writable;
1638 strList *bitmaps;
1639 size_t i;
1640 int ret;
1641
1642 assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
1643
1644 if (!nbd_server_is_running()) {
1645 error_setg(errp, "NBD server not running");
1646 return -EINVAL;
1647 }
1648
1649 if (!arg->has_name) {
1650 arg->name = exp_args->node_name;
1651 }
1652
1653 if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
1654 error_setg(errp, "export name '%s' too long", arg->name);
1655 return -EINVAL;
1656 }
1657
1658 if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
1659 error_setg(errp, "description '%s' too long", arg->description);
1660 return -EINVAL;
1661 }
1662
1663 if (nbd_export_find(arg->name)) {
1664 error_setg(errp, "NBD server already has export named '%s'", arg->name);
1665 return -EEXIST;
1666 }
1667
1668 size = blk_getlength(blk);
1669 if (size < 0) {
1670 error_setg_errno(errp, -size,
1671 "Failed to determine the NBD export's length");
1672 return size;
1673 }
1674
1675
1676
1677 blk_get_perm(blk, &perm, &shared_perm);
1678 ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1679 if (ret < 0) {
1680 return ret;
1681 }
1682
1683 QTAILQ_INIT(&exp->clients);
1684 exp->name = g_strdup(arg->name);
1685 exp->description = g_strdup(arg->description);
1686 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1687 NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
1688 if (readonly) {
1689 exp->nbdflags |= NBD_FLAG_READ_ONLY;
1690 if (shared) {
1691 exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1692 }
1693 } else {
1694 exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1695 NBD_FLAG_SEND_FAST_ZERO);
1696 }
1697 exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1698
1699 for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
1700 exp->nr_export_bitmaps++;
1701 }
1702 exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
1703 for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1704 i++, bitmaps = bitmaps->next) {
1705 const char *bitmap = bitmaps->value;
1706 BlockDriverState *bs = blk_bs(blk);
1707 BdrvDirtyBitmap *bm = NULL;
1708
1709 while (bs) {
1710 bm = bdrv_find_dirty_bitmap(bs, bitmap);
1711 if (bm != NULL) {
1712 break;
1713 }
1714
1715 bs = bdrv_filter_or_cow_bs(bs);
1716 }
1717
1718 if (bm == NULL) {
1719 ret = -ENOENT;
1720 error_setg(errp, "Bitmap '%s' is not found", bitmap);
1721 goto fail;
1722 }
1723
1724 if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1725 ret = -EINVAL;
1726 goto fail;
1727 }
1728
1729 if (readonly && bdrv_is_writable(bs) &&
1730 bdrv_dirty_bitmap_enabled(bm)) {
1731 ret = -EINVAL;
1732 error_setg(errp,
1733 "Enabled bitmap '%s' incompatible with readonly export",
1734 bitmap);
1735 goto fail;
1736 }
1737
1738 exp->export_bitmaps[i] = bm;
1739 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1740 }
1741
1742
1743 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1744 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
1745 }
1746
1747 exp->allocation_depth = arg->allocation_depth;
1748
1749
1750
1751
1752
1753
1754 blk_set_disable_request_queuing(blk, true);
1755
1756 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1757
1758 blk_set_dev_ops(blk, &nbd_block_ops, exp);
1759
1760 QTAILQ_INSERT_TAIL(&exports, exp, next);
1761
1762 return 0;
1763
1764fail:
1765 g_free(exp->export_bitmaps);
1766 g_free(exp->name);
1767 g_free(exp->description);
1768 return ret;
1769}
1770
1771NBDExport *nbd_export_find(const char *name)
1772{
1773 NBDExport *exp;
1774 QTAILQ_FOREACH(exp, &exports, next) {
1775 if (strcmp(name, exp->name) == 0) {
1776 return exp;
1777 }
1778 }
1779
1780 return NULL;
1781}
1782
1783AioContext *
1784nbd_export_aio_context(NBDExport *exp)
1785{
1786 return exp->common.ctx;
1787}
1788
1789static void nbd_export_request_shutdown(BlockExport *blk_exp)
1790{
1791 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1792 NBDClient *client, *next;
1793
1794 blk_exp_ref(&exp->common);
1795
1796
1797
1798
1799
1800
1801
1802 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1803 client_close(client, true);
1804 }
1805 if (exp->name) {
1806 g_free(exp->name);
1807 exp->name = NULL;
1808 QTAILQ_REMOVE(&exports, exp, next);
1809 }
1810 blk_exp_unref(&exp->common);
1811}
1812
1813static void nbd_export_delete(BlockExport *blk_exp)
1814{
1815 size_t i;
1816 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1817
1818 assert(exp->name == NULL);
1819 assert(QTAILQ_EMPTY(&exp->clients));
1820
1821 g_free(exp->description);
1822 exp->description = NULL;
1823
1824 if (exp->common.blk) {
1825 if (exp->eject_notifier_blk) {
1826 notifier_remove(&exp->eject_notifier);
1827 blk_unref(exp->eject_notifier_blk);
1828 }
1829 blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
1830 blk_aio_detach, exp);
1831 blk_set_disable_request_queuing(exp->common.blk, false);
1832 }
1833
1834 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1835 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
1836 }
1837}
1838
1839const BlockExportDriver blk_exp_nbd = {
1840 .type = BLOCK_EXPORT_TYPE_NBD,
1841 .instance_size = sizeof(NBDExport),
1842 .create = nbd_export_create,
1843 .delete = nbd_export_delete,
1844 .request_shutdown = nbd_export_request_shutdown,
1845};
1846
1847static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
1848 unsigned niov, Error **errp)
1849{
1850 int ret;
1851
1852 g_assert(qemu_in_coroutine());
1853 qemu_co_mutex_lock(&client->send_lock);
1854 client->send_coroutine = qemu_coroutine_self();
1855
1856 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
1857
1858 client->send_coroutine = NULL;
1859 qemu_co_mutex_unlock(&client->send_lock);
1860
1861 return ret;
1862}
1863
1864static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
1865 uint64_t handle)
1866{
1867 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
1868 stl_be_p(&reply->error, error);
1869 stq_be_p(&reply->handle, handle);
1870}
1871
1872static int nbd_co_send_simple_reply(NBDClient *client,
1873 uint64_t handle,
1874 uint32_t error,
1875 void *data,
1876 size_t len,
1877 Error **errp)
1878{
1879 NBDSimpleReply reply;
1880 int nbd_err = system_errno_to_nbd_errno(error);
1881 struct iovec iov[] = {
1882 {.iov_base = &reply, .iov_len = sizeof(reply)},
1883 {.iov_base = data, .iov_len = len}
1884 };
1885
1886 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
1887 len);
1888 set_be_simple_reply(&reply, nbd_err, handle);
1889
1890 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1891}
1892
1893static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
1894 uint16_t type, uint64_t handle, uint32_t length)
1895{
1896 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
1897 stw_be_p(&chunk->flags, flags);
1898 stw_be_p(&chunk->type, type);
1899 stq_be_p(&chunk->handle, handle);
1900 stl_be_p(&chunk->length, length);
1901}
1902
1903static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
1904 uint64_t handle,
1905 Error **errp)
1906{
1907 NBDStructuredReplyChunk chunk;
1908 struct iovec iov[] = {
1909 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1910 };
1911
1912 trace_nbd_co_send_structured_done(handle);
1913 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
1914
1915 return nbd_co_send_iov(client, iov, 1, errp);
1916}
1917
1918static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
1919 uint64_t handle,
1920 uint64_t offset,
1921 void *data,
1922 size_t size,
1923 bool final,
1924 Error **errp)
1925{
1926 NBDStructuredReadData chunk;
1927 struct iovec iov[] = {
1928 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1929 {.iov_base = data, .iov_len = size}
1930 };
1931
1932 assert(size);
1933 trace_nbd_co_send_structured_read(handle, offset, data, size);
1934 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1935 NBD_REPLY_TYPE_OFFSET_DATA, handle,
1936 sizeof(chunk) - sizeof(chunk.h) + size);
1937 stq_be_p(&chunk.offset, offset);
1938
1939 return nbd_co_send_iov(client, iov, 2, errp);
1940}
1941
1942static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
1943 uint64_t handle,
1944 uint32_t error,
1945 const char *msg,
1946 Error **errp)
1947{
1948 NBDStructuredError chunk;
1949 int nbd_err = system_errno_to_nbd_errno(error);
1950 struct iovec iov[] = {
1951 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1952 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
1953 };
1954
1955 assert(nbd_err);
1956 trace_nbd_co_send_structured_error(handle, nbd_err,
1957 nbd_err_lookup(nbd_err), msg ? msg : "");
1958 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
1959 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1960 stl_be_p(&chunk.error, nbd_err);
1961 stw_be_p(&chunk.message_length, iov[1].iov_len);
1962
1963 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
1964}
1965
1966
1967
1968
1969
1970static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
1971 uint64_t handle,
1972 uint64_t offset,
1973 uint8_t *data,
1974 size_t size,
1975 Error **errp)
1976{
1977 int ret = 0;
1978 NBDExport *exp = client->exp;
1979 size_t progress = 0;
1980
1981 while (progress < size) {
1982 int64_t pnum;
1983 int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
1984 offset + progress,
1985 size - progress, &pnum, NULL,
1986 NULL);
1987 bool final;
1988
1989 if (status < 0) {
1990 char *msg = g_strdup_printf("unable to check for holes: %s",
1991 strerror(-status));
1992
1993 ret = nbd_co_send_structured_error(client, handle, -status, msg,
1994 errp);
1995 g_free(msg);
1996 return ret;
1997 }
1998 assert(pnum && pnum <= size - progress);
1999 final = progress + pnum == size;
2000 if (status & BDRV_BLOCK_ZERO) {
2001 NBDStructuredReadHole chunk;
2002 struct iovec iov[] = {
2003 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2004 };
2005
2006 trace_nbd_co_send_structured_read_hole(handle, offset + progress,
2007 pnum);
2008 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
2009 NBD_REPLY_TYPE_OFFSET_HOLE,
2010 handle, sizeof(chunk) - sizeof(chunk.h));
2011 stq_be_p(&chunk.offset, offset + progress);
2012 stl_be_p(&chunk.length, pnum);
2013 ret = nbd_co_send_iov(client, iov, 1, errp);
2014 } else {
2015 ret = blk_pread(exp->common.blk, offset + progress,
2016 data + progress, pnum);
2017 if (ret < 0) {
2018 error_setg_errno(errp, -ret, "reading from file failed");
2019 break;
2020 }
2021 ret = nbd_co_send_structured_read(client, handle, offset + progress,
2022 data + progress, pnum, final,
2023 errp);
2024 }
2025
2026 if (ret < 0) {
2027 break;
2028 }
2029 progress += pnum;
2030 }
2031 return ret;
2032}
2033
2034typedef struct NBDExtentArray {
2035 NBDExtent *extents;
2036 unsigned int nb_alloc;
2037 unsigned int count;
2038 uint64_t total_length;
2039 bool can_add;
2040 bool converted_to_be;
2041} NBDExtentArray;
2042
2043static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
2044{
2045 NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
2046
2047 ea->nb_alloc = nb_alloc;
2048 ea->extents = g_new(NBDExtent, nb_alloc);
2049 ea->can_add = true;
2050
2051 return ea;
2052}
2053
2054static void nbd_extent_array_free(NBDExtentArray *ea)
2055{
2056 g_free(ea->extents);
2057 g_free(ea);
2058}
2059G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free);
2060
2061
2062static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
2063{
2064 int i;
2065
2066 assert(!ea->converted_to_be);
2067 ea->can_add = false;
2068 ea->converted_to_be = true;
2069
2070 for (i = 0; i < ea->count; i++) {
2071 ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2072 ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
2073 }
2074}
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086static int nbd_extent_array_add(NBDExtentArray *ea,
2087 uint32_t length, uint32_t flags)
2088{
2089 assert(ea->can_add);
2090
2091 if (!length) {
2092 return 0;
2093 }
2094
2095
2096 if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2097 uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
2098
2099 if (sum <= UINT32_MAX) {
2100 ea->extents[ea->count - 1].length = sum;
2101 ea->total_length += length;
2102 return 0;
2103 }
2104 }
2105
2106 if (ea->count >= ea->nb_alloc) {
2107 ea->can_add = false;
2108 return -1;
2109 }
2110
2111 ea->total_length += length;
2112 ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
2113 ea->count++;
2114
2115 return 0;
2116}
2117
2118static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
2119 uint64_t bytes, NBDExtentArray *ea)
2120{
2121 while (bytes) {
2122 uint32_t flags;
2123 int64_t num;
2124 int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
2125 NULL, NULL);
2126
2127 if (ret < 0) {
2128 return ret;
2129 }
2130
2131 flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2132 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2133
2134 if (nbd_extent_array_add(ea, num, flags) < 0) {
2135 return 0;
2136 }
2137
2138 offset += num;
2139 bytes -= num;
2140 }
2141
2142 return 0;
2143}
2144
2145static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
2146 uint64_t bytes, NBDExtentArray *ea)
2147{
2148 while (bytes) {
2149 int64_t num;
2150 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
2151 &num);
2152
2153 if (ret < 0) {
2154 return ret;
2155 }
2156
2157 if (nbd_extent_array_add(ea, num, ret) < 0) {
2158 return 0;
2159 }
2160
2161 offset += num;
2162 bytes -= num;
2163 }
2164
2165 return 0;
2166}
2167
2168
2169
2170
2171
2172
2173
2174static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
2175 NBDExtentArray *ea,
2176 bool last, uint32_t context_id, Error **errp)
2177{
2178 NBDStructuredMeta chunk;
2179 struct iovec iov[] = {
2180 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2181 {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
2182 };
2183
2184 nbd_extent_array_convert_to_be(ea);
2185
2186 trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
2187 last);
2188 set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
2189 NBD_REPLY_TYPE_BLOCK_STATUS,
2190 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
2191 stl_be_p(&chunk.context_id, context_id);
2192
2193 return nbd_co_send_iov(client, iov, 2, errp);
2194}
2195
2196
2197static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
2198 BlockDriverState *bs, uint64_t offset,
2199 uint32_t length, bool dont_fragment,
2200 bool last, uint32_t context_id,
2201 Error **errp)
2202{
2203 int ret;
2204 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2205 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2206
2207 if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2208 ret = blockstatus_to_extents(bs, offset, length, ea);
2209 } else {
2210 ret = blockalloc_to_extents(bs, offset, length, ea);
2211 }
2212 if (ret < 0) {
2213 return nbd_co_send_structured_error(
2214 client, handle, -ret, "can't get block status", errp);
2215 }
2216
2217 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2218}
2219
2220
2221static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
2222 uint64_t offset, uint64_t length,
2223 NBDExtentArray *es)
2224{
2225 int64_t start, dirty_start, dirty_count;
2226 int64_t end = offset + length;
2227 bool full = false;
2228
2229 bdrv_dirty_bitmap_lock(bitmap);
2230
2231 for (start = offset;
2232 bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
2233 &dirty_start, &dirty_count);
2234 start = dirty_start + dirty_count)
2235 {
2236 if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2237 (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2238 {
2239 full = true;
2240 break;
2241 }
2242 }
2243
2244 if (!full) {
2245
2246 (void) nbd_extent_array_add(es, end - start, 0);
2247 }
2248
2249 bdrv_dirty_bitmap_unlock(bitmap);
2250}
2251
2252static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
2253 BdrvDirtyBitmap *bitmap, uint64_t offset,
2254 uint32_t length, bool dont_fragment, bool last,
2255 uint32_t context_id, Error **errp)
2256{
2257 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2258 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2259
2260 bitmap_to_extents(bitmap, offset, length, ea);
2261
2262 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2263}
2264
2265
2266
2267
2268
2269
2270
2271
2272static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
2273 Error **errp)
2274{
2275 NBDClient *client = req->client;
2276 int valid_flags;
2277 int ret;
2278
2279 g_assert(qemu_in_coroutine());
2280 assert(client->recv_coroutine == qemu_coroutine_self());
2281 ret = nbd_receive_request(client, request, errp);
2282 if (ret < 0) {
2283 return ret;
2284 }
2285
2286 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
2287 nbd_cmd_lookup(request->type));
2288
2289 if (request->type != NBD_CMD_WRITE) {
2290
2291 req->complete = true;
2292 }
2293
2294 if (request->type == NBD_CMD_DISC) {
2295
2296
2297 return -EIO;
2298 }
2299
2300 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
2301 request->type == NBD_CMD_CACHE)
2302 {
2303 if (request->len > NBD_MAX_BUFFER_SIZE) {
2304 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
2305 request->len, NBD_MAX_BUFFER_SIZE);
2306 return -EINVAL;
2307 }
2308
2309 if (request->type != NBD_CMD_CACHE) {
2310 req->data = blk_try_blockalign(client->exp->common.blk,
2311 request->len);
2312 if (req->data == NULL) {
2313 error_setg(errp, "No memory");
2314 return -ENOMEM;
2315 }
2316 }
2317 }
2318
2319 if (request->type == NBD_CMD_WRITE) {
2320 if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
2321 errp) < 0)
2322 {
2323 return -EIO;
2324 }
2325 req->complete = true;
2326
2327 trace_nbd_co_receive_request_payload_received(request->handle,
2328 request->len);
2329 }
2330
2331
2332 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
2333 (request->type == NBD_CMD_WRITE ||
2334 request->type == NBD_CMD_WRITE_ZEROES ||
2335 request->type == NBD_CMD_TRIM)) {
2336 error_setg(errp, "Export is read-only");
2337 return -EROFS;
2338 }
2339 if (request->from > client->exp->size ||
2340 request->len > client->exp->size - request->from) {
2341 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
2342 ", Size: %" PRIu64, request->from, request->len,
2343 client->exp->size);
2344 return (request->type == NBD_CMD_WRITE ||
2345 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
2346 }
2347 if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
2348 client->check_align)) {
2349
2350
2351
2352
2353 trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
2354 request->from,
2355 request->len,
2356 client->check_align);
2357 }
2358 valid_flags = NBD_CMD_FLAG_FUA;
2359 if (request->type == NBD_CMD_READ && client->structured_reply) {
2360 valid_flags |= NBD_CMD_FLAG_DF;
2361 } else if (request->type == NBD_CMD_WRITE_ZEROES) {
2362 valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
2363 } else if (request->type == NBD_CMD_BLOCK_STATUS) {
2364 valid_flags |= NBD_CMD_FLAG_REQ_ONE;
2365 }
2366 if (request->flags & ~valid_flags) {
2367 error_setg(errp, "unsupported flags for command %s (got 0x%x)",
2368 nbd_cmd_lookup(request->type), request->flags);
2369 return -EINVAL;
2370 }
2371
2372 return 0;
2373}
2374
2375
2376
2377
2378
2379static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
2380 uint64_t handle,
2381 int ret,
2382 const char *error_msg,
2383 Error **errp)
2384{
2385 if (client->structured_reply && ret < 0) {
2386 return nbd_co_send_structured_error(client, handle, -ret, error_msg,
2387 errp);
2388 } else {
2389 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
2390 NULL, 0, errp);
2391 }
2392}
2393
2394
2395
2396
2397static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
2398 uint8_t *data, Error **errp)
2399{
2400 int ret;
2401 NBDExport *exp = client->exp;
2402
2403 assert(request->type == NBD_CMD_READ);
2404
2405
2406 if (request->flags & NBD_CMD_FLAG_FUA) {
2407 ret = blk_co_flush(exp->common.blk);
2408 if (ret < 0) {
2409 return nbd_send_generic_reply(client, request->handle, ret,
2410 "flush failed", errp);
2411 }
2412 }
2413
2414 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
2415 request->len)
2416 {
2417 return nbd_co_send_sparse_read(client, request->handle, request->from,
2418 data, request->len, errp);
2419 }
2420
2421 ret = blk_pread(exp->common.blk, request->from, data, request->len);
2422 if (ret < 0) {
2423 return nbd_send_generic_reply(client, request->handle, ret,
2424 "reading from file failed", errp);
2425 }
2426
2427 if (client->structured_reply) {
2428 if (request->len) {
2429 return nbd_co_send_structured_read(client, request->handle,
2430 request->from, data,
2431 request->len, true, errp);
2432 } else {
2433 return nbd_co_send_structured_done(client, request->handle, errp);
2434 }
2435 } else {
2436 return nbd_co_send_simple_reply(client, request->handle, 0,
2437 data, request->len, errp);
2438 }
2439}
2440
2441
2442
2443
2444
2445
2446
2447
2448static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
2449 Error **errp)
2450{
2451 int ret;
2452 NBDExport *exp = client->exp;
2453
2454 assert(request->type == NBD_CMD_CACHE);
2455
2456 ret = blk_co_preadv(exp->common.blk, request->from, request->len,
2457 NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
2458
2459 return nbd_send_generic_reply(client, request->handle, ret,
2460 "caching data failed", errp);
2461}
2462
2463
2464
2465
2466static coroutine_fn int nbd_handle_request(NBDClient *client,
2467 NBDRequest *request,
2468 uint8_t *data, Error **errp)
2469{
2470 int ret;
2471 int flags;
2472 NBDExport *exp = client->exp;
2473 char *msg;
2474 size_t i;
2475
2476 switch (request->type) {
2477 case NBD_CMD_CACHE:
2478 return nbd_do_cmd_cache(client, request, errp);
2479
2480 case NBD_CMD_READ:
2481 return nbd_do_cmd_read(client, request, data, errp);
2482
2483 case NBD_CMD_WRITE:
2484 flags = 0;
2485 if (request->flags & NBD_CMD_FLAG_FUA) {
2486 flags |= BDRV_REQ_FUA;
2487 }
2488 ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
2489 flags);
2490 return nbd_send_generic_reply(client, request->handle, ret,
2491 "writing to file failed", errp);
2492
2493 case NBD_CMD_WRITE_ZEROES:
2494 flags = 0;
2495 if (request->flags & NBD_CMD_FLAG_FUA) {
2496 flags |= BDRV_REQ_FUA;
2497 }
2498 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
2499 flags |= BDRV_REQ_MAY_UNMAP;
2500 }
2501 if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2502 flags |= BDRV_REQ_NO_FALLBACK;
2503 }
2504 ret = 0;
2505
2506 while (ret >= 0 && request->len) {
2507 int align = client->check_align ?: 1;
2508 int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
2509 align));
2510 ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags);
2511 request->len -= len;
2512 request->from += len;
2513 }
2514 return nbd_send_generic_reply(client, request->handle, ret,
2515 "writing to file failed", errp);
2516
2517 case NBD_CMD_DISC:
2518
2519 abort();
2520
2521 case NBD_CMD_FLUSH:
2522 ret = blk_co_flush(exp->common.blk);
2523 return nbd_send_generic_reply(client, request->handle, ret,
2524 "flush failed", errp);
2525
2526 case NBD_CMD_TRIM:
2527 ret = 0;
2528
2529 while (ret >= 0 && request->len) {
2530 int align = client->check_align ?: 1;
2531 int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
2532 align));
2533 ret = blk_co_pdiscard(exp->common.blk, request->from, len);
2534 request->len -= len;
2535 request->from += len;
2536 }
2537 if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
2538 ret = blk_co_flush(exp->common.blk);
2539 }
2540 return nbd_send_generic_reply(client, request->handle, ret,
2541 "discard failed", errp);
2542
2543 case NBD_CMD_BLOCK_STATUS:
2544 if (!request->len) {
2545 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2546 "need non-zero length", errp);
2547 }
2548 if (client->export_meta.count) {
2549 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
2550 int contexts_remaining = client->export_meta.count;
2551
2552 if (client->export_meta.base_allocation) {
2553 ret = nbd_co_send_block_status(client, request->handle,
2554 blk_bs(exp->common.blk),
2555 request->from,
2556 request->len, dont_fragment,
2557 !--contexts_remaining,
2558 NBD_META_ID_BASE_ALLOCATION,
2559 errp);
2560 if (ret < 0) {
2561 return ret;
2562 }
2563 }
2564
2565 if (client->export_meta.allocation_depth) {
2566 ret = nbd_co_send_block_status(client, request->handle,
2567 blk_bs(exp->common.blk),
2568 request->from, request->len,
2569 dont_fragment,
2570 !--contexts_remaining,
2571 NBD_META_ID_ALLOCATION_DEPTH,
2572 errp);
2573 if (ret < 0) {
2574 return ret;
2575 }
2576 }
2577
2578 for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
2579 if (!client->export_meta.bitmaps[i]) {
2580 continue;
2581 }
2582 ret = nbd_co_send_bitmap(client, request->handle,
2583 client->exp->export_bitmaps[i],
2584 request->from, request->len,
2585 dont_fragment, !--contexts_remaining,
2586 NBD_META_ID_DIRTY_BITMAP + i, errp);
2587 if (ret < 0) {
2588 return ret;
2589 }
2590 }
2591
2592 assert(!contexts_remaining);
2593
2594 return 0;
2595 } else {
2596 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2597 "CMD_BLOCK_STATUS not negotiated",
2598 errp);
2599 }
2600
2601 default:
2602 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
2603 request->type);
2604 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
2605 errp);
2606 g_free(msg);
2607 return ret;
2608 }
2609}
2610
2611
2612static coroutine_fn void nbd_trip(void *opaque)
2613{
2614 NBDClient *client = opaque;
2615 NBDRequestData *req;
2616 NBDRequest request = { 0 };
2617 int ret;
2618 Error *local_err = NULL;
2619
2620 trace_nbd_trip();
2621 if (client->closing) {
2622 nbd_client_put(client);
2623 return;
2624 }
2625
2626 if (client->quiescing) {
2627
2628
2629
2630
2631 nbd_client_put(client);
2632 client->recv_coroutine = NULL;
2633 aio_wait_kick();
2634 return;
2635 }
2636
2637 req = nbd_request_get(client);
2638 ret = nbd_co_receive_request(req, &request, &local_err);
2639 client->recv_coroutine = NULL;
2640
2641 if (client->closing) {
2642
2643
2644
2645
2646 goto done;
2647 }
2648
2649 if (ret == -EAGAIN) {
2650 assert(client->quiescing);
2651 goto done;
2652 }
2653
2654 nbd_client_receive_next_request(client);
2655 if (ret == -EIO) {
2656 goto disconnect;
2657 }
2658
2659 if (ret < 0) {
2660
2661
2662 Error *export_err = local_err;
2663
2664 local_err = NULL;
2665 ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
2666 error_get_pretty(export_err), &local_err);
2667 error_free(export_err);
2668 } else {
2669 ret = nbd_handle_request(client, &request, req->data, &local_err);
2670 }
2671 if (ret < 0) {
2672 error_prepend(&local_err, "Failed to send reply: ");
2673 goto disconnect;
2674 }
2675
2676
2677
2678
2679 if (!req->complete) {
2680 error_setg(&local_err, "Request handling failed in intermediate state");
2681 goto disconnect;
2682 }
2683
2684done:
2685 nbd_request_put(req);
2686 nbd_client_put(client);
2687 return;
2688
2689disconnect:
2690 if (local_err) {
2691 error_reportf_err(local_err, "Disconnect client, due to: ");
2692 }
2693 nbd_request_put(req);
2694 client_close(client, true);
2695 nbd_client_put(client);
2696}
2697
2698static void nbd_client_receive_next_request(NBDClient *client)
2699{
2700 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
2701 !client->quiescing) {
2702 nbd_client_get(client);
2703 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
2704 aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
2705 }
2706}
2707
2708static coroutine_fn void nbd_co_client_start(void *opaque)
2709{
2710 NBDClient *client = opaque;
2711 Error *local_err = NULL;
2712
2713 qemu_co_mutex_init(&client->send_lock);
2714
2715 if (nbd_negotiate(client, &local_err)) {
2716 if (local_err) {
2717 error_report_err(local_err);
2718 }
2719 client_close(client, false);
2720 return;
2721 }
2722
2723 nbd_client_receive_next_request(client);
2724}
2725
2726
2727
2728
2729
2730
2731void nbd_client_new(QIOChannelSocket *sioc,
2732 QCryptoTLSCreds *tlscreds,
2733 const char *tlsauthz,
2734 void (*close_fn)(NBDClient *, bool))
2735{
2736 NBDClient *client;
2737 Coroutine *co;
2738
2739 client = g_new0(NBDClient, 1);
2740 client->refcount = 1;
2741 client->tlscreds = tlscreds;
2742 if (tlscreds) {
2743 object_ref(OBJECT(client->tlscreds));
2744 }
2745 client->tlsauthz = g_strdup(tlsauthz);
2746 client->sioc = sioc;
2747 object_ref(OBJECT(client->sioc));
2748 client->ioc = QIO_CHANNEL(sioc);
2749 object_ref(OBJECT(client->ioc));
2750 client->close_fn = close_fn;
2751
2752 co = qemu_coroutine_create(nbd_co_client_start, client);
2753 qemu_coroutine_enter(co);
2754}
2755