1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21
22#include "block/export.h"
23#include "qapi/error.h"
24#include "qemu/queue.h"
25#include "trace.h"
26#include "nbd-internal.h"
27#include "qemu/units.h"
28
29#define NBD_META_ID_BASE_ALLOCATION 0
30#define NBD_META_ID_ALLOCATION_DEPTH 1
31
32#define NBD_META_ID_DIRTY_BITMAP 2
33
34
35
36
37
38
39
40#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
41
42static int system_errno_to_nbd_errno(int err)
43{
44 switch (err) {
45 case 0:
46 return NBD_SUCCESS;
47 case EPERM:
48 case EROFS:
49 return NBD_EPERM;
50 case EIO:
51 return NBD_EIO;
52 case ENOMEM:
53 return NBD_ENOMEM;
54#ifdef EDQUOT
55 case EDQUOT:
56#endif
57 case EFBIG:
58 case ENOSPC:
59 return NBD_ENOSPC;
60 case EOVERFLOW:
61 return NBD_EOVERFLOW;
62 case ENOTSUP:
63#if ENOTSUP != EOPNOTSUPP
64 case EOPNOTSUPP:
65#endif
66 return NBD_ENOTSUP;
67 case ESHUTDOWN:
68 return NBD_ESHUTDOWN;
69 case EINVAL:
70 default:
71 return NBD_EINVAL;
72 }
73}
74
75
76
77typedef struct NBDRequestData NBDRequestData;
78
79struct NBDRequestData {
80 QSIMPLEQ_ENTRY(NBDRequestData) entry;
81 NBDClient *client;
82 uint8_t *data;
83 bool complete;
84};
85
86struct NBDExport {
87 BlockExport common;
88
89 char *name;
90 char *description;
91 uint64_t size;
92 uint16_t nbdflags;
93 QTAILQ_HEAD(, NBDClient) clients;
94 QTAILQ_ENTRY(NBDExport) next;
95
96 BlockBackend *eject_notifier_blk;
97 Notifier eject_notifier;
98
99 bool allocation_depth;
100 BdrvDirtyBitmap **export_bitmaps;
101 size_t nr_export_bitmaps;
102};
103
104static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
105
106
107
108
109typedef struct NBDExportMetaContexts {
110 NBDExport *exp;
111 size_t count;
112 bool base_allocation;
113 bool allocation_depth;
114 bool *bitmaps;
115
116
117
118} NBDExportMetaContexts;
119
120struct NBDClient {
121 int refcount;
122 void (*close_fn)(NBDClient *client, bool negotiated);
123
124 NBDExport *exp;
125 QCryptoTLSCreds *tlscreds;
126 char *tlsauthz;
127 QIOChannelSocket *sioc;
128 QIOChannel *ioc;
129
130 Coroutine *recv_coroutine;
131
132 CoMutex send_lock;
133 Coroutine *send_coroutine;
134
135 bool read_yielding;
136 bool quiescing;
137
138 QTAILQ_ENTRY(NBDClient) next;
139 int nb_requests;
140 bool closing;
141
142 uint32_t check_align;
143
144 bool structured_reply;
145 NBDExportMetaContexts export_meta;
146
147 uint32_t opt;
148 uint32_t optlen;
149
150};
151
152static void nbd_client_receive_next_request(NBDClient *client);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
182 uint32_t type, uint32_t length)
183{
184 stq_be_p(&rep->magic, NBD_REP_MAGIC);
185 stl_be_p(&rep->option, option);
186 stl_be_p(&rep->type, type);
187 stl_be_p(&rep->length, length);
188}
189
190
191
192static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
193 uint32_t len, Error **errp)
194{
195 NBDOptionReply rep;
196
197 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
198 type, nbd_rep_lookup(type), len);
199
200 assert(len < NBD_MAX_BUFFER_SIZE);
201
202 set_be_option_rep(&rep, client->opt, type, len);
203 return nbd_write(client->ioc, &rep, sizeof(rep), errp);
204}
205
206
207
208static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
209 Error **errp)
210{
211 return nbd_negotiate_send_rep_len(client, type, 0, errp);
212}
213
214
215
216static int GCC_FMT_ATTR(4, 0)
217nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
218 Error **errp, const char *fmt, va_list va)
219{
220 ERRP_GUARD();
221 g_autofree char *msg = NULL;
222 int ret;
223 size_t len;
224
225 msg = g_strdup_vprintf(fmt, va);
226 len = strlen(msg);
227 assert(len < NBD_MAX_STRING_SIZE);
228 trace_nbd_negotiate_send_rep_err(msg);
229 ret = nbd_negotiate_send_rep_len(client, type, len, errp);
230 if (ret < 0) {
231 return ret;
232 }
233 if (nbd_write(client->ioc, msg, len, errp) < 0) {
234 error_prepend(errp, "write failed (error message): ");
235 return -EIO;
236 }
237
238 return 0;
239}
240
241
242
243
244static char *
245nbd_sanitize_name(const char *name)
246{
247 if (strnlen(name, 80) < 80) {
248 return g_strdup(name);
249 }
250
251 return g_strdup_printf("%.80s...", name);
252}
253
254
255
256static int GCC_FMT_ATTR(4, 5)
257nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
258 Error **errp, const char *fmt, ...)
259{
260 va_list va;
261 int ret;
262
263 va_start(va, fmt);
264 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
265 va_end(va);
266 return ret;
267}
268
269
270
271
272static int GCC_FMT_ATTR(4, 0)
273nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
274 const char *fmt, va_list va)
275{
276 int ret = nbd_drop(client->ioc, client->optlen, errp);
277
278 client->optlen = 0;
279 if (!ret) {
280 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
281 }
282 return ret;
283}
284
285static int GCC_FMT_ATTR(4, 5)
286nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
287 const char *fmt, ...)
288{
289 int ret;
290 va_list va;
291
292 va_start(va, fmt);
293 ret = nbd_opt_vdrop(client, type, errp, fmt, va);
294 va_end(va);
295
296 return ret;
297}
298
299static int GCC_FMT_ATTR(3, 4)
300nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
301{
302 int ret;
303 va_list va;
304
305 va_start(va, fmt);
306 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
307 va_end(va);
308
309 return ret;
310}
311
312
313
314
315
316static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
317 bool check_nul, Error **errp)
318{
319 if (size > client->optlen) {
320 return nbd_opt_invalid(client, errp,
321 "Inconsistent lengths in option %s",
322 nbd_opt_lookup(client->opt));
323 }
324 client->optlen -= size;
325 if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
326 return -EIO;
327 }
328
329 if (check_nul && strnlen(buffer, size) != size) {
330 return nbd_opt_invalid(client, errp,
331 "Unexpected embedded NUL in option %s",
332 nbd_opt_lookup(client->opt));
333 }
334 return 1;
335}
336
337
338
339
340static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
341{
342 if (size > client->optlen) {
343 return nbd_opt_invalid(client, errp,
344 "Inconsistent lengths in option %s",
345 nbd_opt_lookup(client->opt));
346 }
347 client->optlen -= size;
348 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
349}
350
351
352
353
354
355
356
357
358
359
360
361
362
363static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
364 Error **errp)
365{
366 int ret;
367 uint32_t len;
368 g_autofree char *local_name = NULL;
369
370 *name = NULL;
371 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
372 if (ret <= 0) {
373 return ret;
374 }
375 len = cpu_to_be32(len);
376
377 if (len > NBD_MAX_STRING_SIZE) {
378 return nbd_opt_invalid(client, errp,
379 "Invalid name length: %" PRIu32, len);
380 }
381
382 local_name = g_malloc(len + 1);
383 ret = nbd_opt_read(client, local_name, len, true, errp);
384 if (ret <= 0) {
385 return ret;
386 }
387 local_name[len] = '\0';
388
389 if (length) {
390 *length = len;
391 }
392 *name = g_steal_pointer(&local_name);
393
394 return 1;
395}
396
397
398
399static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
400 Error **errp)
401{
402 ERRP_GUARD();
403 size_t name_len, desc_len;
404 uint32_t len;
405 const char *name = exp->name ? exp->name : "";
406 const char *desc = exp->description ? exp->description : "";
407 QIOChannel *ioc = client->ioc;
408 int ret;
409
410 trace_nbd_negotiate_send_rep_list(name, desc);
411 name_len = strlen(name);
412 desc_len = strlen(desc);
413 assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
414 len = name_len + desc_len + sizeof(len);
415 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
416 if (ret < 0) {
417 return ret;
418 }
419
420 len = cpu_to_be32(name_len);
421 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
422 error_prepend(errp, "write failed (name length): ");
423 return -EINVAL;
424 }
425
426 if (nbd_write(ioc, name, name_len, errp) < 0) {
427 error_prepend(errp, "write failed (name buffer): ");
428 return -EINVAL;
429 }
430
431 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
432 error_prepend(errp, "write failed (description buffer): ");
433 return -EINVAL;
434 }
435
436 return 0;
437}
438
439
440
441static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
442{
443 NBDExport *exp;
444 assert(client->opt == NBD_OPT_LIST);
445
446
447 QTAILQ_FOREACH(exp, &exports, next) {
448 if (nbd_negotiate_send_rep_list(client, exp, errp)) {
449 return -EINVAL;
450 }
451 }
452
453 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
454}
455
456static void nbd_check_meta_export(NBDClient *client)
457{
458 if (client->exp != client->export_meta.exp) {
459 client->export_meta.count = 0;
460 }
461}
462
463
464
465static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
466 Error **errp)
467{
468 ERRP_GUARD();
469 g_autofree char *name = NULL;
470 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
471 size_t len;
472 int ret;
473 uint16_t myflags;
474
475
476
477
478
479
480
481
482 trace_nbd_negotiate_handle_export_name();
483 if (client->optlen > NBD_MAX_STRING_SIZE) {
484 error_setg(errp, "Bad length received");
485 return -EINVAL;
486 }
487 name = g_malloc(client->optlen + 1);
488 if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
489 return -EIO;
490 }
491 name[client->optlen] = '\0';
492 client->optlen = 0;
493
494 trace_nbd_negotiate_handle_export_name_request(name);
495
496 client->exp = nbd_export_find(name);
497 if (!client->exp) {
498 error_setg(errp, "export not found");
499 return -EINVAL;
500 }
501
502 myflags = client->exp->nbdflags;
503 if (client->structured_reply) {
504 myflags |= NBD_FLAG_SEND_DF;
505 }
506 trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
507 stq_be_p(buf, client->exp->size);
508 stw_be_p(buf + 8, myflags);
509 len = no_zeroes ? 10 : sizeof(buf);
510 ret = nbd_write(client->ioc, buf, len, errp);
511 if (ret < 0) {
512 error_prepend(errp, "write failed: ");
513 return ret;
514 }
515
516 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
517 blk_exp_ref(&client->exp->common);
518 nbd_check_meta_export(client);
519
520 return 0;
521}
522
523
524
525
526static int nbd_negotiate_send_info(NBDClient *client,
527 uint16_t info, uint32_t length, void *buf,
528 Error **errp)
529{
530 int rc;
531
532 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
533 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
534 sizeof(info) + length, errp);
535 if (rc < 0) {
536 return rc;
537 }
538 info = cpu_to_be16(info);
539 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
540 return -EIO;
541 }
542 if (nbd_write(client->ioc, buf, length, errp) < 0) {
543 return -EIO;
544 }
545 return 0;
546}
547
548
549
550
551
552
553
554
555static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
556{
557 int ret;
558
559 assert(client->optlen);
560 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
561 nbd_opt_lookup(client->opt));
562 if (fatal && !ret) {
563 error_setg(errp, "option '%s' has unexpected length",
564 nbd_opt_lookup(client->opt));
565 return -EINVAL;
566 }
567 return ret;
568}
569
570
571
572
573static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
574{
575 int rc;
576 g_autofree char *name = NULL;
577 NBDExport *exp;
578 uint16_t requests;
579 uint16_t request;
580 uint32_t namelen = 0;
581 bool sendname = false;
582 bool blocksize = false;
583 uint32_t sizes[3];
584 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
585 uint32_t check_align = 0;
586 uint16_t myflags;
587
588
589
590
591
592
593
594 rc = nbd_opt_read_name(client, &name, &namelen, errp);
595 if (rc <= 0) {
596 return rc;
597 }
598 trace_nbd_negotiate_handle_export_name_request(name);
599
600 rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
601 if (rc <= 0) {
602 return rc;
603 }
604 requests = be16_to_cpu(requests);
605 trace_nbd_negotiate_handle_info_requests(requests);
606 while (requests--) {
607 rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
608 if (rc <= 0) {
609 return rc;
610 }
611 request = be16_to_cpu(request);
612 trace_nbd_negotiate_handle_info_request(request,
613 nbd_info_lookup(request));
614
615
616
617 switch (request) {
618 case NBD_INFO_NAME:
619 sendname = true;
620 break;
621 case NBD_INFO_BLOCK_SIZE:
622 blocksize = true;
623 break;
624 }
625 }
626 if (client->optlen) {
627 return nbd_reject_length(client, false, errp);
628 }
629
630 exp = nbd_export_find(name);
631 if (!exp) {
632 g_autofree char *sane_name = nbd_sanitize_name(name);
633
634 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
635 errp, "export '%s' not present",
636 sane_name);
637 }
638
639
640 if (sendname) {
641 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
642 errp);
643 if (rc < 0) {
644 return rc;
645 }
646 }
647
648
649
650 if (exp->description) {
651 size_t len = strlen(exp->description);
652
653 assert(len <= NBD_MAX_STRING_SIZE);
654 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
655 len, exp->description, errp);
656 if (rc < 0) {
657 return rc;
658 }
659 }
660
661
662
663
664
665 if (client->opt == NBD_OPT_INFO || blocksize) {
666 check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
667 } else {
668 sizes[0] = 1;
669 }
670 assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
671
672
673 sizes[1] = MAX(4096, sizes[0]);
674
675 sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
676 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
677 sizes[0] = cpu_to_be32(sizes[0]);
678 sizes[1] = cpu_to_be32(sizes[1]);
679 sizes[2] = cpu_to_be32(sizes[2]);
680 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
681 sizeof(sizes), sizes, errp);
682 if (rc < 0) {
683 return rc;
684 }
685
686
687 myflags = exp->nbdflags;
688 if (client->structured_reply) {
689 myflags |= NBD_FLAG_SEND_DF;
690 }
691 trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
692 stq_be_p(buf, exp->size);
693 stw_be_p(buf + 8, myflags);
694 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
695 sizeof(buf), buf, errp);
696 if (rc < 0) {
697 return rc;
698 }
699
700
701
702
703
704
705
706 if (client->opt == NBD_OPT_INFO && !blocksize &&
707 blk_get_request_alignment(exp->common.blk) > 1) {
708 return nbd_negotiate_send_rep_err(client,
709 NBD_REP_ERR_BLOCK_SIZE_REQD,
710 errp,
711 "request NBD_INFO_BLOCK_SIZE to "
712 "use this export");
713 }
714
715
716 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
717 if (rc < 0) {
718 return rc;
719 }
720
721 if (client->opt == NBD_OPT_GO) {
722 client->exp = exp;
723 client->check_align = check_align;
724 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
725 blk_exp_ref(&client->exp->common);
726 nbd_check_meta_export(client);
727 rc = 1;
728 }
729 return rc;
730}
731
732
733
734
735static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
736 Error **errp)
737{
738 QIOChannel *ioc;
739 QIOChannelTLS *tioc;
740 struct NBDTLSHandshakeData data = { 0 };
741
742 assert(client->opt == NBD_OPT_STARTTLS);
743
744 trace_nbd_negotiate_handle_starttls();
745 ioc = client->ioc;
746
747 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
748 return NULL;
749 }
750
751 tioc = qio_channel_tls_new_server(ioc,
752 client->tlscreds,
753 client->tlsauthz,
754 errp);
755 if (!tioc) {
756 return NULL;
757 }
758
759 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
760 trace_nbd_negotiate_handle_starttls_handshake();
761 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
762 qio_channel_tls_handshake(tioc,
763 nbd_tls_handshake,
764 &data,
765 NULL,
766 NULL);
767
768 if (!data.complete) {
769 g_main_loop_run(data.loop);
770 }
771 g_main_loop_unref(data.loop);
772 if (data.error) {
773 object_unref(OBJECT(tioc));
774 error_propagate(errp, data.error);
775 return NULL;
776 }
777
778 return QIO_CHANNEL(tioc);
779}
780
781
782
783
784
785
786
787static int nbd_negotiate_send_meta_context(NBDClient *client,
788 const char *context,
789 uint32_t context_id,
790 Error **errp)
791{
792 NBDOptionReplyMetaContext opt;
793 struct iovec iov[] = {
794 {.iov_base = &opt, .iov_len = sizeof(opt)},
795 {.iov_base = (void *)context, .iov_len = strlen(context)}
796 };
797
798 assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
799 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
800 context_id = 0;
801 }
802
803 trace_nbd_negotiate_meta_query_reply(context, context_id);
804 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
805 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
806 stl_be_p(&opt.context_id, context_id);
807
808 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
809}
810
811
812
813
814
815static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
816 const char *query)
817{
818 if (!*query) {
819 trace_nbd_negotiate_meta_query_parse("empty");
820 return client->opt == NBD_OPT_LIST_META_CONTEXT;
821 }
822 if (strcmp(query, pattern) == 0) {
823 trace_nbd_negotiate_meta_query_parse(pattern);
824 return true;
825 }
826 trace_nbd_negotiate_meta_query_skip("pattern not matched");
827 return false;
828}
829
830
831
832
833static bool nbd_strshift(const char **str, const char *prefix)
834{
835 size_t len = strlen(prefix);
836
837 if (strncmp(*str, prefix, len) == 0) {
838 *str += len;
839 return true;
840 }
841 return false;
842}
843
844
845
846
847
848
849static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
850 const char *query)
851{
852 if (!nbd_strshift(&query, "base:")) {
853 return false;
854 }
855 trace_nbd_negotiate_meta_query_parse("base:");
856
857 if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
858 meta->base_allocation = true;
859 }
860 return true;
861}
862
863
864
865
866
867
868
869static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
870 const char *query)
871{
872 size_t i;
873
874 if (!nbd_strshift(&query, "qemu:")) {
875 return false;
876 }
877 trace_nbd_negotiate_meta_query_parse("qemu:");
878
879 if (!*query) {
880 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
881 meta->allocation_depth = meta->exp->allocation_depth;
882 if (meta->exp->nr_export_bitmaps) {
883 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
884 }
885 }
886 trace_nbd_negotiate_meta_query_parse("empty");
887 return true;
888 }
889
890 if (strcmp(query, "allocation-depth") == 0) {
891 trace_nbd_negotiate_meta_query_parse("allocation-depth");
892 meta->allocation_depth = meta->exp->allocation_depth;
893 return true;
894 }
895
896 if (nbd_strshift(&query, "dirty-bitmap:")) {
897 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
898 if (!*query) {
899 if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
900 meta->exp->nr_export_bitmaps) {
901 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
902 }
903 trace_nbd_negotiate_meta_query_parse("empty");
904 return true;
905 }
906
907 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
908 const char *bm_name;
909
910 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
911 if (strcmp(bm_name, query) == 0) {
912 meta->bitmaps[i] = true;
913 trace_nbd_negotiate_meta_query_parse(query);
914 return true;
915 }
916 }
917 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
918 return true;
919 }
920
921 trace_nbd_negotiate_meta_query_skip("unknown qemu context");
922 return true;
923}
924
925
926
927
928
929
930
931
932
933
934static int nbd_negotiate_meta_query(NBDClient *client,
935 NBDExportMetaContexts *meta, Error **errp)
936{
937 int ret;
938 g_autofree char *query = NULL;
939 uint32_t len;
940
941 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
942 if (ret <= 0) {
943 return ret;
944 }
945 len = cpu_to_be32(len);
946
947 if (len > NBD_MAX_STRING_SIZE) {
948 trace_nbd_negotiate_meta_query_skip("length too long");
949 return nbd_opt_skip(client, len, errp);
950 }
951
952 query = g_malloc(len + 1);
953 ret = nbd_opt_read(client, query, len, true, errp);
954 if (ret <= 0) {
955 return ret;
956 }
957 query[len] = '\0';
958
959 if (nbd_meta_base_query(client, meta, query)) {
960 return 1;
961 }
962 if (nbd_meta_qemu_query(client, meta, query)) {
963 return 1;
964 }
965
966 trace_nbd_negotiate_meta_query_skip("unknown namespace");
967 return 1;
968}
969
970
971
972
973
974static int nbd_negotiate_meta_queries(NBDClient *client,
975 NBDExportMetaContexts *meta, Error **errp)
976{
977 int ret;
978 g_autofree char *export_name = NULL;
979
980 g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
981 NBDExportMetaContexts local_meta = {0};
982 uint32_t nb_queries;
983 size_t i;
984 size_t count = 0;
985
986 if (client->opt == NBD_OPT_SET_META_CONTEXT && !client->structured_reply) {
987 return nbd_opt_invalid(client, errp,
988 "request option '%s' when structured reply "
989 "is not negotiated",
990 nbd_opt_lookup(client->opt));
991 }
992
993 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
994
995 meta = &local_meta;
996 }
997
998 g_free(meta->bitmaps);
999 memset(meta, 0, sizeof(*meta));
1000
1001 ret = nbd_opt_read_name(client, &export_name, NULL, errp);
1002 if (ret <= 0) {
1003 return ret;
1004 }
1005
1006 meta->exp = nbd_export_find(export_name);
1007 if (meta->exp == NULL) {
1008 g_autofree char *sane_name = nbd_sanitize_name(export_name);
1009
1010 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
1011 "export '%s' not present", sane_name);
1012 }
1013 meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
1014 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1015 bitmaps = meta->bitmaps;
1016 }
1017
1018 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1019 if (ret <= 0) {
1020 return ret;
1021 }
1022 nb_queries = cpu_to_be32(nb_queries);
1023 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1024 export_name, nb_queries);
1025
1026 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1027
1028 meta->base_allocation = true;
1029 meta->allocation_depth = meta->exp->allocation_depth;
1030 if (meta->exp->nr_export_bitmaps) {
1031 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
1032 }
1033 } else {
1034 for (i = 0; i < nb_queries; ++i) {
1035 ret = nbd_negotiate_meta_query(client, meta, errp);
1036 if (ret <= 0) {
1037 return ret;
1038 }
1039 }
1040 }
1041
1042 if (meta->base_allocation) {
1043 ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1044 NBD_META_ID_BASE_ALLOCATION,
1045 errp);
1046 if (ret < 0) {
1047 return ret;
1048 }
1049 count++;
1050 }
1051
1052 if (meta->allocation_depth) {
1053 ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
1054 NBD_META_ID_ALLOCATION_DEPTH,
1055 errp);
1056 if (ret < 0) {
1057 return ret;
1058 }
1059 count++;
1060 }
1061
1062 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
1063 const char *bm_name;
1064 g_autofree char *context = NULL;
1065
1066 if (!meta->bitmaps[i]) {
1067 continue;
1068 }
1069
1070 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
1071 context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
1072
1073 ret = nbd_negotiate_send_meta_context(client, context,
1074 NBD_META_ID_DIRTY_BITMAP + i,
1075 errp);
1076 if (ret < 0) {
1077 return ret;
1078 }
1079 count++;
1080 }
1081
1082 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1083 if (ret == 0) {
1084 meta->count = count;
1085 }
1086
1087 return ret;
1088}
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099static int nbd_negotiate_options(NBDClient *client, Error **errp)
1100{
1101 uint32_t flags;
1102 bool fixedNewstyle = false;
1103 bool no_zeroes = false;
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120 if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
1121 return -EIO;
1122 }
1123 trace_nbd_negotiate_options_flags(flags);
1124 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
1125 fixedNewstyle = true;
1126 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1127 }
1128 if (flags & NBD_FLAG_C_NO_ZEROES) {
1129 no_zeroes = true;
1130 flags &= ~NBD_FLAG_C_NO_ZEROES;
1131 }
1132 if (flags != 0) {
1133 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1134 return -EINVAL;
1135 }
1136
1137 while (1) {
1138 int ret;
1139 uint32_t option, length;
1140 uint64_t magic;
1141
1142 if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1143 return -EINVAL;
1144 }
1145 trace_nbd_negotiate_options_check_magic(magic);
1146 if (magic != NBD_OPTS_MAGIC) {
1147 error_setg(errp, "Bad magic received");
1148 return -EINVAL;
1149 }
1150
1151 if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1152 return -EINVAL;
1153 }
1154 client->opt = option;
1155
1156 if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1157 return -EINVAL;
1158 }
1159 assert(!client->optlen);
1160 client->optlen = length;
1161
1162 if (length > NBD_MAX_BUFFER_SIZE) {
1163 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1164 length, NBD_MAX_BUFFER_SIZE);
1165 return -EINVAL;
1166 }
1167
1168 trace_nbd_negotiate_options_check_option(option,
1169 nbd_opt_lookup(option));
1170 if (client->tlscreds &&
1171 client->ioc == (QIOChannel *)client->sioc) {
1172 QIOChannel *tioc;
1173 if (!fixedNewstyle) {
1174 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1175 return -EINVAL;
1176 }
1177 switch (option) {
1178 case NBD_OPT_STARTTLS:
1179 if (length) {
1180
1181
1182 return nbd_reject_length(client, true, errp);
1183 }
1184 tioc = nbd_negotiate_handle_starttls(client, errp);
1185 if (!tioc) {
1186 return -EIO;
1187 }
1188 ret = 0;
1189 object_unref(OBJECT(client->ioc));
1190 client->ioc = QIO_CHANNEL(tioc);
1191 break;
1192
1193 case NBD_OPT_EXPORT_NAME:
1194
1195 error_setg(errp, "Option 0x%x not permitted before TLS",
1196 option);
1197 return -EINVAL;
1198
1199 default:
1200
1201
1202
1203
1204
1205
1206 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
1207 option == NBD_OPT_ABORT ? NULL : errp,
1208 "Option 0x%" PRIx32
1209 " not permitted before TLS", option);
1210 if (option == NBD_OPT_ABORT) {
1211 return 1;
1212 }
1213 break;
1214 }
1215 } else if (fixedNewstyle) {
1216 switch (option) {
1217 case NBD_OPT_LIST:
1218 if (length) {
1219 ret = nbd_reject_length(client, false, errp);
1220 } else {
1221 ret = nbd_negotiate_handle_list(client, errp);
1222 }
1223 break;
1224
1225 case NBD_OPT_ABORT:
1226
1227
1228
1229 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
1230 return 1;
1231
1232 case NBD_OPT_EXPORT_NAME:
1233 return nbd_negotiate_handle_export_name(client, no_zeroes,
1234 errp);
1235
1236 case NBD_OPT_INFO:
1237 case NBD_OPT_GO:
1238 ret = nbd_negotiate_handle_info(client, errp);
1239 if (ret == 1) {
1240 assert(option == NBD_OPT_GO);
1241 return 0;
1242 }
1243 break;
1244
1245 case NBD_OPT_STARTTLS:
1246 if (length) {
1247 ret = nbd_reject_length(client, false, errp);
1248 } else if (client->tlscreds) {
1249 ret = nbd_negotiate_send_rep_err(client,
1250 NBD_REP_ERR_INVALID, errp,
1251 "TLS already enabled");
1252 } else {
1253 ret = nbd_negotiate_send_rep_err(client,
1254 NBD_REP_ERR_POLICY, errp,
1255 "TLS not configured");
1256 }
1257 break;
1258
1259 case NBD_OPT_STRUCTURED_REPLY:
1260 if (length) {
1261 ret = nbd_reject_length(client, false, errp);
1262 } else if (client->structured_reply) {
1263 ret = nbd_negotiate_send_rep_err(
1264 client, NBD_REP_ERR_INVALID, errp,
1265 "structured reply already negotiated");
1266 } else {
1267 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1268 client->structured_reply = true;
1269 }
1270 break;
1271
1272 case NBD_OPT_LIST_META_CONTEXT:
1273 case NBD_OPT_SET_META_CONTEXT:
1274 ret = nbd_negotiate_meta_queries(client, &client->export_meta,
1275 errp);
1276 break;
1277
1278 default:
1279 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
1280 "Unsupported option %" PRIu32 " (%s)",
1281 option, nbd_opt_lookup(option));
1282 break;
1283 }
1284 } else {
1285
1286
1287
1288
1289 switch (option) {
1290 case NBD_OPT_EXPORT_NAME:
1291 return nbd_negotiate_handle_export_name(client, no_zeroes,
1292 errp);
1293
1294 default:
1295 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
1296 option, nbd_opt_lookup(option));
1297 return -EINVAL;
1298 }
1299 }
1300 if (ret < 0) {
1301 return ret;
1302 }
1303 }
1304}
1305
1306
1307
1308
1309
1310
1311
1312
1313static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1314{
1315 ERRP_GUARD();
1316 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
1317 int ret;
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333 qio_channel_set_blocking(client->ioc, false, NULL);
1334
1335 trace_nbd_negotiate_begin();
1336 memcpy(buf, "NBDMAGIC", 8);
1337
1338 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
1339 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
1340
1341 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
1342 error_prepend(errp, "write failed: ");
1343 return -EINVAL;
1344 }
1345 ret = nbd_negotiate_options(client, errp);
1346 if (ret != 0) {
1347 if (ret < 0) {
1348 error_prepend(errp, "option negotiation failed: ");
1349 }
1350 return ret;
1351 }
1352
1353
1354 if (client->exp && client->exp->common.ctx) {
1355 qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
1356 }
1357
1358 assert(!client->optlen);
1359 trace_nbd_negotiate_success();
1360
1361 return 0;
1362}
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372static inline int coroutine_fn
1373nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1374{
1375 bool partial = false;
1376
1377 assert(size);
1378 while (size > 0) {
1379 struct iovec iov = { .iov_base = buffer, .iov_len = size };
1380 ssize_t len;
1381
1382 len = qio_channel_readv(client->ioc, &iov, 1, errp);
1383 if (len == QIO_CHANNEL_ERR_BLOCK) {
1384 client->read_yielding = true;
1385 qio_channel_yield(client->ioc, G_IO_IN);
1386 client->read_yielding = false;
1387 if (client->quiescing) {
1388 return -EAGAIN;
1389 }
1390 continue;
1391 } else if (len < 0) {
1392 return -EIO;
1393 } else if (len == 0) {
1394 if (partial) {
1395 error_setg(errp,
1396 "Unexpected end-of-file before all bytes were read");
1397 return -EIO;
1398 } else {
1399 return 0;
1400 }
1401 }
1402
1403 partial = true;
1404 size -= len;
1405 buffer = (uint8_t *) buffer + len;
1406 }
1407 return 1;
1408}
1409
1410static int nbd_receive_request(NBDClient *client, NBDRequest *request,
1411 Error **errp)
1412{
1413 uint8_t buf[NBD_REQUEST_SIZE];
1414 uint32_t magic;
1415 int ret;
1416
1417 ret = nbd_read_eof(client, buf, sizeof(buf), errp);
1418 if (ret < 0) {
1419 return ret;
1420 }
1421 if (ret == 0) {
1422 return -EIO;
1423 }
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434 magic = ldl_be_p(buf);
1435 request->flags = lduw_be_p(buf + 4);
1436 request->type = lduw_be_p(buf + 6);
1437 request->handle = ldq_be_p(buf + 8);
1438 request->from = ldq_be_p(buf + 16);
1439 request->len = ldl_be_p(buf + 24);
1440
1441 trace_nbd_receive_request(magic, request->flags, request->type,
1442 request->from, request->len);
1443
1444 if (magic != NBD_REQUEST_MAGIC) {
1445 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
1446 return -EINVAL;
1447 }
1448 return 0;
1449}
1450
1451#define MAX_NBD_REQUESTS 16
1452
1453void nbd_client_get(NBDClient *client)
1454{
1455 client->refcount++;
1456}
1457
1458void nbd_client_put(NBDClient *client)
1459{
1460 if (--client->refcount == 0) {
1461
1462
1463
1464 assert(client->closing);
1465
1466 qio_channel_detach_aio_context(client->ioc);
1467 object_unref(OBJECT(client->sioc));
1468 object_unref(OBJECT(client->ioc));
1469 if (client->tlscreds) {
1470 object_unref(OBJECT(client->tlscreds));
1471 }
1472 g_free(client->tlsauthz);
1473 if (client->exp) {
1474 QTAILQ_REMOVE(&client->exp->clients, client, next);
1475 blk_exp_unref(&client->exp->common);
1476 }
1477 g_free(client->export_meta.bitmaps);
1478 g_free(client);
1479 }
1480}
1481
1482static void client_close(NBDClient *client, bool negotiated)
1483{
1484 if (client->closing) {
1485 return;
1486 }
1487
1488 client->closing = true;
1489
1490
1491
1492
1493 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
1494 NULL);
1495
1496
1497 if (client->close_fn) {
1498 client->close_fn(client, negotiated);
1499 }
1500}
1501
1502static NBDRequestData *nbd_request_get(NBDClient *client)
1503{
1504 NBDRequestData *req;
1505
1506 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1507 client->nb_requests++;
1508
1509 req = g_new0(NBDRequestData, 1);
1510 nbd_client_get(client);
1511 req->client = client;
1512 return req;
1513}
1514
1515static void nbd_request_put(NBDRequestData *req)
1516{
1517 NBDClient *client = req->client;
1518
1519 if (req->data) {
1520 qemu_vfree(req->data);
1521 }
1522 g_free(req);
1523
1524 client->nb_requests--;
1525
1526 if (client->quiescing && client->nb_requests == 0) {
1527 aio_wait_kick();
1528 }
1529
1530 nbd_client_receive_next_request(client);
1531
1532 nbd_client_put(client);
1533}
1534
1535static void blk_aio_attached(AioContext *ctx, void *opaque)
1536{
1537 NBDExport *exp = opaque;
1538 NBDClient *client;
1539
1540 trace_nbd_blk_aio_attached(exp->name, ctx);
1541
1542 exp->common.ctx = ctx;
1543
1544 QTAILQ_FOREACH(client, &exp->clients, next) {
1545 qio_channel_attach_aio_context(client->ioc, ctx);
1546
1547 assert(client->nb_requests == 0);
1548 assert(client->recv_coroutine == NULL);
1549 assert(client->send_coroutine == NULL);
1550 }
1551}
1552
1553static void blk_aio_detach(void *opaque)
1554{
1555 NBDExport *exp = opaque;
1556 NBDClient *client;
1557
1558 trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1559
1560 QTAILQ_FOREACH(client, &exp->clients, next) {
1561 qio_channel_detach_aio_context(client->ioc);
1562 }
1563
1564 exp->common.ctx = NULL;
1565}
1566
1567static void nbd_drained_begin(void *opaque)
1568{
1569 NBDExport *exp = opaque;
1570 NBDClient *client;
1571
1572 QTAILQ_FOREACH(client, &exp->clients, next) {
1573 client->quiescing = true;
1574 }
1575}
1576
1577static void nbd_drained_end(void *opaque)
1578{
1579 NBDExport *exp = opaque;
1580 NBDClient *client;
1581
1582 QTAILQ_FOREACH(client, &exp->clients, next) {
1583 client->quiescing = false;
1584 nbd_client_receive_next_request(client);
1585 }
1586}
1587
1588static bool nbd_drained_poll(void *opaque)
1589{
1590 NBDExport *exp = opaque;
1591 NBDClient *client;
1592
1593 QTAILQ_FOREACH(client, &exp->clients, next) {
1594 if (client->nb_requests != 0) {
1595
1596
1597
1598
1599 if (client->recv_coroutine != NULL && client->read_yielding) {
1600 qemu_aio_coroutine_enter(exp->common.ctx,
1601 client->recv_coroutine);
1602 }
1603
1604 return true;
1605 }
1606 }
1607
1608 return false;
1609}
1610
1611static void nbd_eject_notifier(Notifier *n, void *data)
1612{
1613 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1614
1615 blk_exp_request_shutdown(&exp->common);
1616}
1617
1618void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
1619{
1620 NBDExport *nbd_exp = container_of(exp, NBDExport, common);
1621 assert(exp->drv == &blk_exp_nbd);
1622 assert(nbd_exp->eject_notifier_blk == NULL);
1623
1624 blk_ref(blk);
1625 nbd_exp->eject_notifier_blk = blk;
1626 nbd_exp->eject_notifier.notify = nbd_eject_notifier;
1627 blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
1628}
1629
1630static const BlockDevOps nbd_block_ops = {
1631 .drained_begin = nbd_drained_begin,
1632 .drained_end = nbd_drained_end,
1633 .drained_poll = nbd_drained_poll,
1634};
1635
1636static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1637 Error **errp)
1638{
1639 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1640 BlockExportOptionsNbd *arg = &exp_args->u.nbd;
1641 BlockBackend *blk = blk_exp->blk;
1642 int64_t size;
1643 uint64_t perm, shared_perm;
1644 bool readonly = !exp_args->writable;
1645 bool shared = !exp_args->writable;
1646 strList *bitmaps;
1647 size_t i;
1648 int ret;
1649
1650 assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
1651
1652 if (!nbd_server_is_running()) {
1653 error_setg(errp, "NBD server not running");
1654 return -EINVAL;
1655 }
1656
1657 if (!arg->has_name) {
1658 arg->name = exp_args->node_name;
1659 }
1660
1661 if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
1662 error_setg(errp, "export name '%s' too long", arg->name);
1663 return -EINVAL;
1664 }
1665
1666 if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
1667 error_setg(errp, "description '%s' too long", arg->description);
1668 return -EINVAL;
1669 }
1670
1671 if (nbd_export_find(arg->name)) {
1672 error_setg(errp, "NBD server already has export named '%s'", arg->name);
1673 return -EEXIST;
1674 }
1675
1676 size = blk_getlength(blk);
1677 if (size < 0) {
1678 error_setg_errno(errp, -size,
1679 "Failed to determine the NBD export's length");
1680 return size;
1681 }
1682
1683
1684
1685 blk_get_perm(blk, &perm, &shared_perm);
1686 ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1687 if (ret < 0) {
1688 return ret;
1689 }
1690
1691 QTAILQ_INIT(&exp->clients);
1692 exp->name = g_strdup(arg->name);
1693 exp->description = g_strdup(arg->description);
1694 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1695 NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
1696 if (readonly) {
1697 exp->nbdflags |= NBD_FLAG_READ_ONLY;
1698 if (shared) {
1699 exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1700 }
1701 } else {
1702 exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1703 NBD_FLAG_SEND_FAST_ZERO);
1704 }
1705 exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1706
1707 for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
1708 exp->nr_export_bitmaps++;
1709 }
1710 exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
1711 for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1712 i++, bitmaps = bitmaps->next) {
1713 const char *bitmap = bitmaps->value;
1714 BlockDriverState *bs = blk_bs(blk);
1715 BdrvDirtyBitmap *bm = NULL;
1716
1717 while (bs) {
1718 bm = bdrv_find_dirty_bitmap(bs, bitmap);
1719 if (bm != NULL) {
1720 break;
1721 }
1722
1723 bs = bdrv_filter_or_cow_bs(bs);
1724 }
1725
1726 if (bm == NULL) {
1727 ret = -ENOENT;
1728 error_setg(errp, "Bitmap '%s' is not found", bitmap);
1729 goto fail;
1730 }
1731
1732 if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1733 ret = -EINVAL;
1734 goto fail;
1735 }
1736
1737 if (readonly && bdrv_is_writable(bs) &&
1738 bdrv_dirty_bitmap_enabled(bm)) {
1739 ret = -EINVAL;
1740 error_setg(errp,
1741 "Enabled bitmap '%s' incompatible with readonly export",
1742 bitmap);
1743 goto fail;
1744 }
1745
1746 exp->export_bitmaps[i] = bm;
1747 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1748 }
1749
1750
1751 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1752 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
1753 }
1754
1755 exp->allocation_depth = arg->allocation_depth;
1756
1757
1758
1759
1760
1761
1762 blk_set_disable_request_queuing(blk, true);
1763
1764 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1765
1766 blk_set_dev_ops(blk, &nbd_block_ops, exp);
1767
1768 QTAILQ_INSERT_TAIL(&exports, exp, next);
1769
1770 return 0;
1771
1772fail:
1773 g_free(exp->export_bitmaps);
1774 g_free(exp->name);
1775 g_free(exp->description);
1776 return ret;
1777}
1778
1779NBDExport *nbd_export_find(const char *name)
1780{
1781 NBDExport *exp;
1782 QTAILQ_FOREACH(exp, &exports, next) {
1783 if (strcmp(name, exp->name) == 0) {
1784 return exp;
1785 }
1786 }
1787
1788 return NULL;
1789}
1790
1791AioContext *
1792nbd_export_aio_context(NBDExport *exp)
1793{
1794 return exp->common.ctx;
1795}
1796
1797static void nbd_export_request_shutdown(BlockExport *blk_exp)
1798{
1799 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1800 NBDClient *client, *next;
1801
1802 blk_exp_ref(&exp->common);
1803
1804
1805
1806
1807
1808
1809
1810 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1811 client_close(client, true);
1812 }
1813 if (exp->name) {
1814 g_free(exp->name);
1815 exp->name = NULL;
1816 QTAILQ_REMOVE(&exports, exp, next);
1817 }
1818 blk_exp_unref(&exp->common);
1819}
1820
1821static void nbd_export_delete(BlockExport *blk_exp)
1822{
1823 size_t i;
1824 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1825
1826 assert(exp->name == NULL);
1827 assert(QTAILQ_EMPTY(&exp->clients));
1828
1829 g_free(exp->description);
1830 exp->description = NULL;
1831
1832 if (exp->common.blk) {
1833 if (exp->eject_notifier_blk) {
1834 notifier_remove(&exp->eject_notifier);
1835 blk_unref(exp->eject_notifier_blk);
1836 }
1837 blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
1838 blk_aio_detach, exp);
1839 blk_set_disable_request_queuing(exp->common.blk, false);
1840 }
1841
1842 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1843 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
1844 }
1845}
1846
1847const BlockExportDriver blk_exp_nbd = {
1848 .type = BLOCK_EXPORT_TYPE_NBD,
1849 .instance_size = sizeof(NBDExport),
1850 .create = nbd_export_create,
1851 .delete = nbd_export_delete,
1852 .request_shutdown = nbd_export_request_shutdown,
1853};
1854
1855static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
1856 unsigned niov, Error **errp)
1857{
1858 int ret;
1859
1860 g_assert(qemu_in_coroutine());
1861 qemu_co_mutex_lock(&client->send_lock);
1862 client->send_coroutine = qemu_coroutine_self();
1863
1864 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
1865
1866 client->send_coroutine = NULL;
1867 qemu_co_mutex_unlock(&client->send_lock);
1868
1869 return ret;
1870}
1871
1872static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
1873 uint64_t handle)
1874{
1875 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
1876 stl_be_p(&reply->error, error);
1877 stq_be_p(&reply->handle, handle);
1878}
1879
1880static int nbd_co_send_simple_reply(NBDClient *client,
1881 uint64_t handle,
1882 uint32_t error,
1883 void *data,
1884 size_t len,
1885 Error **errp)
1886{
1887 NBDSimpleReply reply;
1888 int nbd_err = system_errno_to_nbd_errno(error);
1889 struct iovec iov[] = {
1890 {.iov_base = &reply, .iov_len = sizeof(reply)},
1891 {.iov_base = data, .iov_len = len}
1892 };
1893
1894 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
1895 len);
1896 set_be_simple_reply(&reply, nbd_err, handle);
1897
1898 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1899}
1900
1901static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
1902 uint16_t type, uint64_t handle, uint32_t length)
1903{
1904 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
1905 stw_be_p(&chunk->flags, flags);
1906 stw_be_p(&chunk->type, type);
1907 stq_be_p(&chunk->handle, handle);
1908 stl_be_p(&chunk->length, length);
1909}
1910
1911static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
1912 uint64_t handle,
1913 Error **errp)
1914{
1915 NBDStructuredReplyChunk chunk;
1916 struct iovec iov[] = {
1917 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1918 };
1919
1920 trace_nbd_co_send_structured_done(handle);
1921 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
1922
1923 return nbd_co_send_iov(client, iov, 1, errp);
1924}
1925
1926static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
1927 uint64_t handle,
1928 uint64_t offset,
1929 void *data,
1930 size_t size,
1931 bool final,
1932 Error **errp)
1933{
1934 NBDStructuredReadData chunk;
1935 struct iovec iov[] = {
1936 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1937 {.iov_base = data, .iov_len = size}
1938 };
1939
1940 assert(size);
1941 trace_nbd_co_send_structured_read(handle, offset, data, size);
1942 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1943 NBD_REPLY_TYPE_OFFSET_DATA, handle,
1944 sizeof(chunk) - sizeof(chunk.h) + size);
1945 stq_be_p(&chunk.offset, offset);
1946
1947 return nbd_co_send_iov(client, iov, 2, errp);
1948}
1949
1950static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
1951 uint64_t handle,
1952 uint32_t error,
1953 const char *msg,
1954 Error **errp)
1955{
1956 NBDStructuredError chunk;
1957 int nbd_err = system_errno_to_nbd_errno(error);
1958 struct iovec iov[] = {
1959 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1960 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
1961 };
1962
1963 assert(nbd_err);
1964 trace_nbd_co_send_structured_error(handle, nbd_err,
1965 nbd_err_lookup(nbd_err), msg ? msg : "");
1966 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
1967 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1968 stl_be_p(&chunk.error, nbd_err);
1969 stw_be_p(&chunk.message_length, iov[1].iov_len);
1970
1971 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
1972}
1973
1974
1975
1976
1977
1978static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
1979 uint64_t handle,
1980 uint64_t offset,
1981 uint8_t *data,
1982 size_t size,
1983 Error **errp)
1984{
1985 int ret = 0;
1986 NBDExport *exp = client->exp;
1987 size_t progress = 0;
1988
1989 while (progress < size) {
1990 int64_t pnum;
1991 int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
1992 offset + progress,
1993 size - progress, &pnum, NULL,
1994 NULL);
1995 bool final;
1996
1997 if (status < 0) {
1998 char *msg = g_strdup_printf("unable to check for holes: %s",
1999 strerror(-status));
2000
2001 ret = nbd_co_send_structured_error(client, handle, -status, msg,
2002 errp);
2003 g_free(msg);
2004 return ret;
2005 }
2006 assert(pnum && pnum <= size - progress);
2007 final = progress + pnum == size;
2008 if (status & BDRV_BLOCK_ZERO) {
2009 NBDStructuredReadHole chunk;
2010 struct iovec iov[] = {
2011 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2012 };
2013
2014 trace_nbd_co_send_structured_read_hole(handle, offset + progress,
2015 pnum);
2016 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
2017 NBD_REPLY_TYPE_OFFSET_HOLE,
2018 handle, sizeof(chunk) - sizeof(chunk.h));
2019 stq_be_p(&chunk.offset, offset + progress);
2020 stl_be_p(&chunk.length, pnum);
2021 ret = nbd_co_send_iov(client, iov, 1, errp);
2022 } else {
2023 ret = blk_pread(exp->common.blk, offset + progress,
2024 data + progress, pnum);
2025 if (ret < 0) {
2026 error_setg_errno(errp, -ret, "reading from file failed");
2027 break;
2028 }
2029 ret = nbd_co_send_structured_read(client, handle, offset + progress,
2030 data + progress, pnum, final,
2031 errp);
2032 }
2033
2034 if (ret < 0) {
2035 break;
2036 }
2037 progress += pnum;
2038 }
2039 return ret;
2040}
2041
2042typedef struct NBDExtentArray {
2043 NBDExtent *extents;
2044 unsigned int nb_alloc;
2045 unsigned int count;
2046 uint64_t total_length;
2047 bool can_add;
2048 bool converted_to_be;
2049} NBDExtentArray;
2050
2051static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
2052{
2053 NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
2054
2055 ea->nb_alloc = nb_alloc;
2056 ea->extents = g_new(NBDExtent, nb_alloc);
2057 ea->can_add = true;
2058
2059 return ea;
2060}
2061
2062static void nbd_extent_array_free(NBDExtentArray *ea)
2063{
2064 g_free(ea->extents);
2065 g_free(ea);
2066}
2067G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free);
2068
2069
2070static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
2071{
2072 int i;
2073
2074 assert(!ea->converted_to_be);
2075 ea->can_add = false;
2076 ea->converted_to_be = true;
2077
2078 for (i = 0; i < ea->count; i++) {
2079 ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2080 ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
2081 }
2082}
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094static int nbd_extent_array_add(NBDExtentArray *ea,
2095 uint32_t length, uint32_t flags)
2096{
2097 assert(ea->can_add);
2098
2099 if (!length) {
2100 return 0;
2101 }
2102
2103
2104 if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2105 uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
2106
2107 if (sum <= UINT32_MAX) {
2108 ea->extents[ea->count - 1].length = sum;
2109 ea->total_length += length;
2110 return 0;
2111 }
2112 }
2113
2114 if (ea->count >= ea->nb_alloc) {
2115 ea->can_add = false;
2116 return -1;
2117 }
2118
2119 ea->total_length += length;
2120 ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
2121 ea->count++;
2122
2123 return 0;
2124}
2125
2126static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
2127 uint64_t bytes, NBDExtentArray *ea)
2128{
2129 while (bytes) {
2130 uint32_t flags;
2131 int64_t num;
2132 int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
2133 NULL, NULL);
2134
2135 if (ret < 0) {
2136 return ret;
2137 }
2138
2139 flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2140 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2141
2142 if (nbd_extent_array_add(ea, num, flags) < 0) {
2143 return 0;
2144 }
2145
2146 offset += num;
2147 bytes -= num;
2148 }
2149
2150 return 0;
2151}
2152
2153static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
2154 uint64_t bytes, NBDExtentArray *ea)
2155{
2156 while (bytes) {
2157 int64_t num;
2158 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
2159 &num);
2160
2161 if (ret < 0) {
2162 return ret;
2163 }
2164
2165 if (nbd_extent_array_add(ea, num, ret) < 0) {
2166 return 0;
2167 }
2168
2169 offset += num;
2170 bytes -= num;
2171 }
2172
2173 return 0;
2174}
2175
2176
2177
2178
2179
2180
2181
2182static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
2183 NBDExtentArray *ea,
2184 bool last, uint32_t context_id, Error **errp)
2185{
2186 NBDStructuredMeta chunk;
2187 struct iovec iov[] = {
2188 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2189 {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
2190 };
2191
2192 nbd_extent_array_convert_to_be(ea);
2193
2194 trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
2195 last);
2196 set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
2197 NBD_REPLY_TYPE_BLOCK_STATUS,
2198 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
2199 stl_be_p(&chunk.context_id, context_id);
2200
2201 return nbd_co_send_iov(client, iov, 2, errp);
2202}
2203
2204
2205static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
2206 BlockDriverState *bs, uint64_t offset,
2207 uint32_t length, bool dont_fragment,
2208 bool last, uint32_t context_id,
2209 Error **errp)
2210{
2211 int ret;
2212 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2213 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2214
2215 if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2216 ret = blockstatus_to_extents(bs, offset, length, ea);
2217 } else {
2218 ret = blockalloc_to_extents(bs, offset, length, ea);
2219 }
2220 if (ret < 0) {
2221 return nbd_co_send_structured_error(
2222 client, handle, -ret, "can't get block status", errp);
2223 }
2224
2225 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2226}
2227
2228
2229static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
2230 uint64_t offset, uint64_t length,
2231 NBDExtentArray *es)
2232{
2233 int64_t start, dirty_start, dirty_count;
2234 int64_t end = offset + length;
2235 bool full = false;
2236
2237 bdrv_dirty_bitmap_lock(bitmap);
2238
2239 for (start = offset;
2240 bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
2241 &dirty_start, &dirty_count);
2242 start = dirty_start + dirty_count)
2243 {
2244 if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2245 (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2246 {
2247 full = true;
2248 break;
2249 }
2250 }
2251
2252 if (!full) {
2253
2254 (void) nbd_extent_array_add(es, end - start, 0);
2255 }
2256
2257 bdrv_dirty_bitmap_unlock(bitmap);
2258}
2259
2260static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
2261 BdrvDirtyBitmap *bitmap, uint64_t offset,
2262 uint32_t length, bool dont_fragment, bool last,
2263 uint32_t context_id, Error **errp)
2264{
2265 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2266 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2267
2268 bitmap_to_extents(bitmap, offset, length, ea);
2269
2270 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2271}
2272
2273
2274
2275
2276
2277
2278
2279
2280static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
2281 Error **errp)
2282{
2283 NBDClient *client = req->client;
2284 int valid_flags;
2285 int ret;
2286
2287 g_assert(qemu_in_coroutine());
2288 assert(client->recv_coroutine == qemu_coroutine_self());
2289 ret = nbd_receive_request(client, request, errp);
2290 if (ret < 0) {
2291 return ret;
2292 }
2293
2294 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
2295 nbd_cmd_lookup(request->type));
2296
2297 if (request->type != NBD_CMD_WRITE) {
2298
2299 req->complete = true;
2300 }
2301
2302 if (request->type == NBD_CMD_DISC) {
2303
2304
2305 return -EIO;
2306 }
2307
2308 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
2309 request->type == NBD_CMD_CACHE)
2310 {
2311 if (request->len > NBD_MAX_BUFFER_SIZE) {
2312 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
2313 request->len, NBD_MAX_BUFFER_SIZE);
2314 return -EINVAL;
2315 }
2316
2317 if (request->type != NBD_CMD_CACHE) {
2318 req->data = blk_try_blockalign(client->exp->common.blk,
2319 request->len);
2320 if (req->data == NULL) {
2321 error_setg(errp, "No memory");
2322 return -ENOMEM;
2323 }
2324 }
2325 }
2326
2327 if (request->type == NBD_CMD_WRITE) {
2328 if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
2329 errp) < 0)
2330 {
2331 return -EIO;
2332 }
2333 req->complete = true;
2334
2335 trace_nbd_co_receive_request_payload_received(request->handle,
2336 request->len);
2337 }
2338
2339
2340 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
2341 (request->type == NBD_CMD_WRITE ||
2342 request->type == NBD_CMD_WRITE_ZEROES ||
2343 request->type == NBD_CMD_TRIM)) {
2344 error_setg(errp, "Export is read-only");
2345 return -EROFS;
2346 }
2347 if (request->from > client->exp->size ||
2348 request->len > client->exp->size - request->from) {
2349 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
2350 ", Size: %" PRIu64, request->from, request->len,
2351 client->exp->size);
2352 return (request->type == NBD_CMD_WRITE ||
2353 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
2354 }
2355 if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
2356 client->check_align)) {
2357
2358
2359
2360
2361 trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
2362 request->from,
2363 request->len,
2364 client->check_align);
2365 }
2366 valid_flags = NBD_CMD_FLAG_FUA;
2367 if (request->type == NBD_CMD_READ && client->structured_reply) {
2368 valid_flags |= NBD_CMD_FLAG_DF;
2369 } else if (request->type == NBD_CMD_WRITE_ZEROES) {
2370 valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
2371 } else if (request->type == NBD_CMD_BLOCK_STATUS) {
2372 valid_flags |= NBD_CMD_FLAG_REQ_ONE;
2373 }
2374 if (request->flags & ~valid_flags) {
2375 error_setg(errp, "unsupported flags for command %s (got 0x%x)",
2376 nbd_cmd_lookup(request->type), request->flags);
2377 return -EINVAL;
2378 }
2379
2380 return 0;
2381}
2382
2383
2384
2385
2386
2387static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
2388 uint64_t handle,
2389 int ret,
2390 const char *error_msg,
2391 Error **errp)
2392{
2393 if (client->structured_reply && ret < 0) {
2394 return nbd_co_send_structured_error(client, handle, -ret, error_msg,
2395 errp);
2396 } else {
2397 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
2398 NULL, 0, errp);
2399 }
2400}
2401
2402
2403
2404
2405static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
2406 uint8_t *data, Error **errp)
2407{
2408 int ret;
2409 NBDExport *exp = client->exp;
2410
2411 assert(request->type == NBD_CMD_READ);
2412
2413
2414 if (request->flags & NBD_CMD_FLAG_FUA) {
2415 ret = blk_co_flush(exp->common.blk);
2416 if (ret < 0) {
2417 return nbd_send_generic_reply(client, request->handle, ret,
2418 "flush failed", errp);
2419 }
2420 }
2421
2422 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
2423 request->len)
2424 {
2425 return nbd_co_send_sparse_read(client, request->handle, request->from,
2426 data, request->len, errp);
2427 }
2428
2429 ret = blk_pread(exp->common.blk, request->from, data, request->len);
2430 if (ret < 0) {
2431 return nbd_send_generic_reply(client, request->handle, ret,
2432 "reading from file failed", errp);
2433 }
2434
2435 if (client->structured_reply) {
2436 if (request->len) {
2437 return nbd_co_send_structured_read(client, request->handle,
2438 request->from, data,
2439 request->len, true, errp);
2440 } else {
2441 return nbd_co_send_structured_done(client, request->handle, errp);
2442 }
2443 } else {
2444 return nbd_co_send_simple_reply(client, request->handle, 0,
2445 data, request->len, errp);
2446 }
2447}
2448
2449
2450
2451
2452
2453
2454
2455
2456static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
2457 Error **errp)
2458{
2459 int ret;
2460 NBDExport *exp = client->exp;
2461
2462 assert(request->type == NBD_CMD_CACHE);
2463
2464 ret = blk_co_preadv(exp->common.blk, request->from, request->len,
2465 NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
2466
2467 return nbd_send_generic_reply(client, request->handle, ret,
2468 "caching data failed", errp);
2469}
2470
2471
2472
2473
2474static coroutine_fn int nbd_handle_request(NBDClient *client,
2475 NBDRequest *request,
2476 uint8_t *data, Error **errp)
2477{
2478 int ret;
2479 int flags;
2480 NBDExport *exp = client->exp;
2481 char *msg;
2482 size_t i;
2483
2484 switch (request->type) {
2485 case NBD_CMD_CACHE:
2486 return nbd_do_cmd_cache(client, request, errp);
2487
2488 case NBD_CMD_READ:
2489 return nbd_do_cmd_read(client, request, data, errp);
2490
2491 case NBD_CMD_WRITE:
2492 flags = 0;
2493 if (request->flags & NBD_CMD_FLAG_FUA) {
2494 flags |= BDRV_REQ_FUA;
2495 }
2496 ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
2497 flags);
2498 return nbd_send_generic_reply(client, request->handle, ret,
2499 "writing to file failed", errp);
2500
2501 case NBD_CMD_WRITE_ZEROES:
2502 flags = 0;
2503 if (request->flags & NBD_CMD_FLAG_FUA) {
2504 flags |= BDRV_REQ_FUA;
2505 }
2506 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
2507 flags |= BDRV_REQ_MAY_UNMAP;
2508 }
2509 if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2510 flags |= BDRV_REQ_NO_FALLBACK;
2511 }
2512 ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len,
2513 flags);
2514 return nbd_send_generic_reply(client, request->handle, ret,
2515 "writing to file failed", errp);
2516
2517 case NBD_CMD_DISC:
2518
2519 abort();
2520
2521 case NBD_CMD_FLUSH:
2522 ret = blk_co_flush(exp->common.blk);
2523 return nbd_send_generic_reply(client, request->handle, ret,
2524 "flush failed", errp);
2525
2526 case NBD_CMD_TRIM:
2527 ret = blk_co_pdiscard(exp->common.blk, request->from, request->len);
2528 if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
2529 ret = blk_co_flush(exp->common.blk);
2530 }
2531 return nbd_send_generic_reply(client, request->handle, ret,
2532 "discard failed", errp);
2533
2534 case NBD_CMD_BLOCK_STATUS:
2535 if (!request->len) {
2536 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2537 "need non-zero length", errp);
2538 }
2539 if (client->export_meta.count) {
2540 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
2541 int contexts_remaining = client->export_meta.count;
2542
2543 if (client->export_meta.base_allocation) {
2544 ret = nbd_co_send_block_status(client, request->handle,
2545 blk_bs(exp->common.blk),
2546 request->from,
2547 request->len, dont_fragment,
2548 !--contexts_remaining,
2549 NBD_META_ID_BASE_ALLOCATION,
2550 errp);
2551 if (ret < 0) {
2552 return ret;
2553 }
2554 }
2555
2556 if (client->export_meta.allocation_depth) {
2557 ret = nbd_co_send_block_status(client, request->handle,
2558 blk_bs(exp->common.blk),
2559 request->from, request->len,
2560 dont_fragment,
2561 !--contexts_remaining,
2562 NBD_META_ID_ALLOCATION_DEPTH,
2563 errp);
2564 if (ret < 0) {
2565 return ret;
2566 }
2567 }
2568
2569 for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
2570 if (!client->export_meta.bitmaps[i]) {
2571 continue;
2572 }
2573 ret = nbd_co_send_bitmap(client, request->handle,
2574 client->exp->export_bitmaps[i],
2575 request->from, request->len,
2576 dont_fragment, !--contexts_remaining,
2577 NBD_META_ID_DIRTY_BITMAP + i, errp);
2578 if (ret < 0) {
2579 return ret;
2580 }
2581 }
2582
2583 assert(!contexts_remaining);
2584
2585 return 0;
2586 } else {
2587 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2588 "CMD_BLOCK_STATUS not negotiated",
2589 errp);
2590 }
2591
2592 default:
2593 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
2594 request->type);
2595 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
2596 errp);
2597 g_free(msg);
2598 return ret;
2599 }
2600}
2601
2602
2603static coroutine_fn void nbd_trip(void *opaque)
2604{
2605 NBDClient *client = opaque;
2606 NBDRequestData *req;
2607 NBDRequest request = { 0 };
2608 int ret;
2609 Error *local_err = NULL;
2610
2611 trace_nbd_trip();
2612 if (client->closing) {
2613 nbd_client_put(client);
2614 return;
2615 }
2616
2617 if (client->quiescing) {
2618
2619
2620
2621
2622 nbd_client_put(client);
2623 client->recv_coroutine = NULL;
2624 aio_wait_kick();
2625 return;
2626 }
2627
2628 req = nbd_request_get(client);
2629 ret = nbd_co_receive_request(req, &request, &local_err);
2630 client->recv_coroutine = NULL;
2631
2632 if (client->closing) {
2633
2634
2635
2636
2637 goto done;
2638 }
2639
2640 if (ret == -EAGAIN) {
2641 assert(client->quiescing);
2642 goto done;
2643 }
2644
2645 nbd_client_receive_next_request(client);
2646 if (ret == -EIO) {
2647 goto disconnect;
2648 }
2649
2650 if (ret < 0) {
2651
2652
2653 Error *export_err = local_err;
2654
2655 local_err = NULL;
2656 ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
2657 error_get_pretty(export_err), &local_err);
2658 error_free(export_err);
2659 } else {
2660 ret = nbd_handle_request(client, &request, req->data, &local_err);
2661 }
2662 if (ret < 0) {
2663 error_prepend(&local_err, "Failed to send reply: ");
2664 goto disconnect;
2665 }
2666
2667
2668
2669
2670 if (!req->complete) {
2671 error_setg(&local_err, "Request handling failed in intermediate state");
2672 goto disconnect;
2673 }
2674
2675done:
2676 nbd_request_put(req);
2677 nbd_client_put(client);
2678 return;
2679
2680disconnect:
2681 if (local_err) {
2682 error_reportf_err(local_err, "Disconnect client, due to: ");
2683 }
2684 nbd_request_put(req);
2685 client_close(client, true);
2686 nbd_client_put(client);
2687}
2688
2689static void nbd_client_receive_next_request(NBDClient *client)
2690{
2691 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
2692 !client->quiescing) {
2693 nbd_client_get(client);
2694 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
2695 aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
2696 }
2697}
2698
2699static coroutine_fn void nbd_co_client_start(void *opaque)
2700{
2701 NBDClient *client = opaque;
2702 Error *local_err = NULL;
2703
2704 qemu_co_mutex_init(&client->send_lock);
2705
2706 if (nbd_negotiate(client, &local_err)) {
2707 if (local_err) {
2708 error_report_err(local_err);
2709 }
2710 client_close(client, false);
2711 return;
2712 }
2713
2714 nbd_client_receive_next_request(client);
2715}
2716
2717
2718
2719
2720
2721
2722void nbd_client_new(QIOChannelSocket *sioc,
2723 QCryptoTLSCreds *tlscreds,
2724 const char *tlsauthz,
2725 void (*close_fn)(NBDClient *, bool))
2726{
2727 NBDClient *client;
2728 Coroutine *co;
2729
2730 client = g_new0(NBDClient, 1);
2731 client->refcount = 1;
2732 client->tlscreds = tlscreds;
2733 if (tlscreds) {
2734 object_ref(OBJECT(client->tlscreds));
2735 }
2736 client->tlsauthz = g_strdup(tlsauthz);
2737 client->sioc = sioc;
2738 object_ref(OBJECT(client->sioc));
2739 client->ioc = QIO_CHANNEL(sioc);
2740 object_ref(OBJECT(client->ioc));
2741 client->close_fn = close_fn;
2742
2743 co = qemu_coroutine_create(nbd_co_client_start, client);
2744 qemu_coroutine_enter(co);
2745}
2746