1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21
22#include "block/export.h"
23#include "qapi/error.h"
24#include "qemu/queue.h"
25#include "trace.h"
26#include "nbd-internal.h"
27#include "qemu/units.h"
28#include "qemu/memalign.h"
29
30#define NBD_META_ID_BASE_ALLOCATION 0
31#define NBD_META_ID_ALLOCATION_DEPTH 1
32
33#define NBD_META_ID_DIRTY_BITMAP 2
34
35
36
37
38
39
40
41#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
42
43static int system_errno_to_nbd_errno(int err)
44{
45 switch (err) {
46 case 0:
47 return NBD_SUCCESS;
48 case EPERM:
49 case EROFS:
50 return NBD_EPERM;
51 case EIO:
52 return NBD_EIO;
53 case ENOMEM:
54 return NBD_ENOMEM;
55#ifdef EDQUOT
56 case EDQUOT:
57#endif
58 case EFBIG:
59 case ENOSPC:
60 return NBD_ENOSPC;
61 case EOVERFLOW:
62 return NBD_EOVERFLOW;
63 case ENOTSUP:
64#if ENOTSUP != EOPNOTSUPP
65 case EOPNOTSUPP:
66#endif
67 return NBD_ENOTSUP;
68 case ESHUTDOWN:
69 return NBD_ESHUTDOWN;
70 case EINVAL:
71 default:
72 return NBD_EINVAL;
73 }
74}
75
76
77
78typedef struct NBDRequestData NBDRequestData;
79
80struct NBDRequestData {
81 NBDClient *client;
82 uint8_t *data;
83 bool complete;
84};
85
86struct NBDExport {
87 BlockExport common;
88
89 char *name;
90 char *description;
91 uint64_t size;
92 uint16_t nbdflags;
93 QTAILQ_HEAD(, NBDClient) clients;
94 QTAILQ_ENTRY(NBDExport) next;
95
96 BlockBackend *eject_notifier_blk;
97 Notifier eject_notifier;
98
99 bool allocation_depth;
100 BdrvDirtyBitmap **export_bitmaps;
101 size_t nr_export_bitmaps;
102};
103
104static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
105
106
107
108
109typedef struct NBDExportMetaContexts {
110 NBDExport *exp;
111 size_t count;
112 bool base_allocation;
113 bool allocation_depth;
114 bool *bitmaps;
115
116
117
118} NBDExportMetaContexts;
119
120struct NBDClient {
121 int refcount;
122 void (*close_fn)(NBDClient *client, bool negotiated);
123
124 NBDExport *exp;
125 QCryptoTLSCreds *tlscreds;
126 char *tlsauthz;
127 QIOChannelSocket *sioc;
128 QIOChannel *ioc;
129
130 Coroutine *recv_coroutine;
131
132 CoMutex send_lock;
133 Coroutine *send_coroutine;
134
135 bool read_yielding;
136 bool quiescing;
137
138 QTAILQ_ENTRY(NBDClient) next;
139 int nb_requests;
140 bool closing;
141
142 uint32_t check_align;
143
144 bool structured_reply;
145 NBDExportMetaContexts export_meta;
146
147 uint32_t opt;
148 uint32_t optlen;
149
150};
151
152static void nbd_client_receive_next_request(NBDClient *client);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
182 uint32_t type, uint32_t length)
183{
184 stq_be_p(&rep->magic, NBD_REP_MAGIC);
185 stl_be_p(&rep->option, option);
186 stl_be_p(&rep->type, type);
187 stl_be_p(&rep->length, length);
188}
189
190
191
192static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
193 uint32_t len, Error **errp)
194{
195 NBDOptionReply rep;
196
197 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
198 type, nbd_rep_lookup(type), len);
199
200 assert(len < NBD_MAX_BUFFER_SIZE);
201
202 set_be_option_rep(&rep, client->opt, type, len);
203 return nbd_write(client->ioc, &rep, sizeof(rep), errp);
204}
205
206
207
208static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
209 Error **errp)
210{
211 return nbd_negotiate_send_rep_len(client, type, 0, errp);
212}
213
214
215
216static int G_GNUC_PRINTF(4, 0)
217nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
218 Error **errp, const char *fmt, va_list va)
219{
220 ERRP_GUARD();
221 g_autofree char *msg = NULL;
222 int ret;
223 size_t len;
224
225 msg = g_strdup_vprintf(fmt, va);
226 len = strlen(msg);
227 assert(len < NBD_MAX_STRING_SIZE);
228 trace_nbd_negotiate_send_rep_err(msg);
229 ret = nbd_negotiate_send_rep_len(client, type, len, errp);
230 if (ret < 0) {
231 return ret;
232 }
233 if (nbd_write(client->ioc, msg, len, errp) < 0) {
234 error_prepend(errp, "write failed (error message): ");
235 return -EIO;
236 }
237
238 return 0;
239}
240
241
242
243
244static char *
245nbd_sanitize_name(const char *name)
246{
247 if (strnlen(name, 80) < 80) {
248 return g_strdup(name);
249 }
250
251 return g_strdup_printf("%.80s...", name);
252}
253
254
255
256static int G_GNUC_PRINTF(4, 5)
257nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
258 Error **errp, const char *fmt, ...)
259{
260 va_list va;
261 int ret;
262
263 va_start(va, fmt);
264 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
265 va_end(va);
266 return ret;
267}
268
269
270
271
272static int G_GNUC_PRINTF(4, 0)
273nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
274 const char *fmt, va_list va)
275{
276 int ret = nbd_drop(client->ioc, client->optlen, errp);
277
278 client->optlen = 0;
279 if (!ret) {
280 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
281 }
282 return ret;
283}
284
285static int G_GNUC_PRINTF(4, 5)
286nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
287 const char *fmt, ...)
288{
289 int ret;
290 va_list va;
291
292 va_start(va, fmt);
293 ret = nbd_opt_vdrop(client, type, errp, fmt, va);
294 va_end(va);
295
296 return ret;
297}
298
299static int G_GNUC_PRINTF(3, 4)
300nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
301{
302 int ret;
303 va_list va;
304
305 va_start(va, fmt);
306 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
307 va_end(va);
308
309 return ret;
310}
311
312
313
314
315
316static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
317 bool check_nul, Error **errp)
318{
319 if (size > client->optlen) {
320 return nbd_opt_invalid(client, errp,
321 "Inconsistent lengths in option %s",
322 nbd_opt_lookup(client->opt));
323 }
324 client->optlen -= size;
325 if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
326 return -EIO;
327 }
328
329 if (check_nul && strnlen(buffer, size) != size) {
330 return nbd_opt_invalid(client, errp,
331 "Unexpected embedded NUL in option %s",
332 nbd_opt_lookup(client->opt));
333 }
334 return 1;
335}
336
337
338
339
340static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
341{
342 if (size > client->optlen) {
343 return nbd_opt_invalid(client, errp,
344 "Inconsistent lengths in option %s",
345 nbd_opt_lookup(client->opt));
346 }
347 client->optlen -= size;
348 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
349}
350
351
352
353
354
355
356
357
358
359
360
361
362
363static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
364 Error **errp)
365{
366 int ret;
367 uint32_t len;
368 g_autofree char *local_name = NULL;
369
370 *name = NULL;
371 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
372 if (ret <= 0) {
373 return ret;
374 }
375 len = cpu_to_be32(len);
376
377 if (len > NBD_MAX_STRING_SIZE) {
378 return nbd_opt_invalid(client, errp,
379 "Invalid name length: %" PRIu32, len);
380 }
381
382 local_name = g_malloc(len + 1);
383 ret = nbd_opt_read(client, local_name, len, true, errp);
384 if (ret <= 0) {
385 return ret;
386 }
387 local_name[len] = '\0';
388
389 if (length) {
390 *length = len;
391 }
392 *name = g_steal_pointer(&local_name);
393
394 return 1;
395}
396
397
398
399static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
400 Error **errp)
401{
402 ERRP_GUARD();
403 size_t name_len, desc_len;
404 uint32_t len;
405 const char *name = exp->name ? exp->name : "";
406 const char *desc = exp->description ? exp->description : "";
407 QIOChannel *ioc = client->ioc;
408 int ret;
409
410 trace_nbd_negotiate_send_rep_list(name, desc);
411 name_len = strlen(name);
412 desc_len = strlen(desc);
413 assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
414 len = name_len + desc_len + sizeof(len);
415 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
416 if (ret < 0) {
417 return ret;
418 }
419
420 len = cpu_to_be32(name_len);
421 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
422 error_prepend(errp, "write failed (name length): ");
423 return -EINVAL;
424 }
425
426 if (nbd_write(ioc, name, name_len, errp) < 0) {
427 error_prepend(errp, "write failed (name buffer): ");
428 return -EINVAL;
429 }
430
431 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
432 error_prepend(errp, "write failed (description buffer): ");
433 return -EINVAL;
434 }
435
436 return 0;
437}
438
439
440
441static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
442{
443 NBDExport *exp;
444 assert(client->opt == NBD_OPT_LIST);
445
446
447 QTAILQ_FOREACH(exp, &exports, next) {
448 if (nbd_negotiate_send_rep_list(client, exp, errp)) {
449 return -EINVAL;
450 }
451 }
452
453 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
454}
455
456static void nbd_check_meta_export(NBDClient *client)
457{
458 if (client->exp != client->export_meta.exp) {
459 client->export_meta.count = 0;
460 }
461}
462
463
464
465static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
466 Error **errp)
467{
468 ERRP_GUARD();
469 g_autofree char *name = NULL;
470 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
471 size_t len;
472 int ret;
473 uint16_t myflags;
474
475
476
477
478
479
480
481
482 trace_nbd_negotiate_handle_export_name();
483 if (client->optlen > NBD_MAX_STRING_SIZE) {
484 error_setg(errp, "Bad length received");
485 return -EINVAL;
486 }
487 name = g_malloc(client->optlen + 1);
488 if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
489 return -EIO;
490 }
491 name[client->optlen] = '\0';
492 client->optlen = 0;
493
494 trace_nbd_negotiate_handle_export_name_request(name);
495
496 client->exp = nbd_export_find(name);
497 if (!client->exp) {
498 error_setg(errp, "export not found");
499 return -EINVAL;
500 }
501
502 myflags = client->exp->nbdflags;
503 if (client->structured_reply) {
504 myflags |= NBD_FLAG_SEND_DF;
505 }
506 trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
507 stq_be_p(buf, client->exp->size);
508 stw_be_p(buf + 8, myflags);
509 len = no_zeroes ? 10 : sizeof(buf);
510 ret = nbd_write(client->ioc, buf, len, errp);
511 if (ret < 0) {
512 error_prepend(errp, "write failed: ");
513 return ret;
514 }
515
516 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
517 blk_exp_ref(&client->exp->common);
518 nbd_check_meta_export(client);
519
520 return 0;
521}
522
523
524
525
526static int nbd_negotiate_send_info(NBDClient *client,
527 uint16_t info, uint32_t length, void *buf,
528 Error **errp)
529{
530 int rc;
531
532 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
533 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
534 sizeof(info) + length, errp);
535 if (rc < 0) {
536 return rc;
537 }
538 info = cpu_to_be16(info);
539 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
540 return -EIO;
541 }
542 if (nbd_write(client->ioc, buf, length, errp) < 0) {
543 return -EIO;
544 }
545 return 0;
546}
547
548
549
550
551
552
553
554
555static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
556{
557 int ret;
558
559 assert(client->optlen);
560 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
561 nbd_opt_lookup(client->opt));
562 if (fatal && !ret) {
563 error_setg(errp, "option '%s' has unexpected length",
564 nbd_opt_lookup(client->opt));
565 return -EINVAL;
566 }
567 return ret;
568}
569
570
571
572
573static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
574{
575 int rc;
576 g_autofree char *name = NULL;
577 NBDExport *exp;
578 uint16_t requests;
579 uint16_t request;
580 uint32_t namelen = 0;
581 bool sendname = false;
582 bool blocksize = false;
583 uint32_t sizes[3];
584 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
585 uint32_t check_align = 0;
586 uint16_t myflags;
587
588
589
590
591
592
593
594 rc = nbd_opt_read_name(client, &name, &namelen, errp);
595 if (rc <= 0) {
596 return rc;
597 }
598 trace_nbd_negotiate_handle_export_name_request(name);
599
600 rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
601 if (rc <= 0) {
602 return rc;
603 }
604 requests = be16_to_cpu(requests);
605 trace_nbd_negotiate_handle_info_requests(requests);
606 while (requests--) {
607 rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
608 if (rc <= 0) {
609 return rc;
610 }
611 request = be16_to_cpu(request);
612 trace_nbd_negotiate_handle_info_request(request,
613 nbd_info_lookup(request));
614
615
616
617 switch (request) {
618 case NBD_INFO_NAME:
619 sendname = true;
620 break;
621 case NBD_INFO_BLOCK_SIZE:
622 blocksize = true;
623 break;
624 }
625 }
626 if (client->optlen) {
627 return nbd_reject_length(client, false, errp);
628 }
629
630 exp = nbd_export_find(name);
631 if (!exp) {
632 g_autofree char *sane_name = nbd_sanitize_name(name);
633
634 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
635 errp, "export '%s' not present",
636 sane_name);
637 }
638
639
640 if (sendname) {
641 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
642 errp);
643 if (rc < 0) {
644 return rc;
645 }
646 }
647
648
649
650 if (exp->description) {
651 size_t len = strlen(exp->description);
652
653 assert(len <= NBD_MAX_STRING_SIZE);
654 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
655 len, exp->description, errp);
656 if (rc < 0) {
657 return rc;
658 }
659 }
660
661
662
663
664
665 if (client->opt == NBD_OPT_INFO || blocksize) {
666 check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
667 } else {
668 sizes[0] = 1;
669 }
670 assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
671
672
673 sizes[1] = MAX(4096, sizes[0]);
674
675 sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
676 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
677 sizes[0] = cpu_to_be32(sizes[0]);
678 sizes[1] = cpu_to_be32(sizes[1]);
679 sizes[2] = cpu_to_be32(sizes[2]);
680 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
681 sizeof(sizes), sizes, errp);
682 if (rc < 0) {
683 return rc;
684 }
685
686
687 myflags = exp->nbdflags;
688 if (client->structured_reply) {
689 myflags |= NBD_FLAG_SEND_DF;
690 }
691 trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
692 stq_be_p(buf, exp->size);
693 stw_be_p(buf + 8, myflags);
694 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
695 sizeof(buf), buf, errp);
696 if (rc < 0) {
697 return rc;
698 }
699
700
701
702
703
704
705
706 if (client->opt == NBD_OPT_INFO && !blocksize &&
707 blk_get_request_alignment(exp->common.blk) > 1) {
708 return nbd_negotiate_send_rep_err(client,
709 NBD_REP_ERR_BLOCK_SIZE_REQD,
710 errp,
711 "request NBD_INFO_BLOCK_SIZE to "
712 "use this export");
713 }
714
715
716 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
717 if (rc < 0) {
718 return rc;
719 }
720
721 if (client->opt == NBD_OPT_GO) {
722 client->exp = exp;
723 client->check_align = check_align;
724 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
725 blk_exp_ref(&client->exp->common);
726 nbd_check_meta_export(client);
727 rc = 1;
728 }
729 return rc;
730}
731
732
733
734
735static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
736 Error **errp)
737{
738 QIOChannel *ioc;
739 QIOChannelTLS *tioc;
740 struct NBDTLSHandshakeData data = { 0 };
741
742 assert(client->opt == NBD_OPT_STARTTLS);
743
744 trace_nbd_negotiate_handle_starttls();
745 ioc = client->ioc;
746
747 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
748 return NULL;
749 }
750
751 tioc = qio_channel_tls_new_server(ioc,
752 client->tlscreds,
753 client->tlsauthz,
754 errp);
755 if (!tioc) {
756 return NULL;
757 }
758
759 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
760 trace_nbd_negotiate_handle_starttls_handshake();
761 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
762 qio_channel_tls_handshake(tioc,
763 nbd_tls_handshake,
764 &data,
765 NULL,
766 NULL);
767
768 if (!data.complete) {
769 g_main_loop_run(data.loop);
770 }
771 g_main_loop_unref(data.loop);
772 if (data.error) {
773 object_unref(OBJECT(tioc));
774 error_propagate(errp, data.error);
775 return NULL;
776 }
777
778 return QIO_CHANNEL(tioc);
779}
780
781
782
783
784
785
786
787static int nbd_negotiate_send_meta_context(NBDClient *client,
788 const char *context,
789 uint32_t context_id,
790 Error **errp)
791{
792 NBDOptionReplyMetaContext opt;
793 struct iovec iov[] = {
794 {.iov_base = &opt, .iov_len = sizeof(opt)},
795 {.iov_base = (void *)context, .iov_len = strlen(context)}
796 };
797
798 assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
799 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
800 context_id = 0;
801 }
802
803 trace_nbd_negotiate_meta_query_reply(context, context_id);
804 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
805 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
806 stl_be_p(&opt.context_id, context_id);
807
808 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
809}
810
811
812
813
814
815static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
816 const char *query)
817{
818 if (!*query) {
819 trace_nbd_negotiate_meta_query_parse("empty");
820 return client->opt == NBD_OPT_LIST_META_CONTEXT;
821 }
822 if (strcmp(query, pattern) == 0) {
823 trace_nbd_negotiate_meta_query_parse(pattern);
824 return true;
825 }
826 trace_nbd_negotiate_meta_query_skip("pattern not matched");
827 return false;
828}
829
830
831
832
833static bool nbd_strshift(const char **str, const char *prefix)
834{
835 size_t len = strlen(prefix);
836
837 if (strncmp(*str, prefix, len) == 0) {
838 *str += len;
839 return true;
840 }
841 return false;
842}
843
844
845
846
847
848
849static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
850 const char *query)
851{
852 if (!nbd_strshift(&query, "base:")) {
853 return false;
854 }
855 trace_nbd_negotiate_meta_query_parse("base:");
856
857 if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
858 meta->base_allocation = true;
859 }
860 return true;
861}
862
863
864
865
866
867
868
869static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
870 const char *query)
871{
872 size_t i;
873
874 if (!nbd_strshift(&query, "qemu:")) {
875 return false;
876 }
877 trace_nbd_negotiate_meta_query_parse("qemu:");
878
879 if (!*query) {
880 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
881 meta->allocation_depth = meta->exp->allocation_depth;
882 if (meta->exp->nr_export_bitmaps) {
883 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
884 }
885 }
886 trace_nbd_negotiate_meta_query_parse("empty");
887 return true;
888 }
889
890 if (strcmp(query, "allocation-depth") == 0) {
891 trace_nbd_negotiate_meta_query_parse("allocation-depth");
892 meta->allocation_depth = meta->exp->allocation_depth;
893 return true;
894 }
895
896 if (nbd_strshift(&query, "dirty-bitmap:")) {
897 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
898 if (!*query) {
899 if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
900 meta->exp->nr_export_bitmaps) {
901 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
902 }
903 trace_nbd_negotiate_meta_query_parse("empty");
904 return true;
905 }
906
907 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
908 const char *bm_name;
909
910 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
911 if (strcmp(bm_name, query) == 0) {
912 meta->bitmaps[i] = true;
913 trace_nbd_negotiate_meta_query_parse(query);
914 return true;
915 }
916 }
917 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
918 return true;
919 }
920
921 trace_nbd_negotiate_meta_query_skip("unknown qemu context");
922 return true;
923}
924
925
926
927
928
929
930
931
932
933
934static int nbd_negotiate_meta_query(NBDClient *client,
935 NBDExportMetaContexts *meta, Error **errp)
936{
937 int ret;
938 g_autofree char *query = NULL;
939 uint32_t len;
940
941 ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
942 if (ret <= 0) {
943 return ret;
944 }
945 len = cpu_to_be32(len);
946
947 if (len > NBD_MAX_STRING_SIZE) {
948 trace_nbd_negotiate_meta_query_skip("length too long");
949 return nbd_opt_skip(client, len, errp);
950 }
951
952 query = g_malloc(len + 1);
953 ret = nbd_opt_read(client, query, len, true, errp);
954 if (ret <= 0) {
955 return ret;
956 }
957 query[len] = '\0';
958
959 if (nbd_meta_base_query(client, meta, query)) {
960 return 1;
961 }
962 if (nbd_meta_qemu_query(client, meta, query)) {
963 return 1;
964 }
965
966 trace_nbd_negotiate_meta_query_skip("unknown namespace");
967 return 1;
968}
969
970
971
972
973
974static int nbd_negotiate_meta_queries(NBDClient *client,
975 NBDExportMetaContexts *meta, Error **errp)
976{
977 int ret;
978 g_autofree char *export_name = NULL;
979
980 g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
981 NBDExportMetaContexts local_meta = {0};
982 uint32_t nb_queries;
983 size_t i;
984 size_t count = 0;
985
986 if (client->opt == NBD_OPT_SET_META_CONTEXT && !client->structured_reply) {
987 return nbd_opt_invalid(client, errp,
988 "request option '%s' when structured reply "
989 "is not negotiated",
990 nbd_opt_lookup(client->opt));
991 }
992
993 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
994
995 meta = &local_meta;
996 }
997
998 g_free(meta->bitmaps);
999 memset(meta, 0, sizeof(*meta));
1000
1001 ret = nbd_opt_read_name(client, &export_name, NULL, errp);
1002 if (ret <= 0) {
1003 return ret;
1004 }
1005
1006 meta->exp = nbd_export_find(export_name);
1007 if (meta->exp == NULL) {
1008 g_autofree char *sane_name = nbd_sanitize_name(export_name);
1009
1010 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
1011 "export '%s' not present", sane_name);
1012 }
1013 meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
1014 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1015 bitmaps = meta->bitmaps;
1016 }
1017
1018 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1019 if (ret <= 0) {
1020 return ret;
1021 }
1022 nb_queries = cpu_to_be32(nb_queries);
1023 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1024 export_name, nb_queries);
1025
1026 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1027
1028 meta->base_allocation = true;
1029 meta->allocation_depth = meta->exp->allocation_depth;
1030 if (meta->exp->nr_export_bitmaps) {
1031 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
1032 }
1033 } else {
1034 for (i = 0; i < nb_queries; ++i) {
1035 ret = nbd_negotiate_meta_query(client, meta, errp);
1036 if (ret <= 0) {
1037 return ret;
1038 }
1039 }
1040 }
1041
1042 if (meta->base_allocation) {
1043 ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1044 NBD_META_ID_BASE_ALLOCATION,
1045 errp);
1046 if (ret < 0) {
1047 return ret;
1048 }
1049 count++;
1050 }
1051
1052 if (meta->allocation_depth) {
1053 ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
1054 NBD_META_ID_ALLOCATION_DEPTH,
1055 errp);
1056 if (ret < 0) {
1057 return ret;
1058 }
1059 count++;
1060 }
1061
1062 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
1063 const char *bm_name;
1064 g_autofree char *context = NULL;
1065
1066 if (!meta->bitmaps[i]) {
1067 continue;
1068 }
1069
1070 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
1071 context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
1072
1073 ret = nbd_negotiate_send_meta_context(client, context,
1074 NBD_META_ID_DIRTY_BITMAP + i,
1075 errp);
1076 if (ret < 0) {
1077 return ret;
1078 }
1079 count++;
1080 }
1081
1082 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1083 if (ret == 0) {
1084 meta->count = count;
1085 }
1086
1087 return ret;
1088}
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099static int nbd_negotiate_options(NBDClient *client, Error **errp)
1100{
1101 uint32_t flags;
1102 bool fixedNewstyle = false;
1103 bool no_zeroes = false;
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120 if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
1121 return -EIO;
1122 }
1123 trace_nbd_negotiate_options_flags(flags);
1124 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
1125 fixedNewstyle = true;
1126 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1127 }
1128 if (flags & NBD_FLAG_C_NO_ZEROES) {
1129 no_zeroes = true;
1130 flags &= ~NBD_FLAG_C_NO_ZEROES;
1131 }
1132 if (flags != 0) {
1133 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1134 return -EINVAL;
1135 }
1136
1137 while (1) {
1138 int ret;
1139 uint32_t option, length;
1140 uint64_t magic;
1141
1142 if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1143 return -EINVAL;
1144 }
1145 trace_nbd_negotiate_options_check_magic(magic);
1146 if (magic != NBD_OPTS_MAGIC) {
1147 error_setg(errp, "Bad magic received");
1148 return -EINVAL;
1149 }
1150
1151 if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1152 return -EINVAL;
1153 }
1154 client->opt = option;
1155
1156 if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1157 return -EINVAL;
1158 }
1159 assert(!client->optlen);
1160 client->optlen = length;
1161
1162 if (length > NBD_MAX_BUFFER_SIZE) {
1163 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1164 length, NBD_MAX_BUFFER_SIZE);
1165 return -EINVAL;
1166 }
1167
1168 trace_nbd_negotiate_options_check_option(option,
1169 nbd_opt_lookup(option));
1170 if (client->tlscreds &&
1171 client->ioc == (QIOChannel *)client->sioc) {
1172 QIOChannel *tioc;
1173 if (!fixedNewstyle) {
1174 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1175 return -EINVAL;
1176 }
1177 switch (option) {
1178 case NBD_OPT_STARTTLS:
1179 if (length) {
1180
1181
1182 return nbd_reject_length(client, true, errp);
1183 }
1184 tioc = nbd_negotiate_handle_starttls(client, errp);
1185 if (!tioc) {
1186 return -EIO;
1187 }
1188 ret = 0;
1189 object_unref(OBJECT(client->ioc));
1190 client->ioc = QIO_CHANNEL(tioc);
1191 break;
1192
1193 case NBD_OPT_EXPORT_NAME:
1194
1195 error_setg(errp, "Option 0x%x not permitted before TLS",
1196 option);
1197 return -EINVAL;
1198
1199 default:
1200
1201
1202
1203
1204
1205
1206 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
1207 option == NBD_OPT_ABORT ? NULL : errp,
1208 "Option 0x%" PRIx32
1209 " not permitted before TLS", option);
1210 if (option == NBD_OPT_ABORT) {
1211 return 1;
1212 }
1213 break;
1214 }
1215 } else if (fixedNewstyle) {
1216 switch (option) {
1217 case NBD_OPT_LIST:
1218 if (length) {
1219 ret = nbd_reject_length(client, false, errp);
1220 } else {
1221 ret = nbd_negotiate_handle_list(client, errp);
1222 }
1223 break;
1224
1225 case NBD_OPT_ABORT:
1226
1227
1228
1229 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
1230 return 1;
1231
1232 case NBD_OPT_EXPORT_NAME:
1233 return nbd_negotiate_handle_export_name(client, no_zeroes,
1234 errp);
1235
1236 case NBD_OPT_INFO:
1237 case NBD_OPT_GO:
1238 ret = nbd_negotiate_handle_info(client, errp);
1239 if (ret == 1) {
1240 assert(option == NBD_OPT_GO);
1241 return 0;
1242 }
1243 break;
1244
1245 case NBD_OPT_STARTTLS:
1246 if (length) {
1247 ret = nbd_reject_length(client, false, errp);
1248 } else if (client->tlscreds) {
1249 ret = nbd_negotiate_send_rep_err(client,
1250 NBD_REP_ERR_INVALID, errp,
1251 "TLS already enabled");
1252 } else {
1253 ret = nbd_negotiate_send_rep_err(client,
1254 NBD_REP_ERR_POLICY, errp,
1255 "TLS not configured");
1256 }
1257 break;
1258
1259 case NBD_OPT_STRUCTURED_REPLY:
1260 if (length) {
1261 ret = nbd_reject_length(client, false, errp);
1262 } else if (client->structured_reply) {
1263 ret = nbd_negotiate_send_rep_err(
1264 client, NBD_REP_ERR_INVALID, errp,
1265 "structured reply already negotiated");
1266 } else {
1267 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1268 client->structured_reply = true;
1269 }
1270 break;
1271
1272 case NBD_OPT_LIST_META_CONTEXT:
1273 case NBD_OPT_SET_META_CONTEXT:
1274 ret = nbd_negotiate_meta_queries(client, &client->export_meta,
1275 errp);
1276 break;
1277
1278 default:
1279 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
1280 "Unsupported option %" PRIu32 " (%s)",
1281 option, nbd_opt_lookup(option));
1282 break;
1283 }
1284 } else {
1285
1286
1287
1288
1289 switch (option) {
1290 case NBD_OPT_EXPORT_NAME:
1291 return nbd_negotiate_handle_export_name(client, no_zeroes,
1292 errp);
1293
1294 default:
1295 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
1296 option, nbd_opt_lookup(option));
1297 return -EINVAL;
1298 }
1299 }
1300 if (ret < 0) {
1301 return ret;
1302 }
1303 }
1304}
1305
1306
1307
1308
1309
1310
1311
1312
1313static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1314{
1315 ERRP_GUARD();
1316 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
1317 int ret;
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333 qio_channel_set_blocking(client->ioc, false, NULL);
1334
1335 trace_nbd_negotiate_begin();
1336 memcpy(buf, "NBDMAGIC", 8);
1337
1338 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
1339 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
1340
1341 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
1342 error_prepend(errp, "write failed: ");
1343 return -EINVAL;
1344 }
1345 ret = nbd_negotiate_options(client, errp);
1346 if (ret != 0) {
1347 if (ret < 0) {
1348 error_prepend(errp, "option negotiation failed: ");
1349 }
1350 return ret;
1351 }
1352
1353
1354 if (client->exp && client->exp->common.ctx) {
1355 qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
1356 }
1357
1358 assert(!client->optlen);
1359 trace_nbd_negotiate_success();
1360
1361 return 0;
1362}
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372static inline int coroutine_fn
1373nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1374{
1375 bool partial = false;
1376
1377 assert(size);
1378 while (size > 0) {
1379 struct iovec iov = { .iov_base = buffer, .iov_len = size };
1380 ssize_t len;
1381
1382 len = qio_channel_readv(client->ioc, &iov, 1, errp);
1383 if (len == QIO_CHANNEL_ERR_BLOCK) {
1384 client->read_yielding = true;
1385 qio_channel_yield(client->ioc, G_IO_IN);
1386 client->read_yielding = false;
1387 if (client->quiescing) {
1388 return -EAGAIN;
1389 }
1390 continue;
1391 } else if (len < 0) {
1392 return -EIO;
1393 } else if (len == 0) {
1394 if (partial) {
1395 error_setg(errp,
1396 "Unexpected end-of-file before all bytes were read");
1397 return -EIO;
1398 } else {
1399 return 0;
1400 }
1401 }
1402
1403 partial = true;
1404 size -= len;
1405 buffer = (uint8_t *) buffer + len;
1406 }
1407 return 1;
1408}
1409
1410static int nbd_receive_request(NBDClient *client, NBDRequest *request,
1411 Error **errp)
1412{
1413 uint8_t buf[NBD_REQUEST_SIZE];
1414 uint32_t magic;
1415 int ret;
1416
1417 ret = nbd_read_eof(client, buf, sizeof(buf), errp);
1418 if (ret < 0) {
1419 return ret;
1420 }
1421 if (ret == 0) {
1422 return -EIO;
1423 }
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434 magic = ldl_be_p(buf);
1435 request->flags = lduw_be_p(buf + 4);
1436 request->type = lduw_be_p(buf + 6);
1437 request->handle = ldq_be_p(buf + 8);
1438 request->from = ldq_be_p(buf + 16);
1439 request->len = ldl_be_p(buf + 24);
1440
1441 trace_nbd_receive_request(magic, request->flags, request->type,
1442 request->from, request->len);
1443
1444 if (magic != NBD_REQUEST_MAGIC) {
1445 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
1446 return -EINVAL;
1447 }
1448 return 0;
1449}
1450
1451#define MAX_NBD_REQUESTS 16
1452
1453void nbd_client_get(NBDClient *client)
1454{
1455 client->refcount++;
1456}
1457
1458void nbd_client_put(NBDClient *client)
1459{
1460 if (--client->refcount == 0) {
1461
1462
1463
1464 assert(client->closing);
1465
1466 qio_channel_detach_aio_context(client->ioc);
1467 object_unref(OBJECT(client->sioc));
1468 object_unref(OBJECT(client->ioc));
1469 if (client->tlscreds) {
1470 object_unref(OBJECT(client->tlscreds));
1471 }
1472 g_free(client->tlsauthz);
1473 if (client->exp) {
1474 QTAILQ_REMOVE(&client->exp->clients, client, next);
1475 blk_exp_unref(&client->exp->common);
1476 }
1477 g_free(client->export_meta.bitmaps);
1478 g_free(client);
1479 }
1480}
1481
1482static void client_close(NBDClient *client, bool negotiated)
1483{
1484 if (client->closing) {
1485 return;
1486 }
1487
1488 client->closing = true;
1489
1490
1491
1492
1493 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
1494 NULL);
1495
1496
1497 if (client->close_fn) {
1498 client->close_fn(client, negotiated);
1499 }
1500}
1501
1502static NBDRequestData *nbd_request_get(NBDClient *client)
1503{
1504 NBDRequestData *req;
1505
1506 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1507 client->nb_requests++;
1508
1509 req = g_new0(NBDRequestData, 1);
1510 nbd_client_get(client);
1511 req->client = client;
1512 return req;
1513}
1514
1515static void nbd_request_put(NBDRequestData *req)
1516{
1517 NBDClient *client = req->client;
1518
1519 if (req->data) {
1520 qemu_vfree(req->data);
1521 }
1522 g_free(req);
1523
1524 client->nb_requests--;
1525
1526 if (client->quiescing && client->nb_requests == 0) {
1527 aio_wait_kick();
1528 }
1529
1530 nbd_client_receive_next_request(client);
1531
1532 nbd_client_put(client);
1533}
1534
1535static void blk_aio_attached(AioContext *ctx, void *opaque)
1536{
1537 NBDExport *exp = opaque;
1538 NBDClient *client;
1539
1540 trace_nbd_blk_aio_attached(exp->name, ctx);
1541
1542 exp->common.ctx = ctx;
1543
1544 QTAILQ_FOREACH(client, &exp->clients, next) {
1545 qio_channel_attach_aio_context(client->ioc, ctx);
1546
1547 assert(client->nb_requests == 0);
1548 assert(client->recv_coroutine == NULL);
1549 assert(client->send_coroutine == NULL);
1550 }
1551}
1552
1553static void blk_aio_detach(void *opaque)
1554{
1555 NBDExport *exp = opaque;
1556 NBDClient *client;
1557
1558 trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1559
1560 QTAILQ_FOREACH(client, &exp->clients, next) {
1561 qio_channel_detach_aio_context(client->ioc);
1562 }
1563
1564 exp->common.ctx = NULL;
1565}
1566
1567static void nbd_drained_begin(void *opaque)
1568{
1569 NBDExport *exp = opaque;
1570 NBDClient *client;
1571
1572 QTAILQ_FOREACH(client, &exp->clients, next) {
1573 client->quiescing = true;
1574 }
1575}
1576
1577static void nbd_drained_end(void *opaque)
1578{
1579 NBDExport *exp = opaque;
1580 NBDClient *client;
1581
1582 QTAILQ_FOREACH(client, &exp->clients, next) {
1583 client->quiescing = false;
1584 nbd_client_receive_next_request(client);
1585 }
1586}
1587
1588static bool nbd_drained_poll(void *opaque)
1589{
1590 NBDExport *exp = opaque;
1591 NBDClient *client;
1592
1593 QTAILQ_FOREACH(client, &exp->clients, next) {
1594 if (client->nb_requests != 0) {
1595
1596
1597
1598
1599 if (client->recv_coroutine != NULL && client->read_yielding) {
1600 qemu_aio_coroutine_enter(exp->common.ctx,
1601 client->recv_coroutine);
1602 }
1603
1604 return true;
1605 }
1606 }
1607
1608 return false;
1609}
1610
1611static void nbd_eject_notifier(Notifier *n, void *data)
1612{
1613 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1614
1615 blk_exp_request_shutdown(&exp->common);
1616}
1617
1618void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
1619{
1620 NBDExport *nbd_exp = container_of(exp, NBDExport, common);
1621 assert(exp->drv == &blk_exp_nbd);
1622 assert(nbd_exp->eject_notifier_blk == NULL);
1623
1624 blk_ref(blk);
1625 nbd_exp->eject_notifier_blk = blk;
1626 nbd_exp->eject_notifier.notify = nbd_eject_notifier;
1627 blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
1628}
1629
1630static const BlockDevOps nbd_block_ops = {
1631 .drained_begin = nbd_drained_begin,
1632 .drained_end = nbd_drained_end,
1633 .drained_poll = nbd_drained_poll,
1634};
1635
1636static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1637 Error **errp)
1638{
1639 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1640 BlockExportOptionsNbd *arg = &exp_args->u.nbd;
1641 BlockBackend *blk = blk_exp->blk;
1642 int64_t size;
1643 uint64_t perm, shared_perm;
1644 bool readonly = !exp_args->writable;
1645 bool shared = !exp_args->writable;
1646 strList *bitmaps;
1647 size_t i;
1648 int ret;
1649
1650 assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
1651
1652 if (!nbd_server_is_running()) {
1653 error_setg(errp, "NBD server not running");
1654 return -EINVAL;
1655 }
1656
1657 if (!arg->has_name) {
1658 arg->name = exp_args->node_name;
1659 }
1660
1661 if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
1662 error_setg(errp, "export name '%s' too long", arg->name);
1663 return -EINVAL;
1664 }
1665
1666 if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
1667 error_setg(errp, "description '%s' too long", arg->description);
1668 return -EINVAL;
1669 }
1670
1671 if (nbd_export_find(arg->name)) {
1672 error_setg(errp, "NBD server already has export named '%s'", arg->name);
1673 return -EEXIST;
1674 }
1675
1676 size = blk_getlength(blk);
1677 if (size < 0) {
1678 error_setg_errno(errp, -size,
1679 "Failed to determine the NBD export's length");
1680 return size;
1681 }
1682
1683
1684
1685 blk_get_perm(blk, &perm, &shared_perm);
1686 ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1687 if (ret < 0) {
1688 return ret;
1689 }
1690
1691 QTAILQ_INIT(&exp->clients);
1692 exp->name = g_strdup(arg->name);
1693 exp->description = g_strdup(arg->description);
1694 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1695 NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
1696 if (readonly) {
1697 exp->nbdflags |= NBD_FLAG_READ_ONLY;
1698 if (shared) {
1699 exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1700 }
1701 } else {
1702 exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1703 NBD_FLAG_SEND_FAST_ZERO);
1704 }
1705 exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1706
1707 for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
1708 exp->nr_export_bitmaps++;
1709 }
1710 exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
1711 for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1712 i++, bitmaps = bitmaps->next) {
1713 const char *bitmap = bitmaps->value;
1714 BlockDriverState *bs = blk_bs(blk);
1715 BdrvDirtyBitmap *bm = NULL;
1716
1717 while (bs) {
1718 bm = bdrv_find_dirty_bitmap(bs, bitmap);
1719 if (bm != NULL) {
1720 break;
1721 }
1722
1723 bs = bdrv_filter_or_cow_bs(bs);
1724 }
1725
1726 if (bm == NULL) {
1727 ret = -ENOENT;
1728 error_setg(errp, "Bitmap '%s' is not found", bitmap);
1729 goto fail;
1730 }
1731
1732 if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1733 ret = -EINVAL;
1734 goto fail;
1735 }
1736
1737 if (readonly && bdrv_is_writable(bs) &&
1738 bdrv_dirty_bitmap_enabled(bm)) {
1739 ret = -EINVAL;
1740 error_setg(errp,
1741 "Enabled bitmap '%s' incompatible with readonly export",
1742 bitmap);
1743 goto fail;
1744 }
1745
1746 exp->export_bitmaps[i] = bm;
1747 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1748 }
1749
1750
1751 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1752 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
1753 }
1754
1755 exp->allocation_depth = arg->allocation_depth;
1756
1757
1758
1759
1760
1761
1762 blk_set_disable_request_queuing(blk, true);
1763
1764 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1765
1766 blk_set_dev_ops(blk, &nbd_block_ops, exp);
1767
1768 QTAILQ_INSERT_TAIL(&exports, exp, next);
1769
1770 return 0;
1771
1772fail:
1773 g_free(exp->export_bitmaps);
1774 g_free(exp->name);
1775 g_free(exp->description);
1776 return ret;
1777}
1778
1779NBDExport *nbd_export_find(const char *name)
1780{
1781 NBDExport *exp;
1782 QTAILQ_FOREACH(exp, &exports, next) {
1783 if (strcmp(name, exp->name) == 0) {
1784 return exp;
1785 }
1786 }
1787
1788 return NULL;
1789}
1790
1791AioContext *
1792nbd_export_aio_context(NBDExport *exp)
1793{
1794 return exp->common.ctx;
1795}
1796
1797static void nbd_export_request_shutdown(BlockExport *blk_exp)
1798{
1799 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1800 NBDClient *client, *next;
1801
1802 blk_exp_ref(&exp->common);
1803
1804
1805
1806
1807
1808
1809
1810 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1811 client_close(client, true);
1812 }
1813 if (exp->name) {
1814 g_free(exp->name);
1815 exp->name = NULL;
1816 QTAILQ_REMOVE(&exports, exp, next);
1817 }
1818 blk_exp_unref(&exp->common);
1819}
1820
1821static void nbd_export_delete(BlockExport *blk_exp)
1822{
1823 size_t i;
1824 NBDExport *exp = container_of(blk_exp, NBDExport, common);
1825
1826 assert(exp->name == NULL);
1827 assert(QTAILQ_EMPTY(&exp->clients));
1828
1829 g_free(exp->description);
1830 exp->description = NULL;
1831
1832 if (exp->common.blk) {
1833 if (exp->eject_notifier_blk) {
1834 notifier_remove(&exp->eject_notifier);
1835 blk_unref(exp->eject_notifier_blk);
1836 }
1837 blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
1838 blk_aio_detach, exp);
1839 blk_set_disable_request_queuing(exp->common.blk, false);
1840 }
1841
1842 for (i = 0; i < exp->nr_export_bitmaps; i++) {
1843 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
1844 }
1845}
1846
1847const BlockExportDriver blk_exp_nbd = {
1848 .type = BLOCK_EXPORT_TYPE_NBD,
1849 .instance_size = sizeof(NBDExport),
1850 .create = nbd_export_create,
1851 .delete = nbd_export_delete,
1852 .request_shutdown = nbd_export_request_shutdown,
1853};
1854
1855static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
1856 unsigned niov, Error **errp)
1857{
1858 int ret;
1859
1860 g_assert(qemu_in_coroutine());
1861 qemu_co_mutex_lock(&client->send_lock);
1862 client->send_coroutine = qemu_coroutine_self();
1863
1864 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
1865
1866 client->send_coroutine = NULL;
1867 qemu_co_mutex_unlock(&client->send_lock);
1868
1869 return ret;
1870}
1871
1872static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
1873 uint64_t handle)
1874{
1875 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
1876 stl_be_p(&reply->error, error);
1877 stq_be_p(&reply->handle, handle);
1878}
1879
1880static int nbd_co_send_simple_reply(NBDClient *client,
1881 uint64_t handle,
1882 uint32_t error,
1883 void *data,
1884 size_t len,
1885 Error **errp)
1886{
1887 NBDSimpleReply reply;
1888 int nbd_err = system_errno_to_nbd_errno(error);
1889 struct iovec iov[] = {
1890 {.iov_base = &reply, .iov_len = sizeof(reply)},
1891 {.iov_base = data, .iov_len = len}
1892 };
1893
1894 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
1895 len);
1896 set_be_simple_reply(&reply, nbd_err, handle);
1897
1898 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1899}
1900
1901static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
1902 uint16_t type, uint64_t handle, uint32_t length)
1903{
1904 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
1905 stw_be_p(&chunk->flags, flags);
1906 stw_be_p(&chunk->type, type);
1907 stq_be_p(&chunk->handle, handle);
1908 stl_be_p(&chunk->length, length);
1909}
1910
1911static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
1912 uint64_t handle,
1913 Error **errp)
1914{
1915 NBDStructuredReplyChunk chunk;
1916 struct iovec iov[] = {
1917 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1918 };
1919
1920 trace_nbd_co_send_structured_done(handle);
1921 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
1922
1923 return nbd_co_send_iov(client, iov, 1, errp);
1924}
1925
1926static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
1927 uint64_t handle,
1928 uint64_t offset,
1929 void *data,
1930 size_t size,
1931 bool final,
1932 Error **errp)
1933{
1934 NBDStructuredReadData chunk;
1935 struct iovec iov[] = {
1936 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1937 {.iov_base = data, .iov_len = size}
1938 };
1939
1940 assert(size);
1941 trace_nbd_co_send_structured_read(handle, offset, data, size);
1942 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1943 NBD_REPLY_TYPE_OFFSET_DATA, handle,
1944 sizeof(chunk) - sizeof(chunk.h) + size);
1945 stq_be_p(&chunk.offset, offset);
1946
1947 return nbd_co_send_iov(client, iov, 2, errp);
1948}
1949
1950static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
1951 uint64_t handle,
1952 uint32_t error,
1953 const char *msg,
1954 Error **errp)
1955{
1956 NBDStructuredError chunk;
1957 int nbd_err = system_errno_to_nbd_errno(error);
1958 struct iovec iov[] = {
1959 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1960 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
1961 };
1962
1963 assert(nbd_err);
1964 trace_nbd_co_send_structured_error(handle, nbd_err,
1965 nbd_err_lookup(nbd_err), msg ? msg : "");
1966 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
1967 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1968 stl_be_p(&chunk.error, nbd_err);
1969 stw_be_p(&chunk.message_length, iov[1].iov_len);
1970
1971 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
1972}
1973
1974
1975
1976
1977
1978static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
1979 uint64_t handle,
1980 uint64_t offset,
1981 uint8_t *data,
1982 size_t size,
1983 Error **errp)
1984{
1985 int ret = 0;
1986 NBDExport *exp = client->exp;
1987 size_t progress = 0;
1988
1989 while (progress < size) {
1990 int64_t pnum;
1991 int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
1992 offset + progress,
1993 size - progress, &pnum, NULL,
1994 NULL);
1995 bool final;
1996
1997 if (status < 0) {
1998 char *msg = g_strdup_printf("unable to check for holes: %s",
1999 strerror(-status));
2000
2001 ret = nbd_co_send_structured_error(client, handle, -status, msg,
2002 errp);
2003 g_free(msg);
2004 return ret;
2005 }
2006 assert(pnum && pnum <= size - progress);
2007 final = progress + pnum == size;
2008 if (status & BDRV_BLOCK_ZERO) {
2009 NBDStructuredReadHole chunk;
2010 struct iovec iov[] = {
2011 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2012 };
2013
2014 trace_nbd_co_send_structured_read_hole(handle, offset + progress,
2015 pnum);
2016 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
2017 NBD_REPLY_TYPE_OFFSET_HOLE,
2018 handle, sizeof(chunk) - sizeof(chunk.h));
2019 stq_be_p(&chunk.offset, offset + progress);
2020 stl_be_p(&chunk.length, pnum);
2021 ret = nbd_co_send_iov(client, iov, 1, errp);
2022 } else {
2023 ret = blk_pread(exp->common.blk, offset + progress,
2024 data + progress, pnum);
2025 if (ret < 0) {
2026 error_setg_errno(errp, -ret, "reading from file failed");
2027 break;
2028 }
2029 ret = nbd_co_send_structured_read(client, handle, offset + progress,
2030 data + progress, pnum, final,
2031 errp);
2032 }
2033
2034 if (ret < 0) {
2035 break;
2036 }
2037 progress += pnum;
2038 }
2039 return ret;
2040}
2041
2042typedef struct NBDExtentArray {
2043 NBDExtent *extents;
2044 unsigned int nb_alloc;
2045 unsigned int count;
2046 uint64_t total_length;
2047 bool can_add;
2048 bool converted_to_be;
2049} NBDExtentArray;
2050
2051static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
2052{
2053 NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
2054
2055 ea->nb_alloc = nb_alloc;
2056 ea->extents = g_new(NBDExtent, nb_alloc);
2057 ea->can_add = true;
2058
2059 return ea;
2060}
2061
2062static void nbd_extent_array_free(NBDExtentArray *ea)
2063{
2064 g_free(ea->extents);
2065 g_free(ea);
2066}
2067G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free)
2068
2069
2070static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
2071{
2072 int i;
2073
2074 assert(!ea->converted_to_be);
2075 ea->can_add = false;
2076 ea->converted_to_be = true;
2077
2078 for (i = 0; i < ea->count; i++) {
2079 ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2080 ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
2081 }
2082}
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093static int nbd_extent_array_add(NBDExtentArray *ea,
2094 uint32_t length, uint32_t flags)
2095{
2096 assert(ea->can_add);
2097
2098 if (!length) {
2099 return 0;
2100 }
2101
2102
2103 if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2104 uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
2105
2106 if (sum <= UINT32_MAX) {
2107 ea->extents[ea->count - 1].length = sum;
2108 ea->total_length += length;
2109 return 0;
2110 }
2111 }
2112
2113 if (ea->count >= ea->nb_alloc) {
2114 ea->can_add = false;
2115 return -1;
2116 }
2117
2118 ea->total_length += length;
2119 ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
2120 ea->count++;
2121
2122 return 0;
2123}
2124
2125static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
2126 uint64_t bytes, NBDExtentArray *ea)
2127{
2128 while (bytes) {
2129 uint32_t flags;
2130 int64_t num;
2131 int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
2132 NULL, NULL);
2133
2134 if (ret < 0) {
2135 return ret;
2136 }
2137
2138 flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2139 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2140
2141 if (nbd_extent_array_add(ea, num, flags) < 0) {
2142 return 0;
2143 }
2144
2145 offset += num;
2146 bytes -= num;
2147 }
2148
2149 return 0;
2150}
2151
2152static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
2153 uint64_t bytes, NBDExtentArray *ea)
2154{
2155 while (bytes) {
2156 int64_t num;
2157 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
2158 &num);
2159
2160 if (ret < 0) {
2161 return ret;
2162 }
2163
2164 if (nbd_extent_array_add(ea, num, ret) < 0) {
2165 return 0;
2166 }
2167
2168 offset += num;
2169 bytes -= num;
2170 }
2171
2172 return 0;
2173}
2174
2175
2176
2177
2178
2179
2180
2181static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
2182 NBDExtentArray *ea,
2183 bool last, uint32_t context_id, Error **errp)
2184{
2185 NBDStructuredMeta chunk;
2186 struct iovec iov[] = {
2187 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2188 {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
2189 };
2190
2191 nbd_extent_array_convert_to_be(ea);
2192
2193 trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
2194 last);
2195 set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
2196 NBD_REPLY_TYPE_BLOCK_STATUS,
2197 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
2198 stl_be_p(&chunk.context_id, context_id);
2199
2200 return nbd_co_send_iov(client, iov, 2, errp);
2201}
2202
2203
2204static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
2205 BlockDriverState *bs, uint64_t offset,
2206 uint32_t length, bool dont_fragment,
2207 bool last, uint32_t context_id,
2208 Error **errp)
2209{
2210 int ret;
2211 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2212 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2213
2214 if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2215 ret = blockstatus_to_extents(bs, offset, length, ea);
2216 } else {
2217 ret = blockalloc_to_extents(bs, offset, length, ea);
2218 }
2219 if (ret < 0) {
2220 return nbd_co_send_structured_error(
2221 client, handle, -ret, "can't get block status", errp);
2222 }
2223
2224 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2225}
2226
2227
2228static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
2229 uint64_t offset, uint64_t length,
2230 NBDExtentArray *es)
2231{
2232 int64_t start, dirty_start, dirty_count;
2233 int64_t end = offset + length;
2234 bool full = false;
2235
2236 bdrv_dirty_bitmap_lock(bitmap);
2237
2238 for (start = offset;
2239 bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
2240 &dirty_start, &dirty_count);
2241 start = dirty_start + dirty_count)
2242 {
2243 if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2244 (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2245 {
2246 full = true;
2247 break;
2248 }
2249 }
2250
2251 if (!full) {
2252
2253 (void) nbd_extent_array_add(es, end - start, 0);
2254 }
2255
2256 bdrv_dirty_bitmap_unlock(bitmap);
2257}
2258
2259static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
2260 BdrvDirtyBitmap *bitmap, uint64_t offset,
2261 uint32_t length, bool dont_fragment, bool last,
2262 uint32_t context_id, Error **errp)
2263{
2264 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2265 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
2266
2267 bitmap_to_extents(bitmap, offset, length, ea);
2268
2269 return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
2270}
2271
2272
2273
2274
2275
2276
2277
2278
2279static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
2280 Error **errp)
2281{
2282 NBDClient *client = req->client;
2283 int valid_flags;
2284 int ret;
2285
2286 g_assert(qemu_in_coroutine());
2287 assert(client->recv_coroutine == qemu_coroutine_self());
2288 ret = nbd_receive_request(client, request, errp);
2289 if (ret < 0) {
2290 return ret;
2291 }
2292
2293 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
2294 nbd_cmd_lookup(request->type));
2295
2296 if (request->type != NBD_CMD_WRITE) {
2297
2298 req->complete = true;
2299 }
2300
2301 if (request->type == NBD_CMD_DISC) {
2302
2303
2304 return -EIO;
2305 }
2306
2307 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
2308 request->type == NBD_CMD_CACHE)
2309 {
2310 if (request->len > NBD_MAX_BUFFER_SIZE) {
2311 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
2312 request->len, NBD_MAX_BUFFER_SIZE);
2313 return -EINVAL;
2314 }
2315
2316 if (request->type != NBD_CMD_CACHE) {
2317 req->data = blk_try_blockalign(client->exp->common.blk,
2318 request->len);
2319 if (req->data == NULL) {
2320 error_setg(errp, "No memory");
2321 return -ENOMEM;
2322 }
2323 }
2324 }
2325
2326 if (request->type == NBD_CMD_WRITE) {
2327 if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
2328 errp) < 0)
2329 {
2330 return -EIO;
2331 }
2332 req->complete = true;
2333
2334 trace_nbd_co_receive_request_payload_received(request->handle,
2335 request->len);
2336 }
2337
2338
2339 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
2340 (request->type == NBD_CMD_WRITE ||
2341 request->type == NBD_CMD_WRITE_ZEROES ||
2342 request->type == NBD_CMD_TRIM)) {
2343 error_setg(errp, "Export is read-only");
2344 return -EROFS;
2345 }
2346 if (request->from > client->exp->size ||
2347 request->len > client->exp->size - request->from) {
2348 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
2349 ", Size: %" PRIu64, request->from, request->len,
2350 client->exp->size);
2351 return (request->type == NBD_CMD_WRITE ||
2352 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
2353 }
2354 if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
2355 client->check_align)) {
2356
2357
2358
2359
2360 trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
2361 request->from,
2362 request->len,
2363 client->check_align);
2364 }
2365 valid_flags = NBD_CMD_FLAG_FUA;
2366 if (request->type == NBD_CMD_READ && client->structured_reply) {
2367 valid_flags |= NBD_CMD_FLAG_DF;
2368 } else if (request->type == NBD_CMD_WRITE_ZEROES) {
2369 valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
2370 } else if (request->type == NBD_CMD_BLOCK_STATUS) {
2371 valid_flags |= NBD_CMD_FLAG_REQ_ONE;
2372 }
2373 if (request->flags & ~valid_flags) {
2374 error_setg(errp, "unsupported flags for command %s (got 0x%x)",
2375 nbd_cmd_lookup(request->type), request->flags);
2376 return -EINVAL;
2377 }
2378
2379 return 0;
2380}
2381
2382
2383
2384
2385
2386static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
2387 uint64_t handle,
2388 int ret,
2389 const char *error_msg,
2390 Error **errp)
2391{
2392 if (client->structured_reply && ret < 0) {
2393 return nbd_co_send_structured_error(client, handle, -ret, error_msg,
2394 errp);
2395 } else {
2396 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
2397 NULL, 0, errp);
2398 }
2399}
2400
2401
2402
2403
2404static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
2405 uint8_t *data, Error **errp)
2406{
2407 int ret;
2408 NBDExport *exp = client->exp;
2409
2410 assert(request->type == NBD_CMD_READ);
2411
2412
2413 if (request->flags & NBD_CMD_FLAG_FUA) {
2414 ret = blk_co_flush(exp->common.blk);
2415 if (ret < 0) {
2416 return nbd_send_generic_reply(client, request->handle, ret,
2417 "flush failed", errp);
2418 }
2419 }
2420
2421 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
2422 request->len)
2423 {
2424 return nbd_co_send_sparse_read(client, request->handle, request->from,
2425 data, request->len, errp);
2426 }
2427
2428 ret = blk_pread(exp->common.blk, request->from, data, request->len);
2429 if (ret < 0) {
2430 return nbd_send_generic_reply(client, request->handle, ret,
2431 "reading from file failed", errp);
2432 }
2433
2434 if (client->structured_reply) {
2435 if (request->len) {
2436 return nbd_co_send_structured_read(client, request->handle,
2437 request->from, data,
2438 request->len, true, errp);
2439 } else {
2440 return nbd_co_send_structured_done(client, request->handle, errp);
2441 }
2442 } else {
2443 return nbd_co_send_simple_reply(client, request->handle, 0,
2444 data, request->len, errp);
2445 }
2446}
2447
2448
2449
2450
2451
2452
2453
2454
2455static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
2456 Error **errp)
2457{
2458 int ret;
2459 NBDExport *exp = client->exp;
2460
2461 assert(request->type == NBD_CMD_CACHE);
2462
2463 ret = blk_co_preadv(exp->common.blk, request->from, request->len,
2464 NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
2465
2466 return nbd_send_generic_reply(client, request->handle, ret,
2467 "caching data failed", errp);
2468}
2469
2470
2471
2472
2473static coroutine_fn int nbd_handle_request(NBDClient *client,
2474 NBDRequest *request,
2475 uint8_t *data, Error **errp)
2476{
2477 int ret;
2478 int flags;
2479 NBDExport *exp = client->exp;
2480 char *msg;
2481 size_t i;
2482
2483 switch (request->type) {
2484 case NBD_CMD_CACHE:
2485 return nbd_do_cmd_cache(client, request, errp);
2486
2487 case NBD_CMD_READ:
2488 return nbd_do_cmd_read(client, request, data, errp);
2489
2490 case NBD_CMD_WRITE:
2491 flags = 0;
2492 if (request->flags & NBD_CMD_FLAG_FUA) {
2493 flags |= BDRV_REQ_FUA;
2494 }
2495 ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
2496 flags);
2497 return nbd_send_generic_reply(client, request->handle, ret,
2498 "writing to file failed", errp);
2499
2500 case NBD_CMD_WRITE_ZEROES:
2501 flags = 0;
2502 if (request->flags & NBD_CMD_FLAG_FUA) {
2503 flags |= BDRV_REQ_FUA;
2504 }
2505 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
2506 flags |= BDRV_REQ_MAY_UNMAP;
2507 }
2508 if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2509 flags |= BDRV_REQ_NO_FALLBACK;
2510 }
2511 ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len,
2512 flags);
2513 return nbd_send_generic_reply(client, request->handle, ret,
2514 "writing to file failed", errp);
2515
2516 case NBD_CMD_DISC:
2517
2518 abort();
2519
2520 case NBD_CMD_FLUSH:
2521 ret = blk_co_flush(exp->common.blk);
2522 return nbd_send_generic_reply(client, request->handle, ret,
2523 "flush failed", errp);
2524
2525 case NBD_CMD_TRIM:
2526 ret = blk_co_pdiscard(exp->common.blk, request->from, request->len);
2527 if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
2528 ret = blk_co_flush(exp->common.blk);
2529 }
2530 return nbd_send_generic_reply(client, request->handle, ret,
2531 "discard failed", errp);
2532
2533 case NBD_CMD_BLOCK_STATUS:
2534 if (!request->len) {
2535 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2536 "need non-zero length", errp);
2537 }
2538 if (client->export_meta.count) {
2539 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
2540 int contexts_remaining = client->export_meta.count;
2541
2542 if (client->export_meta.base_allocation) {
2543 ret = nbd_co_send_block_status(client, request->handle,
2544 blk_bs(exp->common.blk),
2545 request->from,
2546 request->len, dont_fragment,
2547 !--contexts_remaining,
2548 NBD_META_ID_BASE_ALLOCATION,
2549 errp);
2550 if (ret < 0) {
2551 return ret;
2552 }
2553 }
2554
2555 if (client->export_meta.allocation_depth) {
2556 ret = nbd_co_send_block_status(client, request->handle,
2557 blk_bs(exp->common.blk),
2558 request->from, request->len,
2559 dont_fragment,
2560 !--contexts_remaining,
2561 NBD_META_ID_ALLOCATION_DEPTH,
2562 errp);
2563 if (ret < 0) {
2564 return ret;
2565 }
2566 }
2567
2568 for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
2569 if (!client->export_meta.bitmaps[i]) {
2570 continue;
2571 }
2572 ret = nbd_co_send_bitmap(client, request->handle,
2573 client->exp->export_bitmaps[i],
2574 request->from, request->len,
2575 dont_fragment, !--contexts_remaining,
2576 NBD_META_ID_DIRTY_BITMAP + i, errp);
2577 if (ret < 0) {
2578 return ret;
2579 }
2580 }
2581
2582 assert(!contexts_remaining);
2583
2584 return 0;
2585 } else {
2586 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2587 "CMD_BLOCK_STATUS not negotiated",
2588 errp);
2589 }
2590
2591 default:
2592 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
2593 request->type);
2594 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
2595 errp);
2596 g_free(msg);
2597 return ret;
2598 }
2599}
2600
2601
2602static coroutine_fn void nbd_trip(void *opaque)
2603{
2604 NBDClient *client = opaque;
2605 NBDRequestData *req;
2606 NBDRequest request = { 0 };
2607 int ret;
2608 Error *local_err = NULL;
2609
2610 trace_nbd_trip();
2611 if (client->closing) {
2612 nbd_client_put(client);
2613 return;
2614 }
2615
2616 if (client->quiescing) {
2617
2618
2619
2620
2621 nbd_client_put(client);
2622 client->recv_coroutine = NULL;
2623 aio_wait_kick();
2624 return;
2625 }
2626
2627 req = nbd_request_get(client);
2628 ret = nbd_co_receive_request(req, &request, &local_err);
2629 client->recv_coroutine = NULL;
2630
2631 if (client->closing) {
2632
2633
2634
2635
2636 goto done;
2637 }
2638
2639 if (ret == -EAGAIN) {
2640 assert(client->quiescing);
2641 goto done;
2642 }
2643
2644 nbd_client_receive_next_request(client);
2645 if (ret == -EIO) {
2646 goto disconnect;
2647 }
2648
2649 if (ret < 0) {
2650
2651
2652 Error *export_err = local_err;
2653
2654 local_err = NULL;
2655 ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
2656 error_get_pretty(export_err), &local_err);
2657 error_free(export_err);
2658 } else {
2659 ret = nbd_handle_request(client, &request, req->data, &local_err);
2660 }
2661 if (ret < 0) {
2662 error_prepend(&local_err, "Failed to send reply: ");
2663 goto disconnect;
2664 }
2665
2666
2667
2668
2669 if (!req->complete) {
2670 error_setg(&local_err, "Request handling failed in intermediate state");
2671 goto disconnect;
2672 }
2673
2674done:
2675 nbd_request_put(req);
2676 nbd_client_put(client);
2677 return;
2678
2679disconnect:
2680 if (local_err) {
2681 error_reportf_err(local_err, "Disconnect client, due to: ");
2682 }
2683 nbd_request_put(req);
2684 client_close(client, true);
2685 nbd_client_put(client);
2686}
2687
2688static void nbd_client_receive_next_request(NBDClient *client)
2689{
2690 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
2691 !client->quiescing) {
2692 nbd_client_get(client);
2693 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
2694 aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
2695 }
2696}
2697
2698static coroutine_fn void nbd_co_client_start(void *opaque)
2699{
2700 NBDClient *client = opaque;
2701 Error *local_err = NULL;
2702
2703 qemu_co_mutex_init(&client->send_lock);
2704
2705 if (nbd_negotiate(client, &local_err)) {
2706 if (local_err) {
2707 error_report_err(local_err);
2708 }
2709 client_close(client, false);
2710 return;
2711 }
2712
2713 nbd_client_receive_next_request(client);
2714}
2715
2716
2717
2718
2719
2720
2721void nbd_client_new(QIOChannelSocket *sioc,
2722 QCryptoTLSCreds *tlscreds,
2723 const char *tlsauthz,
2724 void (*close_fn)(NBDClient *, bool))
2725{
2726 NBDClient *client;
2727 Coroutine *co;
2728
2729 client = g_new0(NBDClient, 1);
2730 client->refcount = 1;
2731 client->tlscreds = tlscreds;
2732 if (tlscreds) {
2733 object_ref(OBJECT(client->tlscreds));
2734 }
2735 client->tlsauthz = g_strdup(tlsauthz);
2736 client->sioc = sioc;
2737 object_ref(OBJECT(client->sioc));
2738 client->ioc = QIO_CHANNEL(sioc);
2739 object_ref(OBJECT(client->ioc));
2740 client->close_fn = close_fn;
2741
2742 co = qemu_coroutine_create(nbd_co_client_start, client);
2743 qemu_coroutine_enter(co);
2744}
2745