1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "qapi/error.h"
22#include "trace.h"
23#include "nbd-internal.h"
24
25#define NBD_META_ID_BASE_ALLOCATION 0
26
27static int system_errno_to_nbd_errno(int err)
28{
29 switch (err) {
30 case 0:
31 return NBD_SUCCESS;
32 case EPERM:
33 case EROFS:
34 return NBD_EPERM;
35 case EIO:
36 return NBD_EIO;
37 case ENOMEM:
38 return NBD_ENOMEM;
39#ifdef EDQUOT
40 case EDQUOT:
41#endif
42 case EFBIG:
43 case ENOSPC:
44 return NBD_ENOSPC;
45 case EOVERFLOW:
46 return NBD_EOVERFLOW;
47 case ESHUTDOWN:
48 return NBD_ESHUTDOWN;
49 case EINVAL:
50 default:
51 return NBD_EINVAL;
52 }
53}
54
55
56
57typedef struct NBDRequestData NBDRequestData;
58
59struct NBDRequestData {
60 QSIMPLEQ_ENTRY(NBDRequestData) entry;
61 NBDClient *client;
62 uint8_t *data;
63 bool complete;
64};
65
66struct NBDExport {
67 int refcount;
68 void (*close)(NBDExport *exp);
69
70 BlockBackend *blk;
71 char *name;
72 char *description;
73 off_t dev_offset;
74 off_t size;
75 uint16_t nbdflags;
76 QTAILQ_HEAD(, NBDClient) clients;
77 QTAILQ_ENTRY(NBDExport) next;
78
79 AioContext *ctx;
80
81 BlockBackend *eject_notifier_blk;
82 Notifier eject_notifier;
83};
84
85static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
86
87
88
89
90typedef struct NBDExportMetaContexts {
91 char export_name[NBD_MAX_NAME_SIZE + 1];
92 bool valid;
93
94 bool base_allocation;
95} NBDExportMetaContexts;
96
97struct NBDClient {
98 int refcount;
99 void (*close_fn)(NBDClient *client, bool negotiated);
100
101 NBDExport *exp;
102 QCryptoTLSCreds *tlscreds;
103 char *tlsaclname;
104 QIOChannelSocket *sioc;
105 QIOChannel *ioc;
106
107 Coroutine *recv_coroutine;
108
109 CoMutex send_lock;
110 Coroutine *send_coroutine;
111
112 QTAILQ_ENTRY(NBDClient) next;
113 int nb_requests;
114 bool closing;
115
116 bool structured_reply;
117 NBDExportMetaContexts export_meta;
118
119 uint32_t opt;
120 uint32_t optlen;
121
122};
123
124static void nbd_client_receive_next_request(NBDClient *client);
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
154 uint32_t type, uint32_t length)
155{
156 stq_be_p(&rep->magic, NBD_REP_MAGIC);
157 stl_be_p(&rep->option, option);
158 stl_be_p(&rep->type, type);
159 stl_be_p(&rep->length, length);
160}
161
162
163
164static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
165 uint32_t len, Error **errp)
166{
167 NBDOptionReply rep;
168
169 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
170 type, nbd_rep_lookup(type), len);
171
172 assert(len < NBD_MAX_BUFFER_SIZE);
173
174 set_be_option_rep(&rep, client->opt, type, len);
175 return nbd_write(client->ioc, &rep, sizeof(rep), errp);
176}
177
178
179
180static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
181 Error **errp)
182{
183 return nbd_negotiate_send_rep_len(client, type, 0, errp);
184}
185
186
187
188static int GCC_FMT_ATTR(4, 0)
189nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
190 Error **errp, const char *fmt, va_list va)
191{
192 char *msg;
193 int ret;
194 size_t len;
195
196 msg = g_strdup_vprintf(fmt, va);
197 len = strlen(msg);
198 assert(len < 4096);
199 trace_nbd_negotiate_send_rep_err(msg);
200 ret = nbd_negotiate_send_rep_len(client, type, len, errp);
201 if (ret < 0) {
202 goto out;
203 }
204 if (nbd_write(client->ioc, msg, len, errp) < 0) {
205 error_prepend(errp, "write failed (error message): ");
206 ret = -EIO;
207 } else {
208 ret = 0;
209 }
210
211out:
212 g_free(msg);
213 return ret;
214}
215
216
217
218static int GCC_FMT_ATTR(4, 5)
219nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
220 Error **errp, const char *fmt, ...)
221{
222 va_list va;
223 int ret;
224
225 va_start(va, fmt);
226 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
227 va_end(va);
228 return ret;
229}
230
231
232
233
234static int GCC_FMT_ATTR(4, 0)
235nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
236 const char *fmt, va_list va)
237{
238 int ret = nbd_drop(client->ioc, client->optlen, errp);
239
240 client->optlen = 0;
241 if (!ret) {
242 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
243 }
244 return ret;
245}
246
247static int GCC_FMT_ATTR(4, 5)
248nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
249 const char *fmt, ...)
250{
251 int ret;
252 va_list va;
253
254 va_start(va, fmt);
255 ret = nbd_opt_vdrop(client, type, errp, fmt, va);
256 va_end(va);
257
258 return ret;
259}
260
261static int GCC_FMT_ATTR(3, 4)
262nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
263{
264 int ret;
265 va_list va;
266
267 va_start(va, fmt);
268 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
269 va_end(va);
270
271 return ret;
272}
273
274
275
276
277static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
278 Error **errp)
279{
280 if (size > client->optlen) {
281 return nbd_opt_invalid(client, errp,
282 "Inconsistent lengths in option %s",
283 nbd_opt_lookup(client->opt));
284 }
285 client->optlen -= size;
286 return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1;
287}
288
289
290
291
292static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
293{
294 if (size > client->optlen) {
295 return nbd_opt_invalid(client, errp,
296 "Inconsistent lengths in option %s",
297 nbd_opt_lookup(client->opt));
298 }
299 client->optlen -= size;
300 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
301}
302
303
304
305
306
307
308
309
310
311
312
313
314
315static int nbd_opt_read_name(NBDClient *client, char *name, uint32_t *length,
316 Error **errp)
317{
318 int ret;
319 uint32_t len;
320
321 ret = nbd_opt_read(client, &len, sizeof(len), errp);
322 if (ret <= 0) {
323 return ret;
324 }
325 cpu_to_be32s(&len);
326
327 if (len > NBD_MAX_NAME_SIZE) {
328 return nbd_opt_invalid(client, errp,
329 "Invalid name length: %" PRIu32, len);
330 }
331
332 ret = nbd_opt_read(client, name, len, errp);
333 if (ret <= 0) {
334 return ret;
335 }
336 name[len] = '\0';
337
338 if (length) {
339 *length = len;
340 }
341
342 return 1;
343}
344
345
346
347static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
348 Error **errp)
349{
350 size_t name_len, desc_len;
351 uint32_t len;
352 const char *name = exp->name ? exp->name : "";
353 const char *desc = exp->description ? exp->description : "";
354 QIOChannel *ioc = client->ioc;
355 int ret;
356
357 trace_nbd_negotiate_send_rep_list(name, desc);
358 name_len = strlen(name);
359 desc_len = strlen(desc);
360 len = name_len + desc_len + sizeof(len);
361 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
362 if (ret < 0) {
363 return ret;
364 }
365
366 len = cpu_to_be32(name_len);
367 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
368 error_prepend(errp, "write failed (name length): ");
369 return -EINVAL;
370 }
371
372 if (nbd_write(ioc, name, name_len, errp) < 0) {
373 error_prepend(errp, "write failed (name buffer): ");
374 return -EINVAL;
375 }
376
377 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
378 error_prepend(errp, "write failed (description buffer): ");
379 return -EINVAL;
380 }
381
382 return 0;
383}
384
385
386
387static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
388{
389 NBDExport *exp;
390 assert(client->opt == NBD_OPT_LIST);
391
392
393 QTAILQ_FOREACH(exp, &exports, next) {
394 if (nbd_negotiate_send_rep_list(client, exp, errp)) {
395 return -EINVAL;
396 }
397 }
398
399 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
400}
401
402static void nbd_check_meta_export_name(NBDClient *client)
403{
404 client->export_meta.valid &= !strcmp(client->exp->name,
405 client->export_meta.export_name);
406}
407
408
409
410static int nbd_negotiate_handle_export_name(NBDClient *client,
411 uint16_t myflags, bool no_zeroes,
412 Error **errp)
413{
414 char name[NBD_MAX_NAME_SIZE + 1];
415 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
416 size_t len;
417 int ret;
418
419
420
421
422
423
424
425
426 trace_nbd_negotiate_handle_export_name();
427 if (client->optlen >= sizeof(name)) {
428 error_setg(errp, "Bad length received");
429 return -EINVAL;
430 }
431 if (nbd_read(client->ioc, name, client->optlen, errp) < 0) {
432 error_prepend(errp, "read failed: ");
433 return -EIO;
434 }
435 name[client->optlen] = '\0';
436 client->optlen = 0;
437
438 trace_nbd_negotiate_handle_export_name_request(name);
439
440 client->exp = nbd_export_find(name);
441 if (!client->exp) {
442 error_setg(errp, "export not found");
443 return -EINVAL;
444 }
445
446 trace_nbd_negotiate_new_style_size_flags(client->exp->size,
447 client->exp->nbdflags | myflags);
448 stq_be_p(buf, client->exp->size);
449 stw_be_p(buf + 8, client->exp->nbdflags | myflags);
450 len = no_zeroes ? 10 : sizeof(buf);
451 ret = nbd_write(client->ioc, buf, len, errp);
452 if (ret < 0) {
453 error_prepend(errp, "write failed: ");
454 return ret;
455 }
456
457 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
458 nbd_export_get(client->exp);
459 nbd_check_meta_export_name(client);
460
461 return 0;
462}
463
464
465
466
467static int nbd_negotiate_send_info(NBDClient *client,
468 uint16_t info, uint32_t length, void *buf,
469 Error **errp)
470{
471 int rc;
472
473 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
474 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
475 sizeof(info) + length, errp);
476 if (rc < 0) {
477 return rc;
478 }
479 cpu_to_be16s(&info);
480 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
481 return -EIO;
482 }
483 if (nbd_write(client->ioc, buf, length, errp) < 0) {
484 return -EIO;
485 }
486 return 0;
487}
488
489
490
491
492
493
494
495
496static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
497{
498 int ret;
499
500 assert(client->optlen);
501 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
502 nbd_opt_lookup(client->opt));
503 if (fatal && !ret) {
504 error_setg(errp, "option '%s' has unexpected length",
505 nbd_opt_lookup(client->opt));
506 return -EINVAL;
507 }
508 return ret;
509}
510
511
512
513
514static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
515 Error **errp)
516{
517 int rc;
518 char name[NBD_MAX_NAME_SIZE + 1];
519 NBDExport *exp;
520 uint16_t requests;
521 uint16_t request;
522 uint32_t namelen;
523 bool sendname = false;
524 bool blocksize = false;
525 uint32_t sizes[3];
526 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
527
528
529
530
531
532
533
534 rc = nbd_opt_read_name(client, name, &namelen, errp);
535 if (rc <= 0) {
536 return rc;
537 }
538 trace_nbd_negotiate_handle_export_name_request(name);
539
540 rc = nbd_opt_read(client, &requests, sizeof(requests), errp);
541 if (rc <= 0) {
542 return rc;
543 }
544 be16_to_cpus(&requests);
545 trace_nbd_negotiate_handle_info_requests(requests);
546 while (requests--) {
547 rc = nbd_opt_read(client, &request, sizeof(request), errp);
548 if (rc <= 0) {
549 return rc;
550 }
551 be16_to_cpus(&request);
552 trace_nbd_negotiate_handle_info_request(request,
553 nbd_info_lookup(request));
554
555
556
557 switch (request) {
558 case NBD_INFO_NAME:
559 sendname = true;
560 break;
561 case NBD_INFO_BLOCK_SIZE:
562 blocksize = true;
563 break;
564 }
565 }
566 if (client->optlen) {
567 return nbd_reject_length(client, false, errp);
568 }
569
570 exp = nbd_export_find(name);
571 if (!exp) {
572 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
573 errp, "export '%s' not present",
574 name);
575 }
576
577
578 if (sendname) {
579 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
580 errp);
581 if (rc < 0) {
582 return rc;
583 }
584 }
585
586
587
588 if (exp->description) {
589 size_t len = strlen(exp->description);
590
591 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
592 len, exp->description, errp);
593 if (rc < 0) {
594 return rc;
595 }
596 }
597
598
599
600
601
602
603 sizes[0] =
604 (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
605
606
607 sizes[1] = 4096;
608
609 sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
610 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
611 cpu_to_be32s(&sizes[0]);
612 cpu_to_be32s(&sizes[1]);
613 cpu_to_be32s(&sizes[2]);
614 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
615 sizeof(sizes), sizes, errp);
616 if (rc < 0) {
617 return rc;
618 }
619
620
621 trace_nbd_negotiate_new_style_size_flags(exp->size,
622 exp->nbdflags | myflags);
623 stq_be_p(buf, exp->size);
624 stw_be_p(buf + 8, exp->nbdflags | myflags);
625 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
626 sizeof(buf), buf, errp);
627 if (rc < 0) {
628 return rc;
629 }
630
631
632
633
634
635 if (client->opt == NBD_OPT_INFO && !blocksize) {
636 return nbd_negotiate_send_rep_err(client,
637 NBD_REP_ERR_BLOCK_SIZE_REQD,
638 errp,
639 "request NBD_INFO_BLOCK_SIZE to "
640 "use this export");
641 }
642
643
644 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
645 if (rc < 0) {
646 return rc;
647 }
648
649 if (client->opt == NBD_OPT_GO) {
650 client->exp = exp;
651 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
652 nbd_export_get(client->exp);
653 nbd_check_meta_export_name(client);
654 rc = 1;
655 }
656 return rc;
657}
658
659
660
661
662static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
663 Error **errp)
664{
665 QIOChannel *ioc;
666 QIOChannelTLS *tioc;
667 struct NBDTLSHandshakeData data = { 0 };
668
669 assert(client->opt == NBD_OPT_STARTTLS);
670
671 trace_nbd_negotiate_handle_starttls();
672 ioc = client->ioc;
673
674 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
675 return NULL;
676 }
677
678 tioc = qio_channel_tls_new_server(ioc,
679 client->tlscreds,
680 client->tlsaclname,
681 errp);
682 if (!tioc) {
683 return NULL;
684 }
685
686 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
687 trace_nbd_negotiate_handle_starttls_handshake();
688 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
689 qio_channel_tls_handshake(tioc,
690 nbd_tls_handshake,
691 &data,
692 NULL,
693 NULL);
694
695 if (!data.complete) {
696 g_main_loop_run(data.loop);
697 }
698 g_main_loop_unref(data.loop);
699 if (data.error) {
700 object_unref(OBJECT(tioc));
701 error_propagate(errp, data.error);
702 return NULL;
703 }
704
705 return QIO_CHANNEL(tioc);
706}
707
708
709
710
711
712
713
714static int nbd_negotiate_send_meta_context(NBDClient *client,
715 const char *context,
716 uint32_t context_id,
717 Error **errp)
718{
719 NBDOptionReplyMetaContext opt;
720 struct iovec iov[] = {
721 {.iov_base = &opt, .iov_len = sizeof(opt)},
722 {.iov_base = (void *)context, .iov_len = strlen(context)}
723 };
724
725 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
726 context_id = 0;
727 }
728
729 trace_nbd_negotiate_meta_query_reply(context, context_id);
730 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
731 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
732 stl_be_p(&opt.context_id, context_id);
733
734 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
735}
736
737
738
739
740
741
742
743
744
745static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
746 uint32_t len, Error **errp)
747{
748 int ret;
749 char query[sizeof("allocation") - 1];
750 size_t alen = strlen("allocation");
751
752 if (len == 0) {
753 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
754 meta->base_allocation = true;
755 }
756 trace_nbd_negotiate_meta_query_parse("base:");
757 return 1;
758 }
759
760 if (len != alen) {
761 trace_nbd_negotiate_meta_query_skip("not base:allocation");
762 return nbd_opt_skip(client, len, errp);
763 }
764
765 ret = nbd_opt_read(client, query, len, errp);
766 if (ret <= 0) {
767 return ret;
768 }
769
770 if (strncmp(query, "allocation", alen) == 0) {
771 meta->base_allocation = true;
772 }
773
774 trace_nbd_negotiate_meta_query_parse("base:allocation");
775 return 1;
776}
777
778
779
780
781
782
783
784
785
786
787
788
789
790static int nbd_negotiate_meta_query(NBDClient *client,
791 NBDExportMetaContexts *meta, Error **errp)
792{
793 int ret;
794 char query[sizeof("base:") - 1];
795 size_t baselen = strlen("base:");
796 uint32_t len;
797
798 ret = nbd_opt_read(client, &len, sizeof(len), errp);
799 if (ret <= 0) {
800 return ret;
801 }
802 cpu_to_be32s(&len);
803
804
805
806 if (len < baselen) {
807 trace_nbd_negotiate_meta_query_skip("length too short");
808 return nbd_opt_skip(client, len, errp);
809 }
810
811 len -= baselen;
812 ret = nbd_opt_read(client, query, baselen, errp);
813 if (ret <= 0) {
814 return ret;
815 }
816 if (strncmp(query, "base:", baselen) != 0) {
817 trace_nbd_negotiate_meta_query_skip("not for base: namespace");
818 return nbd_opt_skip(client, len, errp);
819 }
820
821 return nbd_meta_base_query(client, meta, len, errp);
822}
823
824
825
826
827
828static int nbd_negotiate_meta_queries(NBDClient *client,
829 NBDExportMetaContexts *meta, Error **errp)
830{
831 int ret;
832 NBDExport *exp;
833 NBDExportMetaContexts local_meta;
834 uint32_t nb_queries;
835 int i;
836
837 if (!client->structured_reply) {
838 return nbd_opt_invalid(client, errp,
839 "request option '%s' when structured reply "
840 "is not negotiated",
841 nbd_opt_lookup(client->opt));
842 }
843
844 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
845
846 meta = &local_meta;
847 }
848
849 memset(meta, 0, sizeof(*meta));
850
851 ret = nbd_opt_read_name(client, meta->export_name, NULL, errp);
852 if (ret <= 0) {
853 return ret;
854 }
855
856 exp = nbd_export_find(meta->export_name);
857 if (exp == NULL) {
858 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
859 "export '%s' not present", meta->export_name);
860 }
861
862 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp);
863 if (ret <= 0) {
864 return ret;
865 }
866 cpu_to_be32s(&nb_queries);
867 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
868 meta->export_name, nb_queries);
869
870 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
871
872 meta->base_allocation = true;
873 } else {
874 for (i = 0; i < nb_queries; ++i) {
875 ret = nbd_negotiate_meta_query(client, meta, errp);
876 if (ret <= 0) {
877 return ret;
878 }
879 }
880 }
881
882 if (meta->base_allocation) {
883 ret = nbd_negotiate_send_meta_context(client, "base:allocation",
884 NBD_META_ID_BASE_ALLOCATION,
885 errp);
886 if (ret < 0) {
887 return ret;
888 }
889 }
890
891 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
892 if (ret == 0) {
893 meta->valid = true;
894 }
895
896 return ret;
897}
898
899
900
901
902
903
904
905
906
907
908static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
909 Error **errp)
910{
911 uint32_t flags;
912 bool fixedNewstyle = false;
913 bool no_zeroes = false;
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930 if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) {
931 error_prepend(errp, "read failed: ");
932 return -EIO;
933 }
934 be32_to_cpus(&flags);
935 trace_nbd_negotiate_options_flags(flags);
936 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
937 fixedNewstyle = true;
938 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
939 }
940 if (flags & NBD_FLAG_C_NO_ZEROES) {
941 no_zeroes = true;
942 flags &= ~NBD_FLAG_C_NO_ZEROES;
943 }
944 if (flags != 0) {
945 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
946 return -EINVAL;
947 }
948
949 while (1) {
950 int ret;
951 uint32_t option, length;
952 uint64_t magic;
953
954 if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) {
955 error_prepend(errp, "read failed: ");
956 return -EINVAL;
957 }
958 magic = be64_to_cpu(magic);
959 trace_nbd_negotiate_options_check_magic(magic);
960 if (magic != NBD_OPTS_MAGIC) {
961 error_setg(errp, "Bad magic received");
962 return -EINVAL;
963 }
964
965 if (nbd_read(client->ioc, &option,
966 sizeof(option), errp) < 0) {
967 error_prepend(errp, "read failed: ");
968 return -EINVAL;
969 }
970 option = be32_to_cpu(option);
971 client->opt = option;
972
973 if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) {
974 error_prepend(errp, "read failed: ");
975 return -EINVAL;
976 }
977 length = be32_to_cpu(length);
978 assert(!client->optlen);
979 client->optlen = length;
980
981 if (length > NBD_MAX_BUFFER_SIZE) {
982 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
983 length, NBD_MAX_BUFFER_SIZE);
984 return -EINVAL;
985 }
986
987 trace_nbd_negotiate_options_check_option(option,
988 nbd_opt_lookup(option));
989 if (client->tlscreds &&
990 client->ioc == (QIOChannel *)client->sioc) {
991 QIOChannel *tioc;
992 if (!fixedNewstyle) {
993 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
994 return -EINVAL;
995 }
996 switch (option) {
997 case NBD_OPT_STARTTLS:
998 if (length) {
999
1000
1001 return nbd_reject_length(client, true, errp);
1002 }
1003 tioc = nbd_negotiate_handle_starttls(client, errp);
1004 if (!tioc) {
1005 return -EIO;
1006 }
1007 ret = 0;
1008 object_unref(OBJECT(client->ioc));
1009 client->ioc = QIO_CHANNEL(tioc);
1010 break;
1011
1012 case NBD_OPT_EXPORT_NAME:
1013
1014 error_setg(errp, "Option 0x%x not permitted before TLS",
1015 option);
1016 return -EINVAL;
1017
1018 default:
1019 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD, errp,
1020 "Option 0x%" PRIx32
1021 "not permitted before TLS", option);
1022
1023
1024
1025 if (option == NBD_OPT_ABORT) {
1026 return 1;
1027 }
1028 break;
1029 }
1030 } else if (fixedNewstyle) {
1031 switch (option) {
1032 case NBD_OPT_LIST:
1033 if (length) {
1034 ret = nbd_reject_length(client, false, errp);
1035 } else {
1036 ret = nbd_negotiate_handle_list(client, errp);
1037 }
1038 break;
1039
1040 case NBD_OPT_ABORT:
1041
1042
1043
1044 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
1045 return 1;
1046
1047 case NBD_OPT_EXPORT_NAME:
1048 return nbd_negotiate_handle_export_name(client,
1049 myflags, no_zeroes,
1050 errp);
1051
1052 case NBD_OPT_INFO:
1053 case NBD_OPT_GO:
1054 ret = nbd_negotiate_handle_info(client, myflags, errp);
1055 if (ret == 1) {
1056 assert(option == NBD_OPT_GO);
1057 return 0;
1058 }
1059 break;
1060
1061 case NBD_OPT_STARTTLS:
1062 if (length) {
1063 ret = nbd_reject_length(client, false, errp);
1064 } else if (client->tlscreds) {
1065 ret = nbd_negotiate_send_rep_err(client,
1066 NBD_REP_ERR_INVALID, errp,
1067 "TLS already enabled");
1068 } else {
1069 ret = nbd_negotiate_send_rep_err(client,
1070 NBD_REP_ERR_POLICY, errp,
1071 "TLS not configured");
1072 }
1073 break;
1074
1075 case NBD_OPT_STRUCTURED_REPLY:
1076 if (length) {
1077 ret = nbd_reject_length(client, false, errp);
1078 } else if (client->structured_reply) {
1079 ret = nbd_negotiate_send_rep_err(
1080 client, NBD_REP_ERR_INVALID, errp,
1081 "structured reply already negotiated");
1082 } else {
1083 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1084 client->structured_reply = true;
1085 myflags |= NBD_FLAG_SEND_DF;
1086 }
1087 break;
1088
1089 case NBD_OPT_LIST_META_CONTEXT:
1090 case NBD_OPT_SET_META_CONTEXT:
1091 ret = nbd_negotiate_meta_queries(client, &client->export_meta,
1092 errp);
1093 break;
1094
1095 default:
1096 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
1097 "Unsupported option %" PRIu32 " (%s)",
1098 option, nbd_opt_lookup(option));
1099 break;
1100 }
1101 } else {
1102
1103
1104
1105
1106 switch (option) {
1107 case NBD_OPT_EXPORT_NAME:
1108 return nbd_negotiate_handle_export_name(client,
1109 myflags, no_zeroes,
1110 errp);
1111
1112 default:
1113 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
1114 option, nbd_opt_lookup(option));
1115 return -EINVAL;
1116 }
1117 }
1118 if (ret < 0) {
1119 return ret;
1120 }
1121 }
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1132{
1133 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
1134 int ret;
1135 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
1136 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
1137 NBD_FLAG_SEND_WRITE_ZEROES);
1138 bool oldStyle;
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 qio_channel_set_blocking(client->ioc, false, NULL);
1155
1156 trace_nbd_negotiate_begin();
1157 memcpy(buf, "NBDMAGIC", 8);
1158
1159 oldStyle = client->exp != NULL && !client->tlscreds;
1160 if (oldStyle) {
1161 trace_nbd_negotiate_old_style(client->exp->size,
1162 client->exp->nbdflags | myflags);
1163 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
1164 stq_be_p(buf + 16, client->exp->size);
1165 stl_be_p(buf + 24, client->exp->nbdflags | myflags);
1166
1167 if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) {
1168 error_prepend(errp, "write failed: ");
1169 return -EINVAL;
1170 }
1171 } else {
1172 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
1173 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
1174
1175 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
1176 error_prepend(errp, "write failed: ");
1177 return -EINVAL;
1178 }
1179 ret = nbd_negotiate_options(client, myflags, errp);
1180 if (ret != 0) {
1181 if (ret < 0) {
1182 error_prepend(errp, "option negotiation failed: ");
1183 }
1184 return ret;
1185 }
1186 }
1187
1188 assert(!client->optlen);
1189 trace_nbd_negotiate_success();
1190
1191 return 0;
1192}
1193
1194static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
1195 Error **errp)
1196{
1197 uint8_t buf[NBD_REQUEST_SIZE];
1198 uint32_t magic;
1199 int ret;
1200
1201 ret = nbd_read(ioc, buf, sizeof(buf), errp);
1202 if (ret < 0) {
1203 return ret;
1204 }
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215 magic = ldl_be_p(buf);
1216 request->flags = lduw_be_p(buf + 4);
1217 request->type = lduw_be_p(buf + 6);
1218 request->handle = ldq_be_p(buf + 8);
1219 request->from = ldq_be_p(buf + 16);
1220 request->len = ldl_be_p(buf + 24);
1221
1222 trace_nbd_receive_request(magic, request->flags, request->type,
1223 request->from, request->len);
1224
1225 if (magic != NBD_REQUEST_MAGIC) {
1226 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
1227 return -EINVAL;
1228 }
1229 return 0;
1230}
1231
1232#define MAX_NBD_REQUESTS 16
1233
1234void nbd_client_get(NBDClient *client)
1235{
1236 client->refcount++;
1237}
1238
1239void nbd_client_put(NBDClient *client)
1240{
1241 if (--client->refcount == 0) {
1242
1243
1244
1245 assert(client->closing);
1246
1247 qio_channel_detach_aio_context(client->ioc);
1248 object_unref(OBJECT(client->sioc));
1249 object_unref(OBJECT(client->ioc));
1250 if (client->tlscreds) {
1251 object_unref(OBJECT(client->tlscreds));
1252 }
1253 g_free(client->tlsaclname);
1254 if (client->exp) {
1255 QTAILQ_REMOVE(&client->exp->clients, client, next);
1256 nbd_export_put(client->exp);
1257 }
1258 g_free(client);
1259 }
1260}
1261
1262static void client_close(NBDClient *client, bool negotiated)
1263{
1264 if (client->closing) {
1265 return;
1266 }
1267
1268 client->closing = true;
1269
1270
1271
1272
1273 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
1274 NULL);
1275
1276
1277 if (client->close_fn) {
1278 client->close_fn(client, negotiated);
1279 }
1280}
1281
1282static NBDRequestData *nbd_request_get(NBDClient *client)
1283{
1284 NBDRequestData *req;
1285
1286 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1287 client->nb_requests++;
1288
1289 req = g_new0(NBDRequestData, 1);
1290 nbd_client_get(client);
1291 req->client = client;
1292 return req;
1293}
1294
1295static void nbd_request_put(NBDRequestData *req)
1296{
1297 NBDClient *client = req->client;
1298
1299 if (req->data) {
1300 qemu_vfree(req->data);
1301 }
1302 g_free(req);
1303
1304 client->nb_requests--;
1305 nbd_client_receive_next_request(client);
1306
1307 nbd_client_put(client);
1308}
1309
1310static void blk_aio_attached(AioContext *ctx, void *opaque)
1311{
1312 NBDExport *exp = opaque;
1313 NBDClient *client;
1314
1315 trace_nbd_blk_aio_attached(exp->name, ctx);
1316
1317 exp->ctx = ctx;
1318
1319 QTAILQ_FOREACH(client, &exp->clients, next) {
1320 qio_channel_attach_aio_context(client->ioc, ctx);
1321 if (client->recv_coroutine) {
1322 aio_co_schedule(ctx, client->recv_coroutine);
1323 }
1324 if (client->send_coroutine) {
1325 aio_co_schedule(ctx, client->send_coroutine);
1326 }
1327 }
1328}
1329
1330static void blk_aio_detach(void *opaque)
1331{
1332 NBDExport *exp = opaque;
1333 NBDClient *client;
1334
1335 trace_nbd_blk_aio_detach(exp->name, exp->ctx);
1336
1337 QTAILQ_FOREACH(client, &exp->clients, next) {
1338 qio_channel_detach_aio_context(client->ioc);
1339 }
1340
1341 exp->ctx = NULL;
1342}
1343
1344static void nbd_eject_notifier(Notifier *n, void *data)
1345{
1346 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1347 nbd_export_close(exp);
1348}
1349
1350NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
1351 uint16_t nbdflags, void (*close)(NBDExport *),
1352 bool writethrough, BlockBackend *on_eject_blk,
1353 Error **errp)
1354{
1355 AioContext *ctx;
1356 BlockBackend *blk;
1357 NBDExport *exp = g_new0(NBDExport, 1);
1358 uint64_t perm;
1359 int ret;
1360
1361
1362
1363
1364
1365
1366 ctx = bdrv_get_aio_context(bs);
1367 aio_context_acquire(ctx);
1368 bdrv_invalidate_cache(bs, NULL);
1369 aio_context_release(ctx);
1370
1371
1372
1373 perm = BLK_PERM_CONSISTENT_READ;
1374 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
1375 perm |= BLK_PERM_WRITE;
1376 }
1377 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
1378 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
1379 ret = blk_insert_bs(blk, bs, errp);
1380 if (ret < 0) {
1381 goto fail;
1382 }
1383 blk_set_enable_write_cache(blk, !writethrough);
1384
1385 exp->refcount = 1;
1386 QTAILQ_INIT(&exp->clients);
1387 exp->blk = blk;
1388 exp->dev_offset = dev_offset;
1389 exp->nbdflags = nbdflags;
1390 exp->size = size < 0 ? blk_getlength(blk) : size;
1391 if (exp->size < 0) {
1392 error_setg_errno(errp, -exp->size,
1393 "Failed to determine the NBD export's length");
1394 goto fail;
1395 }
1396 exp->size -= exp->size % BDRV_SECTOR_SIZE;
1397
1398 exp->close = close;
1399 exp->ctx = blk_get_aio_context(blk);
1400 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1401
1402 if (on_eject_blk) {
1403 blk_ref(on_eject_blk);
1404 exp->eject_notifier_blk = on_eject_blk;
1405 exp->eject_notifier.notify = nbd_eject_notifier;
1406 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
1407 }
1408 return exp;
1409
1410fail:
1411 blk_unref(blk);
1412 g_free(exp);
1413 return NULL;
1414}
1415
1416NBDExport *nbd_export_find(const char *name)
1417{
1418 NBDExport *exp;
1419 QTAILQ_FOREACH(exp, &exports, next) {
1420 if (strcmp(name, exp->name) == 0) {
1421 return exp;
1422 }
1423 }
1424
1425 return NULL;
1426}
1427
1428void nbd_export_set_name(NBDExport *exp, const char *name)
1429{
1430 if (exp->name == name) {
1431 return;
1432 }
1433
1434 nbd_export_get(exp);
1435 if (exp->name != NULL) {
1436 g_free(exp->name);
1437 exp->name = NULL;
1438 QTAILQ_REMOVE(&exports, exp, next);
1439 nbd_export_put(exp);
1440 }
1441 if (name != NULL) {
1442 nbd_export_get(exp);
1443 exp->name = g_strdup(name);
1444 QTAILQ_INSERT_TAIL(&exports, exp, next);
1445 }
1446 nbd_export_put(exp);
1447}
1448
1449void nbd_export_set_description(NBDExport *exp, const char *description)
1450{
1451 g_free(exp->description);
1452 exp->description = g_strdup(description);
1453}
1454
1455void nbd_export_close(NBDExport *exp)
1456{
1457 NBDClient *client, *next;
1458
1459 nbd_export_get(exp);
1460 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1461 client_close(client, true);
1462 }
1463 nbd_export_set_name(exp, NULL);
1464 nbd_export_set_description(exp, NULL);
1465 nbd_export_put(exp);
1466}
1467
1468void nbd_export_remove(NBDExport *exp, NbdServerRemoveMode mode, Error **errp)
1469{
1470 if (mode == NBD_SERVER_REMOVE_MODE_HARD || QTAILQ_EMPTY(&exp->clients)) {
1471 nbd_export_close(exp);
1472 return;
1473 }
1474
1475 assert(mode == NBD_SERVER_REMOVE_MODE_SAFE);
1476
1477 error_setg(errp, "export '%s' still in use", exp->name);
1478 error_append_hint(errp, "Use mode='hard' to force client disconnect\n");
1479}
1480
1481void nbd_export_get(NBDExport *exp)
1482{
1483 assert(exp->refcount > 0);
1484 exp->refcount++;
1485}
1486
1487void nbd_export_put(NBDExport *exp)
1488{
1489 assert(exp->refcount > 0);
1490 if (exp->refcount == 1) {
1491 nbd_export_close(exp);
1492 }
1493
1494
1495
1496
1497
1498
1499 assert(exp->refcount > 0);
1500 if (--exp->refcount == 0) {
1501 assert(exp->name == NULL);
1502 assert(exp->description == NULL);
1503
1504 if (exp->close) {
1505 exp->close(exp);
1506 }
1507
1508 if (exp->blk) {
1509 if (exp->eject_notifier_blk) {
1510 notifier_remove(&exp->eject_notifier);
1511 blk_unref(exp->eject_notifier_blk);
1512 }
1513 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
1514 blk_aio_detach, exp);
1515 blk_unref(exp->blk);
1516 exp->blk = NULL;
1517 }
1518
1519 g_free(exp);
1520 }
1521}
1522
1523BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
1524{
1525 return exp->blk;
1526}
1527
1528void nbd_export_close_all(void)
1529{
1530 NBDExport *exp, *next;
1531
1532 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
1533 nbd_export_close(exp);
1534 }
1535}
1536
1537static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
1538 unsigned niov, Error **errp)
1539{
1540 int ret;
1541
1542 g_assert(qemu_in_coroutine());
1543 qemu_co_mutex_lock(&client->send_lock);
1544 client->send_coroutine = qemu_coroutine_self();
1545
1546 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
1547
1548 client->send_coroutine = NULL;
1549 qemu_co_mutex_unlock(&client->send_lock);
1550
1551 return ret;
1552}
1553
1554static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
1555 uint64_t handle)
1556{
1557 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
1558 stl_be_p(&reply->error, error);
1559 stq_be_p(&reply->handle, handle);
1560}
1561
1562static int nbd_co_send_simple_reply(NBDClient *client,
1563 uint64_t handle,
1564 uint32_t error,
1565 void *data,
1566 size_t len,
1567 Error **errp)
1568{
1569 NBDSimpleReply reply;
1570 int nbd_err = system_errno_to_nbd_errno(error);
1571 struct iovec iov[] = {
1572 {.iov_base = &reply, .iov_len = sizeof(reply)},
1573 {.iov_base = data, .iov_len = len}
1574 };
1575
1576 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
1577 len);
1578 set_be_simple_reply(&reply, nbd_err, handle);
1579
1580 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1581}
1582
1583static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
1584 uint16_t type, uint64_t handle, uint32_t length)
1585{
1586 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
1587 stw_be_p(&chunk->flags, flags);
1588 stw_be_p(&chunk->type, type);
1589 stq_be_p(&chunk->handle, handle);
1590 stl_be_p(&chunk->length, length);
1591}
1592
1593static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
1594 uint64_t handle,
1595 Error **errp)
1596{
1597 NBDStructuredReplyChunk chunk;
1598 struct iovec iov[] = {
1599 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1600 };
1601
1602 trace_nbd_co_send_structured_done(handle);
1603 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
1604
1605 return nbd_co_send_iov(client, iov, 1, errp);
1606}
1607
1608static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
1609 uint64_t handle,
1610 uint64_t offset,
1611 void *data,
1612 size_t size,
1613 bool final,
1614 Error **errp)
1615{
1616 NBDStructuredReadData chunk;
1617 struct iovec iov[] = {
1618 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1619 {.iov_base = data, .iov_len = size}
1620 };
1621
1622 assert(size);
1623 trace_nbd_co_send_structured_read(handle, offset, data, size);
1624 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1625 NBD_REPLY_TYPE_OFFSET_DATA, handle,
1626 sizeof(chunk) - sizeof(chunk.h) + size);
1627 stq_be_p(&chunk.offset, offset);
1628
1629 return nbd_co_send_iov(client, iov, 2, errp);
1630}
1631
1632static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
1633 uint64_t handle,
1634 uint32_t error,
1635 const char *msg,
1636 Error **errp)
1637{
1638 NBDStructuredError chunk;
1639 int nbd_err = system_errno_to_nbd_errno(error);
1640 struct iovec iov[] = {
1641 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1642 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
1643 };
1644
1645 assert(nbd_err);
1646 trace_nbd_co_send_structured_error(handle, nbd_err,
1647 nbd_err_lookup(nbd_err), msg ? msg : "");
1648 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
1649 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1650 stl_be_p(&chunk.error, nbd_err);
1651 stw_be_p(&chunk.message_length, iov[1].iov_len);
1652
1653 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
1654}
1655
1656
1657
1658
1659
1660static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
1661 uint64_t handle,
1662 uint64_t offset,
1663 uint8_t *data,
1664 size_t size,
1665 Error **errp)
1666{
1667 int ret = 0;
1668 NBDExport *exp = client->exp;
1669 size_t progress = 0;
1670
1671 while (progress < size) {
1672 int64_t pnum;
1673 int status = bdrv_block_status_above(blk_bs(exp->blk), NULL,
1674 offset + progress,
1675 size - progress, &pnum, NULL,
1676 NULL);
1677 bool final;
1678
1679 if (status < 0) {
1680 char *msg = g_strdup_printf("unable to check for holes: %s",
1681 strerror(-status));
1682
1683 ret = nbd_co_send_structured_error(client, handle, -status, msg,
1684 errp);
1685 g_free(msg);
1686 return ret;
1687 }
1688 assert(pnum && pnum <= size - progress);
1689 final = progress + pnum == size;
1690 if (status & BDRV_BLOCK_ZERO) {
1691 NBDStructuredReadHole chunk;
1692 struct iovec iov[] = {
1693 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1694 };
1695
1696 trace_nbd_co_send_structured_read_hole(handle, offset + progress,
1697 pnum);
1698 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
1699 NBD_REPLY_TYPE_OFFSET_HOLE,
1700 handle, sizeof(chunk) - sizeof(chunk.h));
1701 stq_be_p(&chunk.offset, offset + progress);
1702 stl_be_p(&chunk.length, pnum);
1703 ret = nbd_co_send_iov(client, iov, 1, errp);
1704 } else {
1705 ret = blk_pread(exp->blk, offset + progress + exp->dev_offset,
1706 data + progress, pnum);
1707 if (ret < 0) {
1708 error_setg_errno(errp, -ret, "reading from file failed");
1709 break;
1710 }
1711 ret = nbd_co_send_structured_read(client, handle, offset + progress,
1712 data + progress, pnum, final,
1713 errp);
1714 }
1715
1716 if (ret < 0) {
1717 break;
1718 }
1719 progress += pnum;
1720 }
1721 return ret;
1722}
1723
1724static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset,
1725 uint64_t bytes, NBDExtent *extent)
1726{
1727 uint64_t remaining_bytes = bytes;
1728
1729 while (remaining_bytes) {
1730 uint32_t flags;
1731 int64_t num;
1732 int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes,
1733 &num, NULL, NULL);
1734 if (ret < 0) {
1735 return ret;
1736 }
1737
1738 flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
1739 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
1740
1741 if (remaining_bytes == bytes) {
1742 extent->flags = flags;
1743 }
1744
1745 if (flags != extent->flags) {
1746 break;
1747 }
1748
1749 offset += num;
1750 remaining_bytes -= num;
1751 }
1752
1753 cpu_to_be32s(&extent->flags);
1754 extent->length = cpu_to_be32(bytes - remaining_bytes);
1755
1756 return 0;
1757}
1758
1759
1760
1761static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
1762 NBDExtent *extents, unsigned nb_extents,
1763 uint32_t context_id, Error **errp)
1764{
1765 NBDStructuredMeta chunk;
1766
1767 struct iovec iov[] = {
1768 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
1769 {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])}
1770 };
1771
1772 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS,
1773 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
1774 stl_be_p(&chunk.context_id, context_id);
1775
1776 return nbd_co_send_iov(client, iov, 2, errp);
1777}
1778
1779
1780static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
1781 BlockDriverState *bs, uint64_t offset,
1782 uint64_t length, uint32_t context_id,
1783 Error **errp)
1784{
1785 int ret;
1786 NBDExtent extent;
1787
1788 ret = blockstatus_to_extent_be(bs, offset, length, &extent);
1789 if (ret < 0) {
1790 return nbd_co_send_structured_error(
1791 client, handle, -ret, "can't get block status", errp);
1792 }
1793
1794 return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp);
1795}
1796
1797
1798
1799
1800
1801
1802
1803static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
1804 Error **errp)
1805{
1806 NBDClient *client = req->client;
1807 int valid_flags;
1808
1809 g_assert(qemu_in_coroutine());
1810 assert(client->recv_coroutine == qemu_coroutine_self());
1811 if (nbd_receive_request(client->ioc, request, errp) < 0) {
1812 return -EIO;
1813 }
1814
1815 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
1816 nbd_cmd_lookup(request->type));
1817
1818 if (request->type != NBD_CMD_WRITE) {
1819
1820 req->complete = true;
1821 }
1822
1823 if (request->type == NBD_CMD_DISC) {
1824
1825
1826 return -EIO;
1827 }
1828
1829 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
1830 if (request->len > NBD_MAX_BUFFER_SIZE) {
1831 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1832 request->len, NBD_MAX_BUFFER_SIZE);
1833 return -EINVAL;
1834 }
1835
1836 req->data = blk_try_blockalign(client->exp->blk, request->len);
1837 if (req->data == NULL) {
1838 error_setg(errp, "No memory");
1839 return -ENOMEM;
1840 }
1841 }
1842 if (request->type == NBD_CMD_WRITE) {
1843 if (nbd_read(client->ioc, req->data, request->len, errp) < 0) {
1844 error_prepend(errp, "reading from socket failed: ");
1845 return -EIO;
1846 }
1847 req->complete = true;
1848
1849 trace_nbd_co_receive_request_payload_received(request->handle,
1850 request->len);
1851 }
1852
1853
1854 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
1855 (request->type == NBD_CMD_WRITE ||
1856 request->type == NBD_CMD_WRITE_ZEROES ||
1857 request->type == NBD_CMD_TRIM)) {
1858 error_setg(errp, "Export is read-only");
1859 return -EROFS;
1860 }
1861 if (request->from > client->exp->size ||
1862 request->from + request->len > client->exp->size) {
1863 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
1864 ", Size: %" PRIu64, request->from, request->len,
1865 (uint64_t)client->exp->size);
1866 return (request->type == NBD_CMD_WRITE ||
1867 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
1868 }
1869 valid_flags = NBD_CMD_FLAG_FUA;
1870 if (request->type == NBD_CMD_READ && client->structured_reply) {
1871 valid_flags |= NBD_CMD_FLAG_DF;
1872 } else if (request->type == NBD_CMD_WRITE_ZEROES) {
1873 valid_flags |= NBD_CMD_FLAG_NO_HOLE;
1874 } else if (request->type == NBD_CMD_BLOCK_STATUS) {
1875 valid_flags |= NBD_CMD_FLAG_REQ_ONE;
1876 }
1877 if (request->flags & ~valid_flags) {
1878 error_setg(errp, "unsupported flags for command %s (got 0x%x)",
1879 nbd_cmd_lookup(request->type), request->flags);
1880 return -EINVAL;
1881 }
1882
1883 return 0;
1884}
1885
1886
1887
1888
1889
1890static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
1891 uint64_t handle,
1892 int ret,
1893 const char *error_msg,
1894 Error **errp)
1895{
1896 if (client->structured_reply && ret < 0) {
1897 return nbd_co_send_structured_error(client, handle, -ret, error_msg,
1898 errp);
1899 } else {
1900 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
1901 NULL, 0, errp);
1902 }
1903}
1904
1905
1906
1907
1908static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
1909 uint8_t *data, Error **errp)
1910{
1911 int ret;
1912 NBDExport *exp = client->exp;
1913
1914 assert(request->type == NBD_CMD_READ);
1915
1916
1917 if (request->flags & NBD_CMD_FLAG_FUA) {
1918 ret = blk_co_flush(exp->blk);
1919 if (ret < 0) {
1920 return nbd_send_generic_reply(client, request->handle, ret,
1921 "flush failed", errp);
1922 }
1923 }
1924
1925 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
1926 request->len) {
1927 return nbd_co_send_sparse_read(client, request->handle, request->from,
1928 data, request->len, errp);
1929 }
1930
1931 ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
1932 request->len);
1933 if (ret < 0) {
1934 return nbd_send_generic_reply(client, request->handle, ret,
1935 "reading from file failed", errp);
1936 }
1937
1938 if (client->structured_reply) {
1939 if (request->len) {
1940 return nbd_co_send_structured_read(client, request->handle,
1941 request->from, data,
1942 request->len, true, errp);
1943 } else {
1944 return nbd_co_send_structured_done(client, request->handle, errp);
1945 }
1946 } else {
1947 return nbd_co_send_simple_reply(client, request->handle, 0,
1948 data, request->len, errp);
1949 }
1950}
1951
1952
1953
1954
1955static coroutine_fn int nbd_handle_request(NBDClient *client,
1956 NBDRequest *request,
1957 uint8_t *data, Error **errp)
1958{
1959 int ret;
1960 int flags;
1961 NBDExport *exp = client->exp;
1962 char *msg;
1963
1964 switch (request->type) {
1965 case NBD_CMD_READ:
1966 return nbd_do_cmd_read(client, request, data, errp);
1967
1968 case NBD_CMD_WRITE:
1969 flags = 0;
1970 if (request->flags & NBD_CMD_FLAG_FUA) {
1971 flags |= BDRV_REQ_FUA;
1972 }
1973 ret = blk_pwrite(exp->blk, request->from + exp->dev_offset,
1974 data, request->len, flags);
1975 return nbd_send_generic_reply(client, request->handle, ret,
1976 "writing to file failed", errp);
1977
1978 case NBD_CMD_WRITE_ZEROES:
1979 flags = 0;
1980 if (request->flags & NBD_CMD_FLAG_FUA) {
1981 flags |= BDRV_REQ_FUA;
1982 }
1983 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
1984 flags |= BDRV_REQ_MAY_UNMAP;
1985 }
1986 ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
1987 request->len, flags);
1988 return nbd_send_generic_reply(client, request->handle, ret,
1989 "writing to file failed", errp);
1990
1991 case NBD_CMD_DISC:
1992
1993 abort();
1994
1995 case NBD_CMD_FLUSH:
1996 ret = blk_co_flush(exp->blk);
1997 return nbd_send_generic_reply(client, request->handle, ret,
1998 "flush failed", errp);
1999
2000 case NBD_CMD_TRIM:
2001 ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset,
2002 request->len);
2003 if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) {
2004 ret = blk_co_flush(exp->blk);
2005 }
2006 return nbd_send_generic_reply(client, request->handle, ret,
2007 "discard failed", errp);
2008
2009 case NBD_CMD_BLOCK_STATUS:
2010 if (client->export_meta.valid && client->export_meta.base_allocation) {
2011 return nbd_co_send_block_status(client, request->handle,
2012 blk_bs(exp->blk), request->from,
2013 request->len,
2014 NBD_META_ID_BASE_ALLOCATION, errp);
2015 } else {
2016 return nbd_send_generic_reply(client, request->handle, -EINVAL,
2017 "CMD_BLOCK_STATUS not negotiated",
2018 errp);
2019 }
2020
2021 default:
2022 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
2023 request->type);
2024 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
2025 errp);
2026 g_free(msg);
2027 return ret;
2028 }
2029}
2030
2031
2032static coroutine_fn void nbd_trip(void *opaque)
2033{
2034 NBDClient *client = opaque;
2035 NBDRequestData *req;
2036 NBDRequest request = { 0 };
2037 int ret;
2038 Error *local_err = NULL;
2039
2040 trace_nbd_trip();
2041 if (client->closing) {
2042 nbd_client_put(client);
2043 return;
2044 }
2045
2046 req = nbd_request_get(client);
2047 ret = nbd_co_receive_request(req, &request, &local_err);
2048 client->recv_coroutine = NULL;
2049
2050 if (client->closing) {
2051
2052
2053
2054
2055 goto done;
2056 }
2057
2058 nbd_client_receive_next_request(client);
2059 if (ret == -EIO) {
2060 goto disconnect;
2061 }
2062
2063 if (ret < 0) {
2064
2065
2066 Error *export_err = local_err;
2067
2068 local_err = NULL;
2069 ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
2070 error_get_pretty(export_err), &local_err);
2071 error_free(export_err);
2072 } else {
2073 ret = nbd_handle_request(client, &request, req->data, &local_err);
2074 }
2075 if (ret < 0) {
2076 error_prepend(&local_err, "Failed to send reply: ");
2077 goto disconnect;
2078 }
2079
2080
2081
2082
2083 if (!req->complete) {
2084 error_setg(&local_err, "Request handling failed in intermediate state");
2085 goto disconnect;
2086 }
2087
2088done:
2089 nbd_request_put(req);
2090 nbd_client_put(client);
2091 return;
2092
2093disconnect:
2094 if (local_err) {
2095 error_reportf_err(local_err, "Disconnect client, due to: ");
2096 }
2097 nbd_request_put(req);
2098 client_close(client, true);
2099 nbd_client_put(client);
2100}
2101
2102static void nbd_client_receive_next_request(NBDClient *client)
2103{
2104 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
2105 nbd_client_get(client);
2106 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
2107 aio_co_schedule(client->exp->ctx, client->recv_coroutine);
2108 }
2109}
2110
2111static coroutine_fn void nbd_co_client_start(void *opaque)
2112{
2113 NBDClient *client = opaque;
2114 NBDExport *exp = client->exp;
2115 Error *local_err = NULL;
2116
2117 if (exp) {
2118 nbd_export_get(exp);
2119 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
2120 }
2121 qemu_co_mutex_init(&client->send_lock);
2122
2123 if (nbd_negotiate(client, &local_err)) {
2124 if (local_err) {
2125 error_report_err(local_err);
2126 }
2127 client_close(client, false);
2128 return;
2129 }
2130
2131 nbd_client_receive_next_request(client);
2132}
2133
2134
2135
2136
2137
2138
2139
2140void nbd_client_new(NBDExport *exp,
2141 QIOChannelSocket *sioc,
2142 QCryptoTLSCreds *tlscreds,
2143 const char *tlsaclname,
2144 void (*close_fn)(NBDClient *, bool))
2145{
2146 NBDClient *client;
2147 Coroutine *co;
2148
2149 client = g_new0(NBDClient, 1);
2150 client->refcount = 1;
2151 client->exp = exp;
2152 client->tlscreds = tlscreds;
2153 if (tlscreds) {
2154 object_ref(OBJECT(client->tlscreds));
2155 }
2156 client->tlsaclname = g_strdup(tlsaclname);
2157 client->sioc = sioc;
2158 object_ref(OBJECT(client->sioc));
2159 client->ioc = QIO_CHANNEL(sioc);
2160 object_ref(OBJECT(client->ioc));
2161 client->close_fn = close_fn;
2162
2163 co = qemu_coroutine_create(nbd_co_client_start, client);
2164 qemu_coroutine_enter(co);
2165}
2166