1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "qapi/error.h"
22#include "trace.h"
23#include "nbd-internal.h"
24
25static int system_errno_to_nbd_errno(int err)
26{
27 switch (err) {
28 case 0:
29 return NBD_SUCCESS;
30 case EPERM:
31 case EROFS:
32 return NBD_EPERM;
33 case EIO:
34 return NBD_EIO;
35 case ENOMEM:
36 return NBD_ENOMEM;
37#ifdef EDQUOT
38 case EDQUOT:
39#endif
40 case EFBIG:
41 case ENOSPC:
42 return NBD_ENOSPC;
43 case ESHUTDOWN:
44 return NBD_ESHUTDOWN;
45 case EINVAL:
46 default:
47 return NBD_EINVAL;
48 }
49}
50
51
52
53typedef struct NBDRequestData NBDRequestData;
54
55struct NBDRequestData {
56 QSIMPLEQ_ENTRY(NBDRequestData) entry;
57 NBDClient *client;
58 uint8_t *data;
59 bool complete;
60};
61
62struct NBDExport {
63 int refcount;
64 void (*close)(NBDExport *exp);
65
66 BlockBackend *blk;
67 char *name;
68 char *description;
69 off_t dev_offset;
70 off_t size;
71 uint16_t nbdflags;
72 QTAILQ_HEAD(, NBDClient) clients;
73 QTAILQ_ENTRY(NBDExport) next;
74
75 AioContext *ctx;
76
77 BlockBackend *eject_notifier_blk;
78 Notifier eject_notifier;
79};
80
81static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
82
83struct NBDClient {
84 int refcount;
85 void (*close_fn)(NBDClient *client, bool negotiated);
86
87 NBDExport *exp;
88 QCryptoTLSCreds *tlscreds;
89 char *tlsaclname;
90 QIOChannelSocket *sioc;
91 QIOChannel *ioc;
92
93 Coroutine *recv_coroutine;
94
95 CoMutex send_lock;
96 Coroutine *send_coroutine;
97
98 QTAILQ_ENTRY(NBDClient) next;
99 int nb_requests;
100 bool closing;
101};
102
103
104
105static void nbd_client_receive_next_request(NBDClient *client);
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
137 uint32_t opt, uint32_t len, Error **errp)
138{
139 uint64_t magic;
140
141 trace_nbd_negotiate_send_rep_len(opt, nbd_opt_lookup(opt),
142 type, nbd_rep_lookup(type), len);
143
144 assert(len < NBD_MAX_BUFFER_SIZE);
145 magic = cpu_to_be64(NBD_REP_MAGIC);
146 if (nbd_write(ioc, &magic, sizeof(magic), errp) < 0) {
147 error_prepend(errp, "write failed (rep magic): ");
148 return -EINVAL;
149 }
150
151 opt = cpu_to_be32(opt);
152 if (nbd_write(ioc, &opt, sizeof(opt), errp) < 0) {
153 error_prepend(errp, "write failed (rep opt): ");
154 return -EINVAL;
155 }
156
157 type = cpu_to_be32(type);
158 if (nbd_write(ioc, &type, sizeof(type), errp) < 0) {
159 error_prepend(errp, "write failed (rep type): ");
160 return -EINVAL;
161 }
162
163 len = cpu_to_be32(len);
164 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
165 error_prepend(errp, "write failed (rep data length): ");
166 return -EINVAL;
167 }
168 return 0;
169}
170
171
172
173static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt,
174 Error **errp)
175{
176 return nbd_negotiate_send_rep_len(ioc, type, opt, 0, errp);
177}
178
179
180
181static int GCC_FMT_ATTR(5, 6)
182nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
183 uint32_t opt, Error **errp, const char *fmt, ...)
184{
185 va_list va;
186 char *msg;
187 int ret;
188 size_t len;
189
190 va_start(va, fmt);
191 msg = g_strdup_vprintf(fmt, va);
192 va_end(va);
193 len = strlen(msg);
194 assert(len < 4096);
195 trace_nbd_negotiate_send_rep_err(msg);
196 ret = nbd_negotiate_send_rep_len(ioc, type, opt, len, errp);
197 if (ret < 0) {
198 goto out;
199 }
200 if (nbd_write(ioc, msg, len, errp) < 0) {
201 error_prepend(errp, "write failed (error message): ");
202 ret = -EIO;
203 } else {
204 ret = 0;
205 }
206
207out:
208 g_free(msg);
209 return ret;
210}
211
212
213
214static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp,
215 Error **errp)
216{
217 size_t name_len, desc_len;
218 uint32_t len;
219 const char *name = exp->name ? exp->name : "";
220 const char *desc = exp->description ? exp->description : "";
221 int ret;
222
223 trace_nbd_negotiate_send_rep_list(name, desc);
224 name_len = strlen(name);
225 desc_len = strlen(desc);
226 len = name_len + desc_len + sizeof(len);
227 ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len,
228 errp);
229 if (ret < 0) {
230 return ret;
231 }
232
233 len = cpu_to_be32(name_len);
234 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
235 error_prepend(errp, "write failed (name length): ");
236 return -EINVAL;
237 }
238
239 if (nbd_write(ioc, name, name_len, errp) < 0) {
240 error_prepend(errp, "write failed (name buffer): ");
241 return -EINVAL;
242 }
243
244 if (nbd_write(ioc, desc, desc_len, errp) < 0) {
245 error_prepend(errp, "write failed (description buffer): ");
246 return -EINVAL;
247 }
248
249 return 0;
250}
251
252
253
254static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
255 Error **errp)
256{
257 NBDExport *exp;
258
259 if (length) {
260 if (nbd_drop(client->ioc, length, errp) < 0) {
261 return -EIO;
262 }
263 return nbd_negotiate_send_rep_err(client->ioc,
264 NBD_REP_ERR_INVALID, NBD_OPT_LIST,
265 errp,
266 "OPT_LIST should not have length");
267 }
268
269
270 QTAILQ_FOREACH(exp, &exports, next) {
271 if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
272 return -EINVAL;
273 }
274 }
275
276 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST, errp);
277}
278
279
280
281static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length,
282 uint16_t myflags, bool no_zeroes,
283 Error **errp)
284{
285 char name[NBD_MAX_NAME_SIZE + 1];
286 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
287 size_t len;
288 int ret;
289
290
291
292
293
294
295
296
297 trace_nbd_negotiate_handle_export_name();
298 if (length >= sizeof(name)) {
299 error_setg(errp, "Bad length received");
300 return -EINVAL;
301 }
302 if (nbd_read(client->ioc, name, length, errp) < 0) {
303 error_prepend(errp, "read failed: ");
304 return -EINVAL;
305 }
306 name[length] = '\0';
307
308 trace_nbd_negotiate_handle_export_name_request(name);
309
310 client->exp = nbd_export_find(name);
311 if (!client->exp) {
312 error_setg(errp, "export not found");
313 return -EINVAL;
314 }
315
316 trace_nbd_negotiate_new_style_size_flags(client->exp->size,
317 client->exp->nbdflags | myflags);
318 stq_be_p(buf, client->exp->size);
319 stw_be_p(buf + 8, client->exp->nbdflags | myflags);
320 len = no_zeroes ? 10 : sizeof(buf);
321 ret = nbd_write(client->ioc, buf, len, errp);
322 if (ret < 0) {
323 error_prepend(errp, "write failed: ");
324 return ret;
325 }
326
327 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
328 nbd_export_get(client->exp);
329
330 return 0;
331}
332
333
334
335
336static int nbd_negotiate_send_info(NBDClient *client, uint32_t opt,
337 uint16_t info, uint32_t length, void *buf,
338 Error **errp)
339{
340 int rc;
341
342 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
343 rc = nbd_negotiate_send_rep_len(client->ioc, NBD_REP_INFO, opt,
344 sizeof(info) + length, errp);
345 if (rc < 0) {
346 return rc;
347 }
348 cpu_to_be16s(&info);
349 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
350 return -EIO;
351 }
352 if (nbd_write(client->ioc, buf, length, errp) < 0) {
353 return -EIO;
354 }
355 return 0;
356}
357
358
359
360
361static int nbd_negotiate_handle_info(NBDClient *client, uint32_t length,
362 uint32_t opt, uint16_t myflags,
363 Error **errp)
364{
365 int rc;
366 char name[NBD_MAX_NAME_SIZE + 1];
367 NBDExport *exp;
368 uint16_t requests;
369 uint16_t request;
370 uint32_t namelen;
371 bool sendname = false;
372 bool blocksize = false;
373 uint32_t sizes[3];
374 char buf[sizeof(uint64_t) + sizeof(uint16_t)];
375 const char *msg;
376
377
378
379
380
381
382
383 if (length < sizeof(namelen) + sizeof(requests)) {
384 msg = "overall request too short";
385 goto invalid;
386 }
387 if (nbd_read(client->ioc, &namelen, sizeof(namelen), errp) < 0) {
388 return -EIO;
389 }
390 be32_to_cpus(&namelen);
391 length -= sizeof(namelen);
392 if (namelen > length - sizeof(requests) || (length - namelen) % 2) {
393 msg = "name length is incorrect";
394 goto invalid;
395 }
396 if (namelen >= sizeof(name)) {
397 msg = "name too long for qemu";
398 goto invalid;
399 }
400 if (nbd_read(client->ioc, name, namelen, errp) < 0) {
401 return -EIO;
402 }
403 name[namelen] = '\0';
404 length -= namelen;
405 trace_nbd_negotiate_handle_export_name_request(name);
406
407 if (nbd_read(client->ioc, &requests, sizeof(requests), errp) < 0) {
408 return -EIO;
409 }
410 be16_to_cpus(&requests);
411 length -= sizeof(requests);
412 trace_nbd_negotiate_handle_info_requests(requests);
413 if (requests != length / sizeof(request)) {
414 msg = "incorrect number of requests for overall length";
415 goto invalid;
416 }
417 while (requests--) {
418 if (nbd_read(client->ioc, &request, sizeof(request), errp) < 0) {
419 return -EIO;
420 }
421 be16_to_cpus(&request);
422 length -= sizeof(request);
423 trace_nbd_negotiate_handle_info_request(request,
424 nbd_info_lookup(request));
425
426
427
428 switch (request) {
429 case NBD_INFO_NAME:
430 sendname = true;
431 break;
432 case NBD_INFO_BLOCK_SIZE:
433 blocksize = true;
434 break;
435 }
436 }
437 assert(length == 0);
438
439 exp = nbd_export_find(name);
440 if (!exp) {
441 return nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_UNKNOWN,
442 opt, errp, "export '%s' not present",
443 name);
444 }
445
446
447 if (sendname) {
448 rc = nbd_negotiate_send_info(client, opt, NBD_INFO_NAME, namelen, name,
449 errp);
450 if (rc < 0) {
451 return rc;
452 }
453 }
454
455
456
457 if (exp->description) {
458 size_t len = strlen(exp->description);
459
460 rc = nbd_negotiate_send_info(client, opt, NBD_INFO_DESCRIPTION,
461 len, exp->description, errp);
462 if (rc < 0) {
463 return rc;
464 }
465 }
466
467
468
469
470
471
472 sizes[0] = (opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
473
474
475 sizes[1] = 4096;
476
477 sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
478 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
479 cpu_to_be32s(&sizes[0]);
480 cpu_to_be32s(&sizes[1]);
481 cpu_to_be32s(&sizes[2]);
482 rc = nbd_negotiate_send_info(client, opt, NBD_INFO_BLOCK_SIZE,
483 sizeof(sizes), sizes, errp);
484 if (rc < 0) {
485 return rc;
486 }
487
488
489 trace_nbd_negotiate_new_style_size_flags(exp->size,
490 exp->nbdflags | myflags);
491 stq_be_p(buf, exp->size);
492 stw_be_p(buf + 8, exp->nbdflags | myflags);
493 rc = nbd_negotiate_send_info(client, opt, NBD_INFO_EXPORT,
494 sizeof(buf), buf, errp);
495 if (rc < 0) {
496 return rc;
497 }
498
499
500
501
502
503 if (opt == NBD_OPT_INFO && !blocksize) {
504 return nbd_negotiate_send_rep_err(client->ioc,
505 NBD_REP_ERR_BLOCK_SIZE_REQD, opt,
506 errp,
507 "request NBD_INFO_BLOCK_SIZE to "
508 "use this export");
509 }
510
511
512 rc = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, opt, errp);
513 if (rc < 0) {
514 return rc;
515 }
516
517 if (opt == NBD_OPT_GO) {
518 client->exp = exp;
519 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
520 nbd_export_get(client->exp);
521 rc = 1;
522 }
523 return rc;
524
525 invalid:
526 if (nbd_drop(client->ioc, length, errp) < 0) {
527 return -EIO;
528 }
529 return nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID, opt,
530 errp, "%s", msg);
531}
532
533
534
535
536static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
537 uint32_t length,
538 Error **errp)
539{
540 QIOChannel *ioc;
541 QIOChannelTLS *tioc;
542 struct NBDTLSHandshakeData data = { 0 };
543
544 trace_nbd_negotiate_handle_starttls();
545 ioc = client->ioc;
546 if (length) {
547 if (nbd_drop(ioc, length, errp) < 0) {
548 return NULL;
549 }
550 nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
551 errp,
552 "OPT_STARTTLS should not have length");
553 return NULL;
554 }
555
556 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
557 NBD_OPT_STARTTLS, errp) < 0) {
558 return NULL;
559 }
560
561 tioc = qio_channel_tls_new_server(ioc,
562 client->tlscreds,
563 client->tlsaclname,
564 errp);
565 if (!tioc) {
566 return NULL;
567 }
568
569 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
570 trace_nbd_negotiate_handle_starttls_handshake();
571 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
572 qio_channel_tls_handshake(tioc,
573 nbd_tls_handshake,
574 &data,
575 NULL);
576
577 if (!data.complete) {
578 g_main_loop_run(data.loop);
579 }
580 g_main_loop_unref(data.loop);
581 if (data.error) {
582 object_unref(OBJECT(tioc));
583 error_propagate(errp, data.error);
584 return NULL;
585 }
586
587 return QIO_CHANNEL(tioc);
588}
589
590
591
592
593
594
595
596
597
598
599static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
600 Error **errp)
601{
602 uint32_t flags;
603 bool fixedNewstyle = false;
604 bool no_zeroes = false;
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621 if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) {
622 error_prepend(errp, "read failed: ");
623 return -EIO;
624 }
625 be32_to_cpus(&flags);
626 trace_nbd_negotiate_options_flags(flags);
627 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
628 fixedNewstyle = true;
629 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
630 }
631 if (flags & NBD_FLAG_C_NO_ZEROES) {
632 no_zeroes = true;
633 flags &= ~NBD_FLAG_C_NO_ZEROES;
634 }
635 if (flags != 0) {
636 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
637 return -EINVAL;
638 }
639
640 while (1) {
641 int ret;
642 uint32_t option, length;
643 uint64_t magic;
644
645 if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) {
646 error_prepend(errp, "read failed: ");
647 return -EINVAL;
648 }
649 magic = be64_to_cpu(magic);
650 trace_nbd_negotiate_options_check_magic(magic);
651 if (magic != NBD_OPTS_MAGIC) {
652 error_setg(errp, "Bad magic received");
653 return -EINVAL;
654 }
655
656 if (nbd_read(client->ioc, &option,
657 sizeof(option), errp) < 0) {
658 error_prepend(errp, "read failed: ");
659 return -EINVAL;
660 }
661 option = be32_to_cpu(option);
662
663 if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) {
664 error_prepend(errp, "read failed: ");
665 return -EINVAL;
666 }
667 length = be32_to_cpu(length);
668
669 if (length > NBD_MAX_BUFFER_SIZE) {
670 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
671 length, NBD_MAX_BUFFER_SIZE);
672 return -EINVAL;
673 }
674
675 trace_nbd_negotiate_options_check_option(option,
676 nbd_opt_lookup(option));
677 if (client->tlscreds &&
678 client->ioc == (QIOChannel *)client->sioc) {
679 QIOChannel *tioc;
680 if (!fixedNewstyle) {
681 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
682 return -EINVAL;
683 }
684 switch (option) {
685 case NBD_OPT_STARTTLS:
686 tioc = nbd_negotiate_handle_starttls(client, length, errp);
687 if (!tioc) {
688 return -EIO;
689 }
690 object_unref(OBJECT(client->ioc));
691 client->ioc = QIO_CHANNEL(tioc);
692 break;
693
694 case NBD_OPT_EXPORT_NAME:
695
696 error_setg(errp, "Option 0x%x not permitted before TLS",
697 option);
698 return -EINVAL;
699
700 default:
701 if (nbd_drop(client->ioc, length, errp) < 0) {
702 return -EIO;
703 }
704 ret = nbd_negotiate_send_rep_err(client->ioc,
705 NBD_REP_ERR_TLS_REQD,
706 option, errp,
707 "Option 0x%" PRIx32
708 "not permitted before TLS",
709 option);
710 if (ret < 0) {
711 return ret;
712 }
713
714
715
716 if (option == NBD_OPT_ABORT) {
717 return 1;
718 }
719 break;
720 }
721 } else if (fixedNewstyle) {
722 switch (option) {
723 case NBD_OPT_LIST:
724 ret = nbd_negotiate_handle_list(client, length, errp);
725 if (ret < 0) {
726 return ret;
727 }
728 break;
729
730 case NBD_OPT_ABORT:
731
732
733
734 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, option, NULL);
735 return 1;
736
737 case NBD_OPT_EXPORT_NAME:
738 return nbd_negotiate_handle_export_name(client, length,
739 myflags, no_zeroes,
740 errp);
741
742 case NBD_OPT_INFO:
743 case NBD_OPT_GO:
744 ret = nbd_negotiate_handle_info(client, length, option,
745 myflags, errp);
746 if (ret == 1) {
747 assert(option == NBD_OPT_GO);
748 return 0;
749 }
750 if (ret) {
751 return ret;
752 }
753 break;
754
755 case NBD_OPT_STARTTLS:
756 if (nbd_drop(client->ioc, length, errp) < 0) {
757 return -EIO;
758 }
759 if (client->tlscreds) {
760 ret = nbd_negotiate_send_rep_err(client->ioc,
761 NBD_REP_ERR_INVALID,
762 option, errp,
763 "TLS already enabled");
764 } else {
765 ret = nbd_negotiate_send_rep_err(client->ioc,
766 NBD_REP_ERR_POLICY,
767 option, errp,
768 "TLS not configured");
769 }
770 if (ret < 0) {
771 return ret;
772 }
773 break;
774 default:
775 if (nbd_drop(client->ioc, length, errp) < 0) {
776 return -EIO;
777 }
778 ret = nbd_negotiate_send_rep_err(client->ioc,
779 NBD_REP_ERR_UNSUP,
780 option, errp,
781 "Unsupported option 0x%"
782 PRIx32 " (%s)", option,
783 nbd_opt_lookup(option));
784 if (ret < 0) {
785 return ret;
786 }
787 break;
788 }
789 } else {
790
791
792
793
794 switch (option) {
795 case NBD_OPT_EXPORT_NAME:
796 return nbd_negotiate_handle_export_name(client, length,
797 myflags, no_zeroes,
798 errp);
799
800 default:
801 error_setg(errp, "Unsupported option 0x%" PRIx32 " (%s)",
802 option, nbd_opt_lookup(option));
803 return -EINVAL;
804 }
805 }
806 }
807}
808
809
810
811
812
813
814
815
816static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
817{
818 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
819 int ret;
820 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
821 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
822 NBD_FLAG_SEND_WRITE_ZEROES);
823 bool oldStyle;
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839 qio_channel_set_blocking(client->ioc, false, NULL);
840
841 trace_nbd_negotiate_begin();
842 memcpy(buf, "NBDMAGIC", 8);
843
844 oldStyle = client->exp != NULL && !client->tlscreds;
845 if (oldStyle) {
846 trace_nbd_negotiate_old_style(client->exp->size,
847 client->exp->nbdflags | myflags);
848 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
849 stq_be_p(buf + 16, client->exp->size);
850 stl_be_p(buf + 24, client->exp->nbdflags | myflags);
851
852 if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) {
853 error_prepend(errp, "write failed: ");
854 return -EINVAL;
855 }
856 } else {
857 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
858 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
859
860 if (nbd_write(client->ioc, buf, 18, errp) < 0) {
861 error_prepend(errp, "write failed: ");
862 return -EINVAL;
863 }
864 ret = nbd_negotiate_options(client, myflags, errp);
865 if (ret != 0) {
866 if (ret < 0) {
867 error_prepend(errp, "option negotiation failed: ");
868 }
869 return ret;
870 }
871 }
872
873 trace_nbd_negotiate_success();
874
875 return 0;
876}
877
878static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
879 Error **errp)
880{
881 uint8_t buf[NBD_REQUEST_SIZE];
882 uint32_t magic;
883 int ret;
884
885 ret = nbd_read(ioc, buf, sizeof(buf), errp);
886 if (ret < 0) {
887 return ret;
888 }
889
890
891
892
893
894
895
896
897
898
899 magic = ldl_be_p(buf);
900 request->flags = lduw_be_p(buf + 4);
901 request->type = lduw_be_p(buf + 6);
902 request->handle = ldq_be_p(buf + 8);
903 request->from = ldq_be_p(buf + 16);
904 request->len = ldl_be_p(buf + 24);
905
906 trace_nbd_receive_request(magic, request->flags, request->type,
907 request->from, request->len);
908
909 if (magic != NBD_REQUEST_MAGIC) {
910 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
911 return -EINVAL;
912 }
913 return 0;
914}
915
916static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
917{
918 uint8_t buf[NBD_REPLY_SIZE];
919
920 reply->error = system_errno_to_nbd_errno(reply->error);
921
922 trace_nbd_send_reply(reply->error, reply->handle);
923
924
925
926
927
928
929 stl_be_p(buf, NBD_REPLY_MAGIC);
930 stl_be_p(buf + 4, reply->error);
931 stq_be_p(buf + 8, reply->handle);
932
933 return nbd_write(ioc, buf, sizeof(buf), errp);
934}
935
936#define MAX_NBD_REQUESTS 16
937
938void nbd_client_get(NBDClient *client)
939{
940 client->refcount++;
941}
942
943void nbd_client_put(NBDClient *client)
944{
945 if (--client->refcount == 0) {
946
947
948
949 assert(client->closing);
950
951 qio_channel_detach_aio_context(client->ioc);
952 object_unref(OBJECT(client->sioc));
953 object_unref(OBJECT(client->ioc));
954 if (client->tlscreds) {
955 object_unref(OBJECT(client->tlscreds));
956 }
957 g_free(client->tlsaclname);
958 if (client->exp) {
959 QTAILQ_REMOVE(&client->exp->clients, client, next);
960 nbd_export_put(client->exp);
961 }
962 g_free(client);
963 }
964}
965
966static void client_close(NBDClient *client, bool negotiated)
967{
968 if (client->closing) {
969 return;
970 }
971
972 client->closing = true;
973
974
975
976
977 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
978 NULL);
979
980
981 if (client->close_fn) {
982 client->close_fn(client, negotiated);
983 }
984}
985
986static NBDRequestData *nbd_request_get(NBDClient *client)
987{
988 NBDRequestData *req;
989
990 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
991 client->nb_requests++;
992
993 req = g_new0(NBDRequestData, 1);
994 nbd_client_get(client);
995 req->client = client;
996 return req;
997}
998
999static void nbd_request_put(NBDRequestData *req)
1000{
1001 NBDClient *client = req->client;
1002
1003 if (req->data) {
1004 qemu_vfree(req->data);
1005 }
1006 g_free(req);
1007
1008 client->nb_requests--;
1009 nbd_client_receive_next_request(client);
1010
1011 nbd_client_put(client);
1012}
1013
1014static void blk_aio_attached(AioContext *ctx, void *opaque)
1015{
1016 NBDExport *exp = opaque;
1017 NBDClient *client;
1018
1019 trace_nbd_blk_aio_attached(exp->name, ctx);
1020
1021 exp->ctx = ctx;
1022
1023 QTAILQ_FOREACH(client, &exp->clients, next) {
1024 qio_channel_attach_aio_context(client->ioc, ctx);
1025 if (client->recv_coroutine) {
1026 aio_co_schedule(ctx, client->recv_coroutine);
1027 }
1028 if (client->send_coroutine) {
1029 aio_co_schedule(ctx, client->send_coroutine);
1030 }
1031 }
1032}
1033
1034static void blk_aio_detach(void *opaque)
1035{
1036 NBDExport *exp = opaque;
1037 NBDClient *client;
1038
1039 trace_nbd_blk_aio_detach(exp->name, exp->ctx);
1040
1041 QTAILQ_FOREACH(client, &exp->clients, next) {
1042 qio_channel_detach_aio_context(client->ioc);
1043 }
1044
1045 exp->ctx = NULL;
1046}
1047
1048static void nbd_eject_notifier(Notifier *n, void *data)
1049{
1050 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
1051 nbd_export_close(exp);
1052}
1053
1054NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
1055 uint16_t nbdflags, void (*close)(NBDExport *),
1056 bool writethrough, BlockBackend *on_eject_blk,
1057 Error **errp)
1058{
1059 AioContext *ctx;
1060 BlockBackend *blk;
1061 NBDExport *exp = g_malloc0(sizeof(NBDExport));
1062 uint64_t perm;
1063 int ret;
1064
1065
1066
1067
1068
1069
1070 ctx = bdrv_get_aio_context(bs);
1071 aio_context_acquire(ctx);
1072 bdrv_invalidate_cache(bs, NULL);
1073 aio_context_release(ctx);
1074
1075
1076
1077 perm = BLK_PERM_CONSISTENT_READ;
1078 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
1079 perm |= BLK_PERM_WRITE;
1080 }
1081 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
1082 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
1083 ret = blk_insert_bs(blk, bs, errp);
1084 if (ret < 0) {
1085 goto fail;
1086 }
1087 blk_set_enable_write_cache(blk, !writethrough);
1088
1089 exp->refcount = 1;
1090 QTAILQ_INIT(&exp->clients);
1091 exp->blk = blk;
1092 exp->dev_offset = dev_offset;
1093 exp->nbdflags = nbdflags;
1094 exp->size = size < 0 ? blk_getlength(blk) : size;
1095 if (exp->size < 0) {
1096 error_setg_errno(errp, -exp->size,
1097 "Failed to determine the NBD export's length");
1098 goto fail;
1099 }
1100 exp->size -= exp->size % BDRV_SECTOR_SIZE;
1101
1102 exp->close = close;
1103 exp->ctx = blk_get_aio_context(blk);
1104 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1105
1106 if (on_eject_blk) {
1107 blk_ref(on_eject_blk);
1108 exp->eject_notifier_blk = on_eject_blk;
1109 exp->eject_notifier.notify = nbd_eject_notifier;
1110 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
1111 }
1112 return exp;
1113
1114fail:
1115 blk_unref(blk);
1116 g_free(exp);
1117 return NULL;
1118}
1119
1120NBDExport *nbd_export_find(const char *name)
1121{
1122 NBDExport *exp;
1123 QTAILQ_FOREACH(exp, &exports, next) {
1124 if (strcmp(name, exp->name) == 0) {
1125 return exp;
1126 }
1127 }
1128
1129 return NULL;
1130}
1131
1132void nbd_export_set_name(NBDExport *exp, const char *name)
1133{
1134 if (exp->name == name) {
1135 return;
1136 }
1137
1138 nbd_export_get(exp);
1139 if (exp->name != NULL) {
1140 g_free(exp->name);
1141 exp->name = NULL;
1142 QTAILQ_REMOVE(&exports, exp, next);
1143 nbd_export_put(exp);
1144 }
1145 if (name != NULL) {
1146 nbd_export_get(exp);
1147 exp->name = g_strdup(name);
1148 QTAILQ_INSERT_TAIL(&exports, exp, next);
1149 }
1150 nbd_export_put(exp);
1151}
1152
1153void nbd_export_set_description(NBDExport *exp, const char *description)
1154{
1155 g_free(exp->description);
1156 exp->description = g_strdup(description);
1157}
1158
1159void nbd_export_close(NBDExport *exp)
1160{
1161 NBDClient *client, *next;
1162
1163 nbd_export_get(exp);
1164 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1165 client_close(client, true);
1166 }
1167 nbd_export_set_name(exp, NULL);
1168 nbd_export_set_description(exp, NULL);
1169 nbd_export_put(exp);
1170}
1171
1172void nbd_export_get(NBDExport *exp)
1173{
1174 assert(exp->refcount > 0);
1175 exp->refcount++;
1176}
1177
1178void nbd_export_put(NBDExport *exp)
1179{
1180 assert(exp->refcount > 0);
1181 if (exp->refcount == 1) {
1182 nbd_export_close(exp);
1183 }
1184
1185 if (--exp->refcount == 0) {
1186 assert(exp->name == NULL);
1187 assert(exp->description == NULL);
1188
1189 if (exp->close) {
1190 exp->close(exp);
1191 }
1192
1193 if (exp->blk) {
1194 if (exp->eject_notifier_blk) {
1195 notifier_remove(&exp->eject_notifier);
1196 blk_unref(exp->eject_notifier_blk);
1197 }
1198 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
1199 blk_aio_detach, exp);
1200 blk_unref(exp->blk);
1201 exp->blk = NULL;
1202 }
1203
1204 g_free(exp);
1205 }
1206}
1207
1208BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
1209{
1210 return exp->blk;
1211}
1212
1213void nbd_export_close_all(void)
1214{
1215 NBDExport *exp, *next;
1216
1217 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
1218 nbd_export_close(exp);
1219 }
1220}
1221
1222static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len,
1223 Error **errp)
1224{
1225 NBDClient *client = req->client;
1226 int ret;
1227
1228 g_assert(qemu_in_coroutine());
1229
1230 trace_nbd_co_send_reply(reply->handle, reply->error, len);
1231
1232 qemu_co_mutex_lock(&client->send_lock);
1233 client->send_coroutine = qemu_coroutine_self();
1234
1235 if (!len) {
1236 ret = nbd_send_reply(client->ioc, reply, errp);
1237 } else {
1238 qio_channel_set_cork(client->ioc, true);
1239 ret = nbd_send_reply(client->ioc, reply, errp);
1240 if (ret == 0) {
1241 ret = nbd_write(client->ioc, req->data, len, errp);
1242 if (ret < 0) {
1243 ret = -EIO;
1244 }
1245 }
1246 qio_channel_set_cork(client->ioc, false);
1247 }
1248
1249 client->send_coroutine = NULL;
1250 qemu_co_mutex_unlock(&client->send_lock);
1251 return ret;
1252}
1253
1254
1255
1256
1257
1258
1259
1260static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
1261 Error **errp)
1262{
1263 NBDClient *client = req->client;
1264
1265 g_assert(qemu_in_coroutine());
1266 assert(client->recv_coroutine == qemu_coroutine_self());
1267 if (nbd_receive_request(client->ioc, request, errp) < 0) {
1268 return -EIO;
1269 }
1270
1271 trace_nbd_co_receive_request_decode_type(request->handle, request->type,
1272 nbd_cmd_lookup(request->type));
1273
1274 if (request->type != NBD_CMD_WRITE) {
1275
1276 req->complete = true;
1277 }
1278
1279 if (request->type == NBD_CMD_DISC) {
1280
1281
1282 return -EIO;
1283 }
1284
1285
1286
1287
1288 if ((request->from + request->len) < request->from) {
1289 error_setg(errp,
1290 "integer overflow detected, you're probably being attacked");
1291 return -EINVAL;
1292 }
1293
1294 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
1295 if (request->len > NBD_MAX_BUFFER_SIZE) {
1296 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
1297 request->len, NBD_MAX_BUFFER_SIZE);
1298 return -EINVAL;
1299 }
1300
1301 req->data = blk_try_blockalign(client->exp->blk, request->len);
1302 if (req->data == NULL) {
1303 error_setg(errp, "No memory");
1304 return -ENOMEM;
1305 }
1306 }
1307 if (request->type == NBD_CMD_WRITE) {
1308 if (nbd_read(client->ioc, req->data, request->len, errp) < 0) {
1309 error_prepend(errp, "reading from socket failed: ");
1310 return -EIO;
1311 }
1312 req->complete = true;
1313
1314 trace_nbd_co_receive_request_payload_received(request->handle,
1315 request->len);
1316 }
1317
1318
1319 if (request->from + request->len > client->exp->size) {
1320 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
1321 ", Size: %" PRIu64, request->from, request->len,
1322 (uint64_t)client->exp->size);
1323 return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
1324 }
1325 if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
1326 error_setg(errp, "unsupported flags (got 0x%x)", request->flags);
1327 return -EINVAL;
1328 }
1329 if (request->type != NBD_CMD_WRITE_ZEROES &&
1330 (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
1331 error_setg(errp, "unexpected flags (got 0x%x)", request->flags);
1332 return -EINVAL;
1333 }
1334
1335 return 0;
1336}
1337
1338
1339static coroutine_fn void nbd_trip(void *opaque)
1340{
1341 NBDClient *client = opaque;
1342 NBDExport *exp = client->exp;
1343 NBDRequestData *req;
1344 NBDRequest request = { 0 };
1345 NBDReply reply;
1346 int ret;
1347 int flags;
1348 int reply_data_len = 0;
1349 Error *local_err = NULL;
1350
1351 trace_nbd_trip();
1352 if (client->closing) {
1353 nbd_client_put(client);
1354 return;
1355 }
1356
1357 req = nbd_request_get(client);
1358 ret = nbd_co_receive_request(req, &request, &local_err);
1359 client->recv_coroutine = NULL;
1360 nbd_client_receive_next_request(client);
1361 if (ret == -EIO) {
1362 goto disconnect;
1363 }
1364
1365 reply.handle = request.handle;
1366 reply.error = 0;
1367
1368 if (ret < 0) {
1369 reply.error = -ret;
1370 goto reply;
1371 }
1372
1373 if (client->closing) {
1374
1375
1376
1377
1378 goto done;
1379 }
1380
1381 switch (request.type) {
1382 case NBD_CMD_READ:
1383
1384 if (request.flags & NBD_CMD_FLAG_FUA) {
1385 ret = blk_co_flush(exp->blk);
1386 if (ret < 0) {
1387 error_setg_errno(&local_err, -ret, "flush failed");
1388 reply.error = -ret;
1389 break;
1390 }
1391 }
1392
1393 ret = blk_pread(exp->blk, request.from + exp->dev_offset,
1394 req->data, request.len);
1395 if (ret < 0) {
1396 error_setg_errno(&local_err, -ret, "reading from file failed");
1397 reply.error = -ret;
1398 break;
1399 }
1400
1401 reply_data_len = request.len;
1402
1403 break;
1404 case NBD_CMD_WRITE:
1405 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1406 reply.error = EROFS;
1407 break;
1408 }
1409
1410 flags = 0;
1411 if (request.flags & NBD_CMD_FLAG_FUA) {
1412 flags |= BDRV_REQ_FUA;
1413 }
1414 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1415 req->data, request.len, flags);
1416 if (ret < 0) {
1417 error_setg_errno(&local_err, -ret, "writing to file failed");
1418 reply.error = -ret;
1419 }
1420
1421 break;
1422 case NBD_CMD_WRITE_ZEROES:
1423 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1424 error_setg(&local_err, "Server is read-only, return error");
1425 reply.error = EROFS;
1426 break;
1427 }
1428
1429 flags = 0;
1430 if (request.flags & NBD_CMD_FLAG_FUA) {
1431 flags |= BDRV_REQ_FUA;
1432 }
1433 if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
1434 flags |= BDRV_REQ_MAY_UNMAP;
1435 }
1436 ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
1437 request.len, flags);
1438 if (ret < 0) {
1439 error_setg_errno(&local_err, -ret, "writing to file failed");
1440 reply.error = -ret;
1441 }
1442
1443 break;
1444 case NBD_CMD_DISC:
1445
1446 abort();
1447
1448 case NBD_CMD_FLUSH:
1449 ret = blk_co_flush(exp->blk);
1450 if (ret < 0) {
1451 error_setg_errno(&local_err, -ret, "flush failed");
1452 reply.error = -ret;
1453 }
1454
1455 break;
1456 case NBD_CMD_TRIM:
1457 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
1458 request.len);
1459 if (ret < 0) {
1460 error_setg_errno(&local_err, -ret, "discard failed");
1461 reply.error = -ret;
1462 }
1463
1464 break;
1465 default:
1466 error_setg(&local_err, "invalid request type (%" PRIu32 ") received",
1467 request.type);
1468 reply.error = EINVAL;
1469 }
1470
1471reply:
1472 if (local_err) {
1473
1474
1475 error_report_err(local_err);
1476 local_err = NULL;
1477 }
1478
1479 if (nbd_co_send_reply(req, &reply, reply_data_len, &local_err) < 0) {
1480 error_prepend(&local_err, "Failed to send reply: ");
1481 goto disconnect;
1482 }
1483
1484
1485
1486
1487 if (!req->complete) {
1488 error_setg(&local_err, "Request handling failed in intermediate state");
1489 goto disconnect;
1490 }
1491
1492done:
1493 nbd_request_put(req);
1494 nbd_client_put(client);
1495 return;
1496
1497disconnect:
1498 if (local_err) {
1499 error_reportf_err(local_err, "Disconnect client, due to: ");
1500 }
1501 nbd_request_put(req);
1502 client_close(client, true);
1503 nbd_client_put(client);
1504}
1505
1506static void nbd_client_receive_next_request(NBDClient *client)
1507{
1508 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
1509 nbd_client_get(client);
1510 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
1511 aio_co_schedule(client->exp->ctx, client->recv_coroutine);
1512 }
1513}
1514
1515static coroutine_fn void nbd_co_client_start(void *opaque)
1516{
1517 NBDClient *client = opaque;
1518 NBDExport *exp = client->exp;
1519 Error *local_err = NULL;
1520
1521 if (exp) {
1522 nbd_export_get(exp);
1523 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1524 }
1525 qemu_co_mutex_init(&client->send_lock);
1526
1527 if (nbd_negotiate(client, &local_err)) {
1528 if (local_err) {
1529 error_report_err(local_err);
1530 }
1531 client_close(client, false);
1532 return;
1533 }
1534
1535 nbd_client_receive_next_request(client);
1536}
1537
1538
1539
1540
1541
1542
1543
1544void nbd_client_new(NBDExport *exp,
1545 QIOChannelSocket *sioc,
1546 QCryptoTLSCreds *tlscreds,
1547 const char *tlsaclname,
1548 void (*close_fn)(NBDClient *, bool))
1549{
1550 NBDClient *client;
1551 Coroutine *co;
1552
1553 client = g_malloc0(sizeof(NBDClient));
1554 client->refcount = 1;
1555 client->exp = exp;
1556 client->tlscreds = tlscreds;
1557 if (tlscreds) {
1558 object_ref(OBJECT(client->tlscreds));
1559 }
1560 client->tlsaclname = g_strdup(tlsaclname);
1561 client->sioc = sioc;
1562 object_ref(OBJECT(client->sioc));
1563 client->ioc = QIO_CHANNEL(sioc);
1564 object_ref(OBJECT(client->ioc));
1565 client->close_fn = close_fn;
1566
1567 co = qemu_coroutine_create(nbd_co_client_start, client);
1568 qemu_coroutine_enter(co);
1569}
1570