1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "qapi/error.h"
22#include "nbd-internal.h"
23
24static int system_errno_to_nbd_errno(int err)
25{
26 switch (err) {
27 case 0:
28 return NBD_SUCCESS;
29 case EPERM:
30 case EROFS:
31 return NBD_EPERM;
32 case EIO:
33 return NBD_EIO;
34 case ENOMEM:
35 return NBD_ENOMEM;
36#ifdef EDQUOT
37 case EDQUOT:
38#endif
39 case EFBIG:
40 case ENOSPC:
41 return NBD_ENOSPC;
42 case ESHUTDOWN:
43 return NBD_ESHUTDOWN;
44 case EINVAL:
45 default:
46 return NBD_EINVAL;
47 }
48}
49
50
51
52typedef struct NBDRequestData NBDRequestData;
53
54struct NBDRequestData {
55 QSIMPLEQ_ENTRY(NBDRequestData) entry;
56 NBDClient *client;
57 uint8_t *data;
58 bool complete;
59};
60
61struct NBDExport {
62 int refcount;
63 void (*close)(NBDExport *exp);
64
65 BlockBackend *blk;
66 char *name;
67 char *description;
68 off_t dev_offset;
69 off_t size;
70 uint16_t nbdflags;
71 QTAILQ_HEAD(, NBDClient) clients;
72 QTAILQ_ENTRY(NBDExport) next;
73
74 AioContext *ctx;
75
76 BlockBackend *eject_notifier_blk;
77 Notifier eject_notifier;
78};
79
80static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
81
82struct NBDClient {
83 int refcount;
84 void (*close)(NBDClient *client);
85
86 bool no_zeroes;
87 NBDExport *exp;
88 QCryptoTLSCreds *tlscreds;
89 char *tlsaclname;
90 QIOChannelSocket *sioc;
91 QIOChannel *ioc;
92
93 Coroutine *recv_coroutine;
94
95 CoMutex send_lock;
96 Coroutine *send_coroutine;
97
98 bool can_read;
99
100 QTAILQ_ENTRY(NBDClient) next;
101 int nb_requests;
102 bool closing;
103};
104
105
106
107static void nbd_set_handlers(NBDClient *client);
108static void nbd_unset_handlers(NBDClient *client);
109static void nbd_update_can_read(NBDClient *client);
110
111static gboolean nbd_negotiate_continue(QIOChannel *ioc,
112 GIOCondition condition,
113 void *opaque)
114{
115 qemu_coroutine_enter(opaque);
116 return TRUE;
117}
118
119static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
120{
121 ssize_t ret;
122 guint watch;
123
124 assert(qemu_in_coroutine());
125
126 watch = qio_channel_add_watch(ioc,
127 G_IO_IN,
128 nbd_negotiate_continue,
129 qemu_coroutine_self(),
130 NULL);
131 ret = read_sync(ioc, buffer, size);
132 g_source_remove(watch);
133 return ret;
134
135}
136
137static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer,
138 size_t size)
139{
140 ssize_t ret;
141 guint watch;
142
143 assert(qemu_in_coroutine());
144
145 watch = qio_channel_add_watch(ioc,
146 G_IO_OUT,
147 nbd_negotiate_continue,
148 qemu_coroutine_self(),
149 NULL);
150 ret = write_sync(ioc, buffer, size);
151 g_source_remove(watch);
152 return ret;
153}
154
155static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
156{
157 ssize_t ret, dropped = size;
158 uint8_t *buffer = g_malloc(MIN(65536, size));
159
160 while (size > 0) {
161 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size));
162 if (ret < 0) {
163 g_free(buffer);
164 return ret;
165 }
166
167 assert(ret <= size);
168 size -= ret;
169 }
170
171 g_free(buffer);
172 return dropped;
173}
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
205 uint32_t opt, uint32_t len)
206{
207 uint64_t magic;
208
209 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
210 type, opt, len);
211
212 magic = cpu_to_be64(NBD_REP_MAGIC);
213 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
214 LOG("write failed (rep magic)");
215 return -EINVAL;
216 }
217 opt = cpu_to_be32(opt);
218 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
219 LOG("write failed (rep opt)");
220 return -EINVAL;
221 }
222 type = cpu_to_be32(type);
223 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
224 LOG("write failed (rep type)");
225 return -EINVAL;
226 }
227 len = cpu_to_be32(len);
228 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
229 LOG("write failed (rep data length)");
230 return -EINVAL;
231 }
232 return 0;
233}
234
235
236
237static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
238{
239 return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
240}
241
242
243
244static int GCC_FMT_ATTR(4, 5)
245nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
246 uint32_t opt, const char *fmt, ...)
247{
248 va_list va;
249 char *msg;
250 int ret;
251 size_t len;
252
253 va_start(va, fmt);
254 msg = g_strdup_vprintf(fmt, va);
255 va_end(va);
256 len = strlen(msg);
257 assert(len < 4096);
258 TRACE("sending error message \"%s\"", msg);
259 ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
260 if (ret < 0) {
261 goto out;
262 }
263 if (nbd_negotiate_write(ioc, msg, len) != len) {
264 LOG("write failed (error message)");
265 ret = -EIO;
266 } else {
267 ret = 0;
268 }
269out:
270 g_free(msg);
271 return ret;
272}
273
274
275
276static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
277{
278 size_t name_len, desc_len;
279 uint32_t len;
280 const char *name = exp->name ? exp->name : "";
281 const char *desc = exp->description ? exp->description : "";
282 int rc;
283
284 TRACE("Advertising export name '%s' description '%s'", name, desc);
285 name_len = strlen(name);
286 desc_len = strlen(desc);
287 len = name_len + desc_len + sizeof(len);
288 rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
289 if (rc < 0) {
290 return rc;
291 }
292
293 len = cpu_to_be32(name_len);
294 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
295 LOG("write failed (name length)");
296 return -EINVAL;
297 }
298 if (nbd_negotiate_write(ioc, name, name_len) != name_len) {
299 LOG("write failed (name buffer)");
300 return -EINVAL;
301 }
302 if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) {
303 LOG("write failed (description buffer)");
304 return -EINVAL;
305 }
306 return 0;
307}
308
309
310
311static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
312{
313 NBDExport *exp;
314
315 if (length) {
316 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
317 return -EIO;
318 }
319 return nbd_negotiate_send_rep_err(client->ioc,
320 NBD_REP_ERR_INVALID, NBD_OPT_LIST,
321 "OPT_LIST should not have length");
322 }
323
324
325 QTAILQ_FOREACH(exp, &exports, next) {
326 if (nbd_negotiate_send_rep_list(client->ioc, exp)) {
327 return -EINVAL;
328 }
329 }
330
331 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST);
332}
333
334static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
335{
336 int rc = -EINVAL;
337 char name[NBD_MAX_NAME_SIZE + 1];
338
339
340
341
342 TRACE("Checking length");
343 if (length >= sizeof(name)) {
344 LOG("Bad length received");
345 goto fail;
346 }
347 if (nbd_negotiate_read(client->ioc, name, length) != length) {
348 LOG("read failed");
349 goto fail;
350 }
351 name[length] = '\0';
352
353 TRACE("Client requested export '%s'", name);
354
355 client->exp = nbd_export_find(name);
356 if (!client->exp) {
357 LOG("export not found");
358 goto fail;
359 }
360
361 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
362 nbd_export_get(client->exp);
363 rc = 0;
364fail:
365 return rc;
366}
367
368
369
370static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
371 uint32_t length)
372{
373 QIOChannel *ioc;
374 QIOChannelTLS *tioc;
375 struct NBDTLSHandshakeData data = { 0 };
376
377 TRACE("Setting up TLS");
378 ioc = client->ioc;
379 if (length) {
380 if (nbd_negotiate_drop_sync(ioc, length) != length) {
381 return NULL;
382 }
383 nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
384 "OPT_STARTTLS should not have length");
385 return NULL;
386 }
387
388 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
389 NBD_OPT_STARTTLS) < 0) {
390 return NULL;
391 }
392
393 tioc = qio_channel_tls_new_server(ioc,
394 client->tlscreds,
395 client->tlsaclname,
396 NULL);
397 if (!tioc) {
398 return NULL;
399 }
400
401 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
402 TRACE("Starting TLS handshake");
403 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
404 qio_channel_tls_handshake(tioc,
405 nbd_tls_handshake,
406 &data,
407 NULL);
408
409 if (!data.complete) {
410 g_main_loop_run(data.loop);
411 }
412 g_main_loop_unref(data.loop);
413 if (data.error) {
414 object_unref(OBJECT(tioc));
415 error_free(data.error);
416 return NULL;
417 }
418
419 return QIO_CHANNEL(tioc);
420}
421
422
423
424
425static int nbd_negotiate_options(NBDClient *client)
426{
427 uint32_t flags;
428 bool fixedNewstyle = false;
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) !=
445 sizeof(flags)) {
446 LOG("read failed");
447 return -EIO;
448 }
449 TRACE("Checking client flags");
450 be32_to_cpus(&flags);
451 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
452 TRACE("Client supports fixed newstyle handshake");
453 fixedNewstyle = true;
454 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
455 }
456 if (flags & NBD_FLAG_C_NO_ZEROES) {
457 TRACE("Client supports no zeroes at handshake end");
458 client->no_zeroes = true;
459 flags &= ~NBD_FLAG_C_NO_ZEROES;
460 }
461 if (flags != 0) {
462 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
463 return -EIO;
464 }
465
466 while (1) {
467 int ret;
468 uint32_t clientflags, length;
469 uint64_t magic;
470
471 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) !=
472 sizeof(magic)) {
473 LOG("read failed");
474 return -EINVAL;
475 }
476 TRACE("Checking opts magic");
477 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
478 LOG("Bad magic received");
479 return -EINVAL;
480 }
481
482 if (nbd_negotiate_read(client->ioc, &clientflags,
483 sizeof(clientflags)) != sizeof(clientflags)) {
484 LOG("read failed");
485 return -EINVAL;
486 }
487 clientflags = be32_to_cpu(clientflags);
488
489 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) !=
490 sizeof(length)) {
491 LOG("read failed");
492 return -EINVAL;
493 }
494 length = be32_to_cpu(length);
495
496 TRACE("Checking option 0x%" PRIx32, clientflags);
497 if (client->tlscreds &&
498 client->ioc == (QIOChannel *)client->sioc) {
499 QIOChannel *tioc;
500 if (!fixedNewstyle) {
501 TRACE("Unsupported option 0x%" PRIx32, clientflags);
502 return -EINVAL;
503 }
504 switch (clientflags) {
505 case NBD_OPT_STARTTLS:
506 tioc = nbd_negotiate_handle_starttls(client, length);
507 if (!tioc) {
508 return -EIO;
509 }
510 object_unref(OBJECT(client->ioc));
511 client->ioc = QIO_CHANNEL(tioc);
512 break;
513
514 case NBD_OPT_EXPORT_NAME:
515
516 TRACE("Option 0x%x not permitted before TLS", clientflags);
517 return -EINVAL;
518
519 default:
520 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
521 return -EIO;
522 }
523 ret = nbd_negotiate_send_rep_err(client->ioc,
524 NBD_REP_ERR_TLS_REQD,
525 clientflags,
526 "Option 0x%" PRIx32
527 "not permitted before TLS",
528 clientflags);
529 if (ret < 0) {
530 return ret;
531 }
532
533 if (clientflags == NBD_OPT_ABORT) {
534 return -EINVAL;
535 }
536 break;
537 }
538 } else if (fixedNewstyle) {
539 switch (clientflags) {
540 case NBD_OPT_LIST:
541 ret = nbd_negotiate_handle_list(client, length);
542 if (ret < 0) {
543 return ret;
544 }
545 break;
546
547 case NBD_OPT_ABORT:
548
549
550
551 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
552 return -EINVAL;
553
554 case NBD_OPT_EXPORT_NAME:
555 return nbd_negotiate_handle_export_name(client, length);
556
557 case NBD_OPT_STARTTLS:
558 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
559 return -EIO;
560 }
561 if (client->tlscreds) {
562 ret = nbd_negotiate_send_rep_err(client->ioc,
563 NBD_REP_ERR_INVALID,
564 clientflags,
565 "TLS already enabled");
566 } else {
567 ret = nbd_negotiate_send_rep_err(client->ioc,
568 NBD_REP_ERR_POLICY,
569 clientflags,
570 "TLS not configured");
571 }
572 if (ret < 0) {
573 return ret;
574 }
575 break;
576 default:
577 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
578 return -EIO;
579 }
580 ret = nbd_negotiate_send_rep_err(client->ioc,
581 NBD_REP_ERR_UNSUP,
582 clientflags,
583 "Unsupported option 0x%"
584 PRIx32,
585 clientflags);
586 if (ret < 0) {
587 return ret;
588 }
589 break;
590 }
591 } else {
592
593
594
595
596 switch (clientflags) {
597 case NBD_OPT_EXPORT_NAME:
598 return nbd_negotiate_handle_export_name(client, length);
599
600 default:
601 TRACE("Unsupported option 0x%" PRIx32, clientflags);
602 return -EINVAL;
603 }
604 }
605 }
606}
607
608typedef struct {
609 NBDClient *client;
610 Coroutine *co;
611} NBDClientNewData;
612
613static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
614{
615 NBDClient *client = data->client;
616 char buf[8 + 8 + 8 + 128];
617 int rc;
618 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
619 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
620 NBD_FLAG_SEND_WRITE_ZEROES);
621 bool oldStyle;
622 size_t len;
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642 qio_channel_set_blocking(client->ioc, false, NULL);
643 rc = -EINVAL;
644
645 TRACE("Beginning negotiation.");
646 memset(buf, 0, sizeof(buf));
647 memcpy(buf, "NBDMAGIC", 8);
648
649 oldStyle = client->exp != NULL && !client->tlscreds;
650 if (oldStyle) {
651 TRACE("advertising size %" PRIu64 " and flags %x",
652 client->exp->size, client->exp->nbdflags | myflags);
653 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
654 stq_be_p(buf + 16, client->exp->size);
655 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
656 } else {
657 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
658 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
659 }
660
661 if (oldStyle) {
662 if (client->tlscreds) {
663 TRACE("TLS cannot be enabled with oldstyle protocol");
664 goto fail;
665 }
666 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) {
667 LOG("write failed");
668 goto fail;
669 }
670 } else {
671 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) {
672 LOG("write failed");
673 goto fail;
674 }
675 rc = nbd_negotiate_options(client);
676 if (rc != 0) {
677 LOG("option negotiation failed");
678 goto fail;
679 }
680
681 TRACE("advertising size %" PRIu64 " and flags %x",
682 client->exp->size, client->exp->nbdflags | myflags);
683 stq_be_p(buf + 18, client->exp->size);
684 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
685 len = client->no_zeroes ? 10 : sizeof(buf) - 18;
686 if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) {
687 LOG("write failed");
688 goto fail;
689 }
690 }
691
692 TRACE("Negotiation succeeded.");
693 rc = 0;
694fail:
695 return rc;
696}
697
698static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
699{
700 uint8_t buf[NBD_REQUEST_SIZE];
701 uint32_t magic;
702 ssize_t ret;
703
704 ret = read_sync(ioc, buf, sizeof(buf));
705 if (ret < 0) {
706 return ret;
707 }
708
709 if (ret != sizeof(buf)) {
710 LOG("read failed");
711 return -EINVAL;
712 }
713
714
715
716
717
718
719
720
721
722
723 magic = ldl_be_p(buf);
724 request->flags = lduw_be_p(buf + 4);
725 request->type = lduw_be_p(buf + 6);
726 request->handle = ldq_be_p(buf + 8);
727 request->from = ldq_be_p(buf + 16);
728 request->len = ldl_be_p(buf + 24);
729
730 TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
731 ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
732 magic, request->flags, request->type, request->from, request->len);
733
734 if (magic != NBD_REQUEST_MAGIC) {
735 LOG("invalid magic (got 0x%" PRIx32 ")", magic);
736 return -EINVAL;
737 }
738 return 0;
739}
740
741static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
742{
743 uint8_t buf[NBD_REPLY_SIZE];
744 ssize_t ret;
745
746 reply->error = system_errno_to_nbd_errno(reply->error);
747
748 TRACE("Sending response to client: { .error = %" PRId32
749 ", handle = %" PRIu64 " }",
750 reply->error, reply->handle);
751
752
753
754
755
756
757 stl_be_p(buf, NBD_REPLY_MAGIC);
758 stl_be_p(buf + 4, reply->error);
759 stq_be_p(buf + 8, reply->handle);
760
761 ret = write_sync(ioc, buf, sizeof(buf));
762 if (ret < 0) {
763 return ret;
764 }
765
766 if (ret != sizeof(buf)) {
767 LOG("writing to socket failed");
768 return -EINVAL;
769 }
770 return 0;
771}
772
773#define MAX_NBD_REQUESTS 16
774
775void nbd_client_get(NBDClient *client)
776{
777 client->refcount++;
778}
779
780void nbd_client_put(NBDClient *client)
781{
782 if (--client->refcount == 0) {
783
784
785
786 assert(client->closing);
787
788 nbd_unset_handlers(client);
789 object_unref(OBJECT(client->sioc));
790 object_unref(OBJECT(client->ioc));
791 if (client->tlscreds) {
792 object_unref(OBJECT(client->tlscreds));
793 }
794 g_free(client->tlsaclname);
795 if (client->exp) {
796 QTAILQ_REMOVE(&client->exp->clients, client, next);
797 nbd_export_put(client->exp);
798 }
799 g_free(client);
800 }
801}
802
803static void client_close(NBDClient *client)
804{
805 if (client->closing) {
806 return;
807 }
808
809 client->closing = true;
810
811
812
813
814 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
815 NULL);
816
817
818 if (client->close) {
819 client->close(client);
820 }
821}
822
823static NBDRequestData *nbd_request_get(NBDClient *client)
824{
825 NBDRequestData *req;
826
827 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
828 client->nb_requests++;
829 nbd_update_can_read(client);
830
831 req = g_new0(NBDRequestData, 1);
832 nbd_client_get(client);
833 req->client = client;
834 return req;
835}
836
837static void nbd_request_put(NBDRequestData *req)
838{
839 NBDClient *client = req->client;
840
841 if (req->data) {
842 qemu_vfree(req->data);
843 }
844 g_free(req);
845
846 client->nb_requests--;
847 nbd_update_can_read(client);
848 nbd_client_put(client);
849}
850
851static void blk_aio_attached(AioContext *ctx, void *opaque)
852{
853 NBDExport *exp = opaque;
854 NBDClient *client;
855
856 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
857
858 exp->ctx = ctx;
859
860 QTAILQ_FOREACH(client, &exp->clients, next) {
861 nbd_set_handlers(client);
862 }
863}
864
865static void blk_aio_detach(void *opaque)
866{
867 NBDExport *exp = opaque;
868 NBDClient *client;
869
870 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
871
872 QTAILQ_FOREACH(client, &exp->clients, next) {
873 nbd_unset_handlers(client);
874 }
875
876 exp->ctx = NULL;
877}
878
879static void nbd_eject_notifier(Notifier *n, void *data)
880{
881 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
882 nbd_export_close(exp);
883}
884
885NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
886 uint16_t nbdflags, void (*close)(NBDExport *),
887 bool writethrough, BlockBackend *on_eject_blk,
888 Error **errp)
889{
890 BlockBackend *blk;
891 NBDExport *exp = g_malloc0(sizeof(NBDExport));
892
893 blk = blk_new();
894 blk_insert_bs(blk, bs);
895 blk_set_enable_write_cache(blk, !writethrough);
896
897 exp->refcount = 1;
898 QTAILQ_INIT(&exp->clients);
899 exp->blk = blk;
900 exp->dev_offset = dev_offset;
901 exp->nbdflags = nbdflags;
902 exp->size = size < 0 ? blk_getlength(blk) : size;
903 if (exp->size < 0) {
904 error_setg_errno(errp, -exp->size,
905 "Failed to determine the NBD export's length");
906 goto fail;
907 }
908 exp->size -= exp->size % BDRV_SECTOR_SIZE;
909
910 exp->close = close;
911 exp->ctx = blk_get_aio_context(blk);
912 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
913
914 if (on_eject_blk) {
915 blk_ref(on_eject_blk);
916 exp->eject_notifier_blk = on_eject_blk;
917 exp->eject_notifier.notify = nbd_eject_notifier;
918 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
919 }
920
921
922
923
924
925
926 aio_context_acquire(exp->ctx);
927 blk_invalidate_cache(blk, NULL);
928 aio_context_release(exp->ctx);
929 return exp;
930
931fail:
932 blk_unref(blk);
933 g_free(exp);
934 return NULL;
935}
936
937NBDExport *nbd_export_find(const char *name)
938{
939 NBDExport *exp;
940 QTAILQ_FOREACH(exp, &exports, next) {
941 if (strcmp(name, exp->name) == 0) {
942 return exp;
943 }
944 }
945
946 return NULL;
947}
948
949void nbd_export_set_name(NBDExport *exp, const char *name)
950{
951 if (exp->name == name) {
952 return;
953 }
954
955 nbd_export_get(exp);
956 if (exp->name != NULL) {
957 g_free(exp->name);
958 exp->name = NULL;
959 QTAILQ_REMOVE(&exports, exp, next);
960 nbd_export_put(exp);
961 }
962 if (name != NULL) {
963 nbd_export_get(exp);
964 exp->name = g_strdup(name);
965 QTAILQ_INSERT_TAIL(&exports, exp, next);
966 }
967 nbd_export_put(exp);
968}
969
970void nbd_export_set_description(NBDExport *exp, const char *description)
971{
972 g_free(exp->description);
973 exp->description = g_strdup(description);
974}
975
976void nbd_export_close(NBDExport *exp)
977{
978 NBDClient *client, *next;
979
980 nbd_export_get(exp);
981 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
982 client_close(client);
983 }
984 nbd_export_set_name(exp, NULL);
985 nbd_export_set_description(exp, NULL);
986 nbd_export_put(exp);
987}
988
989void nbd_export_get(NBDExport *exp)
990{
991 assert(exp->refcount > 0);
992 exp->refcount++;
993}
994
995void nbd_export_put(NBDExport *exp)
996{
997 assert(exp->refcount > 0);
998 if (exp->refcount == 1) {
999 nbd_export_close(exp);
1000 }
1001
1002 if (--exp->refcount == 0) {
1003 assert(exp->name == NULL);
1004 assert(exp->description == NULL);
1005
1006 if (exp->close) {
1007 exp->close(exp);
1008 }
1009
1010 if (exp->blk) {
1011 if (exp->eject_notifier_blk) {
1012 notifier_remove(&exp->eject_notifier);
1013 blk_unref(exp->eject_notifier_blk);
1014 }
1015 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
1016 blk_aio_detach, exp);
1017 blk_unref(exp->blk);
1018 exp->blk = NULL;
1019 }
1020
1021 g_free(exp);
1022 }
1023}
1024
1025BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
1026{
1027 return exp->blk;
1028}
1029
1030void nbd_export_close_all(void)
1031{
1032 NBDExport *exp, *next;
1033
1034 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
1035 nbd_export_close(exp);
1036 }
1037}
1038
1039static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
1040 int len)
1041{
1042 NBDClient *client = req->client;
1043 ssize_t rc, ret;
1044
1045 g_assert(qemu_in_coroutine());
1046 qemu_co_mutex_lock(&client->send_lock);
1047 client->send_coroutine = qemu_coroutine_self();
1048 nbd_set_handlers(client);
1049
1050 if (!len) {
1051 rc = nbd_send_reply(client->ioc, reply);
1052 } else {
1053 qio_channel_set_cork(client->ioc, true);
1054 rc = nbd_send_reply(client->ioc, reply);
1055 if (rc >= 0) {
1056 ret = write_sync(client->ioc, req->data, len);
1057 if (ret != len) {
1058 rc = -EIO;
1059 }
1060 }
1061 qio_channel_set_cork(client->ioc, false);
1062 }
1063
1064 client->send_coroutine = NULL;
1065 nbd_set_handlers(client);
1066 qemu_co_mutex_unlock(&client->send_lock);
1067 return rc;
1068}
1069
1070
1071
1072
1073
1074
1075static ssize_t nbd_co_receive_request(NBDRequestData *req,
1076 NBDRequest *request)
1077{
1078 NBDClient *client = req->client;
1079 ssize_t rc;
1080
1081 g_assert(qemu_in_coroutine());
1082 client->recv_coroutine = qemu_coroutine_self();
1083 nbd_update_can_read(client);
1084
1085 rc = nbd_receive_request(client->ioc, request);
1086 if (rc < 0) {
1087 if (rc != -EAGAIN) {
1088 rc = -EIO;
1089 }
1090 goto out;
1091 }
1092
1093 TRACE("Decoding type");
1094
1095 if (request->type != NBD_CMD_WRITE) {
1096
1097 req->complete = true;
1098 }
1099
1100 if (request->type == NBD_CMD_DISC) {
1101
1102
1103 TRACE("Request type is DISCONNECT");
1104 rc = -EIO;
1105 goto out;
1106 }
1107
1108
1109
1110
1111 if ((request->from + request->len) < request->from) {
1112 LOG("integer overflow detected, you're probably being attacked");
1113 rc = -EINVAL;
1114 goto out;
1115 }
1116
1117 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
1118 if (request->len > NBD_MAX_BUFFER_SIZE) {
1119 LOG("len (%" PRIu32" ) is larger than max len (%u)",
1120 request->len, NBD_MAX_BUFFER_SIZE);
1121 rc = -EINVAL;
1122 goto out;
1123 }
1124
1125 req->data = blk_try_blockalign(client->exp->blk, request->len);
1126 if (req->data == NULL) {
1127 rc = -ENOMEM;
1128 goto out;
1129 }
1130 }
1131 if (request->type == NBD_CMD_WRITE) {
1132 TRACE("Reading %" PRIu32 " byte(s)", request->len);
1133
1134 if (read_sync(client->ioc, req->data, request->len) != request->len) {
1135 LOG("reading from socket failed");
1136 rc = -EIO;
1137 goto out;
1138 }
1139 req->complete = true;
1140 }
1141
1142
1143 if (request->from + request->len > client->exp->size) {
1144 LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
1145 ", Size: %" PRIu64, request->from, request->len,
1146 (uint64_t)client->exp->size);
1147 rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
1148 goto out;
1149 }
1150 if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
1151 LOG("unsupported flags (got 0x%x)", request->flags);
1152 rc = -EINVAL;
1153 goto out;
1154 }
1155 if (request->type != NBD_CMD_WRITE_ZEROES &&
1156 (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
1157 LOG("unexpected flags (got 0x%x)", request->flags);
1158 rc = -EINVAL;
1159 goto out;
1160 }
1161
1162 rc = 0;
1163
1164out:
1165 client->recv_coroutine = NULL;
1166 nbd_update_can_read(client);
1167
1168 return rc;
1169}
1170
1171static void nbd_trip(void *opaque)
1172{
1173 NBDClient *client = opaque;
1174 NBDExport *exp = client->exp;
1175 NBDRequestData *req;
1176 NBDRequest request;
1177 NBDReply reply;
1178 ssize_t ret;
1179 int flags;
1180
1181 TRACE("Reading request.");
1182 if (client->closing) {
1183 return;
1184 }
1185
1186 req = nbd_request_get(client);
1187 ret = nbd_co_receive_request(req, &request);
1188 if (ret == -EAGAIN) {
1189 goto done;
1190 }
1191 if (ret == -EIO) {
1192 goto out;
1193 }
1194
1195 reply.handle = request.handle;
1196 reply.error = 0;
1197
1198 if (ret < 0) {
1199 reply.error = -ret;
1200 goto error_reply;
1201 }
1202
1203 if (client->closing) {
1204
1205
1206
1207
1208 goto done;
1209 }
1210
1211 switch (request.type) {
1212 case NBD_CMD_READ:
1213 TRACE("Request type is READ");
1214
1215
1216 if (request.flags & NBD_CMD_FLAG_FUA) {
1217 ret = blk_co_flush(exp->blk);
1218 if (ret < 0) {
1219 LOG("flush failed");
1220 reply.error = -ret;
1221 goto error_reply;
1222 }
1223 }
1224
1225 ret = blk_pread(exp->blk, request.from + exp->dev_offset,
1226 req->data, request.len);
1227 if (ret < 0) {
1228 LOG("reading from file failed");
1229 reply.error = -ret;
1230 goto error_reply;
1231 }
1232
1233 TRACE("Read %" PRIu32" byte(s)", request.len);
1234 if (nbd_co_send_reply(req, &reply, request.len) < 0)
1235 goto out;
1236 break;
1237 case NBD_CMD_WRITE:
1238 TRACE("Request type is WRITE");
1239
1240 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1241 TRACE("Server is read-only, return error");
1242 reply.error = EROFS;
1243 goto error_reply;
1244 }
1245
1246 TRACE("Writing to device");
1247
1248 flags = 0;
1249 if (request.flags & NBD_CMD_FLAG_FUA) {
1250 flags |= BDRV_REQ_FUA;
1251 }
1252 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1253 req->data, request.len, flags);
1254 if (ret < 0) {
1255 LOG("writing to file failed");
1256 reply.error = -ret;
1257 goto error_reply;
1258 }
1259
1260 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1261 goto out;
1262 }
1263 break;
1264
1265 case NBD_CMD_WRITE_ZEROES:
1266 TRACE("Request type is WRITE_ZEROES");
1267
1268 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1269 TRACE("Server is read-only, return error");
1270 reply.error = EROFS;
1271 goto error_reply;
1272 }
1273
1274 TRACE("Writing to device");
1275
1276 flags = 0;
1277 if (request.flags & NBD_CMD_FLAG_FUA) {
1278 flags |= BDRV_REQ_FUA;
1279 }
1280 if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
1281 flags |= BDRV_REQ_MAY_UNMAP;
1282 }
1283 ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
1284 request.len, flags);
1285 if (ret < 0) {
1286 LOG("writing to file failed");
1287 reply.error = -ret;
1288 goto error_reply;
1289 }
1290
1291 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1292 goto out;
1293 }
1294 break;
1295
1296 case NBD_CMD_DISC:
1297
1298 abort();
1299
1300 case NBD_CMD_FLUSH:
1301 TRACE("Request type is FLUSH");
1302
1303 ret = blk_co_flush(exp->blk);
1304 if (ret < 0) {
1305 LOG("flush failed");
1306 reply.error = -ret;
1307 }
1308 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1309 goto out;
1310 }
1311 break;
1312 case NBD_CMD_TRIM:
1313 TRACE("Request type is TRIM");
1314 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
1315 request.len);
1316 if (ret < 0) {
1317 LOG("discard failed");
1318 reply.error = -ret;
1319 }
1320 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1321 goto out;
1322 }
1323 break;
1324 default:
1325 LOG("invalid request type (%" PRIu32 ") received", request.type);
1326 reply.error = EINVAL;
1327 error_reply:
1328
1329
1330
1331 if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
1332 goto out;
1333 }
1334 break;
1335 }
1336
1337 TRACE("Request/Reply complete");
1338
1339done:
1340 nbd_request_put(req);
1341 return;
1342
1343out:
1344 nbd_request_put(req);
1345 client_close(client);
1346}
1347
1348static void nbd_read(void *opaque)
1349{
1350 NBDClient *client = opaque;
1351
1352 if (client->recv_coroutine) {
1353 qemu_coroutine_enter(client->recv_coroutine);
1354 } else {
1355 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip, client));
1356 }
1357}
1358
1359static void nbd_restart_write(void *opaque)
1360{
1361 NBDClient *client = opaque;
1362
1363 qemu_coroutine_enter(client->send_coroutine);
1364}
1365
1366static void nbd_set_handlers(NBDClient *client)
1367{
1368 if (client->exp && client->exp->ctx) {
1369 aio_set_fd_handler(client->exp->ctx, client->sioc->fd,
1370 true,
1371 client->can_read ? nbd_read : NULL,
1372 client->send_coroutine ? nbd_restart_write : NULL,
1373 client);
1374 }
1375}
1376
1377static void nbd_unset_handlers(NBDClient *client)
1378{
1379 if (client->exp && client->exp->ctx) {
1380 aio_set_fd_handler(client->exp->ctx, client->sioc->fd,
1381 true, NULL, NULL, NULL);
1382 }
1383}
1384
1385static void nbd_update_can_read(NBDClient *client)
1386{
1387 bool can_read = client->recv_coroutine ||
1388 client->nb_requests < MAX_NBD_REQUESTS;
1389
1390 if (can_read != client->can_read) {
1391 client->can_read = can_read;
1392 nbd_set_handlers(client);
1393
1394
1395
1396 }
1397}
1398
1399static coroutine_fn void nbd_co_client_start(void *opaque)
1400{
1401 NBDClientNewData *data = opaque;
1402 NBDClient *client = data->client;
1403 NBDExport *exp = client->exp;
1404
1405 if (exp) {
1406 nbd_export_get(exp);
1407 }
1408 if (nbd_negotiate(data)) {
1409 client_close(client);
1410 goto out;
1411 }
1412 qemu_co_mutex_init(&client->send_lock);
1413 nbd_set_handlers(client);
1414
1415 if (exp) {
1416 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1417 }
1418out:
1419 g_free(data);
1420}
1421
1422void nbd_client_new(NBDExport *exp,
1423 QIOChannelSocket *sioc,
1424 QCryptoTLSCreds *tlscreds,
1425 const char *tlsaclname,
1426 void (*close_fn)(NBDClient *))
1427{
1428 NBDClient *client;
1429 NBDClientNewData *data = g_new(NBDClientNewData, 1);
1430
1431 client = g_malloc0(sizeof(NBDClient));
1432 client->refcount = 1;
1433 client->exp = exp;
1434 client->tlscreds = tlscreds;
1435 if (tlscreds) {
1436 object_ref(OBJECT(client->tlscreds));
1437 }
1438 client->tlsaclname = g_strdup(tlsaclname);
1439 client->sioc = sioc;
1440 object_ref(OBJECT(client->sioc));
1441 client->ioc = QIO_CHANNEL(sioc);
1442 object_ref(OBJECT(client->ioc));
1443 client->can_read = true;
1444 client->close = close_fn;
1445
1446 data->client = client;
1447 data->co = qemu_coroutine_create(nbd_co_client_start, data);
1448 qemu_coroutine_enter(data->co);
1449}
1450