1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "qapi/error.h"
21#include "nbd-internal.h"
22
23static int system_errno_to_nbd_errno(int err)
24{
25 switch (err) {
26 case 0:
27 return NBD_SUCCESS;
28 case EPERM:
29 case EROFS:
30 return NBD_EPERM;
31 case EIO:
32 return NBD_EIO;
33 case ENOMEM:
34 return NBD_ENOMEM;
35#ifdef EDQUOT
36 case EDQUOT:
37#endif
38 case EFBIG:
39 case ENOSPC:
40 return NBD_ENOSPC;
41 case EINVAL:
42 default:
43 return NBD_EINVAL;
44 }
45}
46
47
48
49typedef struct NBDRequest NBDRequest;
50
51struct NBDRequest {
52 QSIMPLEQ_ENTRY(NBDRequest) entry;
53 NBDClient *client;
54 uint8_t *data;
55 bool complete;
56};
57
58struct NBDExport {
59 int refcount;
60 void (*close)(NBDExport *exp);
61
62 BlockBackend *blk;
63 char *name;
64 off_t dev_offset;
65 off_t size;
66 uint16_t nbdflags;
67 QTAILQ_HEAD(, NBDClient) clients;
68 QTAILQ_ENTRY(NBDExport) next;
69
70 AioContext *ctx;
71
72 Notifier eject_notifier;
73};
74
75static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
76
77struct NBDClient {
78 int refcount;
79 void (*close)(NBDClient *client);
80
81 NBDExport *exp;
82 QCryptoTLSCreds *tlscreds;
83 char *tlsaclname;
84 QIOChannelSocket *sioc;
85 QIOChannel *ioc;
86
87 Coroutine *recv_coroutine;
88
89 CoMutex send_lock;
90 Coroutine *send_coroutine;
91
92 bool can_read;
93
94 QTAILQ_ENTRY(NBDClient) next;
95 int nb_requests;
96 bool closing;
97};
98
99
100
101static void nbd_set_handlers(NBDClient *client);
102static void nbd_unset_handlers(NBDClient *client);
103static void nbd_update_can_read(NBDClient *client);
104
105static gboolean nbd_negotiate_continue(QIOChannel *ioc,
106 GIOCondition condition,
107 void *opaque)
108{
109 qemu_coroutine_enter(opaque);
110 return TRUE;
111}
112
113static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
114{
115 ssize_t ret;
116 guint watch;
117
118 assert(qemu_in_coroutine());
119
120 watch = qio_channel_add_watch(ioc,
121 G_IO_IN,
122 nbd_negotiate_continue,
123 qemu_coroutine_self(),
124 NULL);
125 ret = read_sync(ioc, buffer, size);
126 g_source_remove(watch);
127 return ret;
128
129}
130
131static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size)
132{
133 ssize_t ret;
134 guint watch;
135
136 assert(qemu_in_coroutine());
137
138 watch = qio_channel_add_watch(ioc,
139 G_IO_OUT,
140 nbd_negotiate_continue,
141 qemu_coroutine_self(),
142 NULL);
143 ret = write_sync(ioc, buffer, size);
144 g_source_remove(watch);
145 return ret;
146}
147
148static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
149{
150 ssize_t ret, dropped = size;
151 uint8_t *buffer = g_malloc(MIN(65536, size));
152
153 while (size > 0) {
154 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size));
155 if (ret < 0) {
156 g_free(buffer);
157 return ret;
158 }
159
160 assert(ret <= size);
161 size -= ret;
162 }
163
164 g_free(buffer);
165 return dropped;
166}
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
196{
197 uint64_t magic;
198 uint32_t len;
199
200 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
201
202 magic = cpu_to_be64(NBD_REP_MAGIC);
203 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
204 LOG("write failed (rep magic)");
205 return -EINVAL;
206 }
207 opt = cpu_to_be32(opt);
208 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
209 LOG("write failed (rep opt)");
210 return -EINVAL;
211 }
212 type = cpu_to_be32(type);
213 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
214 LOG("write failed (rep type)");
215 return -EINVAL;
216 }
217 len = cpu_to_be32(0);
218 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
219 LOG("write failed (rep data length)");
220 return -EINVAL;
221 }
222 return 0;
223}
224
225static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
226{
227 uint64_t magic, name_len;
228 uint32_t opt, type, len;
229
230 TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
231 name_len = strlen(exp->name);
232 magic = cpu_to_be64(NBD_REP_MAGIC);
233 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
234 LOG("write failed (magic)");
235 return -EINVAL;
236 }
237 opt = cpu_to_be32(NBD_OPT_LIST);
238 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
239 LOG("write failed (opt)");
240 return -EINVAL;
241 }
242 type = cpu_to_be32(NBD_REP_SERVER);
243 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
244 LOG("write failed (reply type)");
245 return -EINVAL;
246 }
247 len = cpu_to_be32(name_len + sizeof(len));
248 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
249 LOG("write failed (length)");
250 return -EINVAL;
251 }
252 len = cpu_to_be32(name_len);
253 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
254 LOG("write failed (length)");
255 return -EINVAL;
256 }
257 if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) {
258 LOG("write failed (buffer)");
259 return -EINVAL;
260 }
261 return 0;
262}
263
264static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
265{
266 NBDExport *exp;
267
268 if (length) {
269 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
270 return -EIO;
271 }
272 return nbd_negotiate_send_rep(client->ioc,
273 NBD_REP_ERR_INVALID, NBD_OPT_LIST);
274 }
275
276
277 QTAILQ_FOREACH(exp, &exports, next) {
278 if (nbd_negotiate_send_rep_list(client->ioc, exp)) {
279 return -EINVAL;
280 }
281 }
282
283 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST);
284}
285
286static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
287{
288 int rc = -EINVAL;
289 char name[NBD_MAX_NAME_SIZE + 1];
290
291
292
293
294 TRACE("Checking length");
295 if (length >= sizeof(name)) {
296 LOG("Bad length received");
297 goto fail;
298 }
299 if (nbd_negotiate_read(client->ioc, name, length) != length) {
300 LOG("read failed");
301 goto fail;
302 }
303 name[length] = '\0';
304
305 TRACE("Client requested export '%s'", name);
306
307 client->exp = nbd_export_find(name);
308 if (!client->exp) {
309 LOG("export not found");
310 goto fail;
311 }
312
313 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
314 nbd_export_get(client->exp);
315 rc = 0;
316fail:
317 return rc;
318}
319
320
321static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
322 uint32_t length)
323{
324 QIOChannel *ioc;
325 QIOChannelTLS *tioc;
326 struct NBDTLSHandshakeData data = { 0 };
327
328 TRACE("Setting up TLS");
329 ioc = client->ioc;
330 if (length) {
331 if (nbd_negotiate_drop_sync(ioc, length) != length) {
332 return NULL;
333 }
334 nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
335 return NULL;
336 }
337
338 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
339 NBD_OPT_STARTTLS) < 0) {
340 return NULL;
341 }
342
343 tioc = qio_channel_tls_new_server(ioc,
344 client->tlscreds,
345 client->tlsaclname,
346 NULL);
347 if (!tioc) {
348 return NULL;
349 }
350
351 TRACE("Starting TLS handshake");
352 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
353 qio_channel_tls_handshake(tioc,
354 nbd_tls_handshake,
355 &data,
356 NULL);
357
358 if (!data.complete) {
359 g_main_loop_run(data.loop);
360 }
361 g_main_loop_unref(data.loop);
362 if (data.error) {
363 object_unref(OBJECT(tioc));
364 error_free(data.error);
365 return NULL;
366 }
367
368 return QIO_CHANNEL(tioc);
369}
370
371
372static int nbd_negotiate_options(NBDClient *client)
373{
374 uint32_t flags;
375 bool fixedNewstyle = false;
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) !=
392 sizeof(flags)) {
393 LOG("read failed");
394 return -EIO;
395 }
396 TRACE("Checking client flags");
397 be32_to_cpus(&flags);
398 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
399 TRACE("Client supports fixed newstyle handshake");
400 fixedNewstyle = true;
401 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
402 }
403 if (flags != 0) {
404 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
405 return -EIO;
406 }
407
408 while (1) {
409 int ret;
410 uint32_t clientflags, length;
411 uint64_t magic;
412
413 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) !=
414 sizeof(magic)) {
415 LOG("read failed");
416 return -EINVAL;
417 }
418 TRACE("Checking opts magic");
419 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
420 LOG("Bad magic received");
421 return -EINVAL;
422 }
423
424 if (nbd_negotiate_read(client->ioc, &clientflags,
425 sizeof(clientflags)) != sizeof(clientflags)) {
426 LOG("read failed");
427 return -EINVAL;
428 }
429 clientflags = be32_to_cpu(clientflags);
430
431 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) !=
432 sizeof(length)) {
433 LOG("read failed");
434 return -EINVAL;
435 }
436 length = be32_to_cpu(length);
437
438 TRACE("Checking option 0x%" PRIx32, clientflags);
439 if (client->tlscreds &&
440 client->ioc == (QIOChannel *)client->sioc) {
441 QIOChannel *tioc;
442 if (!fixedNewstyle) {
443 TRACE("Unsupported option 0x%" PRIx32, clientflags);
444 return -EINVAL;
445 }
446 switch (clientflags) {
447 case NBD_OPT_STARTTLS:
448 tioc = nbd_negotiate_handle_starttls(client, length);
449 if (!tioc) {
450 return -EIO;
451 }
452 object_unref(OBJECT(client->ioc));
453 client->ioc = QIO_CHANNEL(tioc);
454 break;
455
456 case NBD_OPT_EXPORT_NAME:
457
458 TRACE("Option 0x%x not permitted before TLS", clientflags);
459 return -EINVAL;
460
461 default:
462 TRACE("Option 0x%" PRIx32 " not permitted before TLS",
463 clientflags);
464 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
465 return -EIO;
466 }
467 ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
468 clientflags);
469 if (ret < 0) {
470 return ret;
471 }
472 break;
473 }
474 } else if (fixedNewstyle) {
475 switch (clientflags) {
476 case NBD_OPT_LIST:
477 ret = nbd_negotiate_handle_list(client, length);
478 if (ret < 0) {
479 return ret;
480 }
481 break;
482
483 case NBD_OPT_ABORT:
484 return -EINVAL;
485
486 case NBD_OPT_EXPORT_NAME:
487 return nbd_negotiate_handle_export_name(client, length);
488
489 case NBD_OPT_STARTTLS:
490 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
491 return -EIO;
492 }
493 if (client->tlscreds) {
494 TRACE("TLS already enabled");
495 ret = nbd_negotiate_send_rep(client->ioc,
496 NBD_REP_ERR_INVALID,
497 clientflags);
498 } else {
499 TRACE("TLS not configured");
500 ret = nbd_negotiate_send_rep(client->ioc,
501 NBD_REP_ERR_POLICY,
502 clientflags);
503 }
504 if (ret < 0) {
505 return ret;
506 }
507 break;
508 default:
509 TRACE("Unsupported option 0x%" PRIx32, clientflags);
510 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
511 return -EIO;
512 }
513 ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
514 clientflags);
515 if (ret < 0) {
516 return ret;
517 }
518 break;
519 }
520 } else {
521
522
523
524
525 switch (clientflags) {
526 case NBD_OPT_EXPORT_NAME:
527 return nbd_negotiate_handle_export_name(client, length);
528
529 default:
530 TRACE("Unsupported option 0x%" PRIx32, clientflags);
531 return -EINVAL;
532 }
533 }
534 }
535}
536
537typedef struct {
538 NBDClient *client;
539 Coroutine *co;
540} NBDClientNewData;
541
542static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
543{
544 NBDClient *client = data->client;
545 char buf[8 + 8 + 8 + 128];
546 int rc;
547 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
548 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
549 bool oldStyle;
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569 qio_channel_set_blocking(client->ioc, false, NULL);
570 rc = -EINVAL;
571
572 TRACE("Beginning negotiation.");
573 memset(buf, 0, sizeof(buf));
574 memcpy(buf, "NBDMAGIC", 8);
575
576 oldStyle = client->exp != NULL && !client->tlscreds;
577 if (oldStyle) {
578 TRACE("advertising size %" PRIu64 " and flags %x",
579 client->exp->size, client->exp->nbdflags | myflags);
580 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
581 stq_be_p(buf + 16, client->exp->size);
582 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
583 } else {
584 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
585 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
586 }
587
588 if (oldStyle) {
589 if (client->tlscreds) {
590 TRACE("TLS cannot be enabled with oldstyle protocol");
591 goto fail;
592 }
593 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) {
594 LOG("write failed");
595 goto fail;
596 }
597 } else {
598 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) {
599 LOG("write failed");
600 goto fail;
601 }
602 rc = nbd_negotiate_options(client);
603 if (rc != 0) {
604 LOG("option negotiation failed");
605 goto fail;
606 }
607
608 TRACE("advertising size %" PRIu64 " and flags %x",
609 client->exp->size, client->exp->nbdflags | myflags);
610 stq_be_p(buf + 18, client->exp->size);
611 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
612 if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) !=
613 sizeof(buf) - 18) {
614 LOG("write failed");
615 goto fail;
616 }
617 }
618
619 TRACE("Negotiation succeeded.");
620 rc = 0;
621fail:
622 return rc;
623}
624
625static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
626{
627 uint8_t buf[NBD_REQUEST_SIZE];
628 uint32_t magic;
629 ssize_t ret;
630
631 ret = read_sync(ioc, buf, sizeof(buf));
632 if (ret < 0) {
633 return ret;
634 }
635
636 if (ret != sizeof(buf)) {
637 LOG("read failed");
638 return -EINVAL;
639 }
640
641
642
643
644
645
646
647
648
649 magic = ldl_be_p(buf);
650 request->type = ldl_be_p(buf + 4);
651 request->handle = ldq_be_p(buf + 8);
652 request->from = ldq_be_p(buf + 16);
653 request->len = ldl_be_p(buf + 24);
654
655 TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32
656 ", from = %" PRIu64 " , len = %" PRIu32 " }",
657 magic, request->type, request->from, request->len);
658
659 if (magic != NBD_REQUEST_MAGIC) {
660 LOG("invalid magic (got 0x%" PRIx32 ")", magic);
661 return -EINVAL;
662 }
663 return 0;
664}
665
666static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply)
667{
668 uint8_t buf[NBD_REPLY_SIZE];
669 ssize_t ret;
670
671 reply->error = system_errno_to_nbd_errno(reply->error);
672
673 TRACE("Sending response to client: { .error = %" PRId32
674 ", handle = %" PRIu64 " }",
675 reply->error, reply->handle);
676
677
678
679
680
681
682 stl_be_p(buf, NBD_REPLY_MAGIC);
683 stl_be_p(buf + 4, reply->error);
684 stq_be_p(buf + 8, reply->handle);
685
686 ret = write_sync(ioc, buf, sizeof(buf));
687 if (ret < 0) {
688 return ret;
689 }
690
691 if (ret != sizeof(buf)) {
692 LOG("writing to socket failed");
693 return -EINVAL;
694 }
695 return 0;
696}
697
698#define MAX_NBD_REQUESTS 16
699
700void nbd_client_get(NBDClient *client)
701{
702 client->refcount++;
703}
704
705void nbd_client_put(NBDClient *client)
706{
707 if (--client->refcount == 0) {
708
709
710
711 assert(client->closing);
712
713 nbd_unset_handlers(client);
714 object_unref(OBJECT(client->sioc));
715 object_unref(OBJECT(client->ioc));
716 if (client->tlscreds) {
717 object_unref(OBJECT(client->tlscreds));
718 }
719 g_free(client->tlsaclname);
720 if (client->exp) {
721 QTAILQ_REMOVE(&client->exp->clients, client, next);
722 nbd_export_put(client->exp);
723 }
724 g_free(client);
725 }
726}
727
728static void client_close(NBDClient *client)
729{
730 if (client->closing) {
731 return;
732 }
733
734 client->closing = true;
735
736
737
738
739 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
740 NULL);
741
742
743 if (client->close) {
744 client->close(client);
745 }
746}
747
748static NBDRequest *nbd_request_get(NBDClient *client)
749{
750 NBDRequest *req;
751
752 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
753 client->nb_requests++;
754 nbd_update_can_read(client);
755
756 req = g_new0(NBDRequest, 1);
757 nbd_client_get(client);
758 req->client = client;
759 return req;
760}
761
762static void nbd_request_put(NBDRequest *req)
763{
764 NBDClient *client = req->client;
765
766 if (req->data) {
767 qemu_vfree(req->data);
768 }
769 g_free(req);
770
771 client->nb_requests--;
772 nbd_update_can_read(client);
773 nbd_client_put(client);
774}
775
776static void blk_aio_attached(AioContext *ctx, void *opaque)
777{
778 NBDExport *exp = opaque;
779 NBDClient *client;
780
781 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
782
783 exp->ctx = ctx;
784
785 QTAILQ_FOREACH(client, &exp->clients, next) {
786 nbd_set_handlers(client);
787 }
788}
789
790static void blk_aio_detach(void *opaque)
791{
792 NBDExport *exp = opaque;
793 NBDClient *client;
794
795 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
796
797 QTAILQ_FOREACH(client, &exp->clients, next) {
798 nbd_unset_handlers(client);
799 }
800
801 exp->ctx = NULL;
802}
803
804static void nbd_eject_notifier(Notifier *n, void *data)
805{
806 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
807 nbd_export_close(exp);
808}
809
810NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
811 uint16_t nbdflags, void (*close)(NBDExport *),
812 Error **errp)
813{
814 NBDExport *exp = g_malloc0(sizeof(NBDExport));
815 exp->refcount = 1;
816 QTAILQ_INIT(&exp->clients);
817 exp->blk = blk;
818 exp->dev_offset = dev_offset;
819 exp->nbdflags = nbdflags;
820 exp->size = size < 0 ? blk_getlength(blk) : size;
821 if (exp->size < 0) {
822 error_setg_errno(errp, -exp->size,
823 "Failed to determine the NBD export's length");
824 goto fail;
825 }
826 exp->size -= exp->size % BDRV_SECTOR_SIZE;
827
828 exp->close = close;
829 exp->ctx = blk_get_aio_context(blk);
830 blk_ref(blk);
831 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
832
833 exp->eject_notifier.notify = nbd_eject_notifier;
834 blk_add_remove_bs_notifier(blk, &exp->eject_notifier);
835
836
837
838
839
840
841 aio_context_acquire(exp->ctx);
842 blk_invalidate_cache(blk, NULL);
843 aio_context_release(exp->ctx);
844 return exp;
845
846fail:
847 g_free(exp);
848 return NULL;
849}
850
851NBDExport *nbd_export_find(const char *name)
852{
853 NBDExport *exp;
854 QTAILQ_FOREACH(exp, &exports, next) {
855 if (strcmp(name, exp->name) == 0) {
856 return exp;
857 }
858 }
859
860 return NULL;
861}
862
863void nbd_export_set_name(NBDExport *exp, const char *name)
864{
865 if (exp->name == name) {
866 return;
867 }
868
869 nbd_export_get(exp);
870 if (exp->name != NULL) {
871 g_free(exp->name);
872 exp->name = NULL;
873 QTAILQ_REMOVE(&exports, exp, next);
874 nbd_export_put(exp);
875 }
876 if (name != NULL) {
877 nbd_export_get(exp);
878 exp->name = g_strdup(name);
879 QTAILQ_INSERT_TAIL(&exports, exp, next);
880 }
881 nbd_export_put(exp);
882}
883
884void nbd_export_close(NBDExport *exp)
885{
886 NBDClient *client, *next;
887
888 nbd_export_get(exp);
889 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
890 client_close(client);
891 }
892 nbd_export_set_name(exp, NULL);
893 nbd_export_put(exp);
894}
895
896void nbd_export_get(NBDExport *exp)
897{
898 assert(exp->refcount > 0);
899 exp->refcount++;
900}
901
902void nbd_export_put(NBDExport *exp)
903{
904 assert(exp->refcount > 0);
905 if (exp->refcount == 1) {
906 nbd_export_close(exp);
907 }
908
909 if (--exp->refcount == 0) {
910 assert(exp->name == NULL);
911
912 if (exp->close) {
913 exp->close(exp);
914 }
915
916 if (exp->blk) {
917 notifier_remove(&exp->eject_notifier);
918 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
919 blk_aio_detach, exp);
920 blk_unref(exp->blk);
921 exp->blk = NULL;
922 }
923
924 g_free(exp);
925 }
926}
927
928BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
929{
930 return exp->blk;
931}
932
933void nbd_export_close_all(void)
934{
935 NBDExport *exp, *next;
936
937 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
938 nbd_export_close(exp);
939 }
940}
941
942static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
943 int len)
944{
945 NBDClient *client = req->client;
946 ssize_t rc, ret;
947
948 g_assert(qemu_in_coroutine());
949 qemu_co_mutex_lock(&client->send_lock);
950 client->send_coroutine = qemu_coroutine_self();
951 nbd_set_handlers(client);
952
953 if (!len) {
954 rc = nbd_send_reply(client->ioc, reply);
955 } else {
956 qio_channel_set_cork(client->ioc, true);
957 rc = nbd_send_reply(client->ioc, reply);
958 if (rc >= 0) {
959 ret = write_sync(client->ioc, req->data, len);
960 if (ret != len) {
961 rc = -EIO;
962 }
963 }
964 qio_channel_set_cork(client->ioc, false);
965 }
966
967 client->send_coroutine = NULL;
968 nbd_set_handlers(client);
969 qemu_co_mutex_unlock(&client->send_lock);
970 return rc;
971}
972
973
974
975
976
977
978static ssize_t nbd_co_receive_request(NBDRequest *req,
979 struct nbd_request *request)
980{
981 NBDClient *client = req->client;
982 uint32_t command;
983 ssize_t rc;
984
985 g_assert(qemu_in_coroutine());
986 client->recv_coroutine = qemu_coroutine_self();
987 nbd_update_can_read(client);
988
989 rc = nbd_receive_request(client->ioc, request);
990 if (rc < 0) {
991 if (rc != -EAGAIN) {
992 rc = -EIO;
993 }
994 goto out;
995 }
996
997 TRACE("Decoding type");
998
999 command = request->type & NBD_CMD_MASK_COMMAND;
1000 if (command != NBD_CMD_WRITE) {
1001
1002 req->complete = true;
1003 }
1004
1005 if (command == NBD_CMD_DISC) {
1006
1007
1008 TRACE("Request type is DISCONNECT");
1009 rc = -EIO;
1010 goto out;
1011 }
1012
1013
1014
1015
1016 if ((request->from + request->len) < request->from) {
1017 LOG("integer overflow detected, you're probably being attacked");
1018 rc = -EINVAL;
1019 goto out;
1020 }
1021
1022 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
1023 if (request->len > NBD_MAX_BUFFER_SIZE) {
1024 LOG("len (%" PRIu32" ) is larger than max len (%u)",
1025 request->len, NBD_MAX_BUFFER_SIZE);
1026 rc = -EINVAL;
1027 goto out;
1028 }
1029
1030 req->data = blk_try_blockalign(client->exp->blk, request->len);
1031 if (req->data == NULL) {
1032 rc = -ENOMEM;
1033 goto out;
1034 }
1035 }
1036 if (command == NBD_CMD_WRITE) {
1037 TRACE("Reading %" PRIu32 " byte(s)", request->len);
1038
1039 if (read_sync(client->ioc, req->data, request->len) != request->len) {
1040 LOG("reading from socket failed");
1041 rc = -EIO;
1042 goto out;
1043 }
1044 req->complete = true;
1045 }
1046
1047
1048 if (request->from + request->len > client->exp->size) {
1049 LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
1050 ", Size: %" PRIu64, request->from, request->len,
1051 (uint64_t)client->exp->size);
1052 rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
1053 goto out;
1054 }
1055 if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) {
1056 LOG("unsupported flags (got 0x%x)",
1057 request->type & ~NBD_CMD_MASK_COMMAND);
1058 rc = -EINVAL;
1059 goto out;
1060 }
1061
1062 rc = 0;
1063
1064out:
1065 client->recv_coroutine = NULL;
1066 nbd_update_can_read(client);
1067
1068 return rc;
1069}
1070
1071static void nbd_trip(void *opaque)
1072{
1073 NBDClient *client = opaque;
1074 NBDExport *exp = client->exp;
1075 NBDRequest *req;
1076 struct nbd_request request;
1077 struct nbd_reply reply;
1078 ssize_t ret;
1079 uint32_t command;
1080 int flags;
1081
1082 TRACE("Reading request.");
1083 if (client->closing) {
1084 return;
1085 }
1086
1087 req = nbd_request_get(client);
1088 ret = nbd_co_receive_request(req, &request);
1089 if (ret == -EAGAIN) {
1090 goto done;
1091 }
1092 if (ret == -EIO) {
1093 goto out;
1094 }
1095
1096 reply.handle = request.handle;
1097 reply.error = 0;
1098
1099 if (ret < 0) {
1100 reply.error = -ret;
1101 goto error_reply;
1102 }
1103 command = request.type & NBD_CMD_MASK_COMMAND;
1104
1105 if (client->closing) {
1106
1107
1108
1109
1110 goto done;
1111 }
1112
1113 switch (command) {
1114 case NBD_CMD_READ:
1115 TRACE("Request type is READ");
1116
1117 if (request.type & NBD_CMD_FLAG_FUA) {
1118 ret = blk_co_flush(exp->blk);
1119 if (ret < 0) {
1120 LOG("flush failed");
1121 reply.error = -ret;
1122 goto error_reply;
1123 }
1124 }
1125
1126 ret = blk_pread(exp->blk, request.from + exp->dev_offset,
1127 req->data, request.len);
1128 if (ret < 0) {
1129 LOG("reading from file failed");
1130 reply.error = -ret;
1131 goto error_reply;
1132 }
1133
1134 TRACE("Read %" PRIu32" byte(s)", request.len);
1135 if (nbd_co_send_reply(req, &reply, request.len) < 0)
1136 goto out;
1137 break;
1138 case NBD_CMD_WRITE:
1139 TRACE("Request type is WRITE");
1140
1141 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1142 TRACE("Server is read-only, return error");
1143 reply.error = EROFS;
1144 goto error_reply;
1145 }
1146
1147 TRACE("Writing to device");
1148
1149 flags = 0;
1150 if (request.type & NBD_CMD_FLAG_FUA) {
1151 flags |= BDRV_REQ_FUA;
1152 }
1153 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1154 req->data, request.len, flags);
1155 if (ret < 0) {
1156 LOG("writing to file failed");
1157 reply.error = -ret;
1158 goto error_reply;
1159 }
1160
1161 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1162 goto out;
1163 }
1164 break;
1165
1166 case NBD_CMD_DISC:
1167
1168 abort();
1169
1170 case NBD_CMD_FLUSH:
1171 TRACE("Request type is FLUSH");
1172
1173 ret = blk_co_flush(exp->blk);
1174 if (ret < 0) {
1175 LOG("flush failed");
1176 reply.error = -ret;
1177 }
1178 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1179 goto out;
1180 }
1181 break;
1182 case NBD_CMD_TRIM:
1183 TRACE("Request type is TRIM");
1184 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
1185 request.len);
1186 if (ret < 0) {
1187 LOG("discard failed");
1188 reply.error = -ret;
1189 }
1190 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1191 goto out;
1192 }
1193 break;
1194 default:
1195 LOG("invalid request type (%" PRIu32 ") received", request.type);
1196 reply.error = EINVAL;
1197 error_reply:
1198
1199
1200
1201 if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
1202 goto out;
1203 }
1204 break;
1205 }
1206
1207 TRACE("Request/Reply complete");
1208
1209done:
1210 nbd_request_put(req);
1211 return;
1212
1213out:
1214 nbd_request_put(req);
1215 client_close(client);
1216}
1217
1218static void nbd_read(void *opaque)
1219{
1220 NBDClient *client = opaque;
1221
1222 if (client->recv_coroutine) {
1223 qemu_coroutine_enter(client->recv_coroutine);
1224 } else {
1225 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip, client));
1226 }
1227}
1228
1229static void nbd_restart_write(void *opaque)
1230{
1231 NBDClient *client = opaque;
1232
1233 qemu_coroutine_enter(client->send_coroutine);
1234}
1235
1236static void nbd_set_handlers(NBDClient *client)
1237{
1238 if (client->exp && client->exp->ctx) {
1239 aio_set_fd_handler(client->exp->ctx, client->sioc->fd,
1240 true,
1241 client->can_read ? nbd_read : NULL,
1242 client->send_coroutine ? nbd_restart_write : NULL,
1243 client);
1244 }
1245}
1246
1247static void nbd_unset_handlers(NBDClient *client)
1248{
1249 if (client->exp && client->exp->ctx) {
1250 aio_set_fd_handler(client->exp->ctx, client->sioc->fd,
1251 true, NULL, NULL, NULL);
1252 }
1253}
1254
1255static void nbd_update_can_read(NBDClient *client)
1256{
1257 bool can_read = client->recv_coroutine ||
1258 client->nb_requests < MAX_NBD_REQUESTS;
1259
1260 if (can_read != client->can_read) {
1261 client->can_read = can_read;
1262 nbd_set_handlers(client);
1263
1264
1265
1266 }
1267}
1268
1269static coroutine_fn void nbd_co_client_start(void *opaque)
1270{
1271 NBDClientNewData *data = opaque;
1272 NBDClient *client = data->client;
1273 NBDExport *exp = client->exp;
1274
1275 if (exp) {
1276 nbd_export_get(exp);
1277 }
1278 if (nbd_negotiate(data)) {
1279 client_close(client);
1280 goto out;
1281 }
1282 qemu_co_mutex_init(&client->send_lock);
1283 nbd_set_handlers(client);
1284
1285 if (exp) {
1286 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1287 }
1288out:
1289 g_free(data);
1290}
1291
1292void nbd_client_new(NBDExport *exp,
1293 QIOChannelSocket *sioc,
1294 QCryptoTLSCreds *tlscreds,
1295 const char *tlsaclname,
1296 void (*close_fn)(NBDClient *))
1297{
1298 NBDClient *client;
1299 NBDClientNewData *data = g_new(NBDClientNewData, 1);
1300
1301 client = g_malloc0(sizeof(NBDClient));
1302 client->refcount = 1;
1303 client->exp = exp;
1304 client->tlscreds = tlscreds;
1305 if (tlscreds) {
1306 object_ref(OBJECT(client->tlscreds));
1307 }
1308 client->tlsaclname = g_strdup(tlsaclname);
1309 client->sioc = sioc;
1310 object_ref(OBJECT(client->sioc));
1311 client->ioc = QIO_CHANNEL(sioc);
1312 object_ref(OBJECT(client->ioc));
1313 client->can_read = true;
1314 client->close = close_fn;
1315
1316 data->client = client;
1317 data->co = qemu_coroutine_create(nbd_co_client_start, data);
1318 qemu_coroutine_enter(data->co);
1319}
1320