1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "qapi/error.h"
21#include "nbd-internal.h"
22
23static int system_errno_to_nbd_errno(int err)
24{
25 switch (err) {
26 case 0:
27 return NBD_SUCCESS;
28 case EPERM:
29 case EROFS:
30 return NBD_EPERM;
31 case EIO:
32 return NBD_EIO;
33 case ENOMEM:
34 return NBD_ENOMEM;
35#ifdef EDQUOT
36 case EDQUOT:
37#endif
38 case EFBIG:
39 case ENOSPC:
40 return NBD_ENOSPC;
41 case EINVAL:
42 default:
43 return NBD_EINVAL;
44 }
45}
46
47
48
49typedef struct NBDRequest NBDRequest;
50
51struct NBDRequest {
52 QSIMPLEQ_ENTRY(NBDRequest) entry;
53 NBDClient *client;
54 uint8_t *data;
55};
56
57struct NBDExport {
58 int refcount;
59 void (*close)(NBDExport *exp);
60
61 BlockBackend *blk;
62 char *name;
63 off_t dev_offset;
64 off_t size;
65 uint16_t nbdflags;
66 QTAILQ_HEAD(, NBDClient) clients;
67 QTAILQ_ENTRY(NBDExport) next;
68
69 AioContext *ctx;
70
71 Notifier eject_notifier;
72};
73
74static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
75
76struct NBDClient {
77 int refcount;
78 void (*close)(NBDClient *client);
79
80 NBDExport *exp;
81 QCryptoTLSCreds *tlscreds;
82 char *tlsaclname;
83 QIOChannelSocket *sioc;
84 QIOChannel *ioc;
85
86 Coroutine *recv_coroutine;
87
88 CoMutex send_lock;
89 Coroutine *send_coroutine;
90
91 bool can_read;
92
93 QTAILQ_ENTRY(NBDClient) next;
94 int nb_requests;
95 bool closing;
96};
97
98
99
100static void nbd_set_handlers(NBDClient *client);
101static void nbd_unset_handlers(NBDClient *client);
102static void nbd_update_can_read(NBDClient *client);
103
104static gboolean nbd_negotiate_continue(QIOChannel *ioc,
105 GIOCondition condition,
106 void *opaque)
107{
108 qemu_coroutine_enter(opaque, NULL);
109 return TRUE;
110}
111
112static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
113{
114 ssize_t ret;
115 guint watch;
116
117 assert(qemu_in_coroutine());
118
119 watch = qio_channel_add_watch(ioc,
120 G_IO_IN,
121 nbd_negotiate_continue,
122 qemu_coroutine_self(),
123 NULL);
124 ret = read_sync(ioc, buffer, size);
125 g_source_remove(watch);
126 return ret;
127
128}
129
130static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size)
131{
132 ssize_t ret;
133 guint watch;
134
135 assert(qemu_in_coroutine());
136
137 watch = qio_channel_add_watch(ioc,
138 G_IO_OUT,
139 nbd_negotiate_continue,
140 qemu_coroutine_self(),
141 NULL);
142 ret = write_sync(ioc, buffer, size);
143 g_source_remove(watch);
144 return ret;
145}
146
147static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
148{
149 ssize_t ret, dropped = size;
150 uint8_t *buffer = g_malloc(MIN(65536, size));
151
152 while (size > 0) {
153 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size));
154 if (ret < 0) {
155 g_free(buffer);
156 return ret;
157 }
158
159 assert(ret <= size);
160 size -= ret;
161 }
162
163 g_free(buffer);
164 return dropped;
165}
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
195{
196 uint64_t magic;
197 uint32_t len;
198
199 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
200
201 magic = cpu_to_be64(NBD_REP_MAGIC);
202 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
203 LOG("write failed (rep magic)");
204 return -EINVAL;
205 }
206 opt = cpu_to_be32(opt);
207 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
208 LOG("write failed (rep opt)");
209 return -EINVAL;
210 }
211 type = cpu_to_be32(type);
212 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
213 LOG("write failed (rep type)");
214 return -EINVAL;
215 }
216 len = cpu_to_be32(0);
217 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
218 LOG("write failed (rep data length)");
219 return -EINVAL;
220 }
221 return 0;
222}
223
224static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
225{
226 uint64_t magic, name_len;
227 uint32_t opt, type, len;
228
229 TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
230 name_len = strlen(exp->name);
231 magic = cpu_to_be64(NBD_REP_MAGIC);
232 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
233 LOG("write failed (magic)");
234 return -EINVAL;
235 }
236 opt = cpu_to_be32(NBD_OPT_LIST);
237 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
238 LOG("write failed (opt)");
239 return -EINVAL;
240 }
241 type = cpu_to_be32(NBD_REP_SERVER);
242 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
243 LOG("write failed (reply type)");
244 return -EINVAL;
245 }
246 len = cpu_to_be32(name_len + sizeof(len));
247 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
248 LOG("write failed (length)");
249 return -EINVAL;
250 }
251 len = cpu_to_be32(name_len);
252 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
253 LOG("write failed (length)");
254 return -EINVAL;
255 }
256 if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) {
257 LOG("write failed (buffer)");
258 return -EINVAL;
259 }
260 return 0;
261}
262
263static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
264{
265 NBDExport *exp;
266
267 if (length) {
268 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
269 return -EIO;
270 }
271 return nbd_negotiate_send_rep(client->ioc,
272 NBD_REP_ERR_INVALID, NBD_OPT_LIST);
273 }
274
275
276 QTAILQ_FOREACH(exp, &exports, next) {
277 if (nbd_negotiate_send_rep_list(client->ioc, exp)) {
278 return -EINVAL;
279 }
280 }
281
282 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST);
283}
284
285static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
286{
287 int rc = -EINVAL;
288 char name[256];
289
290
291
292
293 TRACE("Checking length");
294 if (length > 255) {
295 LOG("Bad length received");
296 goto fail;
297 }
298 if (nbd_negotiate_read(client->ioc, name, length) != length) {
299 LOG("read failed");
300 goto fail;
301 }
302 name[length] = '\0';
303
304 TRACE("Client requested export '%s'", name);
305
306 client->exp = nbd_export_find(name);
307 if (!client->exp) {
308 LOG("export not found");
309 goto fail;
310 }
311
312 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
313 nbd_export_get(client->exp);
314 rc = 0;
315fail:
316 return rc;
317}
318
319
320static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
321 uint32_t length)
322{
323 QIOChannel *ioc;
324 QIOChannelTLS *tioc;
325 struct NBDTLSHandshakeData data = { 0 };
326
327 TRACE("Setting up TLS");
328 ioc = client->ioc;
329 if (length) {
330 if (nbd_negotiate_drop_sync(ioc, length) != length) {
331 return NULL;
332 }
333 nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
334 return NULL;
335 }
336
337 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS);
338
339 tioc = qio_channel_tls_new_server(ioc,
340 client->tlscreds,
341 client->tlsaclname,
342 NULL);
343 if (!tioc) {
344 return NULL;
345 }
346
347 TRACE("Starting TLS handshake");
348 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
349 qio_channel_tls_handshake(tioc,
350 nbd_tls_handshake,
351 &data,
352 NULL);
353
354 if (!data.complete) {
355 g_main_loop_run(data.loop);
356 }
357 g_main_loop_unref(data.loop);
358 if (data.error) {
359 object_unref(OBJECT(tioc));
360 error_free(data.error);
361 return NULL;
362 }
363
364 return QIO_CHANNEL(tioc);
365}
366
367
368static int nbd_negotiate_options(NBDClient *client)
369{
370 uint32_t flags;
371 bool fixedNewstyle = false;
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) !=
388 sizeof(flags)) {
389 LOG("read failed");
390 return -EIO;
391 }
392 TRACE("Checking client flags");
393 be32_to_cpus(&flags);
394 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
395 TRACE("Client supports fixed newstyle handshake");
396 fixedNewstyle = true;
397 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
398 }
399 if (flags != 0) {
400 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
401 return -EIO;
402 }
403
404 while (1) {
405 int ret;
406 uint32_t clientflags, length;
407 uint64_t magic;
408
409 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) !=
410 sizeof(magic)) {
411 LOG("read failed");
412 return -EINVAL;
413 }
414 TRACE("Checking opts magic");
415 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
416 LOG("Bad magic received");
417 return -EINVAL;
418 }
419
420 if (nbd_negotiate_read(client->ioc, &clientflags,
421 sizeof(clientflags)) != sizeof(clientflags)) {
422 LOG("read failed");
423 return -EINVAL;
424 }
425 clientflags = be32_to_cpu(clientflags);
426
427 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) !=
428 sizeof(length)) {
429 LOG("read failed");
430 return -EINVAL;
431 }
432 length = be32_to_cpu(length);
433
434 TRACE("Checking option 0x%" PRIx32, clientflags);
435 if (client->tlscreds &&
436 client->ioc == (QIOChannel *)client->sioc) {
437 QIOChannel *tioc;
438 if (!fixedNewstyle) {
439 TRACE("Unsupported option 0x%" PRIx32, clientflags);
440 return -EINVAL;
441 }
442 switch (clientflags) {
443 case NBD_OPT_STARTTLS:
444 tioc = nbd_negotiate_handle_starttls(client, length);
445 if (!tioc) {
446 return -EIO;
447 }
448 object_unref(OBJECT(client->ioc));
449 client->ioc = QIO_CHANNEL(tioc);
450 break;
451
452 case NBD_OPT_EXPORT_NAME:
453
454 TRACE("Option 0x%x not permitted before TLS", clientflags);
455 return -EINVAL;
456
457 default:
458 TRACE("Option 0x%" PRIx32 " not permitted before TLS",
459 clientflags);
460 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
461 return -EIO;
462 }
463 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
464 clientflags);
465 break;
466 }
467 } else if (fixedNewstyle) {
468 switch (clientflags) {
469 case NBD_OPT_LIST:
470 ret = nbd_negotiate_handle_list(client, length);
471 if (ret < 0) {
472 return ret;
473 }
474 break;
475
476 case NBD_OPT_ABORT:
477 return -EINVAL;
478
479 case NBD_OPT_EXPORT_NAME:
480 return nbd_negotiate_handle_export_name(client, length);
481
482 case NBD_OPT_STARTTLS:
483 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
484 return -EIO;
485 }
486 if (client->tlscreds) {
487 TRACE("TLS already enabled");
488 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_INVALID,
489 clientflags);
490 } else {
491 TRACE("TLS not configured");
492 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_POLICY,
493 clientflags);
494 }
495 break;
496 default:
497 TRACE("Unsupported option 0x%" PRIx32, clientflags);
498 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
499 return -EIO;
500 }
501 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
502 clientflags);
503 break;
504 }
505 } else {
506
507
508
509
510 switch (clientflags) {
511 case NBD_OPT_EXPORT_NAME:
512 return nbd_negotiate_handle_export_name(client, length);
513
514 default:
515 TRACE("Unsupported option 0x%" PRIx32, clientflags);
516 return -EINVAL;
517 }
518 }
519 }
520}
521
522typedef struct {
523 NBDClient *client;
524 Coroutine *co;
525} NBDClientNewData;
526
527static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
528{
529 NBDClient *client = data->client;
530 char buf[8 + 8 + 8 + 128];
531 int rc;
532 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
533 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
534 bool oldStyle;
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554 qio_channel_set_blocking(client->ioc, false, NULL);
555 rc = -EINVAL;
556
557 TRACE("Beginning negotiation.");
558 memset(buf, 0, sizeof(buf));
559 memcpy(buf, "NBDMAGIC", 8);
560
561 oldStyle = client->exp != NULL && !client->tlscreds;
562 if (oldStyle) {
563 TRACE("advertising size %" PRIu64 " and flags %x",
564 client->exp->size, client->exp->nbdflags | myflags);
565 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
566 stq_be_p(buf + 16, client->exp->size);
567 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
568 } else {
569 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
570 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
571 }
572
573 if (oldStyle) {
574 if (client->tlscreds) {
575 TRACE("TLS cannot be enabled with oldstyle protocol");
576 goto fail;
577 }
578 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) {
579 LOG("write failed");
580 goto fail;
581 }
582 } else {
583 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) {
584 LOG("write failed");
585 goto fail;
586 }
587 rc = nbd_negotiate_options(client);
588 if (rc != 0) {
589 LOG("option negotiation failed");
590 goto fail;
591 }
592
593 TRACE("advertising size %" PRIu64 " and flags %x",
594 client->exp->size, client->exp->nbdflags | myflags);
595 stq_be_p(buf + 18, client->exp->size);
596 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
597 if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) !=
598 sizeof(buf) - 18) {
599 LOG("write failed");
600 goto fail;
601 }
602 }
603
604 TRACE("Negotiation succeeded.");
605 rc = 0;
606fail:
607 return rc;
608}
609
610#ifdef __linux__
611
612int nbd_disconnect(int fd)
613{
614 ioctl(fd, NBD_CLEAR_QUE);
615 ioctl(fd, NBD_DISCONNECT);
616 ioctl(fd, NBD_CLEAR_SOCK);
617 return 0;
618}
619
620#else
621
622int nbd_disconnect(int fd)
623{
624 return -ENOTSUP;
625}
626#endif
627
628static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
629{
630 uint8_t buf[NBD_REQUEST_SIZE];
631 uint32_t magic;
632 ssize_t ret;
633
634 ret = read_sync(ioc, buf, sizeof(buf));
635 if (ret < 0) {
636 return ret;
637 }
638
639 if (ret != sizeof(buf)) {
640 LOG("read failed");
641 return -EINVAL;
642 }
643
644
645
646
647
648
649
650
651
652 magic = ldl_be_p(buf);
653 request->type = ldl_be_p(buf + 4);
654 request->handle = ldq_be_p(buf + 8);
655 request->from = ldq_be_p(buf + 16);
656 request->len = ldl_be_p(buf + 24);
657
658 TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32
659 ", from = %" PRIu64 " , len = %" PRIu32 " }",
660 magic, request->type, request->from, request->len);
661
662 if (magic != NBD_REQUEST_MAGIC) {
663 LOG("invalid magic (got 0x%" PRIx32 ")", magic);
664 return -EINVAL;
665 }
666 return 0;
667}
668
669static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply)
670{
671 uint8_t buf[NBD_REPLY_SIZE];
672 ssize_t ret;
673
674 reply->error = system_errno_to_nbd_errno(reply->error);
675
676 TRACE("Sending response to client: { .error = %" PRId32
677 ", handle = %" PRIu64 " }",
678 reply->error, reply->handle);
679
680
681
682
683
684
685 stl_be_p(buf, NBD_REPLY_MAGIC);
686 stl_be_p(buf + 4, reply->error);
687 stq_be_p(buf + 8, reply->handle);
688
689 ret = write_sync(ioc, buf, sizeof(buf));
690 if (ret < 0) {
691 return ret;
692 }
693
694 if (ret != sizeof(buf)) {
695 LOG("writing to socket failed");
696 return -EINVAL;
697 }
698 return 0;
699}
700
701#define MAX_NBD_REQUESTS 16
702
703void nbd_client_get(NBDClient *client)
704{
705 client->refcount++;
706}
707
708void nbd_client_put(NBDClient *client)
709{
710 if (--client->refcount == 0) {
711
712
713
714 assert(client->closing);
715
716 nbd_unset_handlers(client);
717 object_unref(OBJECT(client->sioc));
718 object_unref(OBJECT(client->ioc));
719 if (client->tlscreds) {
720 object_unref(OBJECT(client->tlscreds));
721 }
722 g_free(client->tlsaclname);
723 if (client->exp) {
724 QTAILQ_REMOVE(&client->exp->clients, client, next);
725 nbd_export_put(client->exp);
726 }
727 g_free(client);
728 }
729}
730
731static void client_close(NBDClient *client)
732{
733 if (client->closing) {
734 return;
735 }
736
737 client->closing = true;
738
739
740
741
742 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
743 NULL);
744
745
746 if (client->close) {
747 client->close(client);
748 }
749}
750
751static NBDRequest *nbd_request_get(NBDClient *client)
752{
753 NBDRequest *req;
754
755 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
756 client->nb_requests++;
757 nbd_update_can_read(client);
758
759 req = g_new0(NBDRequest, 1);
760 nbd_client_get(client);
761 req->client = client;
762 return req;
763}
764
765static void nbd_request_put(NBDRequest *req)
766{
767 NBDClient *client = req->client;
768
769 if (req->data) {
770 qemu_vfree(req->data);
771 }
772 g_free(req);
773
774 client->nb_requests--;
775 nbd_update_can_read(client);
776 nbd_client_put(client);
777}
778
779static void blk_aio_attached(AioContext *ctx, void *opaque)
780{
781 NBDExport *exp = opaque;
782 NBDClient *client;
783
784 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
785
786 exp->ctx = ctx;
787
788 QTAILQ_FOREACH(client, &exp->clients, next) {
789 nbd_set_handlers(client);
790 }
791}
792
793static void blk_aio_detach(void *opaque)
794{
795 NBDExport *exp = opaque;
796 NBDClient *client;
797
798 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
799
800 QTAILQ_FOREACH(client, &exp->clients, next) {
801 nbd_unset_handlers(client);
802 }
803
804 exp->ctx = NULL;
805}
806
807static void nbd_eject_notifier(Notifier *n, void *data)
808{
809 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
810 nbd_export_close(exp);
811}
812
813NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
814 uint16_t nbdflags, void (*close)(NBDExport *),
815 Error **errp)
816{
817 NBDExport *exp = g_malloc0(sizeof(NBDExport));
818 exp->refcount = 1;
819 QTAILQ_INIT(&exp->clients);
820 exp->blk = blk;
821 exp->dev_offset = dev_offset;
822 exp->nbdflags = nbdflags;
823 exp->size = size < 0 ? blk_getlength(blk) : size;
824 if (exp->size < 0) {
825 error_setg_errno(errp, -exp->size,
826 "Failed to determine the NBD export's length");
827 goto fail;
828 }
829 exp->size -= exp->size % BDRV_SECTOR_SIZE;
830
831 exp->close = close;
832 exp->ctx = blk_get_aio_context(blk);
833 blk_ref(blk);
834 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
835
836 exp->eject_notifier.notify = nbd_eject_notifier;
837 blk_add_remove_bs_notifier(blk, &exp->eject_notifier);
838
839
840
841
842
843
844 aio_context_acquire(exp->ctx);
845 blk_invalidate_cache(blk, NULL);
846 aio_context_release(exp->ctx);
847 return exp;
848
849fail:
850 g_free(exp);
851 return NULL;
852}
853
854NBDExport *nbd_export_find(const char *name)
855{
856 NBDExport *exp;
857 QTAILQ_FOREACH(exp, &exports, next) {
858 if (strcmp(name, exp->name) == 0) {
859 return exp;
860 }
861 }
862
863 return NULL;
864}
865
866void nbd_export_set_name(NBDExport *exp, const char *name)
867{
868 if (exp->name == name) {
869 return;
870 }
871
872 nbd_export_get(exp);
873 if (exp->name != NULL) {
874 g_free(exp->name);
875 exp->name = NULL;
876 QTAILQ_REMOVE(&exports, exp, next);
877 nbd_export_put(exp);
878 }
879 if (name != NULL) {
880 nbd_export_get(exp);
881 exp->name = g_strdup(name);
882 QTAILQ_INSERT_TAIL(&exports, exp, next);
883 }
884 nbd_export_put(exp);
885}
886
887void nbd_export_close(NBDExport *exp)
888{
889 NBDClient *client, *next;
890
891 nbd_export_get(exp);
892 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
893 client_close(client);
894 }
895 nbd_export_set_name(exp, NULL);
896 nbd_export_put(exp);
897}
898
899void nbd_export_get(NBDExport *exp)
900{
901 assert(exp->refcount > 0);
902 exp->refcount++;
903}
904
905void nbd_export_put(NBDExport *exp)
906{
907 assert(exp->refcount > 0);
908 if (exp->refcount == 1) {
909 nbd_export_close(exp);
910 }
911
912 if (--exp->refcount == 0) {
913 assert(exp->name == NULL);
914
915 if (exp->close) {
916 exp->close(exp);
917 }
918
919 if (exp->blk) {
920 notifier_remove(&exp->eject_notifier);
921 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
922 blk_aio_detach, exp);
923 blk_unref(exp->blk);
924 exp->blk = NULL;
925 }
926
927 g_free(exp);
928 }
929}
930
931BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
932{
933 return exp->blk;
934}
935
936void nbd_export_close_all(void)
937{
938 NBDExport *exp, *next;
939
940 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
941 nbd_export_close(exp);
942 }
943}
944
945static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
946 int len)
947{
948 NBDClient *client = req->client;
949 ssize_t rc, ret;
950
951 g_assert(qemu_in_coroutine());
952 qemu_co_mutex_lock(&client->send_lock);
953 client->send_coroutine = qemu_coroutine_self();
954 nbd_set_handlers(client);
955
956 if (!len) {
957 rc = nbd_send_reply(client->ioc, reply);
958 } else {
959 qio_channel_set_cork(client->ioc, true);
960 rc = nbd_send_reply(client->ioc, reply);
961 if (rc >= 0) {
962 ret = write_sync(client->ioc, req->data, len);
963 if (ret != len) {
964 rc = -EIO;
965 }
966 }
967 qio_channel_set_cork(client->ioc, false);
968 }
969
970 client->send_coroutine = NULL;
971 nbd_set_handlers(client);
972 qemu_co_mutex_unlock(&client->send_lock);
973 return rc;
974}
975
976static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
977{
978 NBDClient *client = req->client;
979 uint32_t command;
980 ssize_t rc;
981
982 g_assert(qemu_in_coroutine());
983 client->recv_coroutine = qemu_coroutine_self();
984 nbd_update_can_read(client);
985
986 rc = nbd_receive_request(client->ioc, request);
987 if (rc < 0) {
988 if (rc != -EAGAIN) {
989 rc = -EIO;
990 }
991 goto out;
992 }
993
994 if ((request->from + request->len) < request->from) {
995 LOG("integer overflow detected! "
996 "you're probably being attacked");
997 rc = -EINVAL;
998 goto out;
999 }
1000
1001 TRACE("Decoding type");
1002
1003 command = request->type & NBD_CMD_MASK_COMMAND;
1004 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
1005 if (request->len > NBD_MAX_BUFFER_SIZE) {
1006 LOG("len (%" PRIu32" ) is larger than max len (%u)",
1007 request->len, NBD_MAX_BUFFER_SIZE);
1008 rc = -EINVAL;
1009 goto out;
1010 }
1011
1012 req->data = blk_try_blockalign(client->exp->blk, request->len);
1013 if (req->data == NULL) {
1014 rc = -ENOMEM;
1015 goto out;
1016 }
1017 }
1018 if (command == NBD_CMD_WRITE) {
1019 TRACE("Reading %" PRIu32 " byte(s)", request->len);
1020
1021 if (read_sync(client->ioc, req->data, request->len) != request->len) {
1022 LOG("reading from socket failed");
1023 rc = -EIO;
1024 goto out;
1025 }
1026 }
1027 rc = 0;
1028
1029out:
1030 client->recv_coroutine = NULL;
1031 nbd_update_can_read(client);
1032
1033 return rc;
1034}
1035
1036static void nbd_trip(void *opaque)
1037{
1038 NBDClient *client = opaque;
1039 NBDExport *exp = client->exp;
1040 NBDRequest *req;
1041 struct nbd_request request;
1042 struct nbd_reply reply;
1043 ssize_t ret;
1044 uint32_t command;
1045
1046 TRACE("Reading request.");
1047 if (client->closing) {
1048 return;
1049 }
1050
1051 req = nbd_request_get(client);
1052 ret = nbd_co_receive_request(req, &request);
1053 if (ret == -EAGAIN) {
1054 goto done;
1055 }
1056 if (ret == -EIO) {
1057 goto out;
1058 }
1059
1060 reply.handle = request.handle;
1061 reply.error = 0;
1062
1063 if (ret < 0) {
1064 reply.error = -ret;
1065 goto error_reply;
1066 }
1067 command = request.type & NBD_CMD_MASK_COMMAND;
1068 if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) {
1069 LOG("From: %" PRIu64 ", Len: %" PRIu32", Size: %" PRIu64
1070 ", Offset: %" PRIu64 "\n",
1071 request.from, request.len,
1072 (uint64_t)exp->size, (uint64_t)exp->dev_offset);
1073 LOG("requested operation past EOF--bad client?");
1074 goto invalid_request;
1075 }
1076
1077 if (client->closing) {
1078
1079
1080
1081
1082 goto done;
1083 }
1084
1085 switch (command) {
1086 case NBD_CMD_READ:
1087 TRACE("Request type is READ");
1088
1089 if (request.type & NBD_CMD_FLAG_FUA) {
1090 ret = blk_co_flush(exp->blk);
1091 if (ret < 0) {
1092 LOG("flush failed");
1093 reply.error = -ret;
1094 goto error_reply;
1095 }
1096 }
1097
1098 ret = blk_pread(exp->blk, request.from + exp->dev_offset,
1099 req->data, request.len);
1100 if (ret < 0) {
1101 LOG("reading from file failed");
1102 reply.error = -ret;
1103 goto error_reply;
1104 }
1105
1106 TRACE("Read %" PRIu32" byte(s)", request.len);
1107 if (nbd_co_send_reply(req, &reply, request.len) < 0)
1108 goto out;
1109 break;
1110 case NBD_CMD_WRITE:
1111 TRACE("Request type is WRITE");
1112
1113 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1114 TRACE("Server is read-only, return error");
1115 reply.error = EROFS;
1116 goto error_reply;
1117 }
1118
1119 TRACE("Writing to device");
1120
1121 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1122 req->data, request.len);
1123 if (ret < 0) {
1124 LOG("writing to file failed");
1125 reply.error = -ret;
1126 goto error_reply;
1127 }
1128
1129 if (request.type & NBD_CMD_FLAG_FUA) {
1130 ret = blk_co_flush(exp->blk);
1131 if (ret < 0) {
1132 LOG("flush failed");
1133 reply.error = -ret;
1134 goto error_reply;
1135 }
1136 }
1137
1138 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1139 goto out;
1140 }
1141 break;
1142 case NBD_CMD_DISC:
1143 TRACE("Request type is DISCONNECT");
1144 errno = 0;
1145 goto out;
1146 case NBD_CMD_FLUSH:
1147 TRACE("Request type is FLUSH");
1148
1149 ret = blk_co_flush(exp->blk);
1150 if (ret < 0) {
1151 LOG("flush failed");
1152 reply.error = -ret;
1153 }
1154 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1155 goto out;
1156 }
1157 break;
1158 case NBD_CMD_TRIM:
1159 TRACE("Request type is TRIM");
1160
1161
1162 if (request.len >= BDRV_SECTOR_SIZE) {
1163 request.len -= (request.from + request.len) % BDRV_SECTOR_SIZE;
1164 ret = blk_co_discard(exp->blk,
1165 DIV_ROUND_UP(request.from + exp->dev_offset,
1166 BDRV_SECTOR_SIZE),
1167 request.len / BDRV_SECTOR_SIZE);
1168 if (ret < 0) {
1169 LOG("discard failed");
1170 reply.error = -ret;
1171 }
1172 } else {
1173 TRACE("trim request too small, ignoring");
1174 }
1175 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1176 goto out;
1177 }
1178 break;
1179 default:
1180 LOG("invalid request type (%" PRIu32 ") received", request.type);
1181 invalid_request:
1182 reply.error = EINVAL;
1183 error_reply:
1184 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1185 goto out;
1186 }
1187 break;
1188 }
1189
1190 TRACE("Request/Reply complete");
1191
1192done:
1193 nbd_request_put(req);
1194 return;
1195
1196out:
1197 nbd_request_put(req);
1198 client_close(client);
1199}
1200
1201static void nbd_read(void *opaque)
1202{
1203 NBDClient *client = opaque;
1204
1205 if (client->recv_coroutine) {
1206 qemu_coroutine_enter(client->recv_coroutine, NULL);
1207 } else {
1208 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
1209 }
1210}
1211
1212static void nbd_restart_write(void *opaque)
1213{
1214 NBDClient *client = opaque;
1215
1216 qemu_coroutine_enter(client->send_coroutine, NULL);
1217}
1218
1219static void nbd_set_handlers(NBDClient *client)
1220{
1221 if (client->exp && client->exp->ctx) {
1222 aio_set_fd_handler(client->exp->ctx, client->sioc->fd,
1223 true,
1224 client->can_read ? nbd_read : NULL,
1225 client->send_coroutine ? nbd_restart_write : NULL,
1226 client);
1227 }
1228}
1229
1230static void nbd_unset_handlers(NBDClient *client)
1231{
1232 if (client->exp && client->exp->ctx) {
1233 aio_set_fd_handler(client->exp->ctx, client->sioc->fd,
1234 true, NULL, NULL, NULL);
1235 }
1236}
1237
1238static void nbd_update_can_read(NBDClient *client)
1239{
1240 bool can_read = client->recv_coroutine ||
1241 client->nb_requests < MAX_NBD_REQUESTS;
1242
1243 if (can_read != client->can_read) {
1244 client->can_read = can_read;
1245 nbd_set_handlers(client);
1246
1247
1248
1249 }
1250}
1251
1252static coroutine_fn void nbd_co_client_start(void *opaque)
1253{
1254 NBDClientNewData *data = opaque;
1255 NBDClient *client = data->client;
1256 NBDExport *exp = client->exp;
1257
1258 if (exp) {
1259 nbd_export_get(exp);
1260 }
1261 if (nbd_negotiate(data)) {
1262 client_close(client);
1263 goto out;
1264 }
1265 qemu_co_mutex_init(&client->send_lock);
1266 nbd_set_handlers(client);
1267
1268 if (exp) {
1269 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1270 }
1271out:
1272 g_free(data);
1273}
1274
1275void nbd_client_new(NBDExport *exp,
1276 QIOChannelSocket *sioc,
1277 QCryptoTLSCreds *tlscreds,
1278 const char *tlsaclname,
1279 void (*close_fn)(NBDClient *))
1280{
1281 NBDClient *client;
1282 NBDClientNewData *data = g_new(NBDClientNewData, 1);
1283
1284 client = g_malloc0(sizeof(NBDClient));
1285 client->refcount = 1;
1286 client->exp = exp;
1287 client->tlscreds = tlscreds;
1288 if (tlscreds) {
1289 object_ref(OBJECT(client->tlscreds));
1290 }
1291 client->tlsaclname = g_strdup(tlsaclname);
1292 client->sioc = sioc;
1293 object_ref(OBJECT(client->sioc));
1294 client->ioc = QIO_CHANNEL(sioc);
1295 object_ref(OBJECT(client->ioc));
1296 client->can_read = true;
1297 client->close = close_fn;
1298
1299 data->client = client;
1300 data->co = qemu_coroutine_create(nbd_co_client_start);
1301 qemu_coroutine_enter(data->co, data);
1302}
1303