1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "block/dirty-bitmap.h"
34#include "block/write-threshold.h"
35#include "qemu/cutils.h"
36#include "qemu/memalign.h"
37#include "qapi/error.h"
38#include "qemu/error-report.h"
39#include "qemu/main-loop.h"
40#include "sysemu/replay.h"
41
42
43#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
44
45static void bdrv_parent_cb_resize(BlockDriverState *bs);
46static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
47 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
48
49static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
50{
51 BdrvChild *c, *next;
52
53 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
54 if (c == ignore) {
55 continue;
56 }
57 bdrv_parent_drained_begin_single(c);
58 }
59}
60
61void bdrv_parent_drained_end_single(BdrvChild *c)
62{
63 GLOBAL_STATE_CODE();
64
65 assert(c->quiesced_parent);
66 c->quiesced_parent = false;
67
68 if (c->klass->drained_end) {
69 c->klass->drained_end(c);
70 }
71}
72
73static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
74{
75 BdrvChild *c;
76
77 QLIST_FOREACH(c, &bs->parents, next_parent) {
78 if (c == ignore) {
79 continue;
80 }
81 bdrv_parent_drained_end_single(c);
82 }
83}
84
85bool bdrv_parent_drained_poll_single(BdrvChild *c)
86{
87 if (c->klass->drained_poll) {
88 return c->klass->drained_poll(c);
89 }
90 return false;
91}
92
93static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
94 bool ignore_bds_parents)
95{
96 BdrvChild *c, *next;
97 bool busy = false;
98
99 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
100 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
101 continue;
102 }
103 busy |= bdrv_parent_drained_poll_single(c);
104 }
105
106 return busy;
107}
108
109void bdrv_parent_drained_begin_single(BdrvChild *c)
110{
111 GLOBAL_STATE_CODE();
112
113 assert(!c->quiesced_parent);
114 c->quiesced_parent = true;
115
116 if (c->klass->drained_begin) {
117 c->klass->drained_begin(c);
118 }
119}
120
121static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
122{
123 dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
124 src->pdiscard_alignment);
125 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
126 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
127 dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
128 src->max_hw_transfer);
129 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
130 src->opt_mem_alignment);
131 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
132 src->min_mem_alignment);
133 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
134 dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
135}
136
137typedef struct BdrvRefreshLimitsState {
138 BlockDriverState *bs;
139 BlockLimits old_bl;
140} BdrvRefreshLimitsState;
141
142static void bdrv_refresh_limits_abort(void *opaque)
143{
144 BdrvRefreshLimitsState *s = opaque;
145
146 s->bs->bl = s->old_bl;
147}
148
149static TransactionActionDrv bdrv_refresh_limits_drv = {
150 .abort = bdrv_refresh_limits_abort,
151 .clean = g_free,
152};
153
154
155void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
156{
157 ERRP_GUARD();
158 BlockDriver *drv = bs->drv;
159 BdrvChild *c;
160 bool have_limits;
161
162 GLOBAL_STATE_CODE();
163
164 if (tran) {
165 BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
166 *s = (BdrvRefreshLimitsState) {
167 .bs = bs,
168 .old_bl = bs->bl,
169 };
170 tran_add(tran, &bdrv_refresh_limits_drv, s);
171 }
172
173 memset(&bs->bl, 0, sizeof(bs->bl));
174
175 if (!drv) {
176 return;
177 }
178
179
180 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
181 drv->bdrv_aio_preadv ||
182 drv->bdrv_co_preadv_part) ? 1 : 512;
183
184
185 have_limits = false;
186 QLIST_FOREACH(c, &bs->children, next) {
187 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
188 {
189 bdrv_merge_limits(&bs->bl, &c->bs->bl);
190 have_limits = true;
191 }
192
193 if (c->role & BDRV_CHILD_FILTERED) {
194 bs->bl.has_variable_length |= c->bs->bl.has_variable_length;
195 }
196 }
197
198 if (!have_limits) {
199 bs->bl.min_mem_alignment = 512;
200 bs->bl.opt_mem_alignment = qemu_real_host_page_size();
201
202
203 bs->bl.max_iov = IOV_MAX;
204 }
205
206
207 if (drv->bdrv_refresh_limits) {
208 drv->bdrv_refresh_limits(bs, errp);
209 if (*errp) {
210 return;
211 }
212 }
213
214 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
215 error_setg(errp, "Driver requires too large request alignment");
216 }
217}
218
219
220
221
222
223
224void bdrv_enable_copy_on_read(BlockDriverState *bs)
225{
226 IO_CODE();
227 qatomic_inc(&bs->copy_on_read);
228}
229
230void bdrv_disable_copy_on_read(BlockDriverState *bs)
231{
232 int old = qatomic_fetch_dec(&bs->copy_on_read);
233 IO_CODE();
234 assert(old >= 1);
235}
236
237typedef struct {
238 Coroutine *co;
239 BlockDriverState *bs;
240 bool done;
241 bool begin;
242 bool poll;
243 BdrvChild *parent;
244} BdrvCoDrainData;
245
246
247bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
248 bool ignore_bds_parents)
249{
250 GLOBAL_STATE_CODE();
251
252 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
253 return true;
254 }
255
256 if (qatomic_read(&bs->in_flight)) {
257 return true;
258 }
259
260 return false;
261}
262
263static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
264 BdrvChild *ignore_parent)
265{
266 return bdrv_drain_poll(bs, ignore_parent, false);
267}
268
269static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
270 bool poll);
271static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
272
273static void bdrv_co_drain_bh_cb(void *opaque)
274{
275 BdrvCoDrainData *data = opaque;
276 Coroutine *co = data->co;
277 BlockDriverState *bs = data->bs;
278
279 if (bs) {
280 AioContext *ctx = bdrv_get_aio_context(bs);
281 aio_context_acquire(ctx);
282 bdrv_dec_in_flight(bs);
283 if (data->begin) {
284 bdrv_do_drained_begin(bs, data->parent, data->poll);
285 } else {
286 assert(!data->poll);
287 bdrv_do_drained_end(bs, data->parent);
288 }
289 aio_context_release(ctx);
290 } else {
291 assert(data->begin);
292 bdrv_drain_all_begin();
293 }
294
295 data->done = true;
296 aio_co_wake(co);
297}
298
299static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
300 bool begin,
301 BdrvChild *parent,
302 bool poll)
303{
304 BdrvCoDrainData data;
305 Coroutine *self = qemu_coroutine_self();
306 AioContext *ctx = bdrv_get_aio_context(bs);
307 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
308
309
310
311
312 assert(qemu_in_coroutine());
313 data = (BdrvCoDrainData) {
314 .co = self,
315 .bs = bs,
316 .done = false,
317 .begin = begin,
318 .parent = parent,
319 .poll = poll,
320 };
321
322 if (bs) {
323 bdrv_inc_in_flight(bs);
324 }
325
326
327
328
329
330
331
332
333
334 if (ctx != co_ctx) {
335 aio_context_release(ctx);
336 }
337 replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
338 bdrv_co_drain_bh_cb, &data);
339
340 qemu_coroutine_yield();
341
342
343 assert(data.done);
344
345
346 if (ctx != co_ctx) {
347 aio_context_acquire(ctx);
348 }
349}
350
351static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
352 bool poll)
353{
354 IO_OR_GS_CODE();
355
356 if (qemu_in_coroutine()) {
357 bdrv_co_yield_to_drain(bs, true, parent, poll);
358 return;
359 }
360
361 GLOBAL_STATE_CODE();
362
363
364 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
365 bdrv_parent_drained_begin(bs, parent);
366 if (bs->drv && bs->drv->bdrv_drain_begin) {
367 bs->drv->bdrv_drain_begin(bs);
368 }
369 }
370
371
372
373
374
375
376
377
378
379
380 if (poll) {
381 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
382 }
383}
384
385void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
386{
387 bdrv_do_drained_begin(bs, parent, false);
388}
389
390void bdrv_drained_begin(BlockDriverState *bs)
391{
392 IO_OR_GS_CODE();
393 bdrv_do_drained_begin(bs, NULL, true);
394}
395
396
397
398
399
400static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
401{
402 int old_quiesce_counter;
403
404 IO_OR_GS_CODE();
405
406 if (qemu_in_coroutine()) {
407 bdrv_co_yield_to_drain(bs, false, parent, false);
408 return;
409 }
410 assert(bs->quiesce_counter > 0);
411 GLOBAL_STATE_CODE();
412
413
414 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
415 if (old_quiesce_counter == 1) {
416 if (bs->drv && bs->drv->bdrv_drain_end) {
417 bs->drv->bdrv_drain_end(bs);
418 }
419 bdrv_parent_drained_end(bs, parent);
420 }
421}
422
423void bdrv_drained_end(BlockDriverState *bs)
424{
425 IO_OR_GS_CODE();
426 bdrv_do_drained_end(bs, NULL);
427}
428
429void bdrv_drain(BlockDriverState *bs)
430{
431 IO_OR_GS_CODE();
432 bdrv_drained_begin(bs);
433 bdrv_drained_end(bs);
434}
435
436static void bdrv_drain_assert_idle(BlockDriverState *bs)
437{
438 BdrvChild *child, *next;
439
440 assert(qatomic_read(&bs->in_flight) == 0);
441 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
442 bdrv_drain_assert_idle(child->bs);
443 }
444}
445
446unsigned int bdrv_drain_all_count = 0;
447
448static bool bdrv_drain_all_poll(void)
449{
450 BlockDriverState *bs = NULL;
451 bool result = false;
452 GLOBAL_STATE_CODE();
453
454
455
456 while ((bs = bdrv_next_all_states(bs))) {
457 AioContext *aio_context = bdrv_get_aio_context(bs);
458 aio_context_acquire(aio_context);
459 result |= bdrv_drain_poll(bs, NULL, true);
460 aio_context_release(aio_context);
461 }
462
463 return result;
464}
465
466
467
468
469
470
471
472
473
474
475
476
477
478void bdrv_drain_all_begin_nopoll(void)
479{
480 BlockDriverState *bs = NULL;
481 GLOBAL_STATE_CODE();
482
483
484
485
486
487
488 if (replay_events_enabled()) {
489 return;
490 }
491
492
493
494 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
495 assert(bdrv_drain_all_count < INT_MAX);
496 bdrv_drain_all_count++;
497
498
499
500 while ((bs = bdrv_next_all_states(bs))) {
501 AioContext *aio_context = bdrv_get_aio_context(bs);
502
503 aio_context_acquire(aio_context);
504 bdrv_do_drained_begin(bs, NULL, false);
505 aio_context_release(aio_context);
506 }
507}
508
509void bdrv_drain_all_begin(void)
510{
511 BlockDriverState *bs = NULL;
512
513 if (qemu_in_coroutine()) {
514 bdrv_co_yield_to_drain(NULL, true, NULL, true);
515 return;
516 }
517
518
519
520
521
522
523 if (replay_events_enabled()) {
524 return;
525 }
526
527 bdrv_drain_all_begin_nopoll();
528
529
530 AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
531
532 while ((bs = bdrv_next_all_states(bs))) {
533 bdrv_drain_assert_idle(bs);
534 }
535}
536
537void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
538{
539 GLOBAL_STATE_CODE();
540
541 g_assert(bs->quiesce_counter > 0);
542 g_assert(!bs->refcnt);
543
544 while (bs->quiesce_counter) {
545 bdrv_do_drained_end(bs, NULL);
546 }
547}
548
549void bdrv_drain_all_end(void)
550{
551 BlockDriverState *bs = NULL;
552 GLOBAL_STATE_CODE();
553
554
555
556
557
558
559 if (replay_events_enabled()) {
560 return;
561 }
562
563 while ((bs = bdrv_next_all_states(bs))) {
564 AioContext *aio_context = bdrv_get_aio_context(bs);
565
566 aio_context_acquire(aio_context);
567 bdrv_do_drained_end(bs, NULL);
568 aio_context_release(aio_context);
569 }
570
571 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
572 assert(bdrv_drain_all_count > 0);
573 bdrv_drain_all_count--;
574}
575
576void bdrv_drain_all(void)
577{
578 GLOBAL_STATE_CODE();
579 bdrv_drain_all_begin();
580 bdrv_drain_all_end();
581}
582
583
584
585
586
587
588static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
589{
590 if (req->serialising) {
591 qatomic_dec(&req->bs->serialising_in_flight);
592 }
593
594 qemu_co_mutex_lock(&req->bs->reqs_lock);
595 QLIST_REMOVE(req, list);
596 qemu_co_queue_restart_all(&req->wait_queue);
597 qemu_co_mutex_unlock(&req->bs->reqs_lock);
598}
599
600
601
602
603static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
604 BlockDriverState *bs,
605 int64_t offset,
606 int64_t bytes,
607 enum BdrvTrackedRequestType type)
608{
609 bdrv_check_request(offset, bytes, &error_abort);
610
611 *req = (BdrvTrackedRequest){
612 .bs = bs,
613 .offset = offset,
614 .bytes = bytes,
615 .type = type,
616 .co = qemu_coroutine_self(),
617 .serialising = false,
618 .overlap_offset = offset,
619 .overlap_bytes = bytes,
620 };
621
622 qemu_co_queue_init(&req->wait_queue);
623
624 qemu_co_mutex_lock(&bs->reqs_lock);
625 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
626 qemu_co_mutex_unlock(&bs->reqs_lock);
627}
628
629static bool tracked_request_overlaps(BdrvTrackedRequest *req,
630 int64_t offset, int64_t bytes)
631{
632 bdrv_check_request(offset, bytes, &error_abort);
633
634
635 if (offset >= req->overlap_offset + req->overlap_bytes) {
636 return false;
637 }
638
639 if (req->overlap_offset >= offset + bytes) {
640 return false;
641 }
642 return true;
643}
644
645
646static coroutine_fn BdrvTrackedRequest *
647bdrv_find_conflicting_request(BdrvTrackedRequest *self)
648{
649 BdrvTrackedRequest *req;
650
651 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
652 if (req == self || (!req->serialising && !self->serialising)) {
653 continue;
654 }
655 if (tracked_request_overlaps(req, self->overlap_offset,
656 self->overlap_bytes))
657 {
658
659
660
661
662
663 assert(qemu_coroutine_self() != req->co);
664
665
666
667
668
669
670 if (!req->waiting_for) {
671 return req;
672 }
673 }
674 }
675
676 return NULL;
677}
678
679
680static void coroutine_fn
681bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
682{
683 BdrvTrackedRequest *req;
684
685 while ((req = bdrv_find_conflicting_request(self))) {
686 self->waiting_for = req;
687 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
688 self->waiting_for = NULL;
689 }
690}
691
692
693static void tracked_request_set_serialising(BdrvTrackedRequest *req,
694 uint64_t align)
695{
696 int64_t overlap_offset = req->offset & ~(align - 1);
697 int64_t overlap_bytes =
698 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
699
700 bdrv_check_request(req->offset, req->bytes, &error_abort);
701
702 if (!req->serialising) {
703 qatomic_inc(&req->bs->serialising_in_flight);
704 req->serialising = true;
705 }
706
707 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
708 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
709}
710
711
712
713
714
715BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
716{
717 BdrvTrackedRequest *req;
718 Coroutine *self = qemu_coroutine_self();
719 IO_CODE();
720
721 QLIST_FOREACH(req, &bs->tracked_requests, list) {
722 if (req->co == self) {
723 return req;
724 }
725 }
726
727 return NULL;
728}
729
730
731
732
733void coroutine_fn GRAPH_RDLOCK
734bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
735 int64_t *cluster_offset, int64_t *cluster_bytes)
736{
737 BlockDriverInfo bdi;
738 IO_CODE();
739 if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
740 *cluster_offset = offset;
741 *cluster_bytes = bytes;
742 } else {
743 int64_t c = bdi.cluster_size;
744 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
745 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
746 }
747}
748
749static int coroutine_fn GRAPH_RDLOCK bdrv_get_cluster_size(BlockDriverState *bs)
750{
751 BlockDriverInfo bdi;
752 int ret;
753
754 ret = bdrv_co_get_info(bs, &bdi);
755 if (ret < 0 || bdi.cluster_size == 0) {
756 return bs->bl.request_alignment;
757 } else {
758 return bdi.cluster_size;
759 }
760}
761
762void bdrv_inc_in_flight(BlockDriverState *bs)
763{
764 IO_CODE();
765 qatomic_inc(&bs->in_flight);
766}
767
768void bdrv_wakeup(BlockDriverState *bs)
769{
770 IO_CODE();
771 aio_wait_kick();
772}
773
774void bdrv_dec_in_flight(BlockDriverState *bs)
775{
776 IO_CODE();
777 qatomic_dec(&bs->in_flight);
778 bdrv_wakeup(bs);
779}
780
781static void coroutine_fn
782bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
783{
784 BlockDriverState *bs = self->bs;
785
786 if (!qatomic_read(&bs->serialising_in_flight)) {
787 return;
788 }
789
790 qemu_co_mutex_lock(&bs->reqs_lock);
791 bdrv_wait_serialising_requests_locked(self);
792 qemu_co_mutex_unlock(&bs->reqs_lock);
793}
794
795void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
796 uint64_t align)
797{
798 IO_CODE();
799
800 qemu_co_mutex_lock(&req->bs->reqs_lock);
801
802 tracked_request_set_serialising(req, align);
803 bdrv_wait_serialising_requests_locked(req);
804
805 qemu_co_mutex_unlock(&req->bs->reqs_lock);
806}
807
808int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
809 QEMUIOVector *qiov, size_t qiov_offset,
810 Error **errp)
811{
812
813
814
815
816 if (offset < 0) {
817 error_setg(errp, "offset is negative: %" PRIi64, offset);
818 return -EIO;
819 }
820
821 if (bytes < 0) {
822 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
823 return -EIO;
824 }
825
826 if (bytes > BDRV_MAX_LENGTH) {
827 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
828 bytes, BDRV_MAX_LENGTH);
829 return -EIO;
830 }
831
832 if (offset > BDRV_MAX_LENGTH) {
833 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
834 offset, BDRV_MAX_LENGTH);
835 return -EIO;
836 }
837
838 if (offset > BDRV_MAX_LENGTH - bytes) {
839 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
840 "exceeds maximum(%" PRIi64 ")", offset, bytes,
841 BDRV_MAX_LENGTH);
842 return -EIO;
843 }
844
845 if (!qiov) {
846 return 0;
847 }
848
849
850
851
852
853 if (qiov_offset > qiov->size) {
854 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
855 qiov_offset, qiov->size);
856 return -EIO;
857 }
858
859 if (bytes > qiov->size - qiov_offset) {
860 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
861 "vector size(%zu)", bytes, qiov_offset, qiov->size);
862 return -EIO;
863 }
864
865 return 0;
866}
867
868int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
869{
870 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
871}
872
873static int bdrv_check_request32(int64_t offset, int64_t bytes,
874 QEMUIOVector *qiov, size_t qiov_offset)
875{
876 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
877 if (ret < 0) {
878 return ret;
879 }
880
881 if (bytes > BDRV_REQUEST_MAX_BYTES) {
882 return -EIO;
883 }
884
885 return 0;
886}
887
888
889
890
891
892
893
894
895
896
897int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
898{
899 int ret;
900 int64_t target_size, bytes, offset = 0;
901 BlockDriverState *bs = child->bs;
902 IO_CODE();
903
904 target_size = bdrv_getlength(bs);
905 if (target_size < 0) {
906 return target_size;
907 }
908
909 for (;;) {
910 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
911 if (bytes <= 0) {
912 return 0;
913 }
914 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
915 if (ret < 0) {
916 return ret;
917 }
918 if (ret & BDRV_BLOCK_ZERO) {
919 offset += bytes;
920 continue;
921 }
922 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
923 if (ret < 0) {
924 return ret;
925 }
926 offset += bytes;
927 }
928}
929
930
931
932
933
934
935
936int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
937 int64_t bytes, const void *buf,
938 BdrvRequestFlags flags)
939{
940 int ret;
941 IO_CODE();
942 assert_bdrv_graph_readable();
943
944 ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
945 if (ret < 0) {
946 return ret;
947 }
948
949 ret = bdrv_co_flush(child->bs);
950 if (ret < 0) {
951 return ret;
952 }
953
954 return 0;
955}
956
957typedef struct CoroutineIOCompletion {
958 Coroutine *coroutine;
959 int ret;
960} CoroutineIOCompletion;
961
962static void bdrv_co_io_em_complete(void *opaque, int ret)
963{
964 CoroutineIOCompletion *co = opaque;
965
966 co->ret = ret;
967 aio_co_wake(co->coroutine);
968}
969
970static int coroutine_fn GRAPH_RDLOCK
971bdrv_driver_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
972 QEMUIOVector *qiov, size_t qiov_offset, int flags)
973{
974 BlockDriver *drv = bs->drv;
975 int64_t sector_num;
976 unsigned int nb_sectors;
977 QEMUIOVector local_qiov;
978 int ret;
979 assert_bdrv_graph_readable();
980
981 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
982 assert(!(flags & ~bs->supported_read_flags));
983
984 if (!drv) {
985 return -ENOMEDIUM;
986 }
987
988 if (drv->bdrv_co_preadv_part) {
989 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
990 flags);
991 }
992
993 if (qiov_offset > 0 || bytes != qiov->size) {
994 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
995 qiov = &local_qiov;
996 }
997
998 if (drv->bdrv_co_preadv) {
999 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1000 goto out;
1001 }
1002
1003 if (drv->bdrv_aio_preadv) {
1004 BlockAIOCB *acb;
1005 CoroutineIOCompletion co = {
1006 .coroutine = qemu_coroutine_self(),
1007 };
1008
1009 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1010 bdrv_co_io_em_complete, &co);
1011 if (acb == NULL) {
1012 ret = -EIO;
1013 goto out;
1014 } else {
1015 qemu_coroutine_yield();
1016 ret = co.ret;
1017 goto out;
1018 }
1019 }
1020
1021 sector_num = offset >> BDRV_SECTOR_BITS;
1022 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1023
1024 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1025 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1026 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1027 assert(drv->bdrv_co_readv);
1028
1029 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1030
1031out:
1032 if (qiov == &local_qiov) {
1033 qemu_iovec_destroy(&local_qiov);
1034 }
1035
1036 return ret;
1037}
1038
1039static int coroutine_fn GRAPH_RDLOCK
1040bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
1041 QEMUIOVector *qiov, size_t qiov_offset,
1042 BdrvRequestFlags flags)
1043{
1044 BlockDriver *drv = bs->drv;
1045 bool emulate_fua = false;
1046 int64_t sector_num;
1047 unsigned int nb_sectors;
1048 QEMUIOVector local_qiov;
1049 int ret;
1050 assert_bdrv_graph_readable();
1051
1052 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1053
1054 if (!drv) {
1055 return -ENOMEDIUM;
1056 }
1057
1058 if ((flags & BDRV_REQ_FUA) &&
1059 (~bs->supported_write_flags & BDRV_REQ_FUA)) {
1060 flags &= ~BDRV_REQ_FUA;
1061 emulate_fua = true;
1062 }
1063
1064 flags &= bs->supported_write_flags;
1065
1066 if (drv->bdrv_co_pwritev_part) {
1067 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1068 flags);
1069 goto emulate_flags;
1070 }
1071
1072 if (qiov_offset > 0 || bytes != qiov->size) {
1073 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1074 qiov = &local_qiov;
1075 }
1076
1077 if (drv->bdrv_co_pwritev) {
1078 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
1079 goto emulate_flags;
1080 }
1081
1082 if (drv->bdrv_aio_pwritev) {
1083 BlockAIOCB *acb;
1084 CoroutineIOCompletion co = {
1085 .coroutine = qemu_coroutine_self(),
1086 };
1087
1088 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
1089 bdrv_co_io_em_complete, &co);
1090 if (acb == NULL) {
1091 ret = -EIO;
1092 } else {
1093 qemu_coroutine_yield();
1094 ret = co.ret;
1095 }
1096 goto emulate_flags;
1097 }
1098
1099 sector_num = offset >> BDRV_SECTOR_BITS;
1100 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1101
1102 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1103 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1104 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1105
1106 assert(drv->bdrv_co_writev);
1107 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
1108
1109emulate_flags:
1110 if (ret == 0 && emulate_fua) {
1111 ret = bdrv_co_flush(bs);
1112 }
1113
1114 if (qiov == &local_qiov) {
1115 qemu_iovec_destroy(&local_qiov);
1116 }
1117
1118 return ret;
1119}
1120
1121static int coroutine_fn GRAPH_RDLOCK
1122bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1123 int64_t bytes, QEMUIOVector *qiov,
1124 size_t qiov_offset)
1125{
1126 BlockDriver *drv = bs->drv;
1127 QEMUIOVector local_qiov;
1128 int ret;
1129 assert_bdrv_graph_readable();
1130
1131 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1132
1133 if (!drv) {
1134 return -ENOMEDIUM;
1135 }
1136
1137 if (!block_driver_can_compress(drv)) {
1138 return -ENOTSUP;
1139 }
1140
1141 if (drv->bdrv_co_pwritev_compressed_part) {
1142 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1143 qiov, qiov_offset);
1144 }
1145
1146 if (qiov_offset == 0) {
1147 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1148 }
1149
1150 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1151 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1152 qemu_iovec_destroy(&local_qiov);
1153
1154 return ret;
1155}
1156
1157static int coroutine_fn GRAPH_RDLOCK
1158bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
1159 QEMUIOVector *qiov, size_t qiov_offset, int flags)
1160{
1161 BlockDriverState *bs = child->bs;
1162
1163
1164
1165
1166
1167
1168 void *bounce_buffer = NULL;
1169
1170 BlockDriver *drv = bs->drv;
1171 int64_t cluster_offset;
1172 int64_t cluster_bytes;
1173 int64_t skip_bytes;
1174 int ret;
1175 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1176 BDRV_REQUEST_MAX_BYTES);
1177 int64_t progress = 0;
1178 bool skip_write;
1179
1180 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1181
1182 if (!drv) {
1183 return -ENOMEDIUM;
1184 }
1185
1186
1187
1188
1189
1190 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1207 skip_bytes = offset - cluster_offset;
1208
1209 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1210 cluster_offset, cluster_bytes);
1211
1212 while (cluster_bytes) {
1213 int64_t pnum;
1214
1215 if (skip_write) {
1216 ret = 1;
1217 pnum = MIN(cluster_bytes, max_transfer);
1218 } else {
1219 ret = bdrv_is_allocated(bs, cluster_offset,
1220 MIN(cluster_bytes, max_transfer), &pnum);
1221 if (ret < 0) {
1222
1223
1224
1225
1226
1227 pnum = MIN(cluster_bytes, max_transfer);
1228 }
1229
1230
1231 if (ret == 0 && pnum == 0) {
1232 assert(progress >= bytes);
1233 break;
1234 }
1235
1236 assert(skip_bytes < pnum);
1237 }
1238
1239 if (ret <= 0) {
1240 QEMUIOVector local_qiov;
1241
1242
1243 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1244 if (!bounce_buffer) {
1245 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1246 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1247 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1248
1249 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1250 if (!bounce_buffer) {
1251 ret = -ENOMEM;
1252 goto err;
1253 }
1254 }
1255 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1256
1257 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1258 &local_qiov, 0, 0);
1259 if (ret < 0) {
1260 goto err;
1261 }
1262
1263 bdrv_co_debug_event(bs, BLKDBG_COR_WRITE);
1264 if (drv->bdrv_co_pwrite_zeroes &&
1265 buffer_is_zero(bounce_buffer, pnum)) {
1266
1267
1268
1269 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1270 BDRV_REQ_WRITE_UNCHANGED);
1271 } else {
1272
1273
1274
1275 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1276 &local_qiov, 0,
1277 BDRV_REQ_WRITE_UNCHANGED);
1278 }
1279
1280 if (ret < 0) {
1281
1282
1283
1284
1285
1286 goto err;
1287 }
1288
1289 if (!(flags & BDRV_REQ_PREFETCH)) {
1290 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1291 bounce_buffer + skip_bytes,
1292 MIN(pnum - skip_bytes, bytes - progress));
1293 }
1294 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1295
1296 ret = bdrv_driver_preadv(bs, offset + progress,
1297 MIN(pnum - skip_bytes, bytes - progress),
1298 qiov, qiov_offset + progress, 0);
1299 if (ret < 0) {
1300 goto err;
1301 }
1302 }
1303
1304 cluster_offset += pnum;
1305 cluster_bytes -= pnum;
1306 progress += pnum - skip_bytes;
1307 skip_bytes = 0;
1308 }
1309 ret = 0;
1310
1311err:
1312 qemu_vfree(bounce_buffer);
1313 return ret;
1314}
1315
1316
1317
1318
1319
1320
1321static int coroutine_fn GRAPH_RDLOCK
1322bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
1323 int64_t offset, int64_t bytes, int64_t align,
1324 QEMUIOVector *qiov, size_t qiov_offset, int flags)
1325{
1326 BlockDriverState *bs = child->bs;
1327 int64_t total_bytes, max_bytes;
1328 int ret = 0;
1329 int64_t bytes_remaining = bytes;
1330 int max_transfer;
1331
1332 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1333 assert(is_power_of_2(align));
1334 assert((offset & (align - 1)) == 0);
1335 assert((bytes & (align - 1)) == 0);
1336 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1337 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1338 align);
1339
1340
1341
1342
1343
1344
1345
1346 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
1347 BDRV_REQ_REGISTERED_BUF)));
1348
1349
1350 if (flags & BDRV_REQ_COPY_ON_READ) {
1351
1352
1353
1354
1355
1356 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1357 } else {
1358 bdrv_wait_serialising_requests(req);
1359 }
1360
1361 if (flags & BDRV_REQ_COPY_ON_READ) {
1362 int64_t pnum;
1363
1364
1365 flags &= ~BDRV_REQ_COPY_ON_READ;
1366
1367 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1368 if (ret < 0) {
1369 goto out;
1370 }
1371
1372 if (!ret || pnum != bytes) {
1373 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1374 qiov, qiov_offset, flags);
1375 goto out;
1376 } else if (flags & BDRV_REQ_PREFETCH) {
1377 goto out;
1378 }
1379 }
1380
1381
1382 total_bytes = bdrv_co_getlength(bs);
1383 if (total_bytes < 0) {
1384 ret = total_bytes;
1385 goto out;
1386 }
1387
1388 assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
1389
1390 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1391 if (bytes <= max_bytes && bytes <= max_transfer) {
1392 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1393 goto out;
1394 }
1395
1396 while (bytes_remaining) {
1397 int64_t num;
1398
1399 if (max_bytes) {
1400 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1401 assert(num);
1402
1403 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1404 num, qiov,
1405 qiov_offset + bytes - bytes_remaining,
1406 flags);
1407 max_bytes -= num;
1408 } else {
1409 num = bytes_remaining;
1410 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1411 0, bytes_remaining);
1412 }
1413 if (ret < 0) {
1414 goto out;
1415 }
1416 bytes_remaining -= num;
1417 }
1418
1419out:
1420 return ret < 0 ? ret : 0;
1421}
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453typedef struct BdrvRequestPadding {
1454 uint8_t *buf;
1455 size_t buf_len;
1456 uint8_t *tail_buf;
1457 size_t head;
1458 size_t tail;
1459 bool merge_reads;
1460 bool write;
1461 QEMUIOVector local_qiov;
1462
1463 uint8_t *collapse_bounce_buf;
1464 size_t collapse_len;
1465 QEMUIOVector pre_collapse_qiov;
1466} BdrvRequestPadding;
1467
1468static bool bdrv_init_padding(BlockDriverState *bs,
1469 int64_t offset, int64_t bytes,
1470 bool write,
1471 BdrvRequestPadding *pad)
1472{
1473 int64_t align = bs->bl.request_alignment;
1474 int64_t sum;
1475
1476 bdrv_check_request(offset, bytes, &error_abort);
1477 assert(align <= INT_MAX);
1478 assert(align <= SIZE_MAX / 2);
1479
1480 memset(pad, 0, sizeof(*pad));
1481
1482 pad->head = offset & (align - 1);
1483 pad->tail = ((offset + bytes) & (align - 1));
1484 if (pad->tail) {
1485 pad->tail = align - pad->tail;
1486 }
1487
1488 if (!pad->head && !pad->tail) {
1489 return false;
1490 }
1491
1492 assert(bytes);
1493
1494 sum = pad->head + bytes + pad->tail;
1495 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1496 pad->buf = qemu_blockalign(bs, pad->buf_len);
1497 pad->merge_reads = sum == pad->buf_len;
1498 if (pad->tail) {
1499 pad->tail_buf = pad->buf + pad->buf_len - align;
1500 }
1501
1502 pad->write = write;
1503
1504 return true;
1505}
1506
1507static int coroutine_fn GRAPH_RDLOCK
1508bdrv_padding_rmw_read(BdrvChild *child, BdrvTrackedRequest *req,
1509 BdrvRequestPadding *pad, bool zero_middle)
1510{
1511 QEMUIOVector local_qiov;
1512 BlockDriverState *bs = child->bs;
1513 uint64_t align = bs->bl.request_alignment;
1514 int ret;
1515
1516 assert(req->serialising && pad->buf);
1517
1518 if (pad->head || pad->merge_reads) {
1519 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1520
1521 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1522
1523 if (pad->head) {
1524 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1525 }
1526 if (pad->merge_reads && pad->tail) {
1527 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1528 }
1529 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1530 align, &local_qiov, 0, 0);
1531 if (ret < 0) {
1532 return ret;
1533 }
1534 if (pad->head) {
1535 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1536 }
1537 if (pad->merge_reads && pad->tail) {
1538 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1539 }
1540
1541 if (pad->merge_reads) {
1542 goto zero_mem;
1543 }
1544 }
1545
1546 if (pad->tail) {
1547 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1548
1549 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1550 ret = bdrv_aligned_preadv(
1551 child, req,
1552 req->overlap_offset + req->overlap_bytes - align,
1553 align, align, &local_qiov, 0, 0);
1554 if (ret < 0) {
1555 return ret;
1556 }
1557 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1558 }
1559
1560zero_mem:
1561 if (zero_middle) {
1562 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1563 }
1564
1565 return 0;
1566}
1567
1568
1569
1570
1571static void bdrv_padding_finalize(BdrvRequestPadding *pad)
1572{
1573 if (pad->collapse_bounce_buf) {
1574 if (!pad->write) {
1575
1576
1577
1578
1579 qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
1580 pad->collapse_bounce_buf, pad->collapse_len);
1581 }
1582 qemu_vfree(pad->collapse_bounce_buf);
1583 qemu_iovec_destroy(&pad->pre_collapse_qiov);
1584 }
1585 if (pad->buf) {
1586 qemu_vfree(pad->buf);
1587 qemu_iovec_destroy(&pad->local_qiov);
1588 }
1589 memset(pad, 0, sizeof(*pad));
1590}
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603static int bdrv_create_padded_qiov(BlockDriverState *bs,
1604 BdrvRequestPadding *pad,
1605 struct iovec *iov, int niov,
1606 size_t iov_offset, size_t bytes)
1607{
1608 int padded_niov, surplus_count, collapse_count;
1609
1610
1611 assert(niov <= IOV_MAX);
1612
1613
1614
1615
1616
1617
1618 if (SIZE_MAX - pad->head < bytes ||
1619 SIZE_MAX - pad->head - bytes < pad->tail)
1620 {
1621 return -EINVAL;
1622 }
1623
1624
1625 padded_niov = !!pad->head + niov + !!pad->tail;
1626
1627 qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
1628
1629 if (pad->head) {
1630 qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
1631 }
1632
1633
1634
1635
1636
1637
1638 if (padded_niov > IOV_MAX) {
1639
1640
1641
1642
1643
1644
1645 surplus_count = padded_niov - IOV_MAX;
1646 assert(surplus_count <= !!pad->head + !!pad->tail);
1647 collapse_count = surplus_count + 1;
1648
1649
1650
1651
1652
1653 qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
1654 qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
1655 collapse_count, iov_offset, SIZE_MAX);
1656 iov += collapse_count;
1657 iov_offset = 0;
1658 niov -= collapse_count;
1659 bytes -= pad->pre_collapse_qiov.size;
1660
1661
1662
1663
1664
1665
1666 pad->collapse_len = pad->pre_collapse_qiov.size;
1667 pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
1668 if (pad->write) {
1669 qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
1670 pad->collapse_bounce_buf, pad->collapse_len);
1671 }
1672 qemu_iovec_add(&pad->local_qiov,
1673 pad->collapse_bounce_buf, pad->collapse_len);
1674 }
1675
1676 qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
1677
1678 if (pad->tail) {
1679 qemu_iovec_add(&pad->local_qiov,
1680 pad->buf + pad->buf_len - pad->tail, pad->tail);
1681 }
1682
1683 assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
1684 return 0;
1685}
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701static int bdrv_pad_request(BlockDriverState *bs,
1702 QEMUIOVector **qiov, size_t *qiov_offset,
1703 int64_t *offset, int64_t *bytes,
1704 bool write,
1705 BdrvRequestPadding *pad, bool *padded,
1706 BdrvRequestFlags *flags)
1707{
1708 int ret;
1709 struct iovec *sliced_iov;
1710 int sliced_niov;
1711 size_t sliced_head, sliced_tail;
1712
1713
1714 ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
1715 if (ret < 0) {
1716 return ret;
1717 }
1718
1719 if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
1720 if (padded) {
1721 *padded = false;
1722 }
1723 return 0;
1724 }
1725
1726 sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
1727 &sliced_head, &sliced_tail,
1728 &sliced_niov);
1729
1730
1731 assert(*bytes <= SIZE_MAX);
1732 ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
1733 sliced_head, *bytes);
1734 if (ret < 0) {
1735 bdrv_padding_finalize(pad);
1736 return ret;
1737 }
1738 *bytes += pad->head + pad->tail;
1739 *offset -= pad->head;
1740 *qiov = &pad->local_qiov;
1741 *qiov_offset = 0;
1742 if (padded) {
1743 *padded = true;
1744 }
1745 if (flags) {
1746
1747 *flags &= ~BDRV_REQ_REGISTERED_BUF;
1748 }
1749
1750 return 0;
1751}
1752
1753int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1754 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1755 BdrvRequestFlags flags)
1756{
1757 IO_CODE();
1758 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1759}
1760
1761int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1762 int64_t offset, int64_t bytes,
1763 QEMUIOVector *qiov, size_t qiov_offset,
1764 BdrvRequestFlags flags)
1765{
1766 BlockDriverState *bs = child->bs;
1767 BdrvTrackedRequest req;
1768 BdrvRequestPadding pad;
1769 int ret;
1770 IO_CODE();
1771
1772 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1773
1774 if (!bdrv_co_is_inserted(bs)) {
1775 return -ENOMEDIUM;
1776 }
1777
1778 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1779 if (ret < 0) {
1780 return ret;
1781 }
1782
1783 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1784
1785
1786
1787
1788
1789
1790
1791
1792 return 0;
1793 }
1794
1795 bdrv_inc_in_flight(bs);
1796
1797
1798 if (qatomic_read(&bs->copy_on_read)) {
1799 flags |= BDRV_REQ_COPY_ON_READ;
1800 }
1801
1802 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
1803 &pad, NULL, &flags);
1804 if (ret < 0) {
1805 goto fail;
1806 }
1807
1808 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1809 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1810 bs->bl.request_alignment,
1811 qiov, qiov_offset, flags);
1812 tracked_request_end(&req);
1813 bdrv_padding_finalize(&pad);
1814
1815fail:
1816 bdrv_dec_in_flight(bs);
1817
1818 return ret;
1819}
1820
1821static int coroutine_fn GRAPH_RDLOCK
1822bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
1823 BdrvRequestFlags flags)
1824{
1825 BlockDriver *drv = bs->drv;
1826 QEMUIOVector qiov;
1827 void *buf = NULL;
1828 int ret = 0;
1829 bool need_flush = false;
1830 int head = 0;
1831 int tail = 0;
1832
1833 int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
1834 INT64_MAX);
1835 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1836 bs->bl.request_alignment);
1837 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1838
1839 assert_bdrv_graph_readable();
1840 bdrv_check_request(offset, bytes, &error_abort);
1841
1842 if (!drv) {
1843 return -ENOMEDIUM;
1844 }
1845
1846 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1847 return -ENOTSUP;
1848 }
1849
1850
1851 if (flags & BDRV_REQ_REGISTERED_BUF) {
1852 return -EINVAL;
1853 }
1854
1855
1856 bdrv_bsc_invalidate_range(bs, offset, bytes);
1857
1858 assert(alignment % bs->bl.request_alignment == 0);
1859 head = offset % alignment;
1860 tail = (offset + bytes) % alignment;
1861 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1862 assert(max_write_zeroes >= bs->bl.request_alignment);
1863
1864 while (bytes > 0 && !ret) {
1865 int64_t num = bytes;
1866
1867
1868
1869
1870
1871 if (head) {
1872
1873
1874
1875 num = MIN(MIN(bytes, max_transfer), alignment - head);
1876 head = (head + num) % alignment;
1877 assert(num < max_write_zeroes);
1878 } else if (tail && num > alignment) {
1879
1880 num -= tail;
1881 }
1882
1883
1884 if (num > max_write_zeroes) {
1885 num = max_write_zeroes;
1886 }
1887
1888 ret = -ENOTSUP;
1889
1890 if (drv->bdrv_co_pwrite_zeroes) {
1891 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1892 flags & bs->supported_zero_flags);
1893 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1894 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1895 need_flush = true;
1896 }
1897 } else {
1898 assert(!bs->supported_zero_flags);
1899 }
1900
1901 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1902
1903 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1904
1905 if ((flags & BDRV_REQ_FUA) &&
1906 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1907
1908
1909 write_flags &= ~BDRV_REQ_FUA;
1910 need_flush = true;
1911 }
1912 num = MIN(num, max_transfer);
1913 if (buf == NULL) {
1914 buf = qemu_try_blockalign0(bs, num);
1915 if (buf == NULL) {
1916 ret = -ENOMEM;
1917 goto fail;
1918 }
1919 }
1920 qemu_iovec_init_buf(&qiov, buf, num);
1921
1922 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1923
1924
1925
1926
1927 if (num < max_transfer) {
1928 qemu_vfree(buf);
1929 buf = NULL;
1930 }
1931 }
1932
1933 offset += num;
1934 bytes -= num;
1935 }
1936
1937fail:
1938 if (ret == 0 && need_flush) {
1939 ret = bdrv_co_flush(bs);
1940 }
1941 qemu_vfree(buf);
1942 return ret;
1943}
1944
1945static inline int coroutine_fn GRAPH_RDLOCK
1946bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
1947 BdrvTrackedRequest *req, int flags)
1948{
1949 BlockDriverState *bs = child->bs;
1950
1951 bdrv_check_request(offset, bytes, &error_abort);
1952
1953 if (bdrv_is_read_only(bs)) {
1954 return -EPERM;
1955 }
1956
1957 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1958 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1959 assert(!(flags & ~BDRV_REQ_MASK));
1960 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
1961
1962 if (flags & BDRV_REQ_SERIALISING) {
1963 QEMU_LOCK_GUARD(&bs->reqs_lock);
1964
1965 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
1966
1967 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
1968 return -EBUSY;
1969 }
1970
1971 bdrv_wait_serialising_requests_locked(req);
1972 } else {
1973 bdrv_wait_serialising_requests(req);
1974 }
1975
1976 assert(req->overlap_offset <= offset);
1977 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1978 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
1979 child->perm & BLK_PERM_RESIZE);
1980
1981 switch (req->type) {
1982 case BDRV_TRACKED_WRITE:
1983 case BDRV_TRACKED_DISCARD:
1984 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1985 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1986 } else {
1987 assert(child->perm & BLK_PERM_WRITE);
1988 }
1989 bdrv_write_threshold_check_write(bs, offset, bytes);
1990 return 0;
1991 case BDRV_TRACKED_TRUNCATE:
1992 assert(child->perm & BLK_PERM_RESIZE);
1993 return 0;
1994 default:
1995 abort();
1996 }
1997}
1998
1999static inline void coroutine_fn
2000bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
2001 BdrvTrackedRequest *req, int ret)
2002{
2003 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
2004 BlockDriverState *bs = child->bs;
2005
2006 bdrv_check_request(offset, bytes, &error_abort);
2007
2008 qatomic_inc(&bs->write_gen);
2009
2010
2011
2012
2013
2014
2015
2016
2017 if (ret == 0 &&
2018 (req->type == BDRV_TRACKED_TRUNCATE ||
2019 end_sector > bs->total_sectors) &&
2020 req->type != BDRV_TRACKED_DISCARD) {
2021 bs->total_sectors = end_sector;
2022 bdrv_parent_cb_resize(bs);
2023 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2024 }
2025 if (req->bytes) {
2026 switch (req->type) {
2027 case BDRV_TRACKED_WRITE:
2028 stat64_max(&bs->wr_highest_offset, offset + bytes);
2029
2030 case BDRV_TRACKED_DISCARD:
2031 bdrv_set_dirty(bs, offset, bytes);
2032 break;
2033 default:
2034 break;
2035 }
2036 }
2037}
2038
2039
2040
2041
2042
2043static int coroutine_fn GRAPH_RDLOCK
2044bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req,
2045 int64_t offset, int64_t bytes, int64_t align,
2046 QEMUIOVector *qiov, size_t qiov_offset,
2047 BdrvRequestFlags flags)
2048{
2049 BlockDriverState *bs = child->bs;
2050 BlockDriver *drv = bs->drv;
2051 int ret;
2052
2053 int64_t bytes_remaining = bytes;
2054 int max_transfer;
2055
2056 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2057
2058 if (!drv) {
2059 return -ENOMEDIUM;
2060 }
2061
2062 if (bdrv_has_readonly_bitmaps(bs)) {
2063 return -EPERM;
2064 }
2065
2066 assert(is_power_of_2(align));
2067 assert((offset & (align - 1)) == 0);
2068 assert((bytes & (align - 1)) == 0);
2069 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2070 align);
2071
2072 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2073
2074 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2075 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2076 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2077 flags |= BDRV_REQ_ZERO_WRITE;
2078 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2079 flags |= BDRV_REQ_MAY_UNMAP;
2080 }
2081
2082
2083 flags &= ~BDRV_REQ_REGISTERED_BUF;
2084 }
2085
2086 if (ret < 0) {
2087
2088 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2089 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2090 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2091 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2092 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2093 qiov, qiov_offset);
2094 } else if (bytes <= max_transfer) {
2095 bdrv_co_debug_event(bs, BLKDBG_PWRITEV);
2096 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2097 } else {
2098 bdrv_co_debug_event(bs, BLKDBG_PWRITEV);
2099 while (bytes_remaining) {
2100 int num = MIN(bytes_remaining, max_transfer);
2101 int local_flags = flags;
2102
2103 assert(num);
2104 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2105 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2106
2107
2108 local_flags &= ~BDRV_REQ_FUA;
2109 }
2110
2111 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2112 num, qiov,
2113 qiov_offset + bytes - bytes_remaining,
2114 local_flags);
2115 if (ret < 0) {
2116 break;
2117 }
2118 bytes_remaining -= num;
2119 }
2120 }
2121 bdrv_co_debug_event(bs, BLKDBG_PWRITEV_DONE);
2122
2123 if (ret >= 0) {
2124 ret = 0;
2125 }
2126 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2127
2128 return ret;
2129}
2130
2131static int coroutine_fn GRAPH_RDLOCK
2132bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
2133 BdrvRequestFlags flags, BdrvTrackedRequest *req)
2134{
2135 BlockDriverState *bs = child->bs;
2136 QEMUIOVector local_qiov;
2137 uint64_t align = bs->bl.request_alignment;
2138 int ret = 0;
2139 bool padding;
2140 BdrvRequestPadding pad;
2141
2142
2143 flags &= ~BDRV_REQ_REGISTERED_BUF;
2144
2145 padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
2146 if (padding) {
2147 assert(!(flags & BDRV_REQ_NO_WAIT));
2148 bdrv_make_request_serialising(req, align);
2149
2150 bdrv_padding_rmw_read(child, req, &pad, true);
2151
2152 if (pad.head || pad.merge_reads) {
2153 int64_t aligned_offset = offset & ~(align - 1);
2154 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2155
2156 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2157 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2158 align, &local_qiov, 0,
2159 flags & ~BDRV_REQ_ZERO_WRITE);
2160 if (ret < 0 || pad.merge_reads) {
2161
2162 goto out;
2163 }
2164 offset += write_bytes - pad.head;
2165 bytes -= write_bytes - pad.head;
2166 }
2167 }
2168
2169 assert(!bytes || (offset & (align - 1)) == 0);
2170 if (bytes >= align) {
2171
2172 int64_t aligned_bytes = bytes & ~(align - 1);
2173 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2174 NULL, 0, flags);
2175 if (ret < 0) {
2176 goto out;
2177 }
2178 bytes -= aligned_bytes;
2179 offset += aligned_bytes;
2180 }
2181
2182 assert(!bytes || (offset & (align - 1)) == 0);
2183 if (bytes) {
2184 assert(align == pad.tail + bytes);
2185
2186 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2187 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2188 &local_qiov, 0,
2189 flags & ~BDRV_REQ_ZERO_WRITE);
2190 }
2191
2192out:
2193 bdrv_padding_finalize(&pad);
2194
2195 return ret;
2196}
2197
2198
2199
2200
2201int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2202 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2203 BdrvRequestFlags flags)
2204{
2205 IO_CODE();
2206 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2207}
2208
2209int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2210 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2211 BdrvRequestFlags flags)
2212{
2213 BlockDriverState *bs = child->bs;
2214 BdrvTrackedRequest req;
2215 uint64_t align = bs->bl.request_alignment;
2216 BdrvRequestPadding pad;
2217 int ret;
2218 bool padded = false;
2219 IO_CODE();
2220
2221 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2222
2223 if (!bdrv_co_is_inserted(bs)) {
2224 return -ENOMEDIUM;
2225 }
2226
2227 if (flags & BDRV_REQ_ZERO_WRITE) {
2228 ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
2229 } else {
2230 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2231 }
2232 if (ret < 0) {
2233 return ret;
2234 }
2235
2236
2237 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2238 !QEMU_IS_ALIGNED(offset | bytes, align))
2239 {
2240 return -ENOTSUP;
2241 }
2242
2243 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2244
2245
2246
2247
2248
2249
2250
2251
2252 return 0;
2253 }
2254
2255 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2256
2257
2258
2259
2260
2261 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
2262 &pad, &padded, &flags);
2263 if (ret < 0) {
2264 return ret;
2265 }
2266 }
2267
2268 bdrv_inc_in_flight(bs);
2269 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2270
2271 if (flags & BDRV_REQ_ZERO_WRITE) {
2272 assert(!padded);
2273 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2274 goto out;
2275 }
2276
2277 if (padded) {
2278
2279
2280
2281
2282
2283
2284 assert(!(flags & BDRV_REQ_NO_WAIT));
2285 bdrv_make_request_serialising(&req, align);
2286 bdrv_padding_rmw_read(child, &req, &pad, false);
2287 }
2288
2289 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2290 qiov, qiov_offset, flags);
2291
2292 bdrv_padding_finalize(&pad);
2293
2294out:
2295 tracked_request_end(&req);
2296 bdrv_dec_in_flight(bs);
2297
2298 return ret;
2299}
2300
2301int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2302 int64_t bytes, BdrvRequestFlags flags)
2303{
2304 IO_CODE();
2305 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2306 assert_bdrv_graph_readable();
2307
2308 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2309 flags &= ~BDRV_REQ_MAY_UNMAP;
2310 }
2311
2312 return bdrv_co_pwritev(child, offset, bytes, NULL,
2313 BDRV_REQ_ZERO_WRITE | flags);
2314}
2315
2316
2317
2318
2319int bdrv_flush_all(void)
2320{
2321 BdrvNextIterator it;
2322 BlockDriverState *bs = NULL;
2323 int result = 0;
2324
2325 GLOBAL_STATE_CODE();
2326
2327
2328
2329
2330
2331
2332 if (replay_events_enabled()) {
2333 return result;
2334 }
2335
2336 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2337 AioContext *aio_context = bdrv_get_aio_context(bs);
2338 int ret;
2339
2340 aio_context_acquire(aio_context);
2341 ret = bdrv_flush(bs);
2342 if (ret < 0 && !result) {
2343 result = ret;
2344 }
2345 aio_context_release(aio_context);
2346 }
2347
2348 return result;
2349}
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378static int coroutine_fn GRAPH_RDLOCK
2379bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
2380 int64_t offset, int64_t bytes,
2381 int64_t *pnum, int64_t *map, BlockDriverState **file)
2382{
2383 int64_t total_size;
2384 int64_t n;
2385 int ret;
2386 int64_t local_map = 0;
2387 BlockDriverState *local_file = NULL;
2388 int64_t aligned_offset, aligned_bytes;
2389 uint32_t align;
2390 bool has_filtered_child;
2391
2392 assert(pnum);
2393 assert_bdrv_graph_readable();
2394 *pnum = 0;
2395 total_size = bdrv_co_getlength(bs);
2396 if (total_size < 0) {
2397 ret = total_size;
2398 goto early_out;
2399 }
2400
2401 if (offset >= total_size) {
2402 ret = BDRV_BLOCK_EOF;
2403 goto early_out;
2404 }
2405 if (!bytes) {
2406 ret = 0;
2407 goto early_out;
2408 }
2409
2410 n = total_size - offset;
2411 if (n < bytes) {
2412 bytes = n;
2413 }
2414
2415
2416 assert(bs->drv);
2417 has_filtered_child = bdrv_filter_child(bs);
2418 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2419 *pnum = bytes;
2420 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2421 if (offset + bytes == total_size) {
2422 ret |= BDRV_BLOCK_EOF;
2423 }
2424 if (bs->drv->protocol_name) {
2425 ret |= BDRV_BLOCK_OFFSET_VALID;
2426 local_map = offset;
2427 local_file = bs;
2428 }
2429 goto early_out;
2430 }
2431
2432 bdrv_inc_in_flight(bs);
2433
2434
2435 align = bs->bl.request_alignment;
2436 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2437 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2438
2439 if (bs->drv->bdrv_co_block_status) {
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461 if (QLIST_EMPTY(&bs->children) &&
2462 bdrv_bsc_is_data(bs, aligned_offset, pnum))
2463 {
2464 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
2465 local_file = bs;
2466 local_map = aligned_offset;
2467 } else {
2468 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2469 aligned_bytes, pnum, &local_map,
2470 &local_file);
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483 if (want_zero &&
2484 ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
2485 QLIST_EMPTY(&bs->children))
2486 {
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498 assert(local_file == bs);
2499 assert(local_map == aligned_offset);
2500 bdrv_bsc_fill(bs, aligned_offset, *pnum);
2501 }
2502 }
2503 } else {
2504
2505
2506 local_file = bdrv_filter_bs(bs);
2507 assert(local_file);
2508
2509 *pnum = aligned_bytes;
2510 local_map = aligned_offset;
2511 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2512 }
2513 if (ret < 0) {
2514 *pnum = 0;
2515 goto out;
2516 }
2517
2518
2519
2520
2521
2522 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2523 align > offset - aligned_offset);
2524 if (ret & BDRV_BLOCK_RECURSE) {
2525 assert(ret & BDRV_BLOCK_DATA);
2526 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2527 assert(!(ret & BDRV_BLOCK_ZERO));
2528 }
2529
2530 *pnum -= offset - aligned_offset;
2531 if (*pnum > bytes) {
2532 *pnum = bytes;
2533 }
2534 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2535 local_map += offset - aligned_offset;
2536 }
2537
2538 if (ret & BDRV_BLOCK_RAW) {
2539 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2540 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2541 *pnum, pnum, &local_map, &local_file);
2542 goto out;
2543 }
2544
2545 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2546 ret |= BDRV_BLOCK_ALLOCATED;
2547 } else if (bs->drv->supports_backing) {
2548 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2549
2550 if (!cow_bs) {
2551 ret |= BDRV_BLOCK_ZERO;
2552 } else if (want_zero) {
2553 int64_t size2 = bdrv_co_getlength(cow_bs);
2554
2555 if (size2 >= 0 && offset >= size2) {
2556 ret |= BDRV_BLOCK_ZERO;
2557 }
2558 }
2559 }
2560
2561 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2562 local_file && local_file != bs &&
2563 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2564 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2565 int64_t file_pnum;
2566 int ret2;
2567
2568 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2569 *pnum, &file_pnum, NULL, NULL);
2570 if (ret2 >= 0) {
2571
2572
2573
2574 if (ret2 & BDRV_BLOCK_EOF &&
2575 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2576
2577
2578
2579
2580
2581 ret |= BDRV_BLOCK_ZERO;
2582 } else {
2583
2584 *pnum = file_pnum;
2585 ret |= (ret2 & BDRV_BLOCK_ZERO);
2586 }
2587 }
2588 }
2589
2590out:
2591 bdrv_dec_in_flight(bs);
2592 if (ret >= 0 && offset + *pnum == total_size) {
2593 ret |= BDRV_BLOCK_EOF;
2594 }
2595early_out:
2596 if (file) {
2597 *file = local_file;
2598 }
2599 if (map) {
2600 *map = local_map;
2601 }
2602 return ret;
2603}
2604
2605int coroutine_fn
2606bdrv_co_common_block_status_above(BlockDriverState *bs,
2607 BlockDriverState *base,
2608 bool include_base,
2609 bool want_zero,
2610 int64_t offset,
2611 int64_t bytes,
2612 int64_t *pnum,
2613 int64_t *map,
2614 BlockDriverState **file,
2615 int *depth)
2616{
2617 int ret;
2618 BlockDriverState *p;
2619 int64_t eof = 0;
2620 int dummy;
2621 IO_CODE();
2622
2623 assert(!include_base || base);
2624 assert_bdrv_graph_readable();
2625
2626 if (!depth) {
2627 depth = &dummy;
2628 }
2629 *depth = 0;
2630
2631 if (!include_base && bs == base) {
2632 *pnum = bytes;
2633 return 0;
2634 }
2635
2636 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2637 ++*depth;
2638 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2639 return ret;
2640 }
2641
2642 if (ret & BDRV_BLOCK_EOF) {
2643 eof = offset + *pnum;
2644 }
2645
2646 assert(*pnum <= bytes);
2647 bytes = *pnum;
2648
2649 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2650 p = bdrv_filter_or_cow_bs(p))
2651 {
2652 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2653 file);
2654 ++*depth;
2655 if (ret < 0) {
2656 return ret;
2657 }
2658 if (*pnum == 0) {
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668 assert(ret & BDRV_BLOCK_EOF);
2669 *pnum = bytes;
2670 if (file) {
2671 *file = p;
2672 }
2673 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2674 break;
2675 }
2676 if (ret & BDRV_BLOCK_ALLOCATED) {
2677
2678
2679
2680
2681
2682
2683
2684 ret &= ~BDRV_BLOCK_EOF;
2685 break;
2686 }
2687
2688 if (p == base) {
2689 assert(include_base);
2690 break;
2691 }
2692
2693
2694
2695
2696
2697 assert(*pnum <= bytes);
2698 bytes = *pnum;
2699 }
2700
2701 if (offset + *pnum == eof) {
2702 ret |= BDRV_BLOCK_EOF;
2703 }
2704
2705 return ret;
2706}
2707
2708int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2709 BlockDriverState *base,
2710 int64_t offset, int64_t bytes,
2711 int64_t *pnum, int64_t *map,
2712 BlockDriverState **file)
2713{
2714 IO_CODE();
2715 return bdrv_co_common_block_status_above(bs, base, false, true, offset,
2716 bytes, pnum, map, file, NULL);
2717}
2718
2719int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2720 int64_t offset, int64_t bytes, int64_t *pnum,
2721 int64_t *map, BlockDriverState **file)
2722{
2723 IO_CODE();
2724 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2725 pnum, map, file, NULL);
2726}
2727
2728int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2729 int64_t *pnum, int64_t *map, BlockDriverState **file)
2730{
2731 IO_CODE();
2732 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2733 offset, bytes, pnum, map, file);
2734}
2735
2736
2737
2738
2739
2740
2741
2742
2743int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2744 int64_t bytes)
2745{
2746 int ret;
2747 int64_t pnum = bytes;
2748 IO_CODE();
2749
2750 if (!bytes) {
2751 return 1;
2752 }
2753
2754 ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset,
2755 bytes, &pnum, NULL, NULL, NULL);
2756
2757 if (ret < 0) {
2758 return ret;
2759 }
2760
2761 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2762}
2763
2764int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
2765 int64_t bytes, int64_t *pnum)
2766{
2767 int ret;
2768 int64_t dummy;
2769 IO_CODE();
2770
2771 ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset,
2772 bytes, pnum ? pnum : &dummy, NULL,
2773 NULL, NULL);
2774 if (ret < 0) {
2775 return ret;
2776 }
2777 return !!(ret & BDRV_BLOCK_ALLOCATED);
2778}
2779
2780int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
2781 int64_t *pnum)
2782{
2783 int ret;
2784 int64_t dummy;
2785 IO_CODE();
2786
2787 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2788 bytes, pnum ? pnum : &dummy, NULL,
2789 NULL, NULL);
2790 if (ret < 0) {
2791 return ret;
2792 }
2793 return !!(ret & BDRV_BLOCK_ALLOCATED);
2794}
2795
2796
2797int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
2798 BlockDriverState *base,
2799 bool include_base, int64_t offset,
2800 int64_t bytes, int64_t *pnum)
2801{
2802 int depth;
2803 int ret;
2804 IO_CODE();
2805
2806 ret = bdrv_co_common_block_status_above(top, base, include_base, false,
2807 offset, bytes, pnum, NULL, NULL,
2808 &depth);
2809 if (ret < 0) {
2810 return ret;
2811 }
2812
2813 if (ret & BDRV_BLOCK_ALLOCATED) {
2814 return depth;
2815 }
2816 return 0;
2817}
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836int bdrv_is_allocated_above(BlockDriverState *top,
2837 BlockDriverState *base,
2838 bool include_base, int64_t offset,
2839 int64_t bytes, int64_t *pnum)
2840{
2841 int depth;
2842 int ret;
2843 IO_CODE();
2844
2845 ret = bdrv_common_block_status_above(top, base, include_base, false,
2846 offset, bytes, pnum, NULL, NULL,
2847 &depth);
2848 if (ret < 0) {
2849 return ret;
2850 }
2851
2852 if (ret & BDRV_BLOCK_ALLOCATED) {
2853 return depth;
2854 }
2855 return 0;
2856}
2857
2858int coroutine_fn
2859bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2860{
2861 BlockDriver *drv = bs->drv;
2862 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2863 int ret;
2864 IO_CODE();
2865 assert_bdrv_graph_readable();
2866
2867 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2868 if (ret < 0) {
2869 return ret;
2870 }
2871
2872 if (!drv) {
2873 return -ENOMEDIUM;
2874 }
2875
2876 bdrv_inc_in_flight(bs);
2877
2878 if (drv->bdrv_co_load_vmstate) {
2879 ret = drv->bdrv_co_load_vmstate(bs, qiov, pos);
2880 } else if (child_bs) {
2881 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2882 } else {
2883 ret = -ENOTSUP;
2884 }
2885
2886 bdrv_dec_in_flight(bs);
2887
2888 return ret;
2889}
2890
2891int coroutine_fn
2892bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2893{
2894 BlockDriver *drv = bs->drv;
2895 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2896 int ret;
2897 IO_CODE();
2898 assert_bdrv_graph_readable();
2899
2900 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2901 if (ret < 0) {
2902 return ret;
2903 }
2904
2905 if (!drv) {
2906 return -ENOMEDIUM;
2907 }
2908
2909 bdrv_inc_in_flight(bs);
2910
2911 if (drv->bdrv_co_save_vmstate) {
2912 ret = drv->bdrv_co_save_vmstate(bs, qiov, pos);
2913 } else if (child_bs) {
2914 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2915 } else {
2916 ret = -ENOTSUP;
2917 }
2918
2919 bdrv_dec_in_flight(bs);
2920
2921 return ret;
2922}
2923
2924int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2925 int64_t pos, int size)
2926{
2927 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2928 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2929 IO_CODE();
2930
2931 return ret < 0 ? ret : size;
2932}
2933
2934int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2935 int64_t pos, int size)
2936{
2937 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2938 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2939 IO_CODE();
2940
2941 return ret < 0 ? ret : size;
2942}
2943
2944
2945
2946
2947void bdrv_aio_cancel(BlockAIOCB *acb)
2948{
2949 IO_CODE();
2950 qemu_aio_ref(acb);
2951 bdrv_aio_cancel_async(acb);
2952 while (acb->refcnt > 1) {
2953 if (acb->aiocb_info->get_aio_context) {
2954 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2955 } else if (acb->bs) {
2956
2957
2958
2959
2960 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2961 aio_poll(bdrv_get_aio_context(acb->bs), true);
2962 } else {
2963 abort();
2964 }
2965 }
2966 qemu_aio_unref(acb);
2967}
2968
2969
2970
2971
2972void bdrv_aio_cancel_async(BlockAIOCB *acb)
2973{
2974 IO_CODE();
2975 if (acb->aiocb_info->cancel_async) {
2976 acb->aiocb_info->cancel_async(acb);
2977 }
2978}
2979
2980
2981
2982
2983int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2984{
2985 BdrvChild *primary_child = bdrv_primary_child(bs);
2986 BdrvChild *child;
2987 int current_gen;
2988 int ret = 0;
2989 IO_CODE();
2990
2991 assert_bdrv_graph_readable();
2992 bdrv_inc_in_flight(bs);
2993
2994 if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs) ||
2995 bdrv_is_sg(bs)) {
2996 goto early_exit;
2997 }
2998
2999 qemu_co_mutex_lock(&bs->reqs_lock);
3000 current_gen = qatomic_read(&bs->write_gen);
3001
3002
3003 while (bs->active_flush_req) {
3004 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
3005 }
3006
3007
3008 bs->active_flush_req = true;
3009 qemu_co_mutex_unlock(&bs->reqs_lock);
3010
3011
3012 if (bs->drv->bdrv_co_flush) {
3013 ret = bs->drv->bdrv_co_flush(bs);
3014 goto out;
3015 }
3016
3017
3018 BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
3019 if (bs->drv->bdrv_co_flush_to_os) {
3020 ret = bs->drv->bdrv_co_flush_to_os(bs);
3021 if (ret < 0) {
3022 goto out;
3023 }
3024 }
3025
3026
3027 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3028 goto flush_children;
3029 }
3030
3031
3032 if (bs->flushed_gen == current_gen) {
3033 goto flush_children;
3034 }
3035
3036 BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
3037 if (!bs->drv) {
3038
3039
3040 ret = -ENOMEDIUM;
3041 goto out;
3042 }
3043 if (bs->drv->bdrv_co_flush_to_disk) {
3044 ret = bs->drv->bdrv_co_flush_to_disk(bs);
3045 } else if (bs->drv->bdrv_aio_flush) {
3046 BlockAIOCB *acb;
3047 CoroutineIOCompletion co = {
3048 .coroutine = qemu_coroutine_self(),
3049 };
3050
3051 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3052 if (acb == NULL) {
3053 ret = -EIO;
3054 } else {
3055 qemu_coroutine_yield();
3056 ret = co.ret;
3057 }
3058 } else {
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070 ret = 0;
3071 }
3072
3073 if (ret < 0) {
3074 goto out;
3075 }
3076
3077
3078
3079
3080flush_children:
3081 ret = 0;
3082 QLIST_FOREACH(child, &bs->children, next) {
3083 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
3084 int this_child_ret = bdrv_co_flush(child->bs);
3085 if (!ret) {
3086 ret = this_child_ret;
3087 }
3088 }
3089 }
3090
3091out:
3092
3093 if (ret == 0) {
3094 bs->flushed_gen = current_gen;
3095 }
3096
3097 qemu_co_mutex_lock(&bs->reqs_lock);
3098 bs->active_flush_req = false;
3099
3100 qemu_co_queue_next(&bs->flush_queue);
3101 qemu_co_mutex_unlock(&bs->reqs_lock);
3102
3103early_exit:
3104 bdrv_dec_in_flight(bs);
3105 return ret;
3106}
3107
3108int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
3109 int64_t bytes)
3110{
3111 BdrvTrackedRequest req;
3112 int ret;
3113 int64_t max_pdiscard;
3114 int head, tail, align;
3115 BlockDriverState *bs = child->bs;
3116 IO_CODE();
3117 assert_bdrv_graph_readable();
3118
3119 if (!bs || !bs->drv || !bdrv_co_is_inserted(bs)) {
3120 return -ENOMEDIUM;
3121 }
3122
3123 if (bdrv_has_readonly_bitmaps(bs)) {
3124 return -EPERM;
3125 }
3126
3127 ret = bdrv_check_request(offset, bytes, NULL);
3128 if (ret < 0) {
3129 return ret;
3130 }
3131
3132
3133 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3134 return 0;
3135 }
3136
3137 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
3138 return 0;
3139 }
3140
3141
3142 bdrv_bsc_invalidate_range(bs, offset, bytes);
3143
3144
3145
3146
3147
3148
3149 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
3150 assert(align % bs->bl.request_alignment == 0);
3151 head = offset % align;
3152 tail = (offset + bytes) % align;
3153
3154 bdrv_inc_in_flight(bs);
3155 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
3156
3157 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
3158 if (ret < 0) {
3159 goto out;
3160 }
3161
3162 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
3163 align);
3164 assert(max_pdiscard >= bs->bl.request_alignment);
3165
3166 while (bytes > 0) {
3167 int64_t num = bytes;
3168
3169 if (head) {
3170
3171 num = MIN(bytes, align - head);
3172 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3173 num %= bs->bl.request_alignment;
3174 }
3175 head = (head + num) % align;
3176 assert(num < max_pdiscard);
3177 } else if (tail) {
3178 if (num > align) {
3179
3180 num -= tail;
3181 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3182 tail > bs->bl.request_alignment) {
3183 tail %= bs->bl.request_alignment;
3184 num -= tail;
3185 }
3186 }
3187
3188 if (num > max_pdiscard) {
3189 num = max_pdiscard;
3190 }
3191
3192 if (!bs->drv) {
3193 ret = -ENOMEDIUM;
3194 goto out;
3195 }
3196 if (bs->drv->bdrv_co_pdiscard) {
3197 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3198 } else {
3199 BlockAIOCB *acb;
3200 CoroutineIOCompletion co = {
3201 .coroutine = qemu_coroutine_self(),
3202 };
3203
3204 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3205 bdrv_co_io_em_complete, &co);
3206 if (acb == NULL) {
3207 ret = -EIO;
3208 goto out;
3209 } else {
3210 qemu_coroutine_yield();
3211 ret = co.ret;
3212 }
3213 }
3214 if (ret && ret != -ENOTSUP) {
3215 goto out;
3216 }
3217
3218 offset += num;
3219 bytes -= num;
3220 }
3221 ret = 0;
3222out:
3223 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3224 tracked_request_end(&req);
3225 bdrv_dec_in_flight(bs);
3226 return ret;
3227}
3228
3229int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3230{
3231 BlockDriver *drv = bs->drv;
3232 CoroutineIOCompletion co = {
3233 .coroutine = qemu_coroutine_self(),
3234 };
3235 BlockAIOCB *acb;
3236 IO_CODE();
3237 assert_bdrv_graph_readable();
3238
3239 bdrv_inc_in_flight(bs);
3240 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3241 co.ret = -ENOTSUP;
3242 goto out;
3243 }
3244
3245 if (drv->bdrv_co_ioctl) {
3246 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3247 } else {
3248 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3249 if (!acb) {
3250 co.ret = -ENOTSUP;
3251 goto out;
3252 }
3253 qemu_coroutine_yield();
3254 }
3255out:
3256 bdrv_dec_in_flight(bs);
3257 return co.ret;
3258}
3259
3260int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
3261 unsigned int *nr_zones,
3262 BlockZoneDescriptor *zones)
3263{
3264 BlockDriver *drv = bs->drv;
3265 CoroutineIOCompletion co = {
3266 .coroutine = qemu_coroutine_self(),
3267 };
3268 IO_CODE();
3269
3270 bdrv_inc_in_flight(bs);
3271 if (!drv || !drv->bdrv_co_zone_report || bs->bl.zoned == BLK_Z_NONE) {
3272 co.ret = -ENOTSUP;
3273 goto out;
3274 }
3275 co.ret = drv->bdrv_co_zone_report(bs, offset, nr_zones, zones);
3276out:
3277 bdrv_dec_in_flight(bs);
3278 return co.ret;
3279}
3280
3281int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
3282 int64_t offset, int64_t len)
3283{
3284 BlockDriver *drv = bs->drv;
3285 CoroutineIOCompletion co = {
3286 .coroutine = qemu_coroutine_self(),
3287 };
3288 IO_CODE();
3289
3290 bdrv_inc_in_flight(bs);
3291 if (!drv || !drv->bdrv_co_zone_mgmt || bs->bl.zoned == BLK_Z_NONE) {
3292 co.ret = -ENOTSUP;
3293 goto out;
3294 }
3295 co.ret = drv->bdrv_co_zone_mgmt(bs, op, offset, len);
3296out:
3297 bdrv_dec_in_flight(bs);
3298 return co.ret;
3299}
3300
3301int coroutine_fn bdrv_co_zone_append(BlockDriverState *bs, int64_t *offset,
3302 QEMUIOVector *qiov,
3303 BdrvRequestFlags flags)
3304{
3305 int ret;
3306 BlockDriver *drv = bs->drv;
3307 CoroutineIOCompletion co = {
3308 .coroutine = qemu_coroutine_self(),
3309 };
3310 IO_CODE();
3311
3312 ret = bdrv_check_qiov_request(*offset, qiov->size, qiov, 0, NULL);
3313 if (ret < 0) {
3314 return ret;
3315 }
3316
3317 bdrv_inc_in_flight(bs);
3318 if (!drv || !drv->bdrv_co_zone_append || bs->bl.zoned == BLK_Z_NONE) {
3319 co.ret = -ENOTSUP;
3320 goto out;
3321 }
3322 co.ret = drv->bdrv_co_zone_append(bs, offset, qiov, flags);
3323out:
3324 bdrv_dec_in_flight(bs);
3325 return co.ret;
3326}
3327
3328void *qemu_blockalign(BlockDriverState *bs, size_t size)
3329{
3330 IO_CODE();
3331 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3332}
3333
3334void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3335{
3336 IO_CODE();
3337 return memset(qemu_blockalign(bs, size), 0, size);
3338}
3339
3340void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3341{
3342 size_t align = bdrv_opt_mem_align(bs);
3343 IO_CODE();
3344
3345
3346 assert(align > 0);
3347 if (size == 0) {
3348 size = align;
3349 }
3350
3351 return qemu_try_memalign(align, size);
3352}
3353
3354void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3355{
3356 void *mem = qemu_try_blockalign(bs, size);
3357 IO_CODE();
3358
3359 if (mem) {
3360 memset(mem, 0, size);
3361 }
3362
3363 return mem;
3364}
3365
3366
3367static void GRAPH_RDLOCK
3368bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
3369 BdrvChild *final_child)
3370{
3371 BdrvChild *child;
3372
3373 GLOBAL_STATE_CODE();
3374 assert_bdrv_graph_readable();
3375
3376 QLIST_FOREACH(child, &bs->children, next) {
3377 if (child == final_child) {
3378 break;
3379 }
3380
3381 bdrv_unregister_buf(child->bs, host, size);
3382 }
3383
3384 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3385 bs->drv->bdrv_unregister_buf(bs, host, size);
3386 }
3387}
3388
3389bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
3390 Error **errp)
3391{
3392 BdrvChild *child;
3393
3394 GLOBAL_STATE_CODE();
3395 GRAPH_RDLOCK_GUARD_MAINLOOP();
3396
3397 if (bs->drv && bs->drv->bdrv_register_buf) {
3398 if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
3399 return false;
3400 }
3401 }
3402 QLIST_FOREACH(child, &bs->children, next) {
3403 if (!bdrv_register_buf(child->bs, host, size, errp)) {
3404 bdrv_register_buf_rollback(bs, host, size, child);
3405 return false;
3406 }
3407 }
3408 return true;
3409}
3410
3411void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
3412{
3413 BdrvChild *child;
3414
3415 GLOBAL_STATE_CODE();
3416 GRAPH_RDLOCK_GUARD_MAINLOOP();
3417
3418 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3419 bs->drv->bdrv_unregister_buf(bs, host, size);
3420 }
3421 QLIST_FOREACH(child, &bs->children, next) {
3422 bdrv_unregister_buf(child->bs, host, size);
3423 }
3424}
3425
3426static int coroutine_fn GRAPH_RDLOCK bdrv_co_copy_range_internal(
3427 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3428 int64_t dst_offset, int64_t bytes,
3429 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3430 bool recurse_src)
3431{
3432 BdrvTrackedRequest req;
3433 int ret;
3434 assert_bdrv_graph_readable();
3435
3436
3437 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3438 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3439 assert(!(read_flags & BDRV_REQ_NO_WAIT));
3440 assert(!(write_flags & BDRV_REQ_NO_WAIT));
3441
3442 if (!dst || !dst->bs || !bdrv_co_is_inserted(dst->bs)) {
3443 return -ENOMEDIUM;
3444 }
3445 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3446 if (ret) {
3447 return ret;
3448 }
3449 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3450 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3451 }
3452
3453 if (!src || !src->bs || !bdrv_co_is_inserted(src->bs)) {
3454 return -ENOMEDIUM;
3455 }
3456 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3457 if (ret) {
3458 return ret;
3459 }
3460
3461 if (!src->bs->drv->bdrv_co_copy_range_from
3462 || !dst->bs->drv->bdrv_co_copy_range_to
3463 || src->bs->encrypted || dst->bs->encrypted) {
3464 return -ENOTSUP;
3465 }
3466
3467 if (recurse_src) {
3468 bdrv_inc_in_flight(src->bs);
3469 tracked_request_begin(&req, src->bs, src_offset, bytes,
3470 BDRV_TRACKED_READ);
3471
3472
3473 assert(!(read_flags & BDRV_REQ_SERIALISING));
3474 bdrv_wait_serialising_requests(&req);
3475
3476 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3477 src, src_offset,
3478 dst, dst_offset,
3479 bytes,
3480 read_flags, write_flags);
3481
3482 tracked_request_end(&req);
3483 bdrv_dec_in_flight(src->bs);
3484 } else {
3485 bdrv_inc_in_flight(dst->bs);
3486 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3487 BDRV_TRACKED_WRITE);
3488 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3489 write_flags);
3490 if (!ret) {
3491 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3492 src, src_offset,
3493 dst, dst_offset,
3494 bytes,
3495 read_flags, write_flags);
3496 }
3497 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3498 tracked_request_end(&req);
3499 bdrv_dec_in_flight(dst->bs);
3500 }
3501
3502 return ret;
3503}
3504
3505
3506
3507
3508
3509int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3510 BdrvChild *dst, int64_t dst_offset,
3511 int64_t bytes,
3512 BdrvRequestFlags read_flags,
3513 BdrvRequestFlags write_flags)
3514{
3515 IO_CODE();
3516 assert_bdrv_graph_readable();
3517 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3518 read_flags, write_flags);
3519 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3520 bytes, read_flags, write_flags, true);
3521}
3522
3523
3524
3525
3526
3527int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3528 BdrvChild *dst, int64_t dst_offset,
3529 int64_t bytes,
3530 BdrvRequestFlags read_flags,
3531 BdrvRequestFlags write_flags)
3532{
3533 IO_CODE();
3534 assert_bdrv_graph_readable();
3535 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3536 read_flags, write_flags);
3537 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3538 bytes, read_flags, write_flags, false);
3539}
3540
3541int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3542 BdrvChild *dst, int64_t dst_offset,
3543 int64_t bytes, BdrvRequestFlags read_flags,
3544 BdrvRequestFlags write_flags)
3545{
3546 IO_CODE();
3547 assert_bdrv_graph_readable();
3548
3549 return bdrv_co_copy_range_from(src, src_offset,
3550 dst, dst_offset,
3551 bytes, read_flags, write_flags);
3552}
3553
3554static void bdrv_parent_cb_resize(BlockDriverState *bs)
3555{
3556 BdrvChild *c;
3557 QLIST_FOREACH(c, &bs->parents, next_parent) {
3558 if (c->klass->resize) {
3559 c->klass->resize(c);
3560 }
3561 }
3562}
3563
3564
3565
3566
3567
3568
3569
3570
3571int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3572 PreallocMode prealloc, BdrvRequestFlags flags,
3573 Error **errp)
3574{
3575 BlockDriverState *bs = child->bs;
3576 BdrvChild *filtered, *backing;
3577 BlockDriver *drv = bs->drv;
3578 BdrvTrackedRequest req;
3579 int64_t old_size, new_bytes;
3580 int ret;
3581 IO_CODE();
3582 assert_bdrv_graph_readable();
3583
3584
3585 if (!drv) {
3586 error_setg(errp, "No medium inserted");
3587 return -ENOMEDIUM;
3588 }
3589 if (offset < 0) {
3590 error_setg(errp, "Image size cannot be negative");
3591 return -EINVAL;
3592 }
3593
3594 ret = bdrv_check_request(offset, 0, errp);
3595 if (ret < 0) {
3596 return ret;
3597 }
3598
3599 old_size = bdrv_co_getlength(bs);
3600 if (old_size < 0) {
3601 error_setg_errno(errp, -old_size, "Failed to get old image size");
3602 return old_size;
3603 }
3604
3605 if (bdrv_is_read_only(bs)) {
3606 error_setg(errp, "Image is read-only");
3607 return -EACCES;
3608 }
3609
3610 if (offset > old_size) {
3611 new_bytes = offset - old_size;
3612 } else {
3613 new_bytes = 0;
3614 }
3615
3616 bdrv_inc_in_flight(bs);
3617 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3618 BDRV_TRACKED_TRUNCATE);
3619
3620
3621
3622
3623 if (new_bytes) {
3624 bdrv_make_request_serialising(&req, 1);
3625 }
3626 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3627 0);
3628 if (ret < 0) {
3629 error_setg_errno(errp, -ret,
3630 "Failed to prepare request for truncation");
3631 goto out;
3632 }
3633
3634 filtered = bdrv_filter_child(bs);
3635 backing = bdrv_cow_child(bs);
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647 if (new_bytes && backing) {
3648 int64_t backing_len;
3649
3650 backing_len = bdrv_co_getlength(backing->bs);
3651 if (backing_len < 0) {
3652 ret = backing_len;
3653 error_setg_errno(errp, -ret, "Could not get backing file size");
3654 goto out;
3655 }
3656
3657 if (backing_len > old_size) {
3658 flags |= BDRV_REQ_ZERO_WRITE;
3659 }
3660 }
3661
3662 if (drv->bdrv_co_truncate) {
3663 if (flags & ~bs->supported_truncate_flags) {
3664 error_setg(errp, "Block driver does not support requested flags");
3665 ret = -ENOTSUP;
3666 goto out;
3667 }
3668 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3669 } else if (filtered) {
3670 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3671 } else {
3672 error_setg(errp, "Image format driver does not support resize");
3673 ret = -ENOTSUP;
3674 goto out;
3675 }
3676 if (ret < 0) {
3677 goto out;
3678 }
3679
3680 ret = bdrv_co_refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3681 if (ret < 0) {
3682 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3683 } else {
3684 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3685 }
3686
3687
3688
3689
3690
3691 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3692
3693out:
3694 tracked_request_end(&req);
3695 bdrv_dec_in_flight(bs);
3696
3697 return ret;
3698}
3699
3700void bdrv_cancel_in_flight(BlockDriverState *bs)
3701{
3702 GLOBAL_STATE_CODE();
3703 if (!bs || !bs->drv) {
3704 return;
3705 }
3706
3707 if (bs->drv->bdrv_cancel_in_flight) {
3708 bs->drv->bdrv_cancel_in_flight(bs);
3709 }
3710}
3711
3712int coroutine_fn
3713bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
3714 QEMUIOVector *qiov, size_t qiov_offset)
3715{
3716 BlockDriverState *bs = child->bs;
3717 BlockDriver *drv = bs->drv;
3718 int ret;
3719 IO_CODE();
3720 assert_bdrv_graph_readable();
3721
3722 if (!drv) {
3723 return -ENOMEDIUM;
3724 }
3725
3726 if (!drv->bdrv_co_preadv_snapshot) {
3727 return -ENOTSUP;
3728 }
3729
3730 bdrv_inc_in_flight(bs);
3731 ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
3732 bdrv_dec_in_flight(bs);
3733
3734 return ret;
3735}
3736
3737int coroutine_fn
3738bdrv_co_snapshot_block_status(BlockDriverState *bs,
3739 bool want_zero, int64_t offset, int64_t bytes,
3740 int64_t *pnum, int64_t *map,
3741 BlockDriverState **file)
3742{
3743 BlockDriver *drv = bs->drv;
3744 int ret;
3745 IO_CODE();
3746 assert_bdrv_graph_readable();
3747
3748 if (!drv) {
3749 return -ENOMEDIUM;
3750 }
3751
3752 if (!drv->bdrv_co_snapshot_block_status) {
3753 return -ENOTSUP;
3754 }
3755
3756 bdrv_inc_in_flight(bs);
3757 ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
3758 pnum, map, file);
3759 bdrv_dec_in_flight(bs);
3760
3761 return ret;
3762}
3763
3764int coroutine_fn
3765bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
3766{
3767 BlockDriver *drv = bs->drv;
3768 int ret;
3769 IO_CODE();
3770 assert_bdrv_graph_readable();
3771
3772 if (!drv) {
3773 return -ENOMEDIUM;
3774 }
3775
3776 if (!drv->bdrv_co_pdiscard_snapshot) {
3777 return -ENOTSUP;
3778 }
3779
3780 bdrv_inc_in_flight(bs);
3781 ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
3782 bdrv_dec_in_flight(bs);
3783
3784 return ret;
3785}
3786