1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "block/write-threshold.h"
34#include "qemu/cutils.h"
35#include "qemu/memalign.h"
36#include "qapi/error.h"
37#include "qemu/error-report.h"
38#include "qemu/main-loop.h"
39#include "sysemu/replay.h"
40
41
42#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
43
44static void bdrv_parent_cb_resize(BlockDriverState *bs);
45static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
46 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
47
48static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
49 bool ignore_bds_parents)
50{
51 BdrvChild *c, *next;
52
53 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
54 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
55 continue;
56 }
57 bdrv_parent_drained_begin_single(c, false);
58 }
59}
60
61static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
62 int *drained_end_counter)
63{
64 assert(c->parent_quiesce_counter > 0);
65 c->parent_quiesce_counter--;
66 if (c->klass->drained_end) {
67 c->klass->drained_end(c, drained_end_counter);
68 }
69}
70
71void bdrv_parent_drained_end_single(BdrvChild *c)
72{
73 int drained_end_counter = 0;
74 AioContext *ctx = bdrv_child_get_parent_aio_context(c);
75 IO_OR_GS_CODE();
76 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
77 AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
78}
79
80static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
81 bool ignore_bds_parents,
82 int *drained_end_counter)
83{
84 BdrvChild *c;
85
86 QLIST_FOREACH(c, &bs->parents, next_parent) {
87 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
88 continue;
89 }
90 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
91 }
92}
93
94static bool bdrv_parent_drained_poll_single(BdrvChild *c)
95{
96 if (c->klass->drained_poll) {
97 return c->klass->drained_poll(c);
98 }
99 return false;
100}
101
102static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
103 bool ignore_bds_parents)
104{
105 BdrvChild *c, *next;
106 bool busy = false;
107
108 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
109 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
110 continue;
111 }
112 busy |= bdrv_parent_drained_poll_single(c);
113 }
114
115 return busy;
116}
117
118void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
119{
120 AioContext *ctx = bdrv_child_get_parent_aio_context(c);
121 IO_OR_GS_CODE();
122 c->parent_quiesce_counter++;
123 if (c->klass->drained_begin) {
124 c->klass->drained_begin(c);
125 }
126 if (poll) {
127 AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
128 }
129}
130
131static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
132{
133 dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
134 src->pdiscard_alignment);
135 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
136 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
137 dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
138 src->max_hw_transfer);
139 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
140 src->opt_mem_alignment);
141 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
142 src->min_mem_alignment);
143 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
144 dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
145}
146
147typedef struct BdrvRefreshLimitsState {
148 BlockDriverState *bs;
149 BlockLimits old_bl;
150} BdrvRefreshLimitsState;
151
152static void bdrv_refresh_limits_abort(void *opaque)
153{
154 BdrvRefreshLimitsState *s = opaque;
155
156 s->bs->bl = s->old_bl;
157}
158
159static TransactionActionDrv bdrv_refresh_limits_drv = {
160 .abort = bdrv_refresh_limits_abort,
161 .clean = g_free,
162};
163
164
165void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
166{
167 ERRP_GUARD();
168 BlockDriver *drv = bs->drv;
169 BdrvChild *c;
170 bool have_limits;
171
172 GLOBAL_STATE_CODE();
173
174 if (tran) {
175 BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
176 *s = (BdrvRefreshLimitsState) {
177 .bs = bs,
178 .old_bl = bs->bl,
179 };
180 tran_add(tran, &bdrv_refresh_limits_drv, s);
181 }
182
183 memset(&bs->bl, 0, sizeof(bs->bl));
184
185 if (!drv) {
186 return;
187 }
188
189
190 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
191 drv->bdrv_aio_preadv ||
192 drv->bdrv_co_preadv_part) ? 1 : 512;
193
194
195 have_limits = false;
196 QLIST_FOREACH(c, &bs->children, next) {
197 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
198 {
199 bdrv_merge_limits(&bs->bl, &c->bs->bl);
200 have_limits = true;
201 }
202 }
203
204 if (!have_limits) {
205 bs->bl.min_mem_alignment = 512;
206 bs->bl.opt_mem_alignment = qemu_real_host_page_size();
207
208
209 bs->bl.max_iov = IOV_MAX;
210 }
211
212
213 if (drv->bdrv_refresh_limits) {
214 drv->bdrv_refresh_limits(bs, errp);
215 if (*errp) {
216 return;
217 }
218 }
219
220 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
221 error_setg(errp, "Driver requires too large request alignment");
222 }
223}
224
225
226
227
228
229
230void bdrv_enable_copy_on_read(BlockDriverState *bs)
231{
232 IO_CODE();
233 qatomic_inc(&bs->copy_on_read);
234}
235
236void bdrv_disable_copy_on_read(BlockDriverState *bs)
237{
238 int old = qatomic_fetch_dec(&bs->copy_on_read);
239 IO_CODE();
240 assert(old >= 1);
241}
242
243typedef struct {
244 Coroutine *co;
245 BlockDriverState *bs;
246 bool done;
247 bool begin;
248 bool recursive;
249 bool poll;
250 BdrvChild *parent;
251 bool ignore_bds_parents;
252 int *drained_end_counter;
253} BdrvCoDrainData;
254
255static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
256{
257 BdrvCoDrainData *data = opaque;
258 BlockDriverState *bs = data->bs;
259
260 if (data->begin) {
261 bs->drv->bdrv_co_drain_begin(bs);
262 } else {
263 bs->drv->bdrv_co_drain_end(bs);
264 }
265
266
267 qatomic_mb_set(&data->done, true);
268 if (!data->begin) {
269 qatomic_dec(data->drained_end_counter);
270 }
271 bdrv_dec_in_flight(bs);
272
273 g_free(data);
274}
275
276
277static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
278 int *drained_end_counter)
279{
280 BdrvCoDrainData *data;
281
282 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
283 (!begin && !bs->drv->bdrv_co_drain_end)) {
284 return;
285 }
286
287 data = g_new(BdrvCoDrainData, 1);
288 *data = (BdrvCoDrainData) {
289 .bs = bs,
290 .done = false,
291 .begin = begin,
292 .drained_end_counter = drained_end_counter,
293 };
294
295 if (!begin) {
296 qatomic_inc(drained_end_counter);
297 }
298
299
300
301 bdrv_inc_in_flight(bs);
302 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
303 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
304}
305
306
307bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
308 BdrvChild *ignore_parent, bool ignore_bds_parents)
309{
310 BdrvChild *child, *next;
311 IO_OR_GS_CODE();
312
313 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
314 return true;
315 }
316
317 if (qatomic_read(&bs->in_flight)) {
318 return true;
319 }
320
321 if (recursive) {
322 assert(!ignore_bds_parents);
323 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
324 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
325 return true;
326 }
327 }
328 }
329
330 return false;
331}
332
333static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
334 BdrvChild *ignore_parent)
335{
336 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
337}
338
339static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
340 BdrvChild *parent, bool ignore_bds_parents,
341 bool poll);
342static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
343 BdrvChild *parent, bool ignore_bds_parents,
344 int *drained_end_counter);
345
346static void bdrv_co_drain_bh_cb(void *opaque)
347{
348 BdrvCoDrainData *data = opaque;
349 Coroutine *co = data->co;
350 BlockDriverState *bs = data->bs;
351
352 if (bs) {
353 AioContext *ctx = bdrv_get_aio_context(bs);
354 aio_context_acquire(ctx);
355 bdrv_dec_in_flight(bs);
356 if (data->begin) {
357 assert(!data->drained_end_counter);
358 bdrv_do_drained_begin(bs, data->recursive, data->parent,
359 data->ignore_bds_parents, data->poll);
360 } else {
361 assert(!data->poll);
362 bdrv_do_drained_end(bs, data->recursive, data->parent,
363 data->ignore_bds_parents,
364 data->drained_end_counter);
365 }
366 aio_context_release(ctx);
367 } else {
368 assert(data->begin);
369 bdrv_drain_all_begin();
370 }
371
372 data->done = true;
373 aio_co_wake(co);
374}
375
376static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
377 bool begin, bool recursive,
378 BdrvChild *parent,
379 bool ignore_bds_parents,
380 bool poll,
381 int *drained_end_counter)
382{
383 BdrvCoDrainData data;
384 Coroutine *self = qemu_coroutine_self();
385 AioContext *ctx = bdrv_get_aio_context(bs);
386 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
387
388
389
390
391 assert(qemu_in_coroutine());
392 data = (BdrvCoDrainData) {
393 .co = self,
394 .bs = bs,
395 .done = false,
396 .begin = begin,
397 .recursive = recursive,
398 .parent = parent,
399 .ignore_bds_parents = ignore_bds_parents,
400 .poll = poll,
401 .drained_end_counter = drained_end_counter,
402 };
403
404 if (bs) {
405 bdrv_inc_in_flight(bs);
406 }
407
408
409
410
411
412
413
414
415
416 if (ctx != co_ctx) {
417 aio_context_release(ctx);
418 }
419 replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
420
421 qemu_coroutine_yield();
422
423
424 assert(data.done);
425
426
427 if (ctx != co_ctx) {
428 aio_context_acquire(ctx);
429 }
430}
431
432void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
433 BdrvChild *parent, bool ignore_bds_parents)
434{
435 IO_OR_GS_CODE();
436 assert(!qemu_in_coroutine());
437
438
439 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
440 aio_disable_external(bdrv_get_aio_context(bs));
441 }
442
443 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
444 bdrv_drain_invoke(bs, true, NULL);
445}
446
447static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
448 BdrvChild *parent, bool ignore_bds_parents,
449 bool poll)
450{
451 BdrvChild *child, *next;
452
453 if (qemu_in_coroutine()) {
454 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
455 poll, NULL);
456 return;
457 }
458
459 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
460
461 if (recursive) {
462 assert(!ignore_bds_parents);
463 bs->recursive_quiesce_counter++;
464 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
465 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
466 false);
467 }
468 }
469
470
471
472
473
474
475
476
477
478
479 if (poll) {
480 assert(!ignore_bds_parents);
481 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
482 }
483}
484
485void bdrv_drained_begin(BlockDriverState *bs)
486{
487 IO_OR_GS_CODE();
488 bdrv_do_drained_begin(bs, false, NULL, false, true);
489}
490
491void bdrv_subtree_drained_begin(BlockDriverState *bs)
492{
493 IO_OR_GS_CODE();
494 bdrv_do_drained_begin(bs, true, NULL, false, true);
495}
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
512 BdrvChild *parent, bool ignore_bds_parents,
513 int *drained_end_counter)
514{
515 BdrvChild *child;
516 int old_quiesce_counter;
517
518 assert(drained_end_counter != NULL);
519
520 if (qemu_in_coroutine()) {
521 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
522 false, drained_end_counter);
523 return;
524 }
525 assert(bs->quiesce_counter > 0);
526
527
528 bdrv_drain_invoke(bs, false, drained_end_counter);
529 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
530 drained_end_counter);
531
532 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
533 if (old_quiesce_counter == 1) {
534 aio_enable_external(bdrv_get_aio_context(bs));
535 }
536
537 if (recursive) {
538 assert(!ignore_bds_parents);
539 bs->recursive_quiesce_counter--;
540 QLIST_FOREACH(child, &bs->children, next) {
541 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
542 drained_end_counter);
543 }
544 }
545}
546
547void bdrv_drained_end(BlockDriverState *bs)
548{
549 int drained_end_counter = 0;
550 IO_OR_GS_CODE();
551 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
552 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
553}
554
555void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
556{
557 IO_CODE();
558 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
559}
560
561void bdrv_subtree_drained_end(BlockDriverState *bs)
562{
563 int drained_end_counter = 0;
564 IO_OR_GS_CODE();
565 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
566 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
567}
568
569void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
570{
571 int i;
572 IO_OR_GS_CODE();
573
574 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
575 bdrv_do_drained_begin(child->bs, true, child, false, true);
576 }
577}
578
579void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
580{
581 int drained_end_counter = 0;
582 int i;
583 IO_OR_GS_CODE();
584
585 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
586 bdrv_do_drained_end(child->bs, true, child, false,
587 &drained_end_counter);
588 }
589
590 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
591}
592
593void bdrv_drain(BlockDriverState *bs)
594{
595 IO_OR_GS_CODE();
596 bdrv_drained_begin(bs);
597 bdrv_drained_end(bs);
598}
599
600static void bdrv_drain_assert_idle(BlockDriverState *bs)
601{
602 BdrvChild *child, *next;
603
604 assert(qatomic_read(&bs->in_flight) == 0);
605 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
606 bdrv_drain_assert_idle(child->bs);
607 }
608}
609
610unsigned int bdrv_drain_all_count = 0;
611
612static bool bdrv_drain_all_poll(void)
613{
614 BlockDriverState *bs = NULL;
615 bool result = false;
616 GLOBAL_STATE_CODE();
617
618
619
620 while ((bs = bdrv_next_all_states(bs))) {
621 AioContext *aio_context = bdrv_get_aio_context(bs);
622 aio_context_acquire(aio_context);
623 result |= bdrv_drain_poll(bs, false, NULL, true);
624 aio_context_release(aio_context);
625 }
626
627 return result;
628}
629
630
631
632
633
634
635
636
637
638
639
640
641
642void bdrv_drain_all_begin(void)
643{
644 BlockDriverState *bs = NULL;
645 GLOBAL_STATE_CODE();
646
647 if (qemu_in_coroutine()) {
648 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
649 return;
650 }
651
652
653
654
655
656
657 if (replay_events_enabled()) {
658 return;
659 }
660
661
662
663 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
664 assert(bdrv_drain_all_count < INT_MAX);
665 bdrv_drain_all_count++;
666
667
668
669 while ((bs = bdrv_next_all_states(bs))) {
670 AioContext *aio_context = bdrv_get_aio_context(bs);
671
672 aio_context_acquire(aio_context);
673 bdrv_do_drained_begin(bs, false, NULL, true, false);
674 aio_context_release(aio_context);
675 }
676
677
678 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
679
680 while ((bs = bdrv_next_all_states(bs))) {
681 bdrv_drain_assert_idle(bs);
682 }
683}
684
685void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
686{
687 int drained_end_counter = 0;
688 GLOBAL_STATE_CODE();
689
690 g_assert(bs->quiesce_counter > 0);
691 g_assert(!bs->refcnt);
692
693 while (bs->quiesce_counter) {
694 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
695 }
696 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
697}
698
699void bdrv_drain_all_end(void)
700{
701 BlockDriverState *bs = NULL;
702 int drained_end_counter = 0;
703 GLOBAL_STATE_CODE();
704
705
706
707
708
709
710 if (replay_events_enabled()) {
711 return;
712 }
713
714 while ((bs = bdrv_next_all_states(bs))) {
715 AioContext *aio_context = bdrv_get_aio_context(bs);
716
717 aio_context_acquire(aio_context);
718 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
719 aio_context_release(aio_context);
720 }
721
722 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
723 AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
724
725 assert(bdrv_drain_all_count > 0);
726 bdrv_drain_all_count--;
727}
728
729void bdrv_drain_all(void)
730{
731 GLOBAL_STATE_CODE();
732 bdrv_drain_all_begin();
733 bdrv_drain_all_end();
734}
735
736
737
738
739
740
741static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
742{
743 if (req->serialising) {
744 qatomic_dec(&req->bs->serialising_in_flight);
745 }
746
747 qemu_co_mutex_lock(&req->bs->reqs_lock);
748 QLIST_REMOVE(req, list);
749 qemu_co_queue_restart_all(&req->wait_queue);
750 qemu_co_mutex_unlock(&req->bs->reqs_lock);
751}
752
753
754
755
756static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
757 BlockDriverState *bs,
758 int64_t offset,
759 int64_t bytes,
760 enum BdrvTrackedRequestType type)
761{
762 bdrv_check_request(offset, bytes, &error_abort);
763
764 *req = (BdrvTrackedRequest){
765 .bs = bs,
766 .offset = offset,
767 .bytes = bytes,
768 .type = type,
769 .co = qemu_coroutine_self(),
770 .serialising = false,
771 .overlap_offset = offset,
772 .overlap_bytes = bytes,
773 };
774
775 qemu_co_queue_init(&req->wait_queue);
776
777 qemu_co_mutex_lock(&bs->reqs_lock);
778 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
779 qemu_co_mutex_unlock(&bs->reqs_lock);
780}
781
782static bool tracked_request_overlaps(BdrvTrackedRequest *req,
783 int64_t offset, int64_t bytes)
784{
785 bdrv_check_request(offset, bytes, &error_abort);
786
787
788 if (offset >= req->overlap_offset + req->overlap_bytes) {
789 return false;
790 }
791
792 if (req->overlap_offset >= offset + bytes) {
793 return false;
794 }
795 return true;
796}
797
798
799static coroutine_fn BdrvTrackedRequest *
800bdrv_find_conflicting_request(BdrvTrackedRequest *self)
801{
802 BdrvTrackedRequest *req;
803
804 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
805 if (req == self || (!req->serialising && !self->serialising)) {
806 continue;
807 }
808 if (tracked_request_overlaps(req, self->overlap_offset,
809 self->overlap_bytes))
810 {
811
812
813
814
815
816 assert(qemu_coroutine_self() != req->co);
817
818
819
820
821
822
823 if (!req->waiting_for) {
824 return req;
825 }
826 }
827 }
828
829 return NULL;
830}
831
832
833static void coroutine_fn
834bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
835{
836 BdrvTrackedRequest *req;
837
838 while ((req = bdrv_find_conflicting_request(self))) {
839 self->waiting_for = req;
840 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
841 self->waiting_for = NULL;
842 }
843}
844
845
846static void tracked_request_set_serialising(BdrvTrackedRequest *req,
847 uint64_t align)
848{
849 int64_t overlap_offset = req->offset & ~(align - 1);
850 int64_t overlap_bytes =
851 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
852
853 bdrv_check_request(req->offset, req->bytes, &error_abort);
854
855 if (!req->serialising) {
856 qatomic_inc(&req->bs->serialising_in_flight);
857 req->serialising = true;
858 }
859
860 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
861 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
862}
863
864
865
866
867
868BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
869{
870 BdrvTrackedRequest *req;
871 Coroutine *self = qemu_coroutine_self();
872 IO_CODE();
873
874 QLIST_FOREACH(req, &bs->tracked_requests, list) {
875 if (req->co == self) {
876 return req;
877 }
878 }
879
880 return NULL;
881}
882
883
884
885
886void bdrv_round_to_clusters(BlockDriverState *bs,
887 int64_t offset, int64_t bytes,
888 int64_t *cluster_offset,
889 int64_t *cluster_bytes)
890{
891 BlockDriverInfo bdi;
892 IO_CODE();
893 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
894 *cluster_offset = offset;
895 *cluster_bytes = bytes;
896 } else {
897 int64_t c = bdi.cluster_size;
898 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
899 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
900 }
901}
902
903static int bdrv_get_cluster_size(BlockDriverState *bs)
904{
905 BlockDriverInfo bdi;
906 int ret;
907
908 ret = bdrv_get_info(bs, &bdi);
909 if (ret < 0 || bdi.cluster_size == 0) {
910 return bs->bl.request_alignment;
911 } else {
912 return bdi.cluster_size;
913 }
914}
915
916void bdrv_inc_in_flight(BlockDriverState *bs)
917{
918 IO_CODE();
919 qatomic_inc(&bs->in_flight);
920}
921
922void bdrv_wakeup(BlockDriverState *bs)
923{
924 IO_CODE();
925 aio_wait_kick();
926}
927
928void bdrv_dec_in_flight(BlockDriverState *bs)
929{
930 IO_CODE();
931 qatomic_dec(&bs->in_flight);
932 bdrv_wakeup(bs);
933}
934
935static void coroutine_fn
936bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
937{
938 BlockDriverState *bs = self->bs;
939
940 if (!qatomic_read(&bs->serialising_in_flight)) {
941 return;
942 }
943
944 qemu_co_mutex_lock(&bs->reqs_lock);
945 bdrv_wait_serialising_requests_locked(self);
946 qemu_co_mutex_unlock(&bs->reqs_lock);
947}
948
949void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
950 uint64_t align)
951{
952 IO_CODE();
953
954 qemu_co_mutex_lock(&req->bs->reqs_lock);
955
956 tracked_request_set_serialising(req, align);
957 bdrv_wait_serialising_requests_locked(req);
958
959 qemu_co_mutex_unlock(&req->bs->reqs_lock);
960}
961
962int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
963 QEMUIOVector *qiov, size_t qiov_offset,
964 Error **errp)
965{
966
967
968
969
970 if (offset < 0) {
971 error_setg(errp, "offset is negative: %" PRIi64, offset);
972 return -EIO;
973 }
974
975 if (bytes < 0) {
976 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
977 return -EIO;
978 }
979
980 if (bytes > BDRV_MAX_LENGTH) {
981 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
982 bytes, BDRV_MAX_LENGTH);
983 return -EIO;
984 }
985
986 if (offset > BDRV_MAX_LENGTH) {
987 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
988 offset, BDRV_MAX_LENGTH);
989 return -EIO;
990 }
991
992 if (offset > BDRV_MAX_LENGTH - bytes) {
993 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
994 "exceeds maximum(%" PRIi64 ")", offset, bytes,
995 BDRV_MAX_LENGTH);
996 return -EIO;
997 }
998
999 if (!qiov) {
1000 return 0;
1001 }
1002
1003
1004
1005
1006
1007 if (qiov_offset > qiov->size) {
1008 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
1009 qiov_offset, qiov->size);
1010 return -EIO;
1011 }
1012
1013 if (bytes > qiov->size - qiov_offset) {
1014 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
1015 "vector size(%zu)", bytes, qiov_offset, qiov->size);
1016 return -EIO;
1017 }
1018
1019 return 0;
1020}
1021
1022int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
1023{
1024 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
1025}
1026
1027static int bdrv_check_request32(int64_t offset, int64_t bytes,
1028 QEMUIOVector *qiov, size_t qiov_offset)
1029{
1030 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
1031 if (ret < 0) {
1032 return ret;
1033 }
1034
1035 if (bytes > BDRV_REQUEST_MAX_BYTES) {
1036 return -EIO;
1037 }
1038
1039 return 0;
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
1052{
1053 int ret;
1054 int64_t target_size, bytes, offset = 0;
1055 BlockDriverState *bs = child->bs;
1056 IO_CODE();
1057
1058 target_size = bdrv_getlength(bs);
1059 if (target_size < 0) {
1060 return target_size;
1061 }
1062
1063 for (;;) {
1064 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
1065 if (bytes <= 0) {
1066 return 0;
1067 }
1068 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
1069 if (ret < 0) {
1070 return ret;
1071 }
1072 if (ret & BDRV_BLOCK_ZERO) {
1073 offset += bytes;
1074 continue;
1075 }
1076 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
1077 if (ret < 0) {
1078 return ret;
1079 }
1080 offset += bytes;
1081 }
1082}
1083
1084
1085
1086
1087
1088
1089
1090int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
1091 int64_t bytes, const void *buf,
1092 BdrvRequestFlags flags)
1093{
1094 int ret;
1095 IO_CODE();
1096
1097 ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
1098 if (ret < 0) {
1099 return ret;
1100 }
1101
1102 ret = bdrv_co_flush(child->bs);
1103 if (ret < 0) {
1104 return ret;
1105 }
1106
1107 return 0;
1108}
1109
1110typedef struct CoroutineIOCompletion {
1111 Coroutine *coroutine;
1112 int ret;
1113} CoroutineIOCompletion;
1114
1115static void bdrv_co_io_em_complete(void *opaque, int ret)
1116{
1117 CoroutineIOCompletion *co = opaque;
1118
1119 co->ret = ret;
1120 aio_co_wake(co->coroutine);
1121}
1122
1123static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1124 int64_t offset, int64_t bytes,
1125 QEMUIOVector *qiov,
1126 size_t qiov_offset, int flags)
1127{
1128 BlockDriver *drv = bs->drv;
1129 int64_t sector_num;
1130 unsigned int nb_sectors;
1131 QEMUIOVector local_qiov;
1132 int ret;
1133
1134 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1135 assert(!(flags & ~bs->supported_read_flags));
1136
1137 if (!drv) {
1138 return -ENOMEDIUM;
1139 }
1140
1141 if (drv->bdrv_co_preadv_part) {
1142 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1143 flags);
1144 }
1145
1146 if (qiov_offset > 0 || bytes != qiov->size) {
1147 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1148 qiov = &local_qiov;
1149 }
1150
1151 if (drv->bdrv_co_preadv) {
1152 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1153 goto out;
1154 }
1155
1156 if (drv->bdrv_aio_preadv) {
1157 BlockAIOCB *acb;
1158 CoroutineIOCompletion co = {
1159 .coroutine = qemu_coroutine_self(),
1160 };
1161
1162 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1163 bdrv_co_io_em_complete, &co);
1164 if (acb == NULL) {
1165 ret = -EIO;
1166 goto out;
1167 } else {
1168 qemu_coroutine_yield();
1169 ret = co.ret;
1170 goto out;
1171 }
1172 }
1173
1174 sector_num = offset >> BDRV_SECTOR_BITS;
1175 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1176
1177 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1178 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1179 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1180 assert(drv->bdrv_co_readv);
1181
1182 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1183
1184out:
1185 if (qiov == &local_qiov) {
1186 qemu_iovec_destroy(&local_qiov);
1187 }
1188
1189 return ret;
1190}
1191
1192static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1193 int64_t offset, int64_t bytes,
1194 QEMUIOVector *qiov,
1195 size_t qiov_offset,
1196 BdrvRequestFlags flags)
1197{
1198 BlockDriver *drv = bs->drv;
1199 bool emulate_fua = false;
1200 int64_t sector_num;
1201 unsigned int nb_sectors;
1202 QEMUIOVector local_qiov;
1203 int ret;
1204
1205 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1206
1207 if (!drv) {
1208 return -ENOMEDIUM;
1209 }
1210
1211 if ((flags & BDRV_REQ_FUA) &&
1212 (~bs->supported_write_flags & BDRV_REQ_FUA)) {
1213 flags &= ~BDRV_REQ_FUA;
1214 emulate_fua = true;
1215 }
1216
1217 flags &= bs->supported_write_flags;
1218
1219 if (drv->bdrv_co_pwritev_part) {
1220 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1221 flags);
1222 goto emulate_flags;
1223 }
1224
1225 if (qiov_offset > 0 || bytes != qiov->size) {
1226 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1227 qiov = &local_qiov;
1228 }
1229
1230 if (drv->bdrv_co_pwritev) {
1231 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
1232 goto emulate_flags;
1233 }
1234
1235 if (drv->bdrv_aio_pwritev) {
1236 BlockAIOCB *acb;
1237 CoroutineIOCompletion co = {
1238 .coroutine = qemu_coroutine_self(),
1239 };
1240
1241 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
1242 bdrv_co_io_em_complete, &co);
1243 if (acb == NULL) {
1244 ret = -EIO;
1245 } else {
1246 qemu_coroutine_yield();
1247 ret = co.ret;
1248 }
1249 goto emulate_flags;
1250 }
1251
1252 sector_num = offset >> BDRV_SECTOR_BITS;
1253 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1254
1255 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1256 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1257 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1258
1259 assert(drv->bdrv_co_writev);
1260 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
1261
1262emulate_flags:
1263 if (ret == 0 && emulate_fua) {
1264 ret = bdrv_co_flush(bs);
1265 }
1266
1267 if (qiov == &local_qiov) {
1268 qemu_iovec_destroy(&local_qiov);
1269 }
1270
1271 return ret;
1272}
1273
1274static int coroutine_fn
1275bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1276 int64_t bytes, QEMUIOVector *qiov,
1277 size_t qiov_offset)
1278{
1279 BlockDriver *drv = bs->drv;
1280 QEMUIOVector local_qiov;
1281 int ret;
1282
1283 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1284
1285 if (!drv) {
1286 return -ENOMEDIUM;
1287 }
1288
1289 if (!block_driver_can_compress(drv)) {
1290 return -ENOTSUP;
1291 }
1292
1293 if (drv->bdrv_co_pwritev_compressed_part) {
1294 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1295 qiov, qiov_offset);
1296 }
1297
1298 if (qiov_offset == 0) {
1299 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1300 }
1301
1302 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1303 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1304 qemu_iovec_destroy(&local_qiov);
1305
1306 return ret;
1307}
1308
1309static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1310 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1311 size_t qiov_offset, int flags)
1312{
1313 BlockDriverState *bs = child->bs;
1314
1315
1316
1317
1318
1319
1320 void *bounce_buffer = NULL;
1321
1322 BlockDriver *drv = bs->drv;
1323 int64_t cluster_offset;
1324 int64_t cluster_bytes;
1325 int64_t skip_bytes;
1326 int ret;
1327 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1328 BDRV_REQUEST_MAX_BYTES);
1329 int64_t progress = 0;
1330 bool skip_write;
1331
1332 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1333
1334 if (!drv) {
1335 return -ENOMEDIUM;
1336 }
1337
1338
1339
1340
1341
1342 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1359 skip_bytes = offset - cluster_offset;
1360
1361 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1362 cluster_offset, cluster_bytes);
1363
1364 while (cluster_bytes) {
1365 int64_t pnum;
1366
1367 if (skip_write) {
1368 ret = 1;
1369 pnum = MIN(cluster_bytes, max_transfer);
1370 } else {
1371 ret = bdrv_is_allocated(bs, cluster_offset,
1372 MIN(cluster_bytes, max_transfer), &pnum);
1373 if (ret < 0) {
1374
1375
1376
1377
1378
1379 pnum = MIN(cluster_bytes, max_transfer);
1380 }
1381
1382
1383 if (ret == 0 && pnum == 0) {
1384 assert(progress >= bytes);
1385 break;
1386 }
1387
1388 assert(skip_bytes < pnum);
1389 }
1390
1391 if (ret <= 0) {
1392 QEMUIOVector local_qiov;
1393
1394
1395 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1396 if (!bounce_buffer) {
1397 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1398 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1399 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1400
1401 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1402 if (!bounce_buffer) {
1403 ret = -ENOMEM;
1404 goto err;
1405 }
1406 }
1407 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1408
1409 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1410 &local_qiov, 0, 0);
1411 if (ret < 0) {
1412 goto err;
1413 }
1414
1415 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1416 if (drv->bdrv_co_pwrite_zeroes &&
1417 buffer_is_zero(bounce_buffer, pnum)) {
1418
1419
1420
1421 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1422 BDRV_REQ_WRITE_UNCHANGED);
1423 } else {
1424
1425
1426
1427 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1428 &local_qiov, 0,
1429 BDRV_REQ_WRITE_UNCHANGED);
1430 }
1431
1432 if (ret < 0) {
1433
1434
1435
1436
1437
1438 goto err;
1439 }
1440
1441 if (!(flags & BDRV_REQ_PREFETCH)) {
1442 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1443 bounce_buffer + skip_bytes,
1444 MIN(pnum - skip_bytes, bytes - progress));
1445 }
1446 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1447
1448 ret = bdrv_driver_preadv(bs, offset + progress,
1449 MIN(pnum - skip_bytes, bytes - progress),
1450 qiov, qiov_offset + progress, 0);
1451 if (ret < 0) {
1452 goto err;
1453 }
1454 }
1455
1456 cluster_offset += pnum;
1457 cluster_bytes -= pnum;
1458 progress += pnum - skip_bytes;
1459 skip_bytes = 0;
1460 }
1461 ret = 0;
1462
1463err:
1464 qemu_vfree(bounce_buffer);
1465 return ret;
1466}
1467
1468
1469
1470
1471
1472
1473static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1474 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
1475 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1476{
1477 BlockDriverState *bs = child->bs;
1478 int64_t total_bytes, max_bytes;
1479 int ret = 0;
1480 int64_t bytes_remaining = bytes;
1481 int max_transfer;
1482
1483 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1484 assert(is_power_of_2(align));
1485 assert((offset & (align - 1)) == 0);
1486 assert((bytes & (align - 1)) == 0);
1487 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1488 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1489 align);
1490
1491
1492
1493
1494
1495
1496
1497 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
1498 BDRV_REQ_REGISTERED_BUF)));
1499
1500
1501 if (flags & BDRV_REQ_COPY_ON_READ) {
1502
1503
1504
1505
1506
1507 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1508 } else {
1509 bdrv_wait_serialising_requests(req);
1510 }
1511
1512 if (flags & BDRV_REQ_COPY_ON_READ) {
1513 int64_t pnum;
1514
1515
1516 flags &= ~BDRV_REQ_COPY_ON_READ;
1517
1518 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1519 if (ret < 0) {
1520 goto out;
1521 }
1522
1523 if (!ret || pnum != bytes) {
1524 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1525 qiov, qiov_offset, flags);
1526 goto out;
1527 } else if (flags & BDRV_REQ_PREFETCH) {
1528 goto out;
1529 }
1530 }
1531
1532
1533 total_bytes = bdrv_getlength(bs);
1534 if (total_bytes < 0) {
1535 ret = total_bytes;
1536 goto out;
1537 }
1538
1539 assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
1540
1541 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1542 if (bytes <= max_bytes && bytes <= max_transfer) {
1543 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1544 goto out;
1545 }
1546
1547 while (bytes_remaining) {
1548 int64_t num;
1549
1550 if (max_bytes) {
1551 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1552 assert(num);
1553
1554 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1555 num, qiov,
1556 qiov_offset + bytes - bytes_remaining,
1557 flags);
1558 max_bytes -= num;
1559 } else {
1560 num = bytes_remaining;
1561 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1562 0, bytes_remaining);
1563 }
1564 if (ret < 0) {
1565 goto out;
1566 }
1567 bytes_remaining -= num;
1568 }
1569
1570out:
1571 return ret < 0 ? ret : 0;
1572}
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596typedef struct BdrvRequestPadding {
1597 uint8_t *buf;
1598 size_t buf_len;
1599 uint8_t *tail_buf;
1600 size_t head;
1601 size_t tail;
1602 bool merge_reads;
1603 QEMUIOVector local_qiov;
1604} BdrvRequestPadding;
1605
1606static bool bdrv_init_padding(BlockDriverState *bs,
1607 int64_t offset, int64_t bytes,
1608 BdrvRequestPadding *pad)
1609{
1610 int64_t align = bs->bl.request_alignment;
1611 int64_t sum;
1612
1613 bdrv_check_request(offset, bytes, &error_abort);
1614 assert(align <= INT_MAX);
1615 assert(align <= SIZE_MAX / 2);
1616
1617 memset(pad, 0, sizeof(*pad));
1618
1619 pad->head = offset & (align - 1);
1620 pad->tail = ((offset + bytes) & (align - 1));
1621 if (pad->tail) {
1622 pad->tail = align - pad->tail;
1623 }
1624
1625 if (!pad->head && !pad->tail) {
1626 return false;
1627 }
1628
1629 assert(bytes);
1630
1631 sum = pad->head + bytes + pad->tail;
1632 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1633 pad->buf = qemu_blockalign(bs, pad->buf_len);
1634 pad->merge_reads = sum == pad->buf_len;
1635 if (pad->tail) {
1636 pad->tail_buf = pad->buf + pad->buf_len - align;
1637 }
1638
1639 return true;
1640}
1641
1642static coroutine_fn int bdrv_padding_rmw_read(BdrvChild *child,
1643 BdrvTrackedRequest *req,
1644 BdrvRequestPadding *pad,
1645 bool zero_middle)
1646{
1647 QEMUIOVector local_qiov;
1648 BlockDriverState *bs = child->bs;
1649 uint64_t align = bs->bl.request_alignment;
1650 int ret;
1651
1652 assert(req->serialising && pad->buf);
1653
1654 if (pad->head || pad->merge_reads) {
1655 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1656
1657 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1658
1659 if (pad->head) {
1660 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1661 }
1662 if (pad->merge_reads && pad->tail) {
1663 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1664 }
1665 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1666 align, &local_qiov, 0, 0);
1667 if (ret < 0) {
1668 return ret;
1669 }
1670 if (pad->head) {
1671 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1672 }
1673 if (pad->merge_reads && pad->tail) {
1674 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1675 }
1676
1677 if (pad->merge_reads) {
1678 goto zero_mem;
1679 }
1680 }
1681
1682 if (pad->tail) {
1683 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1684
1685 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1686 ret = bdrv_aligned_preadv(
1687 child, req,
1688 req->overlap_offset + req->overlap_bytes - align,
1689 align, align, &local_qiov, 0, 0);
1690 if (ret < 0) {
1691 return ret;
1692 }
1693 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1694 }
1695
1696zero_mem:
1697 if (zero_middle) {
1698 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1699 }
1700
1701 return 0;
1702}
1703
1704static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1705{
1706 if (pad->buf) {
1707 qemu_vfree(pad->buf);
1708 qemu_iovec_destroy(&pad->local_qiov);
1709 }
1710 memset(pad, 0, sizeof(*pad));
1711}
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725static int bdrv_pad_request(BlockDriverState *bs,
1726 QEMUIOVector **qiov, size_t *qiov_offset,
1727 int64_t *offset, int64_t *bytes,
1728 BdrvRequestPadding *pad, bool *padded,
1729 BdrvRequestFlags *flags)
1730{
1731 int ret;
1732
1733 bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
1734
1735 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1736 if (padded) {
1737 *padded = false;
1738 }
1739 return 0;
1740 }
1741
1742 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1743 *qiov, *qiov_offset, *bytes,
1744 pad->buf + pad->buf_len - pad->tail,
1745 pad->tail);
1746 if (ret < 0) {
1747 bdrv_padding_destroy(pad);
1748 return ret;
1749 }
1750 *bytes += pad->head + pad->tail;
1751 *offset -= pad->head;
1752 *qiov = &pad->local_qiov;
1753 *qiov_offset = 0;
1754 if (padded) {
1755 *padded = true;
1756 }
1757 if (flags) {
1758
1759 *flags &= ~BDRV_REQ_REGISTERED_BUF;
1760 }
1761
1762 return 0;
1763}
1764
1765int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1766 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1767 BdrvRequestFlags flags)
1768{
1769 IO_CODE();
1770 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1771}
1772
1773int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1774 int64_t offset, int64_t bytes,
1775 QEMUIOVector *qiov, size_t qiov_offset,
1776 BdrvRequestFlags flags)
1777{
1778 BlockDriverState *bs = child->bs;
1779 BdrvTrackedRequest req;
1780 BdrvRequestPadding pad;
1781 int ret;
1782 IO_CODE();
1783
1784 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1785
1786 if (!bdrv_is_inserted(bs)) {
1787 return -ENOMEDIUM;
1788 }
1789
1790 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1791 if (ret < 0) {
1792 return ret;
1793 }
1794
1795 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1796
1797
1798
1799
1800
1801
1802
1803
1804 return 0;
1805 }
1806
1807 bdrv_inc_in_flight(bs);
1808
1809
1810 if (qatomic_read(&bs->copy_on_read)) {
1811 flags |= BDRV_REQ_COPY_ON_READ;
1812 }
1813
1814 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
1815 NULL, &flags);
1816 if (ret < 0) {
1817 goto fail;
1818 }
1819
1820 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1821 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1822 bs->bl.request_alignment,
1823 qiov, qiov_offset, flags);
1824 tracked_request_end(&req);
1825 bdrv_padding_destroy(&pad);
1826
1827fail:
1828 bdrv_dec_in_flight(bs);
1829
1830 return ret;
1831}
1832
1833static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1834 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
1835{
1836 BlockDriver *drv = bs->drv;
1837 QEMUIOVector qiov;
1838 void *buf = NULL;
1839 int ret = 0;
1840 bool need_flush = false;
1841 int head = 0;
1842 int tail = 0;
1843
1844 int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
1845 INT64_MAX);
1846 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1847 bs->bl.request_alignment);
1848 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1849
1850 bdrv_check_request(offset, bytes, &error_abort);
1851
1852 if (!drv) {
1853 return -ENOMEDIUM;
1854 }
1855
1856 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1857 return -ENOTSUP;
1858 }
1859
1860
1861 if (flags & BDRV_REQ_REGISTERED_BUF) {
1862 return -EINVAL;
1863 }
1864
1865
1866 bdrv_bsc_invalidate_range(bs, offset, bytes);
1867
1868 assert(alignment % bs->bl.request_alignment == 0);
1869 head = offset % alignment;
1870 tail = (offset + bytes) % alignment;
1871 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1872 assert(max_write_zeroes >= bs->bl.request_alignment);
1873
1874 while (bytes > 0 && !ret) {
1875 int64_t num = bytes;
1876
1877
1878
1879
1880
1881 if (head) {
1882
1883
1884
1885 num = MIN(MIN(bytes, max_transfer), alignment - head);
1886 head = (head + num) % alignment;
1887 assert(num < max_write_zeroes);
1888 } else if (tail && num > alignment) {
1889
1890 num -= tail;
1891 }
1892
1893
1894 if (num > max_write_zeroes) {
1895 num = max_write_zeroes;
1896 }
1897
1898 ret = -ENOTSUP;
1899
1900 if (drv->bdrv_co_pwrite_zeroes) {
1901 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1902 flags & bs->supported_zero_flags);
1903 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1904 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1905 need_flush = true;
1906 }
1907 } else {
1908 assert(!bs->supported_zero_flags);
1909 }
1910
1911 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1912
1913 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1914
1915 if ((flags & BDRV_REQ_FUA) &&
1916 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1917
1918
1919 write_flags &= ~BDRV_REQ_FUA;
1920 need_flush = true;
1921 }
1922 num = MIN(num, max_transfer);
1923 if (buf == NULL) {
1924 buf = qemu_try_blockalign0(bs, num);
1925 if (buf == NULL) {
1926 ret = -ENOMEM;
1927 goto fail;
1928 }
1929 }
1930 qemu_iovec_init_buf(&qiov, buf, num);
1931
1932 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1933
1934
1935
1936
1937 if (num < max_transfer) {
1938 qemu_vfree(buf);
1939 buf = NULL;
1940 }
1941 }
1942
1943 offset += num;
1944 bytes -= num;
1945 }
1946
1947fail:
1948 if (ret == 0 && need_flush) {
1949 ret = bdrv_co_flush(bs);
1950 }
1951 qemu_vfree(buf);
1952 return ret;
1953}
1954
1955static inline int coroutine_fn
1956bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
1957 BdrvTrackedRequest *req, int flags)
1958{
1959 BlockDriverState *bs = child->bs;
1960
1961 bdrv_check_request(offset, bytes, &error_abort);
1962
1963 if (bdrv_is_read_only(bs)) {
1964 return -EPERM;
1965 }
1966
1967 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1968 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1969 assert(!(flags & ~BDRV_REQ_MASK));
1970 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
1971
1972 if (flags & BDRV_REQ_SERIALISING) {
1973 QEMU_LOCK_GUARD(&bs->reqs_lock);
1974
1975 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
1976
1977 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
1978 return -EBUSY;
1979 }
1980
1981 bdrv_wait_serialising_requests_locked(req);
1982 } else {
1983 bdrv_wait_serialising_requests(req);
1984 }
1985
1986 assert(req->overlap_offset <= offset);
1987 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1988 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
1989 child->perm & BLK_PERM_RESIZE);
1990
1991 switch (req->type) {
1992 case BDRV_TRACKED_WRITE:
1993 case BDRV_TRACKED_DISCARD:
1994 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1995 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1996 } else {
1997 assert(child->perm & BLK_PERM_WRITE);
1998 }
1999 bdrv_write_threshold_check_write(bs, offset, bytes);
2000 return 0;
2001 case BDRV_TRACKED_TRUNCATE:
2002 assert(child->perm & BLK_PERM_RESIZE);
2003 return 0;
2004 default:
2005 abort();
2006 }
2007}
2008
2009static inline void coroutine_fn
2010bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
2011 BdrvTrackedRequest *req, int ret)
2012{
2013 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
2014 BlockDriverState *bs = child->bs;
2015
2016 bdrv_check_request(offset, bytes, &error_abort);
2017
2018 qatomic_inc(&bs->write_gen);
2019
2020
2021
2022
2023
2024
2025
2026
2027 if (ret == 0 &&
2028 (req->type == BDRV_TRACKED_TRUNCATE ||
2029 end_sector > bs->total_sectors) &&
2030 req->type != BDRV_TRACKED_DISCARD) {
2031 bs->total_sectors = end_sector;
2032 bdrv_parent_cb_resize(bs);
2033 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2034 }
2035 if (req->bytes) {
2036 switch (req->type) {
2037 case BDRV_TRACKED_WRITE:
2038 stat64_max(&bs->wr_highest_offset, offset + bytes);
2039
2040 case BDRV_TRACKED_DISCARD:
2041 bdrv_set_dirty(bs, offset, bytes);
2042 break;
2043 default:
2044 break;
2045 }
2046 }
2047}
2048
2049
2050
2051
2052
2053static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
2054 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
2055 int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
2056 BdrvRequestFlags flags)
2057{
2058 BlockDriverState *bs = child->bs;
2059 BlockDriver *drv = bs->drv;
2060 int ret;
2061
2062 int64_t bytes_remaining = bytes;
2063 int max_transfer;
2064
2065 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2066
2067 if (!drv) {
2068 return -ENOMEDIUM;
2069 }
2070
2071 if (bdrv_has_readonly_bitmaps(bs)) {
2072 return -EPERM;
2073 }
2074
2075 assert(is_power_of_2(align));
2076 assert((offset & (align - 1)) == 0);
2077 assert((bytes & (align - 1)) == 0);
2078 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2079 align);
2080
2081 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2082
2083 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2084 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2085 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2086 flags |= BDRV_REQ_ZERO_WRITE;
2087 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2088 flags |= BDRV_REQ_MAY_UNMAP;
2089 }
2090 }
2091
2092 if (ret < 0) {
2093
2094 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2095 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2096 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2097 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2098 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2099 qiov, qiov_offset);
2100 } else if (bytes <= max_transfer) {
2101 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2102 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2103 } else {
2104 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2105 while (bytes_remaining) {
2106 int num = MIN(bytes_remaining, max_transfer);
2107 int local_flags = flags;
2108
2109 assert(num);
2110 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2111 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2112
2113
2114 local_flags &= ~BDRV_REQ_FUA;
2115 }
2116
2117 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2118 num, qiov,
2119 qiov_offset + bytes - bytes_remaining,
2120 local_flags);
2121 if (ret < 0) {
2122 break;
2123 }
2124 bytes_remaining -= num;
2125 }
2126 }
2127 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2128
2129 if (ret >= 0) {
2130 ret = 0;
2131 }
2132 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2133
2134 return ret;
2135}
2136
2137static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2138 int64_t offset,
2139 int64_t bytes,
2140 BdrvRequestFlags flags,
2141 BdrvTrackedRequest *req)
2142{
2143 BlockDriverState *bs = child->bs;
2144 QEMUIOVector local_qiov;
2145 uint64_t align = bs->bl.request_alignment;
2146 int ret = 0;
2147 bool padding;
2148 BdrvRequestPadding pad;
2149
2150
2151 flags &= ~BDRV_REQ_REGISTERED_BUF;
2152
2153 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2154 if (padding) {
2155 assert(!(flags & BDRV_REQ_NO_WAIT));
2156 bdrv_make_request_serialising(req, align);
2157
2158 bdrv_padding_rmw_read(child, req, &pad, true);
2159
2160 if (pad.head || pad.merge_reads) {
2161 int64_t aligned_offset = offset & ~(align - 1);
2162 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2163
2164 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2165 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2166 align, &local_qiov, 0,
2167 flags & ~BDRV_REQ_ZERO_WRITE);
2168 if (ret < 0 || pad.merge_reads) {
2169
2170 goto out;
2171 }
2172 offset += write_bytes - pad.head;
2173 bytes -= write_bytes - pad.head;
2174 }
2175 }
2176
2177 assert(!bytes || (offset & (align - 1)) == 0);
2178 if (bytes >= align) {
2179
2180 int64_t aligned_bytes = bytes & ~(align - 1);
2181 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2182 NULL, 0, flags);
2183 if (ret < 0) {
2184 goto out;
2185 }
2186 bytes -= aligned_bytes;
2187 offset += aligned_bytes;
2188 }
2189
2190 assert(!bytes || (offset & (align - 1)) == 0);
2191 if (bytes) {
2192 assert(align == pad.tail + bytes);
2193
2194 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2195 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2196 &local_qiov, 0,
2197 flags & ~BDRV_REQ_ZERO_WRITE);
2198 }
2199
2200out:
2201 bdrv_padding_destroy(&pad);
2202
2203 return ret;
2204}
2205
2206
2207
2208
2209int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2210 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2211 BdrvRequestFlags flags)
2212{
2213 IO_CODE();
2214 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2215}
2216
2217int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2218 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2219 BdrvRequestFlags flags)
2220{
2221 BlockDriverState *bs = child->bs;
2222 BdrvTrackedRequest req;
2223 uint64_t align = bs->bl.request_alignment;
2224 BdrvRequestPadding pad;
2225 int ret;
2226 bool padded = false;
2227 IO_CODE();
2228
2229 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2230
2231 if (!bdrv_is_inserted(bs)) {
2232 return -ENOMEDIUM;
2233 }
2234
2235 if (flags & BDRV_REQ_ZERO_WRITE) {
2236 ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
2237 } else {
2238 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2239 }
2240 if (ret < 0) {
2241 return ret;
2242 }
2243
2244
2245 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2246 !QEMU_IS_ALIGNED(offset | bytes, align))
2247 {
2248 return -ENOTSUP;
2249 }
2250
2251 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2252
2253
2254
2255
2256
2257
2258
2259
2260 return 0;
2261 }
2262
2263 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2264
2265
2266
2267
2268
2269 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
2270 &padded, &flags);
2271 if (ret < 0) {
2272 return ret;
2273 }
2274 }
2275
2276 bdrv_inc_in_flight(bs);
2277 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2278
2279 if (flags & BDRV_REQ_ZERO_WRITE) {
2280 assert(!padded);
2281 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2282 goto out;
2283 }
2284
2285 if (padded) {
2286
2287
2288
2289
2290
2291
2292 assert(!(flags & BDRV_REQ_NO_WAIT));
2293 bdrv_make_request_serialising(&req, align);
2294 bdrv_padding_rmw_read(child, &req, &pad, false);
2295 }
2296
2297 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2298 qiov, qiov_offset, flags);
2299
2300 bdrv_padding_destroy(&pad);
2301
2302out:
2303 tracked_request_end(&req);
2304 bdrv_dec_in_flight(bs);
2305
2306 return ret;
2307}
2308
2309int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2310 int64_t bytes, BdrvRequestFlags flags)
2311{
2312 IO_CODE();
2313 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2314
2315 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2316 flags &= ~BDRV_REQ_MAY_UNMAP;
2317 }
2318
2319 return bdrv_co_pwritev(child, offset, bytes, NULL,
2320 BDRV_REQ_ZERO_WRITE | flags);
2321}
2322
2323
2324
2325
2326int bdrv_flush_all(void)
2327{
2328 BdrvNextIterator it;
2329 BlockDriverState *bs = NULL;
2330 int result = 0;
2331
2332 GLOBAL_STATE_CODE();
2333
2334
2335
2336
2337
2338
2339 if (replay_events_enabled()) {
2340 return result;
2341 }
2342
2343 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2344 AioContext *aio_context = bdrv_get_aio_context(bs);
2345 int ret;
2346
2347 aio_context_acquire(aio_context);
2348 ret = bdrv_flush(bs);
2349 if (ret < 0 && !result) {
2350 result = ret;
2351 }
2352 aio_context_release(aio_context);
2353 }
2354
2355 return result;
2356}
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2386 bool want_zero,
2387 int64_t offset, int64_t bytes,
2388 int64_t *pnum, int64_t *map,
2389 BlockDriverState **file)
2390{
2391 int64_t total_size;
2392 int64_t n;
2393 int ret;
2394 int64_t local_map = 0;
2395 BlockDriverState *local_file = NULL;
2396 int64_t aligned_offset, aligned_bytes;
2397 uint32_t align;
2398 bool has_filtered_child;
2399
2400 assert(pnum);
2401 *pnum = 0;
2402 total_size = bdrv_getlength(bs);
2403 if (total_size < 0) {
2404 ret = total_size;
2405 goto early_out;
2406 }
2407
2408 if (offset >= total_size) {
2409 ret = BDRV_BLOCK_EOF;
2410 goto early_out;
2411 }
2412 if (!bytes) {
2413 ret = 0;
2414 goto early_out;
2415 }
2416
2417 n = total_size - offset;
2418 if (n < bytes) {
2419 bytes = n;
2420 }
2421
2422
2423 assert(bs->drv);
2424 has_filtered_child = bdrv_filter_child(bs);
2425 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2426 *pnum = bytes;
2427 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2428 if (offset + bytes == total_size) {
2429 ret |= BDRV_BLOCK_EOF;
2430 }
2431 if (bs->drv->protocol_name) {
2432 ret |= BDRV_BLOCK_OFFSET_VALID;
2433 local_map = offset;
2434 local_file = bs;
2435 }
2436 goto early_out;
2437 }
2438
2439 bdrv_inc_in_flight(bs);
2440
2441
2442 align = bs->bl.request_alignment;
2443 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2444 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2445
2446 if (bs->drv->bdrv_co_block_status) {
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468 if (QLIST_EMPTY(&bs->children) &&
2469 bdrv_bsc_is_data(bs, aligned_offset, pnum))
2470 {
2471 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
2472 local_file = bs;
2473 local_map = aligned_offset;
2474 } else {
2475 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2476 aligned_bytes, pnum, &local_map,
2477 &local_file);
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490 if (want_zero &&
2491 ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
2492 QLIST_EMPTY(&bs->children))
2493 {
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505 assert(local_file == bs);
2506 assert(local_map == aligned_offset);
2507 bdrv_bsc_fill(bs, aligned_offset, *pnum);
2508 }
2509 }
2510 } else {
2511
2512
2513 local_file = bdrv_filter_bs(bs);
2514 assert(local_file);
2515
2516 *pnum = aligned_bytes;
2517 local_map = aligned_offset;
2518 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2519 }
2520 if (ret < 0) {
2521 *pnum = 0;
2522 goto out;
2523 }
2524
2525
2526
2527
2528
2529 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2530 align > offset - aligned_offset);
2531 if (ret & BDRV_BLOCK_RECURSE) {
2532 assert(ret & BDRV_BLOCK_DATA);
2533 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2534 assert(!(ret & BDRV_BLOCK_ZERO));
2535 }
2536
2537 *pnum -= offset - aligned_offset;
2538 if (*pnum > bytes) {
2539 *pnum = bytes;
2540 }
2541 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2542 local_map += offset - aligned_offset;
2543 }
2544
2545 if (ret & BDRV_BLOCK_RAW) {
2546 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2547 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2548 *pnum, pnum, &local_map, &local_file);
2549 goto out;
2550 }
2551
2552 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2553 ret |= BDRV_BLOCK_ALLOCATED;
2554 } else if (bs->drv->supports_backing) {
2555 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2556
2557 if (!cow_bs) {
2558 ret |= BDRV_BLOCK_ZERO;
2559 } else if (want_zero) {
2560 int64_t size2 = bdrv_getlength(cow_bs);
2561
2562 if (size2 >= 0 && offset >= size2) {
2563 ret |= BDRV_BLOCK_ZERO;
2564 }
2565 }
2566 }
2567
2568 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2569 local_file && local_file != bs &&
2570 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2571 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2572 int64_t file_pnum;
2573 int ret2;
2574
2575 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2576 *pnum, &file_pnum, NULL, NULL);
2577 if (ret2 >= 0) {
2578
2579
2580
2581 if (ret2 & BDRV_BLOCK_EOF &&
2582 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2583
2584
2585
2586
2587
2588 ret |= BDRV_BLOCK_ZERO;
2589 } else {
2590
2591 *pnum = file_pnum;
2592 ret |= (ret2 & BDRV_BLOCK_ZERO);
2593 }
2594 }
2595 }
2596
2597out:
2598 bdrv_dec_in_flight(bs);
2599 if (ret >= 0 && offset + *pnum == total_size) {
2600 ret |= BDRV_BLOCK_EOF;
2601 }
2602early_out:
2603 if (file) {
2604 *file = local_file;
2605 }
2606 if (map) {
2607 *map = local_map;
2608 }
2609 return ret;
2610}
2611
2612int coroutine_fn
2613bdrv_co_common_block_status_above(BlockDriverState *bs,
2614 BlockDriverState *base,
2615 bool include_base,
2616 bool want_zero,
2617 int64_t offset,
2618 int64_t bytes,
2619 int64_t *pnum,
2620 int64_t *map,
2621 BlockDriverState **file,
2622 int *depth)
2623{
2624 int ret;
2625 BlockDriverState *p;
2626 int64_t eof = 0;
2627 int dummy;
2628 IO_CODE();
2629
2630 assert(!include_base || base);
2631
2632 if (!depth) {
2633 depth = &dummy;
2634 }
2635 *depth = 0;
2636
2637 if (!include_base && bs == base) {
2638 *pnum = bytes;
2639 return 0;
2640 }
2641
2642 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2643 ++*depth;
2644 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2645 return ret;
2646 }
2647
2648 if (ret & BDRV_BLOCK_EOF) {
2649 eof = offset + *pnum;
2650 }
2651
2652 assert(*pnum <= bytes);
2653 bytes = *pnum;
2654
2655 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2656 p = bdrv_filter_or_cow_bs(p))
2657 {
2658 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2659 file);
2660 ++*depth;
2661 if (ret < 0) {
2662 return ret;
2663 }
2664 if (*pnum == 0) {
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674 assert(ret & BDRV_BLOCK_EOF);
2675 *pnum = bytes;
2676 if (file) {
2677 *file = p;
2678 }
2679 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2680 break;
2681 }
2682 if (ret & BDRV_BLOCK_ALLOCATED) {
2683
2684
2685
2686
2687
2688
2689
2690 ret &= ~BDRV_BLOCK_EOF;
2691 break;
2692 }
2693
2694 if (p == base) {
2695 assert(include_base);
2696 break;
2697 }
2698
2699
2700
2701
2702
2703 assert(*pnum <= bytes);
2704 bytes = *pnum;
2705 }
2706
2707 if (offset + *pnum == eof) {
2708 ret |= BDRV_BLOCK_EOF;
2709 }
2710
2711 return ret;
2712}
2713
2714int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2715 int64_t offset, int64_t bytes, int64_t *pnum,
2716 int64_t *map, BlockDriverState **file)
2717{
2718 IO_CODE();
2719 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2720 pnum, map, file, NULL);
2721}
2722
2723int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2724 int64_t *pnum, int64_t *map, BlockDriverState **file)
2725{
2726 IO_CODE();
2727 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2728 offset, bytes, pnum, map, file);
2729}
2730
2731
2732
2733
2734
2735
2736
2737
2738int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2739 int64_t bytes)
2740{
2741 int ret;
2742 int64_t pnum = bytes;
2743 IO_CODE();
2744
2745 if (!bytes) {
2746 return 1;
2747 }
2748
2749 ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset,
2750 bytes, &pnum, NULL, NULL, NULL);
2751
2752 if (ret < 0) {
2753 return ret;
2754 }
2755
2756 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2757}
2758
2759int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
2760 int64_t *pnum)
2761{
2762 int ret;
2763 int64_t dummy;
2764 IO_CODE();
2765
2766 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2767 bytes, pnum ? pnum : &dummy, NULL,
2768 NULL, NULL);
2769 if (ret < 0) {
2770 return ret;
2771 }
2772 return !!(ret & BDRV_BLOCK_ALLOCATED);
2773}
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792int bdrv_is_allocated_above(BlockDriverState *top,
2793 BlockDriverState *base,
2794 bool include_base, int64_t offset,
2795 int64_t bytes, int64_t *pnum)
2796{
2797 int depth;
2798 int ret = bdrv_common_block_status_above(top, base, include_base, false,
2799 offset, bytes, pnum, NULL, NULL,
2800 &depth);
2801 IO_CODE();
2802 if (ret < 0) {
2803 return ret;
2804 }
2805
2806 if (ret & BDRV_BLOCK_ALLOCATED) {
2807 return depth;
2808 }
2809 return 0;
2810}
2811
2812int coroutine_fn
2813bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2814{
2815 BlockDriver *drv = bs->drv;
2816 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2817 int ret;
2818 IO_CODE();
2819
2820 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2821 if (ret < 0) {
2822 return ret;
2823 }
2824
2825 if (!drv) {
2826 return -ENOMEDIUM;
2827 }
2828
2829 bdrv_inc_in_flight(bs);
2830
2831 if (drv->bdrv_load_vmstate) {
2832 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2833 } else if (child_bs) {
2834 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2835 } else {
2836 ret = -ENOTSUP;
2837 }
2838
2839 bdrv_dec_in_flight(bs);
2840
2841 return ret;
2842}
2843
2844int coroutine_fn
2845bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2846{
2847 BlockDriver *drv = bs->drv;
2848 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2849 int ret;
2850 IO_CODE();
2851
2852 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2853 if (ret < 0) {
2854 return ret;
2855 }
2856
2857 if (!drv) {
2858 return -ENOMEDIUM;
2859 }
2860
2861 bdrv_inc_in_flight(bs);
2862
2863 if (drv->bdrv_save_vmstate) {
2864 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2865 } else if (child_bs) {
2866 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2867 } else {
2868 ret = -ENOTSUP;
2869 }
2870
2871 bdrv_dec_in_flight(bs);
2872
2873 return ret;
2874}
2875
2876int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2877 int64_t pos, int size)
2878{
2879 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2880 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2881 IO_CODE();
2882
2883 return ret < 0 ? ret : size;
2884}
2885
2886int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2887 int64_t pos, int size)
2888{
2889 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2890 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2891 IO_CODE();
2892
2893 return ret < 0 ? ret : size;
2894}
2895
2896
2897
2898
2899void bdrv_aio_cancel(BlockAIOCB *acb)
2900{
2901 IO_CODE();
2902 qemu_aio_ref(acb);
2903 bdrv_aio_cancel_async(acb);
2904 while (acb->refcnt > 1) {
2905 if (acb->aiocb_info->get_aio_context) {
2906 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2907 } else if (acb->bs) {
2908
2909
2910
2911
2912 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2913 aio_poll(bdrv_get_aio_context(acb->bs), true);
2914 } else {
2915 abort();
2916 }
2917 }
2918 qemu_aio_unref(acb);
2919}
2920
2921
2922
2923
2924void bdrv_aio_cancel_async(BlockAIOCB *acb)
2925{
2926 IO_CODE();
2927 if (acb->aiocb_info->cancel_async) {
2928 acb->aiocb_info->cancel_async(acb);
2929 }
2930}
2931
2932
2933
2934
2935int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2936{
2937 BdrvChild *primary_child = bdrv_primary_child(bs);
2938 BdrvChild *child;
2939 int current_gen;
2940 int ret = 0;
2941 IO_CODE();
2942
2943 bdrv_inc_in_flight(bs);
2944
2945 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2946 bdrv_is_sg(bs)) {
2947 goto early_exit;
2948 }
2949
2950 qemu_co_mutex_lock(&bs->reqs_lock);
2951 current_gen = qatomic_read(&bs->write_gen);
2952
2953
2954 while (bs->active_flush_req) {
2955 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2956 }
2957
2958
2959 bs->active_flush_req = true;
2960 qemu_co_mutex_unlock(&bs->reqs_lock);
2961
2962
2963 if (bs->drv->bdrv_co_flush) {
2964 ret = bs->drv->bdrv_co_flush(bs);
2965 goto out;
2966 }
2967
2968
2969 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
2970 if (bs->drv->bdrv_co_flush_to_os) {
2971 ret = bs->drv->bdrv_co_flush_to_os(bs);
2972 if (ret < 0) {
2973 goto out;
2974 }
2975 }
2976
2977
2978 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2979 goto flush_children;
2980 }
2981
2982
2983 if (bs->flushed_gen == current_gen) {
2984 goto flush_children;
2985 }
2986
2987 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
2988 if (!bs->drv) {
2989
2990
2991 ret = -ENOMEDIUM;
2992 goto out;
2993 }
2994 if (bs->drv->bdrv_co_flush_to_disk) {
2995 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2996 } else if (bs->drv->bdrv_aio_flush) {
2997 BlockAIOCB *acb;
2998 CoroutineIOCompletion co = {
2999 .coroutine = qemu_coroutine_self(),
3000 };
3001
3002 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3003 if (acb == NULL) {
3004 ret = -EIO;
3005 } else {
3006 qemu_coroutine_yield();
3007 ret = co.ret;
3008 }
3009 } else {
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021 ret = 0;
3022 }
3023
3024 if (ret < 0) {
3025 goto out;
3026 }
3027
3028
3029
3030
3031flush_children:
3032 ret = 0;
3033 QLIST_FOREACH(child, &bs->children, next) {
3034 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
3035 int this_child_ret = bdrv_co_flush(child->bs);
3036 if (!ret) {
3037 ret = this_child_ret;
3038 }
3039 }
3040 }
3041
3042out:
3043
3044 if (ret == 0) {
3045 bs->flushed_gen = current_gen;
3046 }
3047
3048 qemu_co_mutex_lock(&bs->reqs_lock);
3049 bs->active_flush_req = false;
3050
3051 qemu_co_queue_next(&bs->flush_queue);
3052 qemu_co_mutex_unlock(&bs->reqs_lock);
3053
3054early_exit:
3055 bdrv_dec_in_flight(bs);
3056 return ret;
3057}
3058
3059int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
3060 int64_t bytes)
3061{
3062 BdrvTrackedRequest req;
3063 int ret;
3064 int64_t max_pdiscard;
3065 int head, tail, align;
3066 BlockDriverState *bs = child->bs;
3067 IO_CODE();
3068
3069 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
3070 return -ENOMEDIUM;
3071 }
3072
3073 if (bdrv_has_readonly_bitmaps(bs)) {
3074 return -EPERM;
3075 }
3076
3077 ret = bdrv_check_request(offset, bytes, NULL);
3078 if (ret < 0) {
3079 return ret;
3080 }
3081
3082
3083 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3084 return 0;
3085 }
3086
3087 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
3088 return 0;
3089 }
3090
3091
3092 bdrv_bsc_invalidate_range(bs, offset, bytes);
3093
3094
3095
3096
3097
3098
3099 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
3100 assert(align % bs->bl.request_alignment == 0);
3101 head = offset % align;
3102 tail = (offset + bytes) % align;
3103
3104 bdrv_inc_in_flight(bs);
3105 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
3106
3107 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
3108 if (ret < 0) {
3109 goto out;
3110 }
3111
3112 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
3113 align);
3114 assert(max_pdiscard >= bs->bl.request_alignment);
3115
3116 while (bytes > 0) {
3117 int64_t num = bytes;
3118
3119 if (head) {
3120
3121 num = MIN(bytes, align - head);
3122 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3123 num %= bs->bl.request_alignment;
3124 }
3125 head = (head + num) % align;
3126 assert(num < max_pdiscard);
3127 } else if (tail) {
3128 if (num > align) {
3129
3130 num -= tail;
3131 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3132 tail > bs->bl.request_alignment) {
3133 tail %= bs->bl.request_alignment;
3134 num -= tail;
3135 }
3136 }
3137
3138 if (num > max_pdiscard) {
3139 num = max_pdiscard;
3140 }
3141
3142 if (!bs->drv) {
3143 ret = -ENOMEDIUM;
3144 goto out;
3145 }
3146 if (bs->drv->bdrv_co_pdiscard) {
3147 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3148 } else {
3149 BlockAIOCB *acb;
3150 CoroutineIOCompletion co = {
3151 .coroutine = qemu_coroutine_self(),
3152 };
3153
3154 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3155 bdrv_co_io_em_complete, &co);
3156 if (acb == NULL) {
3157 ret = -EIO;
3158 goto out;
3159 } else {
3160 qemu_coroutine_yield();
3161 ret = co.ret;
3162 }
3163 }
3164 if (ret && ret != -ENOTSUP) {
3165 goto out;
3166 }
3167
3168 offset += num;
3169 bytes -= num;
3170 }
3171 ret = 0;
3172out:
3173 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3174 tracked_request_end(&req);
3175 bdrv_dec_in_flight(bs);
3176 return ret;
3177}
3178
3179int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3180{
3181 BlockDriver *drv = bs->drv;
3182 CoroutineIOCompletion co = {
3183 .coroutine = qemu_coroutine_self(),
3184 };
3185 BlockAIOCB *acb;
3186 IO_CODE();
3187
3188 bdrv_inc_in_flight(bs);
3189 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3190 co.ret = -ENOTSUP;
3191 goto out;
3192 }
3193
3194 if (drv->bdrv_co_ioctl) {
3195 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3196 } else {
3197 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3198 if (!acb) {
3199 co.ret = -ENOTSUP;
3200 goto out;
3201 }
3202 qemu_coroutine_yield();
3203 }
3204out:
3205 bdrv_dec_in_flight(bs);
3206 return co.ret;
3207}
3208
3209void *qemu_blockalign(BlockDriverState *bs, size_t size)
3210{
3211 IO_CODE();
3212 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3213}
3214
3215void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3216{
3217 IO_CODE();
3218 return memset(qemu_blockalign(bs, size), 0, size);
3219}
3220
3221void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3222{
3223 size_t align = bdrv_opt_mem_align(bs);
3224 IO_CODE();
3225
3226
3227 assert(align > 0);
3228 if (size == 0) {
3229 size = align;
3230 }
3231
3232 return qemu_try_memalign(align, size);
3233}
3234
3235void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3236{
3237 void *mem = qemu_try_blockalign(bs, size);
3238 IO_CODE();
3239
3240 if (mem) {
3241 memset(mem, 0, size);
3242 }
3243
3244 return mem;
3245}
3246
3247void bdrv_io_plug(BlockDriverState *bs)
3248{
3249 BdrvChild *child;
3250 IO_CODE();
3251
3252 QLIST_FOREACH(child, &bs->children, next) {
3253 bdrv_io_plug(child->bs);
3254 }
3255
3256 if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
3257 BlockDriver *drv = bs->drv;
3258 if (drv && drv->bdrv_io_plug) {
3259 drv->bdrv_io_plug(bs);
3260 }
3261 }
3262}
3263
3264void bdrv_io_unplug(BlockDriverState *bs)
3265{
3266 BdrvChild *child;
3267 IO_CODE();
3268
3269 assert(bs->io_plugged);
3270 if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
3271 BlockDriver *drv = bs->drv;
3272 if (drv && drv->bdrv_io_unplug) {
3273 drv->bdrv_io_unplug(bs);
3274 }
3275 }
3276
3277 QLIST_FOREACH(child, &bs->children, next) {
3278 bdrv_io_unplug(child->bs);
3279 }
3280}
3281
3282
3283static void bdrv_register_buf_rollback(BlockDriverState *bs,
3284 void *host,
3285 size_t size,
3286 BdrvChild *final_child)
3287{
3288 BdrvChild *child;
3289
3290 QLIST_FOREACH(child, &bs->children, next) {
3291 if (child == final_child) {
3292 break;
3293 }
3294
3295 bdrv_unregister_buf(child->bs, host, size);
3296 }
3297
3298 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3299 bs->drv->bdrv_unregister_buf(bs, host, size);
3300 }
3301}
3302
3303bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
3304 Error **errp)
3305{
3306 BdrvChild *child;
3307
3308 GLOBAL_STATE_CODE();
3309 if (bs->drv && bs->drv->bdrv_register_buf) {
3310 if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
3311 return false;
3312 }
3313 }
3314 QLIST_FOREACH(child, &bs->children, next) {
3315 if (!bdrv_register_buf(child->bs, host, size, errp)) {
3316 bdrv_register_buf_rollback(bs, host, size, child);
3317 return false;
3318 }
3319 }
3320 return true;
3321}
3322
3323void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
3324{
3325 BdrvChild *child;
3326
3327 GLOBAL_STATE_CODE();
3328 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3329 bs->drv->bdrv_unregister_buf(bs, host, size);
3330 }
3331 QLIST_FOREACH(child, &bs->children, next) {
3332 bdrv_unregister_buf(child->bs, host, size);
3333 }
3334}
3335
3336static int coroutine_fn bdrv_co_copy_range_internal(
3337 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3338 int64_t dst_offset, int64_t bytes,
3339 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3340 bool recurse_src)
3341{
3342 BdrvTrackedRequest req;
3343 int ret;
3344
3345
3346 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3347 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3348 assert(!(read_flags & BDRV_REQ_NO_WAIT));
3349 assert(!(write_flags & BDRV_REQ_NO_WAIT));
3350
3351 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
3352 return -ENOMEDIUM;
3353 }
3354 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3355 if (ret) {
3356 return ret;
3357 }
3358 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3359 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3360 }
3361
3362 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
3363 return -ENOMEDIUM;
3364 }
3365 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3366 if (ret) {
3367 return ret;
3368 }
3369
3370 if (!src->bs->drv->bdrv_co_copy_range_from
3371 || !dst->bs->drv->bdrv_co_copy_range_to
3372 || src->bs->encrypted || dst->bs->encrypted) {
3373 return -ENOTSUP;
3374 }
3375
3376 if (recurse_src) {
3377 bdrv_inc_in_flight(src->bs);
3378 tracked_request_begin(&req, src->bs, src_offset, bytes,
3379 BDRV_TRACKED_READ);
3380
3381
3382 assert(!(read_flags & BDRV_REQ_SERIALISING));
3383 bdrv_wait_serialising_requests(&req);
3384
3385 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3386 src, src_offset,
3387 dst, dst_offset,
3388 bytes,
3389 read_flags, write_flags);
3390
3391 tracked_request_end(&req);
3392 bdrv_dec_in_flight(src->bs);
3393 } else {
3394 bdrv_inc_in_flight(dst->bs);
3395 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3396 BDRV_TRACKED_WRITE);
3397 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3398 write_flags);
3399 if (!ret) {
3400 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3401 src, src_offset,
3402 dst, dst_offset,
3403 bytes,
3404 read_flags, write_flags);
3405 }
3406 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3407 tracked_request_end(&req);
3408 bdrv_dec_in_flight(dst->bs);
3409 }
3410
3411 return ret;
3412}
3413
3414
3415
3416
3417
3418int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3419 BdrvChild *dst, int64_t dst_offset,
3420 int64_t bytes,
3421 BdrvRequestFlags read_flags,
3422 BdrvRequestFlags write_flags)
3423{
3424 IO_CODE();
3425 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3426 read_flags, write_flags);
3427 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3428 bytes, read_flags, write_flags, true);
3429}
3430
3431
3432
3433
3434
3435int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3436 BdrvChild *dst, int64_t dst_offset,
3437 int64_t bytes,
3438 BdrvRequestFlags read_flags,
3439 BdrvRequestFlags write_flags)
3440{
3441 IO_CODE();
3442 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3443 read_flags, write_flags);
3444 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3445 bytes, read_flags, write_flags, false);
3446}
3447
3448int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3449 BdrvChild *dst, int64_t dst_offset,
3450 int64_t bytes, BdrvRequestFlags read_flags,
3451 BdrvRequestFlags write_flags)
3452{
3453 IO_CODE();
3454 return bdrv_co_copy_range_from(src, src_offset,
3455 dst, dst_offset,
3456 bytes, read_flags, write_flags);
3457}
3458
3459static void bdrv_parent_cb_resize(BlockDriverState *bs)
3460{
3461 BdrvChild *c;
3462 QLIST_FOREACH(c, &bs->parents, next_parent) {
3463 if (c->klass->resize) {
3464 c->klass->resize(c);
3465 }
3466 }
3467}
3468
3469
3470
3471
3472
3473
3474
3475
3476int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3477 PreallocMode prealloc, BdrvRequestFlags flags,
3478 Error **errp)
3479{
3480 BlockDriverState *bs = child->bs;
3481 BdrvChild *filtered, *backing;
3482 BlockDriver *drv = bs->drv;
3483 BdrvTrackedRequest req;
3484 int64_t old_size, new_bytes;
3485 int ret;
3486 IO_CODE();
3487
3488
3489 if (!drv) {
3490 error_setg(errp, "No medium inserted");
3491 return -ENOMEDIUM;
3492 }
3493 if (offset < 0) {
3494 error_setg(errp, "Image size cannot be negative");
3495 return -EINVAL;
3496 }
3497
3498 ret = bdrv_check_request(offset, 0, errp);
3499 if (ret < 0) {
3500 return ret;
3501 }
3502
3503 old_size = bdrv_getlength(bs);
3504 if (old_size < 0) {
3505 error_setg_errno(errp, -old_size, "Failed to get old image size");
3506 return old_size;
3507 }
3508
3509 if (bdrv_is_read_only(bs)) {
3510 error_setg(errp, "Image is read-only");
3511 return -EACCES;
3512 }
3513
3514 if (offset > old_size) {
3515 new_bytes = offset - old_size;
3516 } else {
3517 new_bytes = 0;
3518 }
3519
3520 bdrv_inc_in_flight(bs);
3521 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3522 BDRV_TRACKED_TRUNCATE);
3523
3524
3525
3526
3527 if (new_bytes) {
3528 bdrv_make_request_serialising(&req, 1);
3529 }
3530 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3531 0);
3532 if (ret < 0) {
3533 error_setg_errno(errp, -ret,
3534 "Failed to prepare request for truncation");
3535 goto out;
3536 }
3537
3538 filtered = bdrv_filter_child(bs);
3539 backing = bdrv_cow_child(bs);
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551 if (new_bytes && backing) {
3552 int64_t backing_len;
3553
3554 backing_len = bdrv_getlength(backing->bs);
3555 if (backing_len < 0) {
3556 ret = backing_len;
3557 error_setg_errno(errp, -ret, "Could not get backing file size");
3558 goto out;
3559 }
3560
3561 if (backing_len > old_size) {
3562 flags |= BDRV_REQ_ZERO_WRITE;
3563 }
3564 }
3565
3566 if (drv->bdrv_co_truncate) {
3567 if (flags & ~bs->supported_truncate_flags) {
3568 error_setg(errp, "Block driver does not support requested flags");
3569 ret = -ENOTSUP;
3570 goto out;
3571 }
3572 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3573 } else if (filtered) {
3574 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3575 } else {
3576 error_setg(errp, "Image format driver does not support resize");
3577 ret = -ENOTSUP;
3578 goto out;
3579 }
3580 if (ret < 0) {
3581 goto out;
3582 }
3583
3584 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3585 if (ret < 0) {
3586 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3587 } else {
3588 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3589 }
3590
3591
3592
3593 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3594
3595out:
3596 tracked_request_end(&req);
3597 bdrv_dec_in_flight(bs);
3598
3599 return ret;
3600}
3601
3602void bdrv_cancel_in_flight(BlockDriverState *bs)
3603{
3604 GLOBAL_STATE_CODE();
3605 if (!bs || !bs->drv) {
3606 return;
3607 }
3608
3609 if (bs->drv->bdrv_cancel_in_flight) {
3610 bs->drv->bdrv_cancel_in_flight(bs);
3611 }
3612}
3613
3614int coroutine_fn
3615bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
3616 QEMUIOVector *qiov, size_t qiov_offset)
3617{
3618 BlockDriverState *bs = child->bs;
3619 BlockDriver *drv = bs->drv;
3620 int ret;
3621 IO_CODE();
3622
3623 if (!drv) {
3624 return -ENOMEDIUM;
3625 }
3626
3627 if (!drv->bdrv_co_preadv_snapshot) {
3628 return -ENOTSUP;
3629 }
3630
3631 bdrv_inc_in_flight(bs);
3632 ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
3633 bdrv_dec_in_flight(bs);
3634
3635 return ret;
3636}
3637
3638int coroutine_fn
3639bdrv_co_snapshot_block_status(BlockDriverState *bs,
3640 bool want_zero, int64_t offset, int64_t bytes,
3641 int64_t *pnum, int64_t *map,
3642 BlockDriverState **file)
3643{
3644 BlockDriver *drv = bs->drv;
3645 int ret;
3646 IO_CODE();
3647
3648 if (!drv) {
3649 return -ENOMEDIUM;
3650 }
3651
3652 if (!drv->bdrv_co_snapshot_block_status) {
3653 return -ENOTSUP;
3654 }
3655
3656 bdrv_inc_in_flight(bs);
3657 ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
3658 pnum, map, file);
3659 bdrv_dec_in_flight(bs);
3660
3661 return ret;
3662}
3663
3664int coroutine_fn
3665bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
3666{
3667 BlockDriver *drv = bs->drv;
3668 int ret;
3669 IO_CODE();
3670
3671 if (!drv) {
3672 return -ENOMEDIUM;
3673 }
3674
3675 if (!drv->bdrv_co_pdiscard_snapshot) {
3676 return -ENOTSUP;
3677 }
3678
3679 bdrv_inc_in_flight(bs);
3680 ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
3681 bdrv_dec_in_flight(bs);
3682
3683 return ret;
3684}
3685