1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "block/write-threshold.h"
34#include "qemu/cutils.h"
35#include "qemu/memalign.h"
36#include "qapi/error.h"
37#include "qemu/error-report.h"
38#include "qemu/main-loop.h"
39#include "sysemu/replay.h"
40
41
42#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
43
44static void bdrv_parent_cb_resize(BlockDriverState *bs);
45static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
46 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
47
48static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
49 bool ignore_bds_parents)
50{
51 BdrvChild *c, *next;
52
53 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
54 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
55 continue;
56 }
57 bdrv_parent_drained_begin_single(c, false);
58 }
59}
60
61static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
62 int *drained_end_counter)
63{
64 assert(c->parent_quiesce_counter > 0);
65 c->parent_quiesce_counter--;
66 if (c->klass->drained_end) {
67 c->klass->drained_end(c, drained_end_counter);
68 }
69}
70
71void bdrv_parent_drained_end_single(BdrvChild *c)
72{
73 int drained_end_counter = 0;
74 AioContext *ctx = bdrv_child_get_parent_aio_context(c);
75 IO_OR_GS_CODE();
76 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
77 AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
78}
79
80static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
81 bool ignore_bds_parents,
82 int *drained_end_counter)
83{
84 BdrvChild *c;
85
86 QLIST_FOREACH(c, &bs->parents, next_parent) {
87 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
88 continue;
89 }
90 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
91 }
92}
93
94static bool bdrv_parent_drained_poll_single(BdrvChild *c)
95{
96 if (c->klass->drained_poll) {
97 return c->klass->drained_poll(c);
98 }
99 return false;
100}
101
102static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
103 bool ignore_bds_parents)
104{
105 BdrvChild *c, *next;
106 bool busy = false;
107
108 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
109 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
110 continue;
111 }
112 busy |= bdrv_parent_drained_poll_single(c);
113 }
114
115 return busy;
116}
117
118void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
119{
120 AioContext *ctx = bdrv_child_get_parent_aio_context(c);
121 IO_OR_GS_CODE();
122 c->parent_quiesce_counter++;
123 if (c->klass->drained_begin) {
124 c->klass->drained_begin(c);
125 }
126 if (poll) {
127 AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
128 }
129}
130
131static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
132{
133 dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
134 src->pdiscard_alignment);
135 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
136 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
137 dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
138 src->max_hw_transfer);
139 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
140 src->opt_mem_alignment);
141 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
142 src->min_mem_alignment);
143 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
144 dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
145}
146
147typedef struct BdrvRefreshLimitsState {
148 BlockDriverState *bs;
149 BlockLimits old_bl;
150} BdrvRefreshLimitsState;
151
152static void bdrv_refresh_limits_abort(void *opaque)
153{
154 BdrvRefreshLimitsState *s = opaque;
155
156 s->bs->bl = s->old_bl;
157}
158
159static TransactionActionDrv bdrv_refresh_limits_drv = {
160 .abort = bdrv_refresh_limits_abort,
161 .clean = g_free,
162};
163
164
165void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
166{
167 ERRP_GUARD();
168 BlockDriver *drv = bs->drv;
169 BdrvChild *c;
170 bool have_limits;
171
172 GLOBAL_STATE_CODE();
173
174 if (tran) {
175 BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
176 *s = (BdrvRefreshLimitsState) {
177 .bs = bs,
178 .old_bl = bs->bl,
179 };
180 tran_add(tran, &bdrv_refresh_limits_drv, s);
181 }
182
183 memset(&bs->bl, 0, sizeof(bs->bl));
184
185 if (!drv) {
186 return;
187 }
188
189
190 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
191 drv->bdrv_aio_preadv ||
192 drv->bdrv_co_preadv_part) ? 1 : 512;
193
194
195 have_limits = false;
196 QLIST_FOREACH(c, &bs->children, next) {
197 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
198 {
199 bdrv_merge_limits(&bs->bl, &c->bs->bl);
200 have_limits = true;
201 }
202 }
203
204 if (!have_limits) {
205 bs->bl.min_mem_alignment = 512;
206 bs->bl.opt_mem_alignment = qemu_real_host_page_size();
207
208
209 bs->bl.max_iov = IOV_MAX;
210 }
211
212
213 if (drv->bdrv_refresh_limits) {
214 drv->bdrv_refresh_limits(bs, errp);
215 if (*errp) {
216 return;
217 }
218 }
219
220 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
221 error_setg(errp, "Driver requires too large request alignment");
222 }
223}
224
225
226
227
228
229
230void bdrv_enable_copy_on_read(BlockDriverState *bs)
231{
232 IO_CODE();
233 qatomic_inc(&bs->copy_on_read);
234}
235
236void bdrv_disable_copy_on_read(BlockDriverState *bs)
237{
238 int old = qatomic_fetch_dec(&bs->copy_on_read);
239 IO_CODE();
240 assert(old >= 1);
241}
242
243typedef struct {
244 Coroutine *co;
245 BlockDriverState *bs;
246 bool done;
247 bool begin;
248 bool recursive;
249 bool poll;
250 BdrvChild *parent;
251 bool ignore_bds_parents;
252 int *drained_end_counter;
253} BdrvCoDrainData;
254
255static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
256{
257 BdrvCoDrainData *data = opaque;
258 BlockDriverState *bs = data->bs;
259
260 if (data->begin) {
261 bs->drv->bdrv_co_drain_begin(bs);
262 } else {
263 bs->drv->bdrv_co_drain_end(bs);
264 }
265
266
267 qatomic_mb_set(&data->done, true);
268 if (!data->begin) {
269 qatomic_dec(data->drained_end_counter);
270 }
271 bdrv_dec_in_flight(bs);
272
273 g_free(data);
274}
275
276
277static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
278 int *drained_end_counter)
279{
280 BdrvCoDrainData *data;
281
282 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
283 (!begin && !bs->drv->bdrv_co_drain_end)) {
284 return;
285 }
286
287 data = g_new(BdrvCoDrainData, 1);
288 *data = (BdrvCoDrainData) {
289 .bs = bs,
290 .done = false,
291 .begin = begin,
292 .drained_end_counter = drained_end_counter,
293 };
294
295 if (!begin) {
296 qatomic_inc(drained_end_counter);
297 }
298
299
300
301 bdrv_inc_in_flight(bs);
302 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
303 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
304}
305
306
307bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
308 BdrvChild *ignore_parent, bool ignore_bds_parents)
309{
310 BdrvChild *child, *next;
311 IO_OR_GS_CODE();
312
313 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
314 return true;
315 }
316
317 if (qatomic_read(&bs->in_flight)) {
318 return true;
319 }
320
321 if (recursive) {
322 assert(!ignore_bds_parents);
323 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
324 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
325 return true;
326 }
327 }
328 }
329
330 return false;
331}
332
333static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
334 BdrvChild *ignore_parent)
335{
336 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
337}
338
339static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
340 BdrvChild *parent, bool ignore_bds_parents,
341 bool poll);
342static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
343 BdrvChild *parent, bool ignore_bds_parents,
344 int *drained_end_counter);
345
346static void bdrv_co_drain_bh_cb(void *opaque)
347{
348 BdrvCoDrainData *data = opaque;
349 Coroutine *co = data->co;
350 BlockDriverState *bs = data->bs;
351
352 if (bs) {
353 AioContext *ctx = bdrv_get_aio_context(bs);
354 aio_context_acquire(ctx);
355 bdrv_dec_in_flight(bs);
356 if (data->begin) {
357 assert(!data->drained_end_counter);
358 bdrv_do_drained_begin(bs, data->recursive, data->parent,
359 data->ignore_bds_parents, data->poll);
360 } else {
361 assert(!data->poll);
362 bdrv_do_drained_end(bs, data->recursive, data->parent,
363 data->ignore_bds_parents,
364 data->drained_end_counter);
365 }
366 aio_context_release(ctx);
367 } else {
368 assert(data->begin);
369 bdrv_drain_all_begin();
370 }
371
372 data->done = true;
373 aio_co_wake(co);
374}
375
376static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
377 bool begin, bool recursive,
378 BdrvChild *parent,
379 bool ignore_bds_parents,
380 bool poll,
381 int *drained_end_counter)
382{
383 BdrvCoDrainData data;
384 Coroutine *self = qemu_coroutine_self();
385 AioContext *ctx = bdrv_get_aio_context(bs);
386 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
387
388
389
390
391 assert(qemu_in_coroutine());
392 data = (BdrvCoDrainData) {
393 .co = self,
394 .bs = bs,
395 .done = false,
396 .begin = begin,
397 .recursive = recursive,
398 .parent = parent,
399 .ignore_bds_parents = ignore_bds_parents,
400 .poll = poll,
401 .drained_end_counter = drained_end_counter,
402 };
403
404 if (bs) {
405 bdrv_inc_in_flight(bs);
406 }
407
408
409
410
411
412
413
414
415
416 if (ctx != co_ctx) {
417 aio_context_release(ctx);
418 }
419 replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
420
421 qemu_coroutine_yield();
422
423
424 assert(data.done);
425
426
427 if (ctx != co_ctx) {
428 aio_context_acquire(ctx);
429 }
430}
431
432void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
433 BdrvChild *parent, bool ignore_bds_parents)
434{
435 IO_OR_GS_CODE();
436 assert(!qemu_in_coroutine());
437
438
439 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
440 aio_disable_external(bdrv_get_aio_context(bs));
441 }
442
443 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
444 bdrv_drain_invoke(bs, true, NULL);
445}
446
447static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
448 BdrvChild *parent, bool ignore_bds_parents,
449 bool poll)
450{
451 BdrvChild *child, *next;
452
453 if (qemu_in_coroutine()) {
454 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
455 poll, NULL);
456 return;
457 }
458
459 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
460
461 if (recursive) {
462 assert(!ignore_bds_parents);
463 bs->recursive_quiesce_counter++;
464 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
465 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
466 false);
467 }
468 }
469
470
471
472
473
474
475
476
477
478
479 if (poll) {
480 assert(!ignore_bds_parents);
481 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
482 }
483}
484
485void bdrv_drained_begin(BlockDriverState *bs)
486{
487 IO_OR_GS_CODE();
488 bdrv_do_drained_begin(bs, false, NULL, false, true);
489}
490
491void bdrv_subtree_drained_begin(BlockDriverState *bs)
492{
493 IO_OR_GS_CODE();
494 bdrv_do_drained_begin(bs, true, NULL, false, true);
495}
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
512 BdrvChild *parent, bool ignore_bds_parents,
513 int *drained_end_counter)
514{
515 BdrvChild *child;
516 int old_quiesce_counter;
517
518 assert(drained_end_counter != NULL);
519
520 if (qemu_in_coroutine()) {
521 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
522 false, drained_end_counter);
523 return;
524 }
525 assert(bs->quiesce_counter > 0);
526
527
528 bdrv_drain_invoke(bs, false, drained_end_counter);
529 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
530 drained_end_counter);
531
532 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
533 if (old_quiesce_counter == 1) {
534 aio_enable_external(bdrv_get_aio_context(bs));
535 }
536
537 if (recursive) {
538 assert(!ignore_bds_parents);
539 bs->recursive_quiesce_counter--;
540 QLIST_FOREACH(child, &bs->children, next) {
541 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
542 drained_end_counter);
543 }
544 }
545}
546
547void bdrv_drained_end(BlockDriverState *bs)
548{
549 int drained_end_counter = 0;
550 IO_OR_GS_CODE();
551 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
552 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
553}
554
555void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
556{
557 IO_CODE();
558 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
559}
560
561void bdrv_subtree_drained_end(BlockDriverState *bs)
562{
563 int drained_end_counter = 0;
564 IO_OR_GS_CODE();
565 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
566 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
567}
568
569void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
570{
571 int i;
572 IO_OR_GS_CODE();
573
574 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
575 bdrv_do_drained_begin(child->bs, true, child, false, true);
576 }
577}
578
579void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
580{
581 int drained_end_counter = 0;
582 int i;
583 IO_OR_GS_CODE();
584
585 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
586 bdrv_do_drained_end(child->bs, true, child, false,
587 &drained_end_counter);
588 }
589
590 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
591}
592
593void bdrv_drain(BlockDriverState *bs)
594{
595 IO_OR_GS_CODE();
596 bdrv_drained_begin(bs);
597 bdrv_drained_end(bs);
598}
599
600static void bdrv_drain_assert_idle(BlockDriverState *bs)
601{
602 BdrvChild *child, *next;
603
604 assert(qatomic_read(&bs->in_flight) == 0);
605 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
606 bdrv_drain_assert_idle(child->bs);
607 }
608}
609
610unsigned int bdrv_drain_all_count = 0;
611
612static bool bdrv_drain_all_poll(void)
613{
614 BlockDriverState *bs = NULL;
615 bool result = false;
616 GLOBAL_STATE_CODE();
617
618
619
620 while ((bs = bdrv_next_all_states(bs))) {
621 AioContext *aio_context = bdrv_get_aio_context(bs);
622 aio_context_acquire(aio_context);
623 result |= bdrv_drain_poll(bs, false, NULL, true);
624 aio_context_release(aio_context);
625 }
626
627 return result;
628}
629
630
631
632
633
634
635
636
637
638
639
640
641
642void bdrv_drain_all_begin(void)
643{
644 BlockDriverState *bs = NULL;
645 GLOBAL_STATE_CODE();
646
647 if (qemu_in_coroutine()) {
648 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
649 return;
650 }
651
652
653
654
655
656
657 if (replay_events_enabled()) {
658 return;
659 }
660
661
662
663 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
664 assert(bdrv_drain_all_count < INT_MAX);
665 bdrv_drain_all_count++;
666
667
668
669 while ((bs = bdrv_next_all_states(bs))) {
670 AioContext *aio_context = bdrv_get_aio_context(bs);
671
672 aio_context_acquire(aio_context);
673 bdrv_do_drained_begin(bs, false, NULL, true, false);
674 aio_context_release(aio_context);
675 }
676
677
678 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
679
680 while ((bs = bdrv_next_all_states(bs))) {
681 bdrv_drain_assert_idle(bs);
682 }
683}
684
685void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
686{
687 int drained_end_counter = 0;
688 GLOBAL_STATE_CODE();
689
690 g_assert(bs->quiesce_counter > 0);
691 g_assert(!bs->refcnt);
692
693 while (bs->quiesce_counter) {
694 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
695 }
696 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
697}
698
699void bdrv_drain_all_end(void)
700{
701 BlockDriverState *bs = NULL;
702 int drained_end_counter = 0;
703 GLOBAL_STATE_CODE();
704
705
706
707
708
709
710 if (replay_events_enabled()) {
711 return;
712 }
713
714 while ((bs = bdrv_next_all_states(bs))) {
715 AioContext *aio_context = bdrv_get_aio_context(bs);
716
717 aio_context_acquire(aio_context);
718 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
719 aio_context_release(aio_context);
720 }
721
722 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
723 AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
724
725 assert(bdrv_drain_all_count > 0);
726 bdrv_drain_all_count--;
727}
728
729void bdrv_drain_all(void)
730{
731 GLOBAL_STATE_CODE();
732 bdrv_drain_all_begin();
733 bdrv_drain_all_end();
734}
735
736
737
738
739
740
741static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
742{
743 if (req->serialising) {
744 qatomic_dec(&req->bs->serialising_in_flight);
745 }
746
747 qemu_co_mutex_lock(&req->bs->reqs_lock);
748 QLIST_REMOVE(req, list);
749 qemu_co_queue_restart_all(&req->wait_queue);
750 qemu_co_mutex_unlock(&req->bs->reqs_lock);
751}
752
753
754
755
756static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
757 BlockDriverState *bs,
758 int64_t offset,
759 int64_t bytes,
760 enum BdrvTrackedRequestType type)
761{
762 bdrv_check_request(offset, bytes, &error_abort);
763
764 *req = (BdrvTrackedRequest){
765 .bs = bs,
766 .offset = offset,
767 .bytes = bytes,
768 .type = type,
769 .co = qemu_coroutine_self(),
770 .serialising = false,
771 .overlap_offset = offset,
772 .overlap_bytes = bytes,
773 };
774
775 qemu_co_queue_init(&req->wait_queue);
776
777 qemu_co_mutex_lock(&bs->reqs_lock);
778 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
779 qemu_co_mutex_unlock(&bs->reqs_lock);
780}
781
782static bool tracked_request_overlaps(BdrvTrackedRequest *req,
783 int64_t offset, int64_t bytes)
784{
785 bdrv_check_request(offset, bytes, &error_abort);
786
787
788 if (offset >= req->overlap_offset + req->overlap_bytes) {
789 return false;
790 }
791
792 if (req->overlap_offset >= offset + bytes) {
793 return false;
794 }
795 return true;
796}
797
798
799static coroutine_fn BdrvTrackedRequest *
800bdrv_find_conflicting_request(BdrvTrackedRequest *self)
801{
802 BdrvTrackedRequest *req;
803
804 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
805 if (req == self || (!req->serialising && !self->serialising)) {
806 continue;
807 }
808 if (tracked_request_overlaps(req, self->overlap_offset,
809 self->overlap_bytes))
810 {
811
812
813
814
815
816 assert(qemu_coroutine_self() != req->co);
817
818
819
820
821
822
823 if (!req->waiting_for) {
824 return req;
825 }
826 }
827 }
828
829 return NULL;
830}
831
832
833static void coroutine_fn
834bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
835{
836 BdrvTrackedRequest *req;
837
838 while ((req = bdrv_find_conflicting_request(self))) {
839 self->waiting_for = req;
840 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
841 self->waiting_for = NULL;
842 }
843}
844
845
846static void tracked_request_set_serialising(BdrvTrackedRequest *req,
847 uint64_t align)
848{
849 int64_t overlap_offset = req->offset & ~(align - 1);
850 int64_t overlap_bytes =
851 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
852
853 bdrv_check_request(req->offset, req->bytes, &error_abort);
854
855 if (!req->serialising) {
856 qatomic_inc(&req->bs->serialising_in_flight);
857 req->serialising = true;
858 }
859
860 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
861 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
862}
863
864
865
866
867
868BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
869{
870 BdrvTrackedRequest *req;
871 Coroutine *self = qemu_coroutine_self();
872 IO_CODE();
873
874 QLIST_FOREACH(req, &bs->tracked_requests, list) {
875 if (req->co == self) {
876 return req;
877 }
878 }
879
880 return NULL;
881}
882
883
884
885
886void bdrv_round_to_clusters(BlockDriverState *bs,
887 int64_t offset, int64_t bytes,
888 int64_t *cluster_offset,
889 int64_t *cluster_bytes)
890{
891 BlockDriverInfo bdi;
892 IO_CODE();
893 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
894 *cluster_offset = offset;
895 *cluster_bytes = bytes;
896 } else {
897 int64_t c = bdi.cluster_size;
898 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
899 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
900 }
901}
902
903static int bdrv_get_cluster_size(BlockDriverState *bs)
904{
905 BlockDriverInfo bdi;
906 int ret;
907
908 ret = bdrv_get_info(bs, &bdi);
909 if (ret < 0 || bdi.cluster_size == 0) {
910 return bs->bl.request_alignment;
911 } else {
912 return bdi.cluster_size;
913 }
914}
915
916void bdrv_inc_in_flight(BlockDriverState *bs)
917{
918 IO_CODE();
919 qatomic_inc(&bs->in_flight);
920}
921
922void bdrv_wakeup(BlockDriverState *bs)
923{
924 IO_CODE();
925 aio_wait_kick();
926}
927
928void bdrv_dec_in_flight(BlockDriverState *bs)
929{
930 IO_CODE();
931 qatomic_dec(&bs->in_flight);
932 bdrv_wakeup(bs);
933}
934
935static void coroutine_fn
936bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
937{
938 BlockDriverState *bs = self->bs;
939
940 if (!qatomic_read(&bs->serialising_in_flight)) {
941 return;
942 }
943
944 qemu_co_mutex_lock(&bs->reqs_lock);
945 bdrv_wait_serialising_requests_locked(self);
946 qemu_co_mutex_unlock(&bs->reqs_lock);
947}
948
949void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
950 uint64_t align)
951{
952 IO_CODE();
953
954 qemu_co_mutex_lock(&req->bs->reqs_lock);
955
956 tracked_request_set_serialising(req, align);
957 bdrv_wait_serialising_requests_locked(req);
958
959 qemu_co_mutex_unlock(&req->bs->reqs_lock);
960}
961
962int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
963 QEMUIOVector *qiov, size_t qiov_offset,
964 Error **errp)
965{
966
967
968
969
970 if (offset < 0) {
971 error_setg(errp, "offset is negative: %" PRIi64, offset);
972 return -EIO;
973 }
974
975 if (bytes < 0) {
976 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
977 return -EIO;
978 }
979
980 if (bytes > BDRV_MAX_LENGTH) {
981 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
982 bytes, BDRV_MAX_LENGTH);
983 return -EIO;
984 }
985
986 if (offset > BDRV_MAX_LENGTH) {
987 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
988 offset, BDRV_MAX_LENGTH);
989 return -EIO;
990 }
991
992 if (offset > BDRV_MAX_LENGTH - bytes) {
993 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
994 "exceeds maximum(%" PRIi64 ")", offset, bytes,
995 BDRV_MAX_LENGTH);
996 return -EIO;
997 }
998
999 if (!qiov) {
1000 return 0;
1001 }
1002
1003
1004
1005
1006
1007 if (qiov_offset > qiov->size) {
1008 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
1009 qiov_offset, qiov->size);
1010 return -EIO;
1011 }
1012
1013 if (bytes > qiov->size - qiov_offset) {
1014 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
1015 "vector size(%zu)", bytes, qiov_offset, qiov->size);
1016 return -EIO;
1017 }
1018
1019 return 0;
1020}
1021
1022int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
1023{
1024 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
1025}
1026
1027static int bdrv_check_request32(int64_t offset, int64_t bytes,
1028 QEMUIOVector *qiov, size_t qiov_offset)
1029{
1030 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
1031 if (ret < 0) {
1032 return ret;
1033 }
1034
1035 if (bytes > BDRV_REQUEST_MAX_BYTES) {
1036 return -EIO;
1037 }
1038
1039 return 0;
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
1052{
1053 int ret;
1054 int64_t target_size, bytes, offset = 0;
1055 BlockDriverState *bs = child->bs;
1056 IO_CODE();
1057
1058 target_size = bdrv_getlength(bs);
1059 if (target_size < 0) {
1060 return target_size;
1061 }
1062
1063 for (;;) {
1064 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
1065 if (bytes <= 0) {
1066 return 0;
1067 }
1068 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
1069 if (ret < 0) {
1070 return ret;
1071 }
1072 if (ret & BDRV_BLOCK_ZERO) {
1073 offset += bytes;
1074 continue;
1075 }
1076 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
1077 if (ret < 0) {
1078 return ret;
1079 }
1080 offset += bytes;
1081 }
1082}
1083
1084
1085
1086
1087
1088
1089
1090int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
1091 int64_t bytes, const void *buf,
1092 BdrvRequestFlags flags)
1093{
1094 int ret;
1095 IO_CODE();
1096
1097 ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
1098 if (ret < 0) {
1099 return ret;
1100 }
1101
1102 ret = bdrv_co_flush(child->bs);
1103 if (ret < 0) {
1104 return ret;
1105 }
1106
1107 return 0;
1108}
1109
1110typedef struct CoroutineIOCompletion {
1111 Coroutine *coroutine;
1112 int ret;
1113} CoroutineIOCompletion;
1114
1115static void bdrv_co_io_em_complete(void *opaque, int ret)
1116{
1117 CoroutineIOCompletion *co = opaque;
1118
1119 co->ret = ret;
1120 aio_co_wake(co->coroutine);
1121}
1122
1123static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1124 int64_t offset, int64_t bytes,
1125 QEMUIOVector *qiov,
1126 size_t qiov_offset, int flags)
1127{
1128 BlockDriver *drv = bs->drv;
1129 int64_t sector_num;
1130 unsigned int nb_sectors;
1131 QEMUIOVector local_qiov;
1132 int ret;
1133
1134 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1135 assert(!(flags & ~bs->supported_read_flags));
1136
1137 if (!drv) {
1138 return -ENOMEDIUM;
1139 }
1140
1141 if (drv->bdrv_co_preadv_part) {
1142 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1143 flags);
1144 }
1145
1146 if (qiov_offset > 0 || bytes != qiov->size) {
1147 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1148 qiov = &local_qiov;
1149 }
1150
1151 if (drv->bdrv_co_preadv) {
1152 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1153 goto out;
1154 }
1155
1156 if (drv->bdrv_aio_preadv) {
1157 BlockAIOCB *acb;
1158 CoroutineIOCompletion co = {
1159 .coroutine = qemu_coroutine_self(),
1160 };
1161
1162 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1163 bdrv_co_io_em_complete, &co);
1164 if (acb == NULL) {
1165 ret = -EIO;
1166 goto out;
1167 } else {
1168 qemu_coroutine_yield();
1169 ret = co.ret;
1170 goto out;
1171 }
1172 }
1173
1174 sector_num = offset >> BDRV_SECTOR_BITS;
1175 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1176
1177 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1178 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1179 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1180 assert(drv->bdrv_co_readv);
1181
1182 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1183
1184out:
1185 if (qiov == &local_qiov) {
1186 qemu_iovec_destroy(&local_qiov);
1187 }
1188
1189 return ret;
1190}
1191
1192static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1193 int64_t offset, int64_t bytes,
1194 QEMUIOVector *qiov,
1195 size_t qiov_offset,
1196 BdrvRequestFlags flags)
1197{
1198 BlockDriver *drv = bs->drv;
1199 bool emulate_fua = false;
1200 int64_t sector_num;
1201 unsigned int nb_sectors;
1202 QEMUIOVector local_qiov;
1203 int ret;
1204
1205 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1206
1207 if (!drv) {
1208 return -ENOMEDIUM;
1209 }
1210
1211 if ((flags & BDRV_REQ_FUA) &&
1212 (~bs->supported_write_flags & BDRV_REQ_FUA)) {
1213 flags &= ~BDRV_REQ_FUA;
1214 emulate_fua = true;
1215 }
1216
1217 flags &= bs->supported_write_flags;
1218
1219 if (drv->bdrv_co_pwritev_part) {
1220 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1221 flags);
1222 goto emulate_flags;
1223 }
1224
1225 if (qiov_offset > 0 || bytes != qiov->size) {
1226 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1227 qiov = &local_qiov;
1228 }
1229
1230 if (drv->bdrv_co_pwritev) {
1231 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
1232 goto emulate_flags;
1233 }
1234
1235 if (drv->bdrv_aio_pwritev) {
1236 BlockAIOCB *acb;
1237 CoroutineIOCompletion co = {
1238 .coroutine = qemu_coroutine_self(),
1239 };
1240
1241 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
1242 bdrv_co_io_em_complete, &co);
1243 if (acb == NULL) {
1244 ret = -EIO;
1245 } else {
1246 qemu_coroutine_yield();
1247 ret = co.ret;
1248 }
1249 goto emulate_flags;
1250 }
1251
1252 sector_num = offset >> BDRV_SECTOR_BITS;
1253 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1254
1255 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1256 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1257 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1258
1259 assert(drv->bdrv_co_writev);
1260 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
1261
1262emulate_flags:
1263 if (ret == 0 && emulate_fua) {
1264 ret = bdrv_co_flush(bs);
1265 }
1266
1267 if (qiov == &local_qiov) {
1268 qemu_iovec_destroy(&local_qiov);
1269 }
1270
1271 return ret;
1272}
1273
1274static int coroutine_fn
1275bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1276 int64_t bytes, QEMUIOVector *qiov,
1277 size_t qiov_offset)
1278{
1279 BlockDriver *drv = bs->drv;
1280 QEMUIOVector local_qiov;
1281 int ret;
1282
1283 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1284
1285 if (!drv) {
1286 return -ENOMEDIUM;
1287 }
1288
1289 if (!block_driver_can_compress(drv)) {
1290 return -ENOTSUP;
1291 }
1292
1293 if (drv->bdrv_co_pwritev_compressed_part) {
1294 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1295 qiov, qiov_offset);
1296 }
1297
1298 if (qiov_offset == 0) {
1299 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1300 }
1301
1302 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1303 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1304 qemu_iovec_destroy(&local_qiov);
1305
1306 return ret;
1307}
1308
1309static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1310 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1311 size_t qiov_offset, int flags)
1312{
1313 BlockDriverState *bs = child->bs;
1314
1315
1316
1317
1318
1319
1320 void *bounce_buffer = NULL;
1321
1322 BlockDriver *drv = bs->drv;
1323 int64_t cluster_offset;
1324 int64_t cluster_bytes;
1325 int64_t skip_bytes;
1326 int ret;
1327 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1328 BDRV_REQUEST_MAX_BYTES);
1329 int64_t progress = 0;
1330 bool skip_write;
1331
1332 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1333
1334 if (!drv) {
1335 return -ENOMEDIUM;
1336 }
1337
1338
1339
1340
1341
1342 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1359 skip_bytes = offset - cluster_offset;
1360
1361 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1362 cluster_offset, cluster_bytes);
1363
1364 while (cluster_bytes) {
1365 int64_t pnum;
1366
1367 if (skip_write) {
1368 ret = 1;
1369 pnum = MIN(cluster_bytes, max_transfer);
1370 } else {
1371 ret = bdrv_is_allocated(bs, cluster_offset,
1372 MIN(cluster_bytes, max_transfer), &pnum);
1373 if (ret < 0) {
1374
1375
1376
1377
1378
1379 pnum = MIN(cluster_bytes, max_transfer);
1380 }
1381
1382
1383 if (ret == 0 && pnum == 0) {
1384 assert(progress >= bytes);
1385 break;
1386 }
1387
1388 assert(skip_bytes < pnum);
1389 }
1390
1391 if (ret <= 0) {
1392 QEMUIOVector local_qiov;
1393
1394
1395 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1396 if (!bounce_buffer) {
1397 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1398 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1399 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1400
1401 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1402 if (!bounce_buffer) {
1403 ret = -ENOMEM;
1404 goto err;
1405 }
1406 }
1407 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1408
1409 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1410 &local_qiov, 0, 0);
1411 if (ret < 0) {
1412 goto err;
1413 }
1414
1415 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1416 if (drv->bdrv_co_pwrite_zeroes &&
1417 buffer_is_zero(bounce_buffer, pnum)) {
1418
1419
1420
1421 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1422 BDRV_REQ_WRITE_UNCHANGED);
1423 } else {
1424
1425
1426
1427 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1428 &local_qiov, 0,
1429 BDRV_REQ_WRITE_UNCHANGED);
1430 }
1431
1432 if (ret < 0) {
1433
1434
1435
1436
1437
1438 goto err;
1439 }
1440
1441 if (!(flags & BDRV_REQ_PREFETCH)) {
1442 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1443 bounce_buffer + skip_bytes,
1444 MIN(pnum - skip_bytes, bytes - progress));
1445 }
1446 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1447
1448 ret = bdrv_driver_preadv(bs, offset + progress,
1449 MIN(pnum - skip_bytes, bytes - progress),
1450 qiov, qiov_offset + progress, 0);
1451 if (ret < 0) {
1452 goto err;
1453 }
1454 }
1455
1456 cluster_offset += pnum;
1457 cluster_bytes -= pnum;
1458 progress += pnum - skip_bytes;
1459 skip_bytes = 0;
1460 }
1461 ret = 0;
1462
1463err:
1464 qemu_vfree(bounce_buffer);
1465 return ret;
1466}
1467
1468
1469
1470
1471
1472
1473static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1474 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
1475 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1476{
1477 BlockDriverState *bs = child->bs;
1478 int64_t total_bytes, max_bytes;
1479 int ret = 0;
1480 int64_t bytes_remaining = bytes;
1481 int max_transfer;
1482
1483 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1484 assert(is_power_of_2(align));
1485 assert((offset & (align - 1)) == 0);
1486 assert((bytes & (align - 1)) == 0);
1487 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1488 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1489 align);
1490
1491
1492
1493
1494
1495
1496
1497 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
1498 BDRV_REQ_REGISTERED_BUF)));
1499
1500
1501 if (flags & BDRV_REQ_COPY_ON_READ) {
1502
1503
1504
1505
1506
1507 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1508 } else {
1509 bdrv_wait_serialising_requests(req);
1510 }
1511
1512 if (flags & BDRV_REQ_COPY_ON_READ) {
1513 int64_t pnum;
1514
1515
1516 flags &= ~BDRV_REQ_COPY_ON_READ;
1517
1518 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1519 if (ret < 0) {
1520 goto out;
1521 }
1522
1523 if (!ret || pnum != bytes) {
1524 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1525 qiov, qiov_offset, flags);
1526 goto out;
1527 } else if (flags & BDRV_REQ_PREFETCH) {
1528 goto out;
1529 }
1530 }
1531
1532
1533 total_bytes = bdrv_getlength(bs);
1534 if (total_bytes < 0) {
1535 ret = total_bytes;
1536 goto out;
1537 }
1538
1539 assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
1540
1541 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1542 if (bytes <= max_bytes && bytes <= max_transfer) {
1543 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1544 goto out;
1545 }
1546
1547 while (bytes_remaining) {
1548 int64_t num;
1549
1550 if (max_bytes) {
1551 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1552 assert(num);
1553
1554 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1555 num, qiov,
1556 qiov_offset + bytes - bytes_remaining,
1557 flags);
1558 max_bytes -= num;
1559 } else {
1560 num = bytes_remaining;
1561 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1562 0, bytes_remaining);
1563 }
1564 if (ret < 0) {
1565 goto out;
1566 }
1567 bytes_remaining -= num;
1568 }
1569
1570out:
1571 return ret < 0 ? ret : 0;
1572}
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596typedef struct BdrvRequestPadding {
1597 uint8_t *buf;
1598 size_t buf_len;
1599 uint8_t *tail_buf;
1600 size_t head;
1601 size_t tail;
1602 bool merge_reads;
1603 QEMUIOVector local_qiov;
1604} BdrvRequestPadding;
1605
1606static bool bdrv_init_padding(BlockDriverState *bs,
1607 int64_t offset, int64_t bytes,
1608 BdrvRequestPadding *pad)
1609{
1610 int64_t align = bs->bl.request_alignment;
1611 int64_t sum;
1612
1613 bdrv_check_request(offset, bytes, &error_abort);
1614 assert(align <= INT_MAX);
1615 assert(align <= SIZE_MAX / 2);
1616
1617 memset(pad, 0, sizeof(*pad));
1618
1619 pad->head = offset & (align - 1);
1620 pad->tail = ((offset + bytes) & (align - 1));
1621 if (pad->tail) {
1622 pad->tail = align - pad->tail;
1623 }
1624
1625 if (!pad->head && !pad->tail) {
1626 return false;
1627 }
1628
1629 assert(bytes);
1630
1631 sum = pad->head + bytes + pad->tail;
1632 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1633 pad->buf = qemu_blockalign(bs, pad->buf_len);
1634 pad->merge_reads = sum == pad->buf_len;
1635 if (pad->tail) {
1636 pad->tail_buf = pad->buf + pad->buf_len - align;
1637 }
1638
1639 return true;
1640}
1641
1642static coroutine_fn int bdrv_padding_rmw_read(BdrvChild *child,
1643 BdrvTrackedRequest *req,
1644 BdrvRequestPadding *pad,
1645 bool zero_middle)
1646{
1647 QEMUIOVector local_qiov;
1648 BlockDriverState *bs = child->bs;
1649 uint64_t align = bs->bl.request_alignment;
1650 int ret;
1651
1652 assert(req->serialising && pad->buf);
1653
1654 if (pad->head || pad->merge_reads) {
1655 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1656
1657 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1658
1659 if (pad->head) {
1660 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1661 }
1662 if (pad->merge_reads && pad->tail) {
1663 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1664 }
1665 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1666 align, &local_qiov, 0, 0);
1667 if (ret < 0) {
1668 return ret;
1669 }
1670 if (pad->head) {
1671 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1672 }
1673 if (pad->merge_reads && pad->tail) {
1674 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1675 }
1676
1677 if (pad->merge_reads) {
1678 goto zero_mem;
1679 }
1680 }
1681
1682 if (pad->tail) {
1683 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1684
1685 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1686 ret = bdrv_aligned_preadv(
1687 child, req,
1688 req->overlap_offset + req->overlap_bytes - align,
1689 align, align, &local_qiov, 0, 0);
1690 if (ret < 0) {
1691 return ret;
1692 }
1693 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1694 }
1695
1696zero_mem:
1697 if (zero_middle) {
1698 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1699 }
1700
1701 return 0;
1702}
1703
1704static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1705{
1706 if (pad->buf) {
1707 qemu_vfree(pad->buf);
1708 qemu_iovec_destroy(&pad->local_qiov);
1709 }
1710 memset(pad, 0, sizeof(*pad));
1711}
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725static int bdrv_pad_request(BlockDriverState *bs,
1726 QEMUIOVector **qiov, size_t *qiov_offset,
1727 int64_t *offset, int64_t *bytes,
1728 BdrvRequestPadding *pad, bool *padded,
1729 BdrvRequestFlags *flags)
1730{
1731 int ret;
1732
1733 bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
1734
1735 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1736 if (padded) {
1737 *padded = false;
1738 }
1739 return 0;
1740 }
1741
1742 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1743 *qiov, *qiov_offset, *bytes,
1744 pad->buf + pad->buf_len - pad->tail,
1745 pad->tail);
1746 if (ret < 0) {
1747 bdrv_padding_destroy(pad);
1748 return ret;
1749 }
1750 *bytes += pad->head + pad->tail;
1751 *offset -= pad->head;
1752 *qiov = &pad->local_qiov;
1753 *qiov_offset = 0;
1754 if (padded) {
1755 *padded = true;
1756 }
1757 if (flags) {
1758
1759 *flags &= ~BDRV_REQ_REGISTERED_BUF;
1760 }
1761
1762 return 0;
1763}
1764
1765int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1766 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1767 BdrvRequestFlags flags)
1768{
1769 IO_CODE();
1770 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1771}
1772
1773int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1774 int64_t offset, int64_t bytes,
1775 QEMUIOVector *qiov, size_t qiov_offset,
1776 BdrvRequestFlags flags)
1777{
1778 BlockDriverState *bs = child->bs;
1779 BdrvTrackedRequest req;
1780 BdrvRequestPadding pad;
1781 int ret;
1782 IO_CODE();
1783
1784 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1785
1786 if (!bdrv_is_inserted(bs)) {
1787 return -ENOMEDIUM;
1788 }
1789
1790 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1791 if (ret < 0) {
1792 return ret;
1793 }
1794
1795 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1796
1797
1798
1799
1800
1801
1802
1803
1804 return 0;
1805 }
1806
1807 bdrv_inc_in_flight(bs);
1808
1809
1810 if (qatomic_read(&bs->copy_on_read)) {
1811 flags |= BDRV_REQ_COPY_ON_READ;
1812 }
1813
1814 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
1815 NULL, &flags);
1816 if (ret < 0) {
1817 goto fail;
1818 }
1819
1820 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1821 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1822 bs->bl.request_alignment,
1823 qiov, qiov_offset, flags);
1824 tracked_request_end(&req);
1825 bdrv_padding_destroy(&pad);
1826
1827fail:
1828 bdrv_dec_in_flight(bs);
1829
1830 return ret;
1831}
1832
1833static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1834 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
1835{
1836 BlockDriver *drv = bs->drv;
1837 QEMUIOVector qiov;
1838 void *buf = NULL;
1839 int ret = 0;
1840 bool need_flush = false;
1841 int head = 0;
1842 int tail = 0;
1843
1844 int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
1845 INT64_MAX);
1846 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1847 bs->bl.request_alignment);
1848 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1849
1850 bdrv_check_request(offset, bytes, &error_abort);
1851
1852 if (!drv) {
1853 return -ENOMEDIUM;
1854 }
1855
1856 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1857 return -ENOTSUP;
1858 }
1859
1860
1861 if (flags & BDRV_REQ_REGISTERED_BUF) {
1862 return -EINVAL;
1863 }
1864
1865
1866 bdrv_bsc_invalidate_range(bs, offset, bytes);
1867
1868 assert(alignment % bs->bl.request_alignment == 0);
1869 head = offset % alignment;
1870 tail = (offset + bytes) % alignment;
1871 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1872 assert(max_write_zeroes >= bs->bl.request_alignment);
1873
1874 while (bytes > 0 && !ret) {
1875 int64_t num = bytes;
1876
1877
1878
1879
1880
1881 if (head) {
1882
1883
1884
1885 num = MIN(MIN(bytes, max_transfer), alignment - head);
1886 head = (head + num) % alignment;
1887 assert(num < max_write_zeroes);
1888 } else if (tail && num > alignment) {
1889
1890 num -= tail;
1891 }
1892
1893
1894 if (num > max_write_zeroes) {
1895 num = max_write_zeroes;
1896 }
1897
1898 ret = -ENOTSUP;
1899
1900 if (drv->bdrv_co_pwrite_zeroes) {
1901 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1902 flags & bs->supported_zero_flags);
1903 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1904 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1905 need_flush = true;
1906 }
1907 } else {
1908 assert(!bs->supported_zero_flags);
1909 }
1910
1911 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1912
1913 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1914
1915 if ((flags & BDRV_REQ_FUA) &&
1916 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1917
1918
1919 write_flags &= ~BDRV_REQ_FUA;
1920 need_flush = true;
1921 }
1922 num = MIN(num, max_transfer);
1923 if (buf == NULL) {
1924 buf = qemu_try_blockalign0(bs, num);
1925 if (buf == NULL) {
1926 ret = -ENOMEM;
1927 goto fail;
1928 }
1929 }
1930 qemu_iovec_init_buf(&qiov, buf, num);
1931
1932 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1933
1934
1935
1936
1937 if (num < max_transfer) {
1938 qemu_vfree(buf);
1939 buf = NULL;
1940 }
1941 }
1942
1943 offset += num;
1944 bytes -= num;
1945 }
1946
1947fail:
1948 if (ret == 0 && need_flush) {
1949 ret = bdrv_co_flush(bs);
1950 }
1951 qemu_vfree(buf);
1952 return ret;
1953}
1954
1955static inline int coroutine_fn
1956bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
1957 BdrvTrackedRequest *req, int flags)
1958{
1959 BlockDriverState *bs = child->bs;
1960
1961 bdrv_check_request(offset, bytes, &error_abort);
1962
1963 if (bdrv_is_read_only(bs)) {
1964 return -EPERM;
1965 }
1966
1967 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1968 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1969 assert(!(flags & ~BDRV_REQ_MASK));
1970 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
1971
1972 if (flags & BDRV_REQ_SERIALISING) {
1973 QEMU_LOCK_GUARD(&bs->reqs_lock);
1974
1975 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
1976
1977 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
1978 return -EBUSY;
1979 }
1980
1981 bdrv_wait_serialising_requests_locked(req);
1982 } else {
1983 bdrv_wait_serialising_requests(req);
1984 }
1985
1986 assert(req->overlap_offset <= offset);
1987 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1988 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
1989 child->perm & BLK_PERM_RESIZE);
1990
1991 switch (req->type) {
1992 case BDRV_TRACKED_WRITE:
1993 case BDRV_TRACKED_DISCARD:
1994 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1995 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1996 } else {
1997 assert(child->perm & BLK_PERM_WRITE);
1998 }
1999 bdrv_write_threshold_check_write(bs, offset, bytes);
2000 return 0;
2001 case BDRV_TRACKED_TRUNCATE:
2002 assert(child->perm & BLK_PERM_RESIZE);
2003 return 0;
2004 default:
2005 abort();
2006 }
2007}
2008
2009static inline void coroutine_fn
2010bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
2011 BdrvTrackedRequest *req, int ret)
2012{
2013 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
2014 BlockDriverState *bs = child->bs;
2015
2016 bdrv_check_request(offset, bytes, &error_abort);
2017
2018 qatomic_inc(&bs->write_gen);
2019
2020
2021
2022
2023
2024
2025
2026
2027 if (ret == 0 &&
2028 (req->type == BDRV_TRACKED_TRUNCATE ||
2029 end_sector > bs->total_sectors) &&
2030 req->type != BDRV_TRACKED_DISCARD) {
2031 bs->total_sectors = end_sector;
2032 bdrv_parent_cb_resize(bs);
2033 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2034 }
2035 if (req->bytes) {
2036 switch (req->type) {
2037 case BDRV_TRACKED_WRITE:
2038 stat64_max(&bs->wr_highest_offset, offset + bytes);
2039
2040 case BDRV_TRACKED_DISCARD:
2041 bdrv_set_dirty(bs, offset, bytes);
2042 break;
2043 default:
2044 break;
2045 }
2046 }
2047}
2048
2049
2050
2051
2052
2053static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
2054 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
2055 int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
2056 BdrvRequestFlags flags)
2057{
2058 BlockDriverState *bs = child->bs;
2059 BlockDriver *drv = bs->drv;
2060 int ret;
2061
2062 int64_t bytes_remaining = bytes;
2063 int max_transfer;
2064
2065 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2066
2067 if (!drv) {
2068 return -ENOMEDIUM;
2069 }
2070
2071 if (bdrv_has_readonly_bitmaps(bs)) {
2072 return -EPERM;
2073 }
2074
2075 assert(is_power_of_2(align));
2076 assert((offset & (align - 1)) == 0);
2077 assert((bytes & (align - 1)) == 0);
2078 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2079 align);
2080
2081 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2082
2083 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2084 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2085 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2086 flags |= BDRV_REQ_ZERO_WRITE;
2087 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2088 flags |= BDRV_REQ_MAY_UNMAP;
2089 }
2090
2091
2092 flags &= ~BDRV_REQ_REGISTERED_BUF;
2093 }
2094
2095 if (ret < 0) {
2096
2097 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2098 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2099 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2100 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2101 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2102 qiov, qiov_offset);
2103 } else if (bytes <= max_transfer) {
2104 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2105 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2106 } else {
2107 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2108 while (bytes_remaining) {
2109 int num = MIN(bytes_remaining, max_transfer);
2110 int local_flags = flags;
2111
2112 assert(num);
2113 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2114 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2115
2116
2117 local_flags &= ~BDRV_REQ_FUA;
2118 }
2119
2120 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2121 num, qiov,
2122 qiov_offset + bytes - bytes_remaining,
2123 local_flags);
2124 if (ret < 0) {
2125 break;
2126 }
2127 bytes_remaining -= num;
2128 }
2129 }
2130 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2131
2132 if (ret >= 0) {
2133 ret = 0;
2134 }
2135 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2136
2137 return ret;
2138}
2139
2140static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2141 int64_t offset,
2142 int64_t bytes,
2143 BdrvRequestFlags flags,
2144 BdrvTrackedRequest *req)
2145{
2146 BlockDriverState *bs = child->bs;
2147 QEMUIOVector local_qiov;
2148 uint64_t align = bs->bl.request_alignment;
2149 int ret = 0;
2150 bool padding;
2151 BdrvRequestPadding pad;
2152
2153
2154 flags &= ~BDRV_REQ_REGISTERED_BUF;
2155
2156 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2157 if (padding) {
2158 assert(!(flags & BDRV_REQ_NO_WAIT));
2159 bdrv_make_request_serialising(req, align);
2160
2161 bdrv_padding_rmw_read(child, req, &pad, true);
2162
2163 if (pad.head || pad.merge_reads) {
2164 int64_t aligned_offset = offset & ~(align - 1);
2165 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2166
2167 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2168 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2169 align, &local_qiov, 0,
2170 flags & ~BDRV_REQ_ZERO_WRITE);
2171 if (ret < 0 || pad.merge_reads) {
2172
2173 goto out;
2174 }
2175 offset += write_bytes - pad.head;
2176 bytes -= write_bytes - pad.head;
2177 }
2178 }
2179
2180 assert(!bytes || (offset & (align - 1)) == 0);
2181 if (bytes >= align) {
2182
2183 int64_t aligned_bytes = bytes & ~(align - 1);
2184 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2185 NULL, 0, flags);
2186 if (ret < 0) {
2187 goto out;
2188 }
2189 bytes -= aligned_bytes;
2190 offset += aligned_bytes;
2191 }
2192
2193 assert(!bytes || (offset & (align - 1)) == 0);
2194 if (bytes) {
2195 assert(align == pad.tail + bytes);
2196
2197 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2198 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2199 &local_qiov, 0,
2200 flags & ~BDRV_REQ_ZERO_WRITE);
2201 }
2202
2203out:
2204 bdrv_padding_destroy(&pad);
2205
2206 return ret;
2207}
2208
2209
2210
2211
2212int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2213 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2214 BdrvRequestFlags flags)
2215{
2216 IO_CODE();
2217 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2218}
2219
2220int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2221 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2222 BdrvRequestFlags flags)
2223{
2224 BlockDriverState *bs = child->bs;
2225 BdrvTrackedRequest req;
2226 uint64_t align = bs->bl.request_alignment;
2227 BdrvRequestPadding pad;
2228 int ret;
2229 bool padded = false;
2230 IO_CODE();
2231
2232 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2233
2234 if (!bdrv_is_inserted(bs)) {
2235 return -ENOMEDIUM;
2236 }
2237
2238 if (flags & BDRV_REQ_ZERO_WRITE) {
2239 ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
2240 } else {
2241 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2242 }
2243 if (ret < 0) {
2244 return ret;
2245 }
2246
2247
2248 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2249 !QEMU_IS_ALIGNED(offset | bytes, align))
2250 {
2251 return -ENOTSUP;
2252 }
2253
2254 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2255
2256
2257
2258
2259
2260
2261
2262
2263 return 0;
2264 }
2265
2266 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2267
2268
2269
2270
2271
2272 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
2273 &padded, &flags);
2274 if (ret < 0) {
2275 return ret;
2276 }
2277 }
2278
2279 bdrv_inc_in_flight(bs);
2280 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2281
2282 if (flags & BDRV_REQ_ZERO_WRITE) {
2283 assert(!padded);
2284 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2285 goto out;
2286 }
2287
2288 if (padded) {
2289
2290
2291
2292
2293
2294
2295 assert(!(flags & BDRV_REQ_NO_WAIT));
2296 bdrv_make_request_serialising(&req, align);
2297 bdrv_padding_rmw_read(child, &req, &pad, false);
2298 }
2299
2300 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2301 qiov, qiov_offset, flags);
2302
2303 bdrv_padding_destroy(&pad);
2304
2305out:
2306 tracked_request_end(&req);
2307 bdrv_dec_in_flight(bs);
2308
2309 return ret;
2310}
2311
2312int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2313 int64_t bytes, BdrvRequestFlags flags)
2314{
2315 IO_CODE();
2316 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2317
2318 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2319 flags &= ~BDRV_REQ_MAY_UNMAP;
2320 }
2321
2322 return bdrv_co_pwritev(child, offset, bytes, NULL,
2323 BDRV_REQ_ZERO_WRITE | flags);
2324}
2325
2326
2327
2328
2329int bdrv_flush_all(void)
2330{
2331 BdrvNextIterator it;
2332 BlockDriverState *bs = NULL;
2333 int result = 0;
2334
2335 GLOBAL_STATE_CODE();
2336
2337
2338
2339
2340
2341
2342 if (replay_events_enabled()) {
2343 return result;
2344 }
2345
2346 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2347 AioContext *aio_context = bdrv_get_aio_context(bs);
2348 int ret;
2349
2350 aio_context_acquire(aio_context);
2351 ret = bdrv_flush(bs);
2352 if (ret < 0 && !result) {
2353 result = ret;
2354 }
2355 aio_context_release(aio_context);
2356 }
2357
2358 return result;
2359}
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2389 bool want_zero,
2390 int64_t offset, int64_t bytes,
2391 int64_t *pnum, int64_t *map,
2392 BlockDriverState **file)
2393{
2394 int64_t total_size;
2395 int64_t n;
2396 int ret;
2397 int64_t local_map = 0;
2398 BlockDriverState *local_file = NULL;
2399 int64_t aligned_offset, aligned_bytes;
2400 uint32_t align;
2401 bool has_filtered_child;
2402
2403 assert(pnum);
2404 *pnum = 0;
2405 total_size = bdrv_getlength(bs);
2406 if (total_size < 0) {
2407 ret = total_size;
2408 goto early_out;
2409 }
2410
2411 if (offset >= total_size) {
2412 ret = BDRV_BLOCK_EOF;
2413 goto early_out;
2414 }
2415 if (!bytes) {
2416 ret = 0;
2417 goto early_out;
2418 }
2419
2420 n = total_size - offset;
2421 if (n < bytes) {
2422 bytes = n;
2423 }
2424
2425
2426 assert(bs->drv);
2427 has_filtered_child = bdrv_filter_child(bs);
2428 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2429 *pnum = bytes;
2430 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2431 if (offset + bytes == total_size) {
2432 ret |= BDRV_BLOCK_EOF;
2433 }
2434 if (bs->drv->protocol_name) {
2435 ret |= BDRV_BLOCK_OFFSET_VALID;
2436 local_map = offset;
2437 local_file = bs;
2438 }
2439 goto early_out;
2440 }
2441
2442 bdrv_inc_in_flight(bs);
2443
2444
2445 align = bs->bl.request_alignment;
2446 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2447 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2448
2449 if (bs->drv->bdrv_co_block_status) {
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471 if (QLIST_EMPTY(&bs->children) &&
2472 bdrv_bsc_is_data(bs, aligned_offset, pnum))
2473 {
2474 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
2475 local_file = bs;
2476 local_map = aligned_offset;
2477 } else {
2478 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2479 aligned_bytes, pnum, &local_map,
2480 &local_file);
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493 if (want_zero &&
2494 ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
2495 QLIST_EMPTY(&bs->children))
2496 {
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508 assert(local_file == bs);
2509 assert(local_map == aligned_offset);
2510 bdrv_bsc_fill(bs, aligned_offset, *pnum);
2511 }
2512 }
2513 } else {
2514
2515
2516 local_file = bdrv_filter_bs(bs);
2517 assert(local_file);
2518
2519 *pnum = aligned_bytes;
2520 local_map = aligned_offset;
2521 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2522 }
2523 if (ret < 0) {
2524 *pnum = 0;
2525 goto out;
2526 }
2527
2528
2529
2530
2531
2532 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2533 align > offset - aligned_offset);
2534 if (ret & BDRV_BLOCK_RECURSE) {
2535 assert(ret & BDRV_BLOCK_DATA);
2536 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2537 assert(!(ret & BDRV_BLOCK_ZERO));
2538 }
2539
2540 *pnum -= offset - aligned_offset;
2541 if (*pnum > bytes) {
2542 *pnum = bytes;
2543 }
2544 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2545 local_map += offset - aligned_offset;
2546 }
2547
2548 if (ret & BDRV_BLOCK_RAW) {
2549 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2550 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2551 *pnum, pnum, &local_map, &local_file);
2552 goto out;
2553 }
2554
2555 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2556 ret |= BDRV_BLOCK_ALLOCATED;
2557 } else if (bs->drv->supports_backing) {
2558 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2559
2560 if (!cow_bs) {
2561 ret |= BDRV_BLOCK_ZERO;
2562 } else if (want_zero) {
2563 int64_t size2 = bdrv_getlength(cow_bs);
2564
2565 if (size2 >= 0 && offset >= size2) {
2566 ret |= BDRV_BLOCK_ZERO;
2567 }
2568 }
2569 }
2570
2571 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2572 local_file && local_file != bs &&
2573 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2574 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2575 int64_t file_pnum;
2576 int ret2;
2577
2578 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2579 *pnum, &file_pnum, NULL, NULL);
2580 if (ret2 >= 0) {
2581
2582
2583
2584 if (ret2 & BDRV_BLOCK_EOF &&
2585 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2586
2587
2588
2589
2590
2591 ret |= BDRV_BLOCK_ZERO;
2592 } else {
2593
2594 *pnum = file_pnum;
2595 ret |= (ret2 & BDRV_BLOCK_ZERO);
2596 }
2597 }
2598 }
2599
2600out:
2601 bdrv_dec_in_flight(bs);
2602 if (ret >= 0 && offset + *pnum == total_size) {
2603 ret |= BDRV_BLOCK_EOF;
2604 }
2605early_out:
2606 if (file) {
2607 *file = local_file;
2608 }
2609 if (map) {
2610 *map = local_map;
2611 }
2612 return ret;
2613}
2614
2615int coroutine_fn
2616bdrv_co_common_block_status_above(BlockDriverState *bs,
2617 BlockDriverState *base,
2618 bool include_base,
2619 bool want_zero,
2620 int64_t offset,
2621 int64_t bytes,
2622 int64_t *pnum,
2623 int64_t *map,
2624 BlockDriverState **file,
2625 int *depth)
2626{
2627 int ret;
2628 BlockDriverState *p;
2629 int64_t eof = 0;
2630 int dummy;
2631 IO_CODE();
2632
2633 assert(!include_base || base);
2634
2635 if (!depth) {
2636 depth = &dummy;
2637 }
2638 *depth = 0;
2639
2640 if (!include_base && bs == base) {
2641 *pnum = bytes;
2642 return 0;
2643 }
2644
2645 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2646 ++*depth;
2647 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2648 return ret;
2649 }
2650
2651 if (ret & BDRV_BLOCK_EOF) {
2652 eof = offset + *pnum;
2653 }
2654
2655 assert(*pnum <= bytes);
2656 bytes = *pnum;
2657
2658 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2659 p = bdrv_filter_or_cow_bs(p))
2660 {
2661 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2662 file);
2663 ++*depth;
2664 if (ret < 0) {
2665 return ret;
2666 }
2667 if (*pnum == 0) {
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677 assert(ret & BDRV_BLOCK_EOF);
2678 *pnum = bytes;
2679 if (file) {
2680 *file = p;
2681 }
2682 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2683 break;
2684 }
2685 if (ret & BDRV_BLOCK_ALLOCATED) {
2686
2687
2688
2689
2690
2691
2692
2693 ret &= ~BDRV_BLOCK_EOF;
2694 break;
2695 }
2696
2697 if (p == base) {
2698 assert(include_base);
2699 break;
2700 }
2701
2702
2703
2704
2705
2706 assert(*pnum <= bytes);
2707 bytes = *pnum;
2708 }
2709
2710 if (offset + *pnum == eof) {
2711 ret |= BDRV_BLOCK_EOF;
2712 }
2713
2714 return ret;
2715}
2716
2717int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2718 int64_t offset, int64_t bytes, int64_t *pnum,
2719 int64_t *map, BlockDriverState **file)
2720{
2721 IO_CODE();
2722 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2723 pnum, map, file, NULL);
2724}
2725
2726int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2727 int64_t *pnum, int64_t *map, BlockDriverState **file)
2728{
2729 IO_CODE();
2730 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2731 offset, bytes, pnum, map, file);
2732}
2733
2734
2735
2736
2737
2738
2739
2740
2741int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2742 int64_t bytes)
2743{
2744 int ret;
2745 int64_t pnum = bytes;
2746 IO_CODE();
2747
2748 if (!bytes) {
2749 return 1;
2750 }
2751
2752 ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset,
2753 bytes, &pnum, NULL, NULL, NULL);
2754
2755 if (ret < 0) {
2756 return ret;
2757 }
2758
2759 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2760}
2761
2762int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
2763 int64_t *pnum)
2764{
2765 int ret;
2766 int64_t dummy;
2767 IO_CODE();
2768
2769 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2770 bytes, pnum ? pnum : &dummy, NULL,
2771 NULL, NULL);
2772 if (ret < 0) {
2773 return ret;
2774 }
2775 return !!(ret & BDRV_BLOCK_ALLOCATED);
2776}
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795int bdrv_is_allocated_above(BlockDriverState *top,
2796 BlockDriverState *base,
2797 bool include_base, int64_t offset,
2798 int64_t bytes, int64_t *pnum)
2799{
2800 int depth;
2801 int ret = bdrv_common_block_status_above(top, base, include_base, false,
2802 offset, bytes, pnum, NULL, NULL,
2803 &depth);
2804 IO_CODE();
2805 if (ret < 0) {
2806 return ret;
2807 }
2808
2809 if (ret & BDRV_BLOCK_ALLOCATED) {
2810 return depth;
2811 }
2812 return 0;
2813}
2814
2815int coroutine_fn
2816bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2817{
2818 BlockDriver *drv = bs->drv;
2819 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2820 int ret;
2821 IO_CODE();
2822
2823 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2824 if (ret < 0) {
2825 return ret;
2826 }
2827
2828 if (!drv) {
2829 return -ENOMEDIUM;
2830 }
2831
2832 bdrv_inc_in_flight(bs);
2833
2834 if (drv->bdrv_load_vmstate) {
2835 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2836 } else if (child_bs) {
2837 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2838 } else {
2839 ret = -ENOTSUP;
2840 }
2841
2842 bdrv_dec_in_flight(bs);
2843
2844 return ret;
2845}
2846
2847int coroutine_fn
2848bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2849{
2850 BlockDriver *drv = bs->drv;
2851 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2852 int ret;
2853 IO_CODE();
2854
2855 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2856 if (ret < 0) {
2857 return ret;
2858 }
2859
2860 if (!drv) {
2861 return -ENOMEDIUM;
2862 }
2863
2864 bdrv_inc_in_flight(bs);
2865
2866 if (drv->bdrv_save_vmstate) {
2867 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2868 } else if (child_bs) {
2869 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2870 } else {
2871 ret = -ENOTSUP;
2872 }
2873
2874 bdrv_dec_in_flight(bs);
2875
2876 return ret;
2877}
2878
2879int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2880 int64_t pos, int size)
2881{
2882 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2883 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2884 IO_CODE();
2885
2886 return ret < 0 ? ret : size;
2887}
2888
2889int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2890 int64_t pos, int size)
2891{
2892 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2893 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2894 IO_CODE();
2895
2896 return ret < 0 ? ret : size;
2897}
2898
2899
2900
2901
2902void bdrv_aio_cancel(BlockAIOCB *acb)
2903{
2904 IO_CODE();
2905 qemu_aio_ref(acb);
2906 bdrv_aio_cancel_async(acb);
2907 while (acb->refcnt > 1) {
2908 if (acb->aiocb_info->get_aio_context) {
2909 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2910 } else if (acb->bs) {
2911
2912
2913
2914
2915 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2916 aio_poll(bdrv_get_aio_context(acb->bs), true);
2917 } else {
2918 abort();
2919 }
2920 }
2921 qemu_aio_unref(acb);
2922}
2923
2924
2925
2926
2927void bdrv_aio_cancel_async(BlockAIOCB *acb)
2928{
2929 IO_CODE();
2930 if (acb->aiocb_info->cancel_async) {
2931 acb->aiocb_info->cancel_async(acb);
2932 }
2933}
2934
2935
2936
2937
2938int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2939{
2940 BdrvChild *primary_child = bdrv_primary_child(bs);
2941 BdrvChild *child;
2942 int current_gen;
2943 int ret = 0;
2944 IO_CODE();
2945
2946 bdrv_inc_in_flight(bs);
2947
2948 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2949 bdrv_is_sg(bs)) {
2950 goto early_exit;
2951 }
2952
2953 qemu_co_mutex_lock(&bs->reqs_lock);
2954 current_gen = qatomic_read(&bs->write_gen);
2955
2956
2957 while (bs->active_flush_req) {
2958 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2959 }
2960
2961
2962 bs->active_flush_req = true;
2963 qemu_co_mutex_unlock(&bs->reqs_lock);
2964
2965
2966 if (bs->drv->bdrv_co_flush) {
2967 ret = bs->drv->bdrv_co_flush(bs);
2968 goto out;
2969 }
2970
2971
2972 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
2973 if (bs->drv->bdrv_co_flush_to_os) {
2974 ret = bs->drv->bdrv_co_flush_to_os(bs);
2975 if (ret < 0) {
2976 goto out;
2977 }
2978 }
2979
2980
2981 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2982 goto flush_children;
2983 }
2984
2985
2986 if (bs->flushed_gen == current_gen) {
2987 goto flush_children;
2988 }
2989
2990 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
2991 if (!bs->drv) {
2992
2993
2994 ret = -ENOMEDIUM;
2995 goto out;
2996 }
2997 if (bs->drv->bdrv_co_flush_to_disk) {
2998 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2999 } else if (bs->drv->bdrv_aio_flush) {
3000 BlockAIOCB *acb;
3001 CoroutineIOCompletion co = {
3002 .coroutine = qemu_coroutine_self(),
3003 };
3004
3005 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3006 if (acb == NULL) {
3007 ret = -EIO;
3008 } else {
3009 qemu_coroutine_yield();
3010 ret = co.ret;
3011 }
3012 } else {
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024 ret = 0;
3025 }
3026
3027 if (ret < 0) {
3028 goto out;
3029 }
3030
3031
3032
3033
3034flush_children:
3035 ret = 0;
3036 QLIST_FOREACH(child, &bs->children, next) {
3037 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
3038 int this_child_ret = bdrv_co_flush(child->bs);
3039 if (!ret) {
3040 ret = this_child_ret;
3041 }
3042 }
3043 }
3044
3045out:
3046
3047 if (ret == 0) {
3048 bs->flushed_gen = current_gen;
3049 }
3050
3051 qemu_co_mutex_lock(&bs->reqs_lock);
3052 bs->active_flush_req = false;
3053
3054 qemu_co_queue_next(&bs->flush_queue);
3055 qemu_co_mutex_unlock(&bs->reqs_lock);
3056
3057early_exit:
3058 bdrv_dec_in_flight(bs);
3059 return ret;
3060}
3061
3062int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
3063 int64_t bytes)
3064{
3065 BdrvTrackedRequest req;
3066 int ret;
3067 int64_t max_pdiscard;
3068 int head, tail, align;
3069 BlockDriverState *bs = child->bs;
3070 IO_CODE();
3071
3072 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
3073 return -ENOMEDIUM;
3074 }
3075
3076 if (bdrv_has_readonly_bitmaps(bs)) {
3077 return -EPERM;
3078 }
3079
3080 ret = bdrv_check_request(offset, bytes, NULL);
3081 if (ret < 0) {
3082 return ret;
3083 }
3084
3085
3086 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3087 return 0;
3088 }
3089
3090 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
3091 return 0;
3092 }
3093
3094
3095 bdrv_bsc_invalidate_range(bs, offset, bytes);
3096
3097
3098
3099
3100
3101
3102 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
3103 assert(align % bs->bl.request_alignment == 0);
3104 head = offset % align;
3105 tail = (offset + bytes) % align;
3106
3107 bdrv_inc_in_flight(bs);
3108 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
3109
3110 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
3111 if (ret < 0) {
3112 goto out;
3113 }
3114
3115 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
3116 align);
3117 assert(max_pdiscard >= bs->bl.request_alignment);
3118
3119 while (bytes > 0) {
3120 int64_t num = bytes;
3121
3122 if (head) {
3123
3124 num = MIN(bytes, align - head);
3125 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3126 num %= bs->bl.request_alignment;
3127 }
3128 head = (head + num) % align;
3129 assert(num < max_pdiscard);
3130 } else if (tail) {
3131 if (num > align) {
3132
3133 num -= tail;
3134 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3135 tail > bs->bl.request_alignment) {
3136 tail %= bs->bl.request_alignment;
3137 num -= tail;
3138 }
3139 }
3140
3141 if (num > max_pdiscard) {
3142 num = max_pdiscard;
3143 }
3144
3145 if (!bs->drv) {
3146 ret = -ENOMEDIUM;
3147 goto out;
3148 }
3149 if (bs->drv->bdrv_co_pdiscard) {
3150 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3151 } else {
3152 BlockAIOCB *acb;
3153 CoroutineIOCompletion co = {
3154 .coroutine = qemu_coroutine_self(),
3155 };
3156
3157 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3158 bdrv_co_io_em_complete, &co);
3159 if (acb == NULL) {
3160 ret = -EIO;
3161 goto out;
3162 } else {
3163 qemu_coroutine_yield();
3164 ret = co.ret;
3165 }
3166 }
3167 if (ret && ret != -ENOTSUP) {
3168 goto out;
3169 }
3170
3171 offset += num;
3172 bytes -= num;
3173 }
3174 ret = 0;
3175out:
3176 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3177 tracked_request_end(&req);
3178 bdrv_dec_in_flight(bs);
3179 return ret;
3180}
3181
3182int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3183{
3184 BlockDriver *drv = bs->drv;
3185 CoroutineIOCompletion co = {
3186 .coroutine = qemu_coroutine_self(),
3187 };
3188 BlockAIOCB *acb;
3189 IO_CODE();
3190
3191 bdrv_inc_in_flight(bs);
3192 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3193 co.ret = -ENOTSUP;
3194 goto out;
3195 }
3196
3197 if (drv->bdrv_co_ioctl) {
3198 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3199 } else {
3200 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3201 if (!acb) {
3202 co.ret = -ENOTSUP;
3203 goto out;
3204 }
3205 qemu_coroutine_yield();
3206 }
3207out:
3208 bdrv_dec_in_flight(bs);
3209 return co.ret;
3210}
3211
3212void *qemu_blockalign(BlockDriverState *bs, size_t size)
3213{
3214 IO_CODE();
3215 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3216}
3217
3218void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3219{
3220 IO_CODE();
3221 return memset(qemu_blockalign(bs, size), 0, size);
3222}
3223
3224void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3225{
3226 size_t align = bdrv_opt_mem_align(bs);
3227 IO_CODE();
3228
3229
3230 assert(align > 0);
3231 if (size == 0) {
3232 size = align;
3233 }
3234
3235 return qemu_try_memalign(align, size);
3236}
3237
3238void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3239{
3240 void *mem = qemu_try_blockalign(bs, size);
3241 IO_CODE();
3242
3243 if (mem) {
3244 memset(mem, 0, size);
3245 }
3246
3247 return mem;
3248}
3249
3250void bdrv_io_plug(BlockDriverState *bs)
3251{
3252 BdrvChild *child;
3253 IO_CODE();
3254
3255 QLIST_FOREACH(child, &bs->children, next) {
3256 bdrv_io_plug(child->bs);
3257 }
3258
3259 if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
3260 BlockDriver *drv = bs->drv;
3261 if (drv && drv->bdrv_io_plug) {
3262 drv->bdrv_io_plug(bs);
3263 }
3264 }
3265}
3266
3267void bdrv_io_unplug(BlockDriverState *bs)
3268{
3269 BdrvChild *child;
3270 IO_CODE();
3271
3272 assert(bs->io_plugged);
3273 if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
3274 BlockDriver *drv = bs->drv;
3275 if (drv && drv->bdrv_io_unplug) {
3276 drv->bdrv_io_unplug(bs);
3277 }
3278 }
3279
3280 QLIST_FOREACH(child, &bs->children, next) {
3281 bdrv_io_unplug(child->bs);
3282 }
3283}
3284
3285
3286static void bdrv_register_buf_rollback(BlockDriverState *bs,
3287 void *host,
3288 size_t size,
3289 BdrvChild *final_child)
3290{
3291 BdrvChild *child;
3292
3293 QLIST_FOREACH(child, &bs->children, next) {
3294 if (child == final_child) {
3295 break;
3296 }
3297
3298 bdrv_unregister_buf(child->bs, host, size);
3299 }
3300
3301 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3302 bs->drv->bdrv_unregister_buf(bs, host, size);
3303 }
3304}
3305
3306bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
3307 Error **errp)
3308{
3309 BdrvChild *child;
3310
3311 GLOBAL_STATE_CODE();
3312 if (bs->drv && bs->drv->bdrv_register_buf) {
3313 if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
3314 return false;
3315 }
3316 }
3317 QLIST_FOREACH(child, &bs->children, next) {
3318 if (!bdrv_register_buf(child->bs, host, size, errp)) {
3319 bdrv_register_buf_rollback(bs, host, size, child);
3320 return false;
3321 }
3322 }
3323 return true;
3324}
3325
3326void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
3327{
3328 BdrvChild *child;
3329
3330 GLOBAL_STATE_CODE();
3331 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3332 bs->drv->bdrv_unregister_buf(bs, host, size);
3333 }
3334 QLIST_FOREACH(child, &bs->children, next) {
3335 bdrv_unregister_buf(child->bs, host, size);
3336 }
3337}
3338
3339static int coroutine_fn bdrv_co_copy_range_internal(
3340 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3341 int64_t dst_offset, int64_t bytes,
3342 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3343 bool recurse_src)
3344{
3345 BdrvTrackedRequest req;
3346 int ret;
3347
3348
3349 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3350 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3351 assert(!(read_flags & BDRV_REQ_NO_WAIT));
3352 assert(!(write_flags & BDRV_REQ_NO_WAIT));
3353
3354 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
3355 return -ENOMEDIUM;
3356 }
3357 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3358 if (ret) {
3359 return ret;
3360 }
3361 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3362 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3363 }
3364
3365 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
3366 return -ENOMEDIUM;
3367 }
3368 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3369 if (ret) {
3370 return ret;
3371 }
3372
3373 if (!src->bs->drv->bdrv_co_copy_range_from
3374 || !dst->bs->drv->bdrv_co_copy_range_to
3375 || src->bs->encrypted || dst->bs->encrypted) {
3376 return -ENOTSUP;
3377 }
3378
3379 if (recurse_src) {
3380 bdrv_inc_in_flight(src->bs);
3381 tracked_request_begin(&req, src->bs, src_offset, bytes,
3382 BDRV_TRACKED_READ);
3383
3384
3385 assert(!(read_flags & BDRV_REQ_SERIALISING));
3386 bdrv_wait_serialising_requests(&req);
3387
3388 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3389 src, src_offset,
3390 dst, dst_offset,
3391 bytes,
3392 read_flags, write_flags);
3393
3394 tracked_request_end(&req);
3395 bdrv_dec_in_flight(src->bs);
3396 } else {
3397 bdrv_inc_in_flight(dst->bs);
3398 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3399 BDRV_TRACKED_WRITE);
3400 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3401 write_flags);
3402 if (!ret) {
3403 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3404 src, src_offset,
3405 dst, dst_offset,
3406 bytes,
3407 read_flags, write_flags);
3408 }
3409 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3410 tracked_request_end(&req);
3411 bdrv_dec_in_flight(dst->bs);
3412 }
3413
3414 return ret;
3415}
3416
3417
3418
3419
3420
3421int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3422 BdrvChild *dst, int64_t dst_offset,
3423 int64_t bytes,
3424 BdrvRequestFlags read_flags,
3425 BdrvRequestFlags write_flags)
3426{
3427 IO_CODE();
3428 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3429 read_flags, write_flags);
3430 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3431 bytes, read_flags, write_flags, true);
3432}
3433
3434
3435
3436
3437
3438int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3439 BdrvChild *dst, int64_t dst_offset,
3440 int64_t bytes,
3441 BdrvRequestFlags read_flags,
3442 BdrvRequestFlags write_flags)
3443{
3444 IO_CODE();
3445 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3446 read_flags, write_flags);
3447 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3448 bytes, read_flags, write_flags, false);
3449}
3450
3451int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3452 BdrvChild *dst, int64_t dst_offset,
3453 int64_t bytes, BdrvRequestFlags read_flags,
3454 BdrvRequestFlags write_flags)
3455{
3456 IO_CODE();
3457 return bdrv_co_copy_range_from(src, src_offset,
3458 dst, dst_offset,
3459 bytes, read_flags, write_flags);
3460}
3461
3462static void bdrv_parent_cb_resize(BlockDriverState *bs)
3463{
3464 BdrvChild *c;
3465 QLIST_FOREACH(c, &bs->parents, next_parent) {
3466 if (c->klass->resize) {
3467 c->klass->resize(c);
3468 }
3469 }
3470}
3471
3472
3473
3474
3475
3476
3477
3478
3479int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3480 PreallocMode prealloc, BdrvRequestFlags flags,
3481 Error **errp)
3482{
3483 BlockDriverState *bs = child->bs;
3484 BdrvChild *filtered, *backing;
3485 BlockDriver *drv = bs->drv;
3486 BdrvTrackedRequest req;
3487 int64_t old_size, new_bytes;
3488 int ret;
3489 IO_CODE();
3490
3491
3492 if (!drv) {
3493 error_setg(errp, "No medium inserted");
3494 return -ENOMEDIUM;
3495 }
3496 if (offset < 0) {
3497 error_setg(errp, "Image size cannot be negative");
3498 return -EINVAL;
3499 }
3500
3501 ret = bdrv_check_request(offset, 0, errp);
3502 if (ret < 0) {
3503 return ret;
3504 }
3505
3506 old_size = bdrv_getlength(bs);
3507 if (old_size < 0) {
3508 error_setg_errno(errp, -old_size, "Failed to get old image size");
3509 return old_size;
3510 }
3511
3512 if (bdrv_is_read_only(bs)) {
3513 error_setg(errp, "Image is read-only");
3514 return -EACCES;
3515 }
3516
3517 if (offset > old_size) {
3518 new_bytes = offset - old_size;
3519 } else {
3520 new_bytes = 0;
3521 }
3522
3523 bdrv_inc_in_flight(bs);
3524 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3525 BDRV_TRACKED_TRUNCATE);
3526
3527
3528
3529
3530 if (new_bytes) {
3531 bdrv_make_request_serialising(&req, 1);
3532 }
3533 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3534 0);
3535 if (ret < 0) {
3536 error_setg_errno(errp, -ret,
3537 "Failed to prepare request for truncation");
3538 goto out;
3539 }
3540
3541 filtered = bdrv_filter_child(bs);
3542 backing = bdrv_cow_child(bs);
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554 if (new_bytes && backing) {
3555 int64_t backing_len;
3556
3557 backing_len = bdrv_getlength(backing->bs);
3558 if (backing_len < 0) {
3559 ret = backing_len;
3560 error_setg_errno(errp, -ret, "Could not get backing file size");
3561 goto out;
3562 }
3563
3564 if (backing_len > old_size) {
3565 flags |= BDRV_REQ_ZERO_WRITE;
3566 }
3567 }
3568
3569 if (drv->bdrv_co_truncate) {
3570 if (flags & ~bs->supported_truncate_flags) {
3571 error_setg(errp, "Block driver does not support requested flags");
3572 ret = -ENOTSUP;
3573 goto out;
3574 }
3575 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3576 } else if (filtered) {
3577 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3578 } else {
3579 error_setg(errp, "Image format driver does not support resize");
3580 ret = -ENOTSUP;
3581 goto out;
3582 }
3583 if (ret < 0) {
3584 goto out;
3585 }
3586
3587 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3588 if (ret < 0) {
3589 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3590 } else {
3591 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3592 }
3593
3594
3595
3596 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3597
3598out:
3599 tracked_request_end(&req);
3600 bdrv_dec_in_flight(bs);
3601
3602 return ret;
3603}
3604
3605void bdrv_cancel_in_flight(BlockDriverState *bs)
3606{
3607 GLOBAL_STATE_CODE();
3608 if (!bs || !bs->drv) {
3609 return;
3610 }
3611
3612 if (bs->drv->bdrv_cancel_in_flight) {
3613 bs->drv->bdrv_cancel_in_flight(bs);
3614 }
3615}
3616
3617int coroutine_fn
3618bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
3619 QEMUIOVector *qiov, size_t qiov_offset)
3620{
3621 BlockDriverState *bs = child->bs;
3622 BlockDriver *drv = bs->drv;
3623 int ret;
3624 IO_CODE();
3625
3626 if (!drv) {
3627 return -ENOMEDIUM;
3628 }
3629
3630 if (!drv->bdrv_co_preadv_snapshot) {
3631 return -ENOTSUP;
3632 }
3633
3634 bdrv_inc_in_flight(bs);
3635 ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
3636 bdrv_dec_in_flight(bs);
3637
3638 return ret;
3639}
3640
3641int coroutine_fn
3642bdrv_co_snapshot_block_status(BlockDriverState *bs,
3643 bool want_zero, int64_t offset, int64_t bytes,
3644 int64_t *pnum, int64_t *map,
3645 BlockDriverState **file)
3646{
3647 BlockDriver *drv = bs->drv;
3648 int ret;
3649 IO_CODE();
3650
3651 if (!drv) {
3652 return -ENOMEDIUM;
3653 }
3654
3655 if (!drv->bdrv_co_snapshot_block_status) {
3656 return -ENOTSUP;
3657 }
3658
3659 bdrv_inc_in_flight(bs);
3660 ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
3661 pnum, map, file);
3662 bdrv_dec_in_flight(bs);
3663
3664 return ret;
3665}
3666
3667int coroutine_fn
3668bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
3669{
3670 BlockDriver *drv = bs->drv;
3671 int ret;
3672 IO_CODE();
3673
3674 if (!drv) {
3675 return -ENOMEDIUM;
3676 }
3677
3678 if (!drv->bdrv_co_pdiscard_snapshot) {
3679 return -ENOTSUP;
3680 }
3681
3682 bdrv_inc_in_flight(bs);
3683 ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
3684 bdrv_dec_in_flight(bs);
3685
3686 return ret;
3687}
3688