1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "qemu/cutils.h"
33#include "qapi/error.h"
34#include "qemu/error-report.h"
35#include "qemu/main-loop.h"
36#include "sysemu/replay.h"
37
38#define NOT_DONE 0x7fffffff
39
40
41#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
42
43static void bdrv_parent_cb_resize(BlockDriverState *bs);
44static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
45 int64_t offset, int bytes, BdrvRequestFlags flags);
46
47static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
48 bool ignore_bds_parents)
49{
50 BdrvChild *c, *next;
51
52 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
53 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
54 continue;
55 }
56 bdrv_parent_drained_begin_single(c, false);
57 }
58}
59
60static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
61 int *drained_end_counter)
62{
63 assert(c->parent_quiesce_counter > 0);
64 c->parent_quiesce_counter--;
65 if (c->role->drained_end) {
66 c->role->drained_end(c, drained_end_counter);
67 }
68}
69
70void bdrv_parent_drained_end_single(BdrvChild *c)
71{
72 int drained_end_counter = 0;
73 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
74 BDRV_POLL_WHILE(c->bs, atomic_read(&drained_end_counter) > 0);
75}
76
77static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
78 bool ignore_bds_parents,
79 int *drained_end_counter)
80{
81 BdrvChild *c;
82
83 QLIST_FOREACH(c, &bs->parents, next_parent) {
84 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
85 continue;
86 }
87 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
88 }
89}
90
91static bool bdrv_parent_drained_poll_single(BdrvChild *c)
92{
93 if (c->role->drained_poll) {
94 return c->role->drained_poll(c);
95 }
96 return false;
97}
98
99static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
100 bool ignore_bds_parents)
101{
102 BdrvChild *c, *next;
103 bool busy = false;
104
105 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
106 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
107 continue;
108 }
109 busy |= bdrv_parent_drained_poll_single(c);
110 }
111
112 return busy;
113}
114
115void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
116{
117 c->parent_quiesce_counter++;
118 if (c->role->drained_begin) {
119 c->role->drained_begin(c);
120 }
121 if (poll) {
122 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
123 }
124}
125
126static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
127{
128 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
129 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
130 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
131 src->opt_mem_alignment);
132 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
133 src->min_mem_alignment);
134 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
135}
136
137void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
138{
139 BlockDriver *drv = bs->drv;
140 Error *local_err = NULL;
141
142 memset(&bs->bl, 0, sizeof(bs->bl));
143
144 if (!drv) {
145 return;
146 }
147
148
149 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
150 drv->bdrv_aio_preadv ||
151 drv->bdrv_co_preadv_part) ? 1 : 512;
152
153
154 if (bs->file) {
155 bdrv_refresh_limits(bs->file->bs, &local_err);
156 if (local_err) {
157 error_propagate(errp, local_err);
158 return;
159 }
160 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
161 } else {
162 bs->bl.min_mem_alignment = 512;
163 bs->bl.opt_mem_alignment = qemu_real_host_page_size;
164
165
166 bs->bl.max_iov = IOV_MAX;
167 }
168
169 if (bs->backing) {
170 bdrv_refresh_limits(bs->backing->bs, &local_err);
171 if (local_err) {
172 error_propagate(errp, local_err);
173 return;
174 }
175 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
176 }
177
178
179 if (drv->bdrv_refresh_limits) {
180 drv->bdrv_refresh_limits(bs, errp);
181 }
182}
183
184
185
186
187
188
189void bdrv_enable_copy_on_read(BlockDriverState *bs)
190{
191 atomic_inc(&bs->copy_on_read);
192}
193
194void bdrv_disable_copy_on_read(BlockDriverState *bs)
195{
196 int old = atomic_fetch_dec(&bs->copy_on_read);
197 assert(old >= 1);
198}
199
200typedef struct {
201 Coroutine *co;
202 BlockDriverState *bs;
203 bool done;
204 bool begin;
205 bool recursive;
206 bool poll;
207 BdrvChild *parent;
208 bool ignore_bds_parents;
209 int *drained_end_counter;
210} BdrvCoDrainData;
211
212static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
213{
214 BdrvCoDrainData *data = opaque;
215 BlockDriverState *bs = data->bs;
216
217 if (data->begin) {
218 bs->drv->bdrv_co_drain_begin(bs);
219 } else {
220 bs->drv->bdrv_co_drain_end(bs);
221 }
222
223
224 atomic_mb_set(&data->done, true);
225 if (!data->begin) {
226 atomic_dec(data->drained_end_counter);
227 }
228 bdrv_dec_in_flight(bs);
229
230 g_free(data);
231}
232
233
234static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
235 int *drained_end_counter)
236{
237 BdrvCoDrainData *data;
238
239 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
240 (!begin && !bs->drv->bdrv_co_drain_end)) {
241 return;
242 }
243
244 data = g_new(BdrvCoDrainData, 1);
245 *data = (BdrvCoDrainData) {
246 .bs = bs,
247 .done = false,
248 .begin = begin,
249 .drained_end_counter = drained_end_counter,
250 };
251
252 if (!begin) {
253 atomic_inc(drained_end_counter);
254 }
255
256
257
258 bdrv_inc_in_flight(bs);
259 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
260 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
261}
262
263
264bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
265 BdrvChild *ignore_parent, bool ignore_bds_parents)
266{
267 BdrvChild *child, *next;
268
269 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
270 return true;
271 }
272
273 if (atomic_read(&bs->in_flight)) {
274 return true;
275 }
276
277 if (recursive) {
278 assert(!ignore_bds_parents);
279 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
280 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
281 return true;
282 }
283 }
284 }
285
286 return false;
287}
288
289static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
290 BdrvChild *ignore_parent)
291{
292 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
293}
294
295static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
296 BdrvChild *parent, bool ignore_bds_parents,
297 bool poll);
298static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
299 BdrvChild *parent, bool ignore_bds_parents,
300 int *drained_end_counter);
301
302static void bdrv_co_drain_bh_cb(void *opaque)
303{
304 BdrvCoDrainData *data = opaque;
305 Coroutine *co = data->co;
306 BlockDriverState *bs = data->bs;
307
308 if (bs) {
309 AioContext *ctx = bdrv_get_aio_context(bs);
310 AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
311
312
313
314
315
316
317
318 if (ctx == co_ctx) {
319 aio_context_acquire(ctx);
320 }
321 bdrv_dec_in_flight(bs);
322 if (data->begin) {
323 assert(!data->drained_end_counter);
324 bdrv_do_drained_begin(bs, data->recursive, data->parent,
325 data->ignore_bds_parents, data->poll);
326 } else {
327 assert(!data->poll);
328 bdrv_do_drained_end(bs, data->recursive, data->parent,
329 data->ignore_bds_parents,
330 data->drained_end_counter);
331 }
332 if (ctx == co_ctx) {
333 aio_context_release(ctx);
334 }
335 } else {
336 assert(data->begin);
337 bdrv_drain_all_begin();
338 }
339
340 data->done = true;
341 aio_co_wake(co);
342}
343
344static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
345 bool begin, bool recursive,
346 BdrvChild *parent,
347 bool ignore_bds_parents,
348 bool poll,
349 int *drained_end_counter)
350{
351 BdrvCoDrainData data;
352
353
354
355
356 assert(qemu_in_coroutine());
357 data = (BdrvCoDrainData) {
358 .co = qemu_coroutine_self(),
359 .bs = bs,
360 .done = false,
361 .begin = begin,
362 .recursive = recursive,
363 .parent = parent,
364 .ignore_bds_parents = ignore_bds_parents,
365 .poll = poll,
366 .drained_end_counter = drained_end_counter,
367 };
368
369 if (bs) {
370 bdrv_inc_in_flight(bs);
371 }
372 replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
373 bdrv_co_drain_bh_cb, &data);
374
375 qemu_coroutine_yield();
376
377
378 assert(data.done);
379}
380
381void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
382 BdrvChild *parent, bool ignore_bds_parents)
383{
384 assert(!qemu_in_coroutine());
385
386
387 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
388 aio_disable_external(bdrv_get_aio_context(bs));
389 }
390
391 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
392 bdrv_drain_invoke(bs, true, NULL);
393}
394
395static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
396 BdrvChild *parent, bool ignore_bds_parents,
397 bool poll)
398{
399 BdrvChild *child, *next;
400
401 if (qemu_in_coroutine()) {
402 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
403 poll, NULL);
404 return;
405 }
406
407 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
408
409 if (recursive) {
410 assert(!ignore_bds_parents);
411 bs->recursive_quiesce_counter++;
412 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
413 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
414 false);
415 }
416 }
417
418
419
420
421
422
423
424
425
426
427 if (poll) {
428 assert(!ignore_bds_parents);
429 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
430 }
431}
432
433void bdrv_drained_begin(BlockDriverState *bs)
434{
435 bdrv_do_drained_begin(bs, false, NULL, false, true);
436}
437
438void bdrv_subtree_drained_begin(BlockDriverState *bs)
439{
440 bdrv_do_drained_begin(bs, true, NULL, false, true);
441}
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
458 BdrvChild *parent, bool ignore_bds_parents,
459 int *drained_end_counter)
460{
461 BdrvChild *child;
462 int old_quiesce_counter;
463
464 assert(drained_end_counter != NULL);
465
466 if (qemu_in_coroutine()) {
467 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
468 false, drained_end_counter);
469 return;
470 }
471 assert(bs->quiesce_counter > 0);
472
473
474 bdrv_drain_invoke(bs, false, drained_end_counter);
475 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
476 drained_end_counter);
477
478 old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
479 if (old_quiesce_counter == 1) {
480 aio_enable_external(bdrv_get_aio_context(bs));
481 }
482
483 if (recursive) {
484 assert(!ignore_bds_parents);
485 bs->recursive_quiesce_counter--;
486 QLIST_FOREACH(child, &bs->children, next) {
487 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
488 drained_end_counter);
489 }
490 }
491}
492
493void bdrv_drained_end(BlockDriverState *bs)
494{
495 int drained_end_counter = 0;
496 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
497 BDRV_POLL_WHILE(bs, atomic_read(&drained_end_counter) > 0);
498}
499
500void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
501{
502 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
503}
504
505void bdrv_subtree_drained_end(BlockDriverState *bs)
506{
507 int drained_end_counter = 0;
508 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
509 BDRV_POLL_WHILE(bs, atomic_read(&drained_end_counter) > 0);
510}
511
512void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
513{
514 int i;
515
516 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
517 bdrv_do_drained_begin(child->bs, true, child, false, true);
518 }
519}
520
521void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
522{
523 int drained_end_counter = 0;
524 int i;
525
526 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
527 bdrv_do_drained_end(child->bs, true, child, false,
528 &drained_end_counter);
529 }
530
531 BDRV_POLL_WHILE(child->bs, atomic_read(&drained_end_counter) > 0);
532}
533
534
535
536
537
538
539
540
541void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
542{
543 assert(qemu_in_coroutine());
544 bdrv_drained_begin(bs);
545 bdrv_drained_end(bs);
546}
547
548void bdrv_drain(BlockDriverState *bs)
549{
550 bdrv_drained_begin(bs);
551 bdrv_drained_end(bs);
552}
553
554static void bdrv_drain_assert_idle(BlockDriverState *bs)
555{
556 BdrvChild *child, *next;
557
558 assert(atomic_read(&bs->in_flight) == 0);
559 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
560 bdrv_drain_assert_idle(child->bs);
561 }
562}
563
564unsigned int bdrv_drain_all_count = 0;
565
566static bool bdrv_drain_all_poll(void)
567{
568 BlockDriverState *bs = NULL;
569 bool result = false;
570
571
572
573 while ((bs = bdrv_next_all_states(bs))) {
574 AioContext *aio_context = bdrv_get_aio_context(bs);
575 aio_context_acquire(aio_context);
576 result |= bdrv_drain_poll(bs, false, NULL, true);
577 aio_context_release(aio_context);
578 }
579
580 return result;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595void bdrv_drain_all_begin(void)
596{
597 BlockDriverState *bs = NULL;
598
599 if (qemu_in_coroutine()) {
600 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
601 return;
602 }
603
604
605
606
607
608
609 if (replay_events_enabled()) {
610 return;
611 }
612
613
614
615 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
616 assert(bdrv_drain_all_count < INT_MAX);
617 bdrv_drain_all_count++;
618
619
620
621 while ((bs = bdrv_next_all_states(bs))) {
622 AioContext *aio_context = bdrv_get_aio_context(bs);
623
624 aio_context_acquire(aio_context);
625 bdrv_do_drained_begin(bs, false, NULL, true, false);
626 aio_context_release(aio_context);
627 }
628
629
630 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
631
632 while ((bs = bdrv_next_all_states(bs))) {
633 bdrv_drain_assert_idle(bs);
634 }
635}
636
637void bdrv_drain_all_end(void)
638{
639 BlockDriverState *bs = NULL;
640 int drained_end_counter = 0;
641
642
643
644
645
646
647 if (replay_events_enabled()) {
648 return;
649 }
650
651 while ((bs = bdrv_next_all_states(bs))) {
652 AioContext *aio_context = bdrv_get_aio_context(bs);
653
654 aio_context_acquire(aio_context);
655 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
656 aio_context_release(aio_context);
657 }
658
659 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
660 AIO_WAIT_WHILE(NULL, atomic_read(&drained_end_counter) > 0);
661
662 assert(bdrv_drain_all_count > 0);
663 bdrv_drain_all_count--;
664}
665
666void bdrv_drain_all(void)
667{
668 bdrv_drain_all_begin();
669 bdrv_drain_all_end();
670}
671
672
673
674
675
676
677static void tracked_request_end(BdrvTrackedRequest *req)
678{
679 if (req->serialising) {
680 atomic_dec(&req->bs->serialising_in_flight);
681 }
682
683 qemu_co_mutex_lock(&req->bs->reqs_lock);
684 QLIST_REMOVE(req, list);
685 qemu_co_queue_restart_all(&req->wait_queue);
686 qemu_co_mutex_unlock(&req->bs->reqs_lock);
687}
688
689
690
691
692static void tracked_request_begin(BdrvTrackedRequest *req,
693 BlockDriverState *bs,
694 int64_t offset,
695 uint64_t bytes,
696 enum BdrvTrackedRequestType type)
697{
698 assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
699
700 *req = (BdrvTrackedRequest){
701 .bs = bs,
702 .offset = offset,
703 .bytes = bytes,
704 .type = type,
705 .co = qemu_coroutine_self(),
706 .serialising = false,
707 .overlap_offset = offset,
708 .overlap_bytes = bytes,
709 };
710
711 qemu_co_queue_init(&req->wait_queue);
712
713 qemu_co_mutex_lock(&bs->reqs_lock);
714 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
715 qemu_co_mutex_unlock(&bs->reqs_lock);
716}
717
718void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
719{
720 int64_t overlap_offset = req->offset & ~(align - 1);
721 uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
722 - overlap_offset;
723
724 if (!req->serialising) {
725 atomic_inc(&req->bs->serialising_in_flight);
726 req->serialising = true;
727 }
728
729 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
730 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
731}
732
733static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
734{
735
736
737
738
739
740
741 return req->serialising && (req->offset == req->overlap_offset) &&
742 (req->bytes == req->overlap_bytes);
743}
744
745
746
747
748
749BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
750{
751 BdrvTrackedRequest *req;
752 Coroutine *self = qemu_coroutine_self();
753
754 QLIST_FOREACH(req, &bs->tracked_requests, list) {
755 if (req->co == self) {
756 return req;
757 }
758 }
759
760 return NULL;
761}
762
763
764
765
766void bdrv_round_to_clusters(BlockDriverState *bs,
767 int64_t offset, int64_t bytes,
768 int64_t *cluster_offset,
769 int64_t *cluster_bytes)
770{
771 BlockDriverInfo bdi;
772
773 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
774 *cluster_offset = offset;
775 *cluster_bytes = bytes;
776 } else {
777 int64_t c = bdi.cluster_size;
778 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
779 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
780 }
781}
782
783static int bdrv_get_cluster_size(BlockDriverState *bs)
784{
785 BlockDriverInfo bdi;
786 int ret;
787
788 ret = bdrv_get_info(bs, &bdi);
789 if (ret < 0 || bdi.cluster_size == 0) {
790 return bs->bl.request_alignment;
791 } else {
792 return bdi.cluster_size;
793 }
794}
795
796static bool tracked_request_overlaps(BdrvTrackedRequest *req,
797 int64_t offset, uint64_t bytes)
798{
799
800 if (offset >= req->overlap_offset + req->overlap_bytes) {
801 return false;
802 }
803
804 if (req->overlap_offset >= offset + bytes) {
805 return false;
806 }
807 return true;
808}
809
810void bdrv_inc_in_flight(BlockDriverState *bs)
811{
812 atomic_inc(&bs->in_flight);
813}
814
815void bdrv_wakeup(BlockDriverState *bs)
816{
817 aio_wait_kick();
818}
819
820void bdrv_dec_in_flight(BlockDriverState *bs)
821{
822 atomic_dec(&bs->in_flight);
823 bdrv_wakeup(bs);
824}
825
826bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
827{
828 BlockDriverState *bs = self->bs;
829 BdrvTrackedRequest *req;
830 bool retry;
831 bool waited = false;
832
833 if (!atomic_read(&bs->serialising_in_flight)) {
834 return false;
835 }
836
837 do {
838 retry = false;
839 qemu_co_mutex_lock(&bs->reqs_lock);
840 QLIST_FOREACH(req, &bs->tracked_requests, list) {
841 if (req == self || (!req->serialising && !self->serialising)) {
842 continue;
843 }
844 if (tracked_request_overlaps(req, self->overlap_offset,
845 self->overlap_bytes))
846 {
847
848
849
850
851 assert(qemu_coroutine_self() != req->co);
852
853
854
855
856 if (!req->waiting_for) {
857 self->waiting_for = req;
858 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
859 self->waiting_for = NULL;
860 retry = true;
861 waited = true;
862 break;
863 }
864 }
865 }
866 qemu_co_mutex_unlock(&bs->reqs_lock);
867 } while (retry);
868
869 return waited;
870}
871
872static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
873 size_t size)
874{
875 if (size > BDRV_REQUEST_MAX_BYTES) {
876 return -EIO;
877 }
878
879 if (!bdrv_is_inserted(bs)) {
880 return -ENOMEDIUM;
881 }
882
883 if (offset < 0) {
884 return -EIO;
885 }
886
887 return 0;
888}
889
890typedef struct RwCo {
891 BdrvChild *child;
892 int64_t offset;
893 QEMUIOVector *qiov;
894 bool is_write;
895 int ret;
896 BdrvRequestFlags flags;
897} RwCo;
898
899static void coroutine_fn bdrv_rw_co_entry(void *opaque)
900{
901 RwCo *rwco = opaque;
902
903 if (!rwco->is_write) {
904 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
905 rwco->qiov->size, rwco->qiov,
906 rwco->flags);
907 } else {
908 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
909 rwco->qiov->size, rwco->qiov,
910 rwco->flags);
911 }
912 aio_wait_kick();
913}
914
915
916
917
918static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
919 QEMUIOVector *qiov, bool is_write,
920 BdrvRequestFlags flags)
921{
922 Coroutine *co;
923 RwCo rwco = {
924 .child = child,
925 .offset = offset,
926 .qiov = qiov,
927 .is_write = is_write,
928 .ret = NOT_DONE,
929 .flags = flags,
930 };
931
932 if (qemu_in_coroutine()) {
933
934 bdrv_rw_co_entry(&rwco);
935 } else {
936 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
937 bdrv_coroutine_enter(child->bs, co);
938 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
939 }
940 return rwco.ret;
941}
942
943int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
944 int bytes, BdrvRequestFlags flags)
945{
946 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
947
948 return bdrv_prwv_co(child, offset, &qiov, true,
949 BDRV_REQ_ZERO_WRITE | flags);
950}
951
952
953
954
955
956
957
958
959
960
961int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
962{
963 int ret;
964 int64_t target_size, bytes, offset = 0;
965 BlockDriverState *bs = child->bs;
966
967 target_size = bdrv_getlength(bs);
968 if (target_size < 0) {
969 return target_size;
970 }
971
972 for (;;) {
973 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
974 if (bytes <= 0) {
975 return 0;
976 }
977 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
978 if (ret < 0) {
979 return ret;
980 }
981 if (ret & BDRV_BLOCK_ZERO) {
982 offset += bytes;
983 continue;
984 }
985 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
986 if (ret < 0) {
987 return ret;
988 }
989 offset += bytes;
990 }
991}
992
993int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
994{
995 int ret;
996
997 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
998 if (ret < 0) {
999 return ret;
1000 }
1001
1002 return qiov->size;
1003}
1004
1005
1006int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
1007{
1008 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1009
1010 if (bytes < 0) {
1011 return -EINVAL;
1012 }
1013
1014 return bdrv_preadv(child, offset, &qiov);
1015}
1016
1017int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
1018{
1019 int ret;
1020
1021 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
1022 if (ret < 0) {
1023 return ret;
1024 }
1025
1026 return qiov->size;
1027}
1028
1029
1030
1031
1032
1033
1034
1035int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
1036{
1037 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1038
1039 if (bytes < 0) {
1040 return -EINVAL;
1041 }
1042
1043 return bdrv_pwritev(child, offset, &qiov);
1044}
1045
1046
1047
1048
1049
1050
1051
1052int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
1053 const void *buf, int count)
1054{
1055 int ret;
1056
1057 ret = bdrv_pwrite(child, offset, buf, count);
1058 if (ret < 0) {
1059 return ret;
1060 }
1061
1062 ret = bdrv_flush(child->bs);
1063 if (ret < 0) {
1064 return ret;
1065 }
1066
1067 return 0;
1068}
1069
1070typedef struct CoroutineIOCompletion {
1071 Coroutine *coroutine;
1072 int ret;
1073} CoroutineIOCompletion;
1074
1075static void bdrv_co_io_em_complete(void *opaque, int ret)
1076{
1077 CoroutineIOCompletion *co = opaque;
1078
1079 co->ret = ret;
1080 aio_co_wake(co->coroutine);
1081}
1082
1083static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1084 uint64_t offset, uint64_t bytes,
1085 QEMUIOVector *qiov,
1086 size_t qiov_offset, int flags)
1087{
1088 BlockDriver *drv = bs->drv;
1089 int64_t sector_num;
1090 unsigned int nb_sectors;
1091 QEMUIOVector local_qiov;
1092 int ret;
1093
1094 assert(!(flags & ~BDRV_REQ_MASK));
1095 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1096
1097 if (!drv) {
1098 return -ENOMEDIUM;
1099 }
1100
1101 if (drv->bdrv_co_preadv_part) {
1102 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1103 flags);
1104 }
1105
1106 if (qiov_offset > 0 || bytes != qiov->size) {
1107 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1108 qiov = &local_qiov;
1109 }
1110
1111 if (drv->bdrv_co_preadv) {
1112 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1113 goto out;
1114 }
1115
1116 if (drv->bdrv_aio_preadv) {
1117 BlockAIOCB *acb;
1118 CoroutineIOCompletion co = {
1119 .coroutine = qemu_coroutine_self(),
1120 };
1121
1122 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1123 bdrv_co_io_em_complete, &co);
1124 if (acb == NULL) {
1125 ret = -EIO;
1126 goto out;
1127 } else {
1128 qemu_coroutine_yield();
1129 ret = co.ret;
1130 goto out;
1131 }
1132 }
1133
1134 sector_num = offset >> BDRV_SECTOR_BITS;
1135 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1136
1137 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1138 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1139 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1140 assert(drv->bdrv_co_readv);
1141
1142 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1143
1144out:
1145 if (qiov == &local_qiov) {
1146 qemu_iovec_destroy(&local_qiov);
1147 }
1148
1149 return ret;
1150}
1151
1152static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1153 uint64_t offset, uint64_t bytes,
1154 QEMUIOVector *qiov,
1155 size_t qiov_offset, int flags)
1156{
1157 BlockDriver *drv = bs->drv;
1158 int64_t sector_num;
1159 unsigned int nb_sectors;
1160 QEMUIOVector local_qiov;
1161 int ret;
1162
1163 assert(!(flags & ~BDRV_REQ_MASK));
1164 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1165
1166 if (!drv) {
1167 return -ENOMEDIUM;
1168 }
1169
1170 if (drv->bdrv_co_pwritev_part) {
1171 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1172 flags & bs->supported_write_flags);
1173 flags &= ~bs->supported_write_flags;
1174 goto emulate_flags;
1175 }
1176
1177 if (qiov_offset > 0 || bytes != qiov->size) {
1178 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1179 qiov = &local_qiov;
1180 }
1181
1182 if (drv->bdrv_co_pwritev) {
1183 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1184 flags & bs->supported_write_flags);
1185 flags &= ~bs->supported_write_flags;
1186 goto emulate_flags;
1187 }
1188
1189 if (drv->bdrv_aio_pwritev) {
1190 BlockAIOCB *acb;
1191 CoroutineIOCompletion co = {
1192 .coroutine = qemu_coroutine_self(),
1193 };
1194
1195 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1196 flags & bs->supported_write_flags,
1197 bdrv_co_io_em_complete, &co);
1198 flags &= ~bs->supported_write_flags;
1199 if (acb == NULL) {
1200 ret = -EIO;
1201 } else {
1202 qemu_coroutine_yield();
1203 ret = co.ret;
1204 }
1205 goto emulate_flags;
1206 }
1207
1208 sector_num = offset >> BDRV_SECTOR_BITS;
1209 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1210
1211 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1212 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1213 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1214
1215 assert(drv->bdrv_co_writev);
1216 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1217 flags & bs->supported_write_flags);
1218 flags &= ~bs->supported_write_flags;
1219
1220emulate_flags:
1221 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1222 ret = bdrv_co_flush(bs);
1223 }
1224
1225 if (qiov == &local_qiov) {
1226 qemu_iovec_destroy(&local_qiov);
1227 }
1228
1229 return ret;
1230}
1231
1232static int coroutine_fn
1233bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
1234 uint64_t bytes, QEMUIOVector *qiov,
1235 size_t qiov_offset)
1236{
1237 BlockDriver *drv = bs->drv;
1238 QEMUIOVector local_qiov;
1239 int ret;
1240
1241 if (!drv) {
1242 return -ENOMEDIUM;
1243 }
1244
1245 if (!block_driver_can_compress(drv)) {
1246 return -ENOTSUP;
1247 }
1248
1249 if (drv->bdrv_co_pwritev_compressed_part) {
1250 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1251 qiov, qiov_offset);
1252 }
1253
1254 if (qiov_offset == 0) {
1255 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1256 }
1257
1258 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1259 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1260 qemu_iovec_destroy(&local_qiov);
1261
1262 return ret;
1263}
1264
1265static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1266 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1267 size_t qiov_offset, int flags)
1268{
1269 BlockDriverState *bs = child->bs;
1270
1271
1272
1273
1274
1275
1276 void *bounce_buffer = NULL;
1277
1278 BlockDriver *drv = bs->drv;
1279 int64_t cluster_offset;
1280 int64_t cluster_bytes;
1281 size_t skip_bytes;
1282 int ret;
1283 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1284 BDRV_REQUEST_MAX_BYTES);
1285 unsigned int progress = 0;
1286 bool skip_write;
1287
1288 if (!drv) {
1289 return -ENOMEDIUM;
1290 }
1291
1292
1293
1294
1295
1296 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1313 skip_bytes = offset - cluster_offset;
1314
1315 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1316 cluster_offset, cluster_bytes);
1317
1318 while (cluster_bytes) {
1319 int64_t pnum;
1320
1321 if (skip_write) {
1322 ret = 1;
1323 pnum = MIN(cluster_bytes, max_transfer);
1324 } else {
1325 ret = bdrv_is_allocated(bs, cluster_offset,
1326 MIN(cluster_bytes, max_transfer), &pnum);
1327 if (ret < 0) {
1328
1329
1330
1331
1332
1333 pnum = MIN(cluster_bytes, max_transfer);
1334 }
1335
1336
1337 if (ret == 0 && pnum == 0) {
1338 assert(progress >= bytes);
1339 break;
1340 }
1341
1342 assert(skip_bytes < pnum);
1343 }
1344
1345 if (ret <= 0) {
1346 QEMUIOVector local_qiov;
1347
1348
1349 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1350 if (!bounce_buffer) {
1351 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1352 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1353 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1354
1355 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1356 if (!bounce_buffer) {
1357 ret = -ENOMEM;
1358 goto err;
1359 }
1360 }
1361 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1362
1363 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1364 &local_qiov, 0, 0);
1365 if (ret < 0) {
1366 goto err;
1367 }
1368
1369 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1370 if (drv->bdrv_co_pwrite_zeroes &&
1371 buffer_is_zero(bounce_buffer, pnum)) {
1372
1373
1374
1375 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1376 BDRV_REQ_WRITE_UNCHANGED);
1377 } else {
1378
1379
1380
1381 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1382 &local_qiov, 0,
1383 BDRV_REQ_WRITE_UNCHANGED);
1384 }
1385
1386 if (ret < 0) {
1387
1388
1389
1390
1391
1392 goto err;
1393 }
1394
1395 if (!(flags & BDRV_REQ_PREFETCH)) {
1396 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1397 bounce_buffer + skip_bytes,
1398 pnum - skip_bytes);
1399 }
1400 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1401
1402 ret = bdrv_driver_preadv(bs, offset + progress,
1403 MIN(pnum - skip_bytes, bytes - progress),
1404 qiov, qiov_offset + progress, 0);
1405 if (ret < 0) {
1406 goto err;
1407 }
1408 }
1409
1410 cluster_offset += pnum;
1411 cluster_bytes -= pnum;
1412 progress += pnum - skip_bytes;
1413 skip_bytes = 0;
1414 }
1415 ret = 0;
1416
1417err:
1418 qemu_vfree(bounce_buffer);
1419 return ret;
1420}
1421
1422
1423
1424
1425
1426
1427static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1428 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1429 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1430{
1431 BlockDriverState *bs = child->bs;
1432 int64_t total_bytes, max_bytes;
1433 int ret = 0;
1434 uint64_t bytes_remaining = bytes;
1435 int max_transfer;
1436
1437 assert(is_power_of_2(align));
1438 assert((offset & (align - 1)) == 0);
1439 assert((bytes & (align - 1)) == 0);
1440 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1441 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1442 align);
1443
1444
1445
1446
1447
1448 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ |
1449 BDRV_REQ_PREFETCH)));
1450
1451
1452 if (flags & BDRV_REQ_COPY_ON_READ) {
1453
1454
1455
1456
1457
1458 bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
1459 }
1460
1461
1462 assert(!(flags & BDRV_REQ_SERIALISING));
1463
1464 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1465 bdrv_wait_serialising_requests(req);
1466 }
1467
1468 if (flags & BDRV_REQ_COPY_ON_READ) {
1469 int64_t pnum;
1470
1471 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1472 if (ret < 0) {
1473 goto out;
1474 }
1475
1476 if (!ret || pnum != bytes) {
1477 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1478 qiov, qiov_offset, flags);
1479 goto out;
1480 } else if (flags & BDRV_REQ_PREFETCH) {
1481 goto out;
1482 }
1483 }
1484
1485
1486 total_bytes = bdrv_getlength(bs);
1487 if (total_bytes < 0) {
1488 ret = total_bytes;
1489 goto out;
1490 }
1491
1492 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1493 if (bytes <= max_bytes && bytes <= max_transfer) {
1494 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
1495 goto out;
1496 }
1497
1498 while (bytes_remaining) {
1499 int num;
1500
1501 if (max_bytes) {
1502 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1503 assert(num);
1504
1505 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1506 num, qiov, bytes - bytes_remaining, 0);
1507 max_bytes -= num;
1508 } else {
1509 num = bytes_remaining;
1510 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1511 bytes_remaining);
1512 }
1513 if (ret < 0) {
1514 goto out;
1515 }
1516 bytes_remaining -= num;
1517 }
1518
1519out:
1520 return ret < 0 ? ret : 0;
1521}
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545typedef struct BdrvRequestPadding {
1546 uint8_t *buf;
1547 size_t buf_len;
1548 uint8_t *tail_buf;
1549 size_t head;
1550 size_t tail;
1551 bool merge_reads;
1552 QEMUIOVector local_qiov;
1553} BdrvRequestPadding;
1554
1555static bool bdrv_init_padding(BlockDriverState *bs,
1556 int64_t offset, int64_t bytes,
1557 BdrvRequestPadding *pad)
1558{
1559 uint64_t align = bs->bl.request_alignment;
1560 size_t sum;
1561
1562 memset(pad, 0, sizeof(*pad));
1563
1564 pad->head = offset & (align - 1);
1565 pad->tail = ((offset + bytes) & (align - 1));
1566 if (pad->tail) {
1567 pad->tail = align - pad->tail;
1568 }
1569
1570 if ((!pad->head && !pad->tail) || !bytes) {
1571 return false;
1572 }
1573
1574 sum = pad->head + bytes + pad->tail;
1575 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1576 pad->buf = qemu_blockalign(bs, pad->buf_len);
1577 pad->merge_reads = sum == pad->buf_len;
1578 if (pad->tail) {
1579 pad->tail_buf = pad->buf + pad->buf_len - align;
1580 }
1581
1582 return true;
1583}
1584
1585static int bdrv_padding_rmw_read(BdrvChild *child,
1586 BdrvTrackedRequest *req,
1587 BdrvRequestPadding *pad,
1588 bool zero_middle)
1589{
1590 QEMUIOVector local_qiov;
1591 BlockDriverState *bs = child->bs;
1592 uint64_t align = bs->bl.request_alignment;
1593 int ret;
1594
1595 assert(req->serialising && pad->buf);
1596
1597 if (pad->head || pad->merge_reads) {
1598 uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
1599
1600 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1601
1602 if (pad->head) {
1603 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1604 }
1605 if (pad->merge_reads && pad->tail) {
1606 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1607 }
1608 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1609 align, &local_qiov, 0, 0);
1610 if (ret < 0) {
1611 return ret;
1612 }
1613 if (pad->head) {
1614 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1615 }
1616 if (pad->merge_reads && pad->tail) {
1617 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1618 }
1619
1620 if (pad->merge_reads) {
1621 goto zero_mem;
1622 }
1623 }
1624
1625 if (pad->tail) {
1626 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1627
1628 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1629 ret = bdrv_aligned_preadv(
1630 child, req,
1631 req->overlap_offset + req->overlap_bytes - align,
1632 align, align, &local_qiov, 0, 0);
1633 if (ret < 0) {
1634 return ret;
1635 }
1636 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1637 }
1638
1639zero_mem:
1640 if (zero_middle) {
1641 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1642 }
1643
1644 return 0;
1645}
1646
1647static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1648{
1649 if (pad->buf) {
1650 qemu_vfree(pad->buf);
1651 qemu_iovec_destroy(&pad->local_qiov);
1652 }
1653}
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667static bool bdrv_pad_request(BlockDriverState *bs,
1668 QEMUIOVector **qiov, size_t *qiov_offset,
1669 int64_t *offset, unsigned int *bytes,
1670 BdrvRequestPadding *pad)
1671{
1672 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1673 return false;
1674 }
1675
1676 qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1677 *qiov, *qiov_offset, *bytes,
1678 pad->buf + pad->buf_len - pad->tail, pad->tail);
1679 *bytes += pad->head + pad->tail;
1680 *offset -= pad->head;
1681 *qiov = &pad->local_qiov;
1682 *qiov_offset = 0;
1683
1684 return true;
1685}
1686
1687int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1688 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1689 BdrvRequestFlags flags)
1690{
1691 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1692}
1693
1694int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1695 int64_t offset, unsigned int bytes,
1696 QEMUIOVector *qiov, size_t qiov_offset,
1697 BdrvRequestFlags flags)
1698{
1699 BlockDriverState *bs = child->bs;
1700 BdrvTrackedRequest req;
1701 BdrvRequestPadding pad;
1702 int ret;
1703
1704 trace_bdrv_co_preadv(bs, offset, bytes, flags);
1705
1706 ret = bdrv_check_byte_request(bs, offset, bytes);
1707 if (ret < 0) {
1708 return ret;
1709 }
1710
1711 bdrv_inc_in_flight(bs);
1712
1713
1714 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1715 flags |= BDRV_REQ_COPY_ON_READ;
1716 }
1717
1718 bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
1719
1720 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1721 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1722 bs->bl.request_alignment,
1723 qiov, qiov_offset, flags);
1724 tracked_request_end(&req);
1725 bdrv_dec_in_flight(bs);
1726
1727 bdrv_padding_destroy(&pad);
1728
1729 return ret;
1730}
1731
1732static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1733 int64_t offset, int bytes, BdrvRequestFlags flags)
1734{
1735 BlockDriver *drv = bs->drv;
1736 QEMUIOVector qiov;
1737 void *buf = NULL;
1738 int ret = 0;
1739 bool need_flush = false;
1740 int head = 0;
1741 int tail = 0;
1742
1743 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1744 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1745 bs->bl.request_alignment);
1746 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1747
1748 if (!drv) {
1749 return -ENOMEDIUM;
1750 }
1751
1752 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1753 return -ENOTSUP;
1754 }
1755
1756 assert(alignment % bs->bl.request_alignment == 0);
1757 head = offset % alignment;
1758 tail = (offset + bytes) % alignment;
1759 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1760 assert(max_write_zeroes >= bs->bl.request_alignment);
1761
1762 while (bytes > 0 && !ret) {
1763 int num = bytes;
1764
1765
1766
1767
1768
1769 if (head) {
1770
1771
1772
1773 num = MIN(MIN(bytes, max_transfer), alignment - head);
1774 head = (head + num) % alignment;
1775 assert(num < max_write_zeroes);
1776 } else if (tail && num > alignment) {
1777
1778 num -= tail;
1779 }
1780
1781
1782 if (num > max_write_zeroes) {
1783 num = max_write_zeroes;
1784 }
1785
1786 ret = -ENOTSUP;
1787
1788 if (drv->bdrv_co_pwrite_zeroes) {
1789 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1790 flags & bs->supported_zero_flags);
1791 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1792 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1793 need_flush = true;
1794 }
1795 } else {
1796 assert(!bs->supported_zero_flags);
1797 }
1798
1799 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1800
1801 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1802
1803 if ((flags & BDRV_REQ_FUA) &&
1804 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1805
1806
1807 write_flags &= ~BDRV_REQ_FUA;
1808 need_flush = true;
1809 }
1810 num = MIN(num, max_transfer);
1811 if (buf == NULL) {
1812 buf = qemu_try_blockalign0(bs, num);
1813 if (buf == NULL) {
1814 ret = -ENOMEM;
1815 goto fail;
1816 }
1817 }
1818 qemu_iovec_init_buf(&qiov, buf, num);
1819
1820 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1821
1822
1823
1824
1825 if (num < max_transfer) {
1826 qemu_vfree(buf);
1827 buf = NULL;
1828 }
1829 }
1830
1831 offset += num;
1832 bytes -= num;
1833 }
1834
1835fail:
1836 if (ret == 0 && need_flush) {
1837 ret = bdrv_co_flush(bs);
1838 }
1839 qemu_vfree(buf);
1840 return ret;
1841}
1842
1843static inline int coroutine_fn
1844bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
1845 BdrvTrackedRequest *req, int flags)
1846{
1847 BlockDriverState *bs = child->bs;
1848 bool waited;
1849 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1850
1851 if (bs->read_only) {
1852 return -EPERM;
1853 }
1854
1855
1856 assert(!(flags & BDRV_REQ_NO_SERIALISING));
1857 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1858 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1859 assert(!(flags & ~BDRV_REQ_MASK));
1860
1861 if (flags & BDRV_REQ_SERIALISING) {
1862 bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
1863 }
1864
1865 waited = bdrv_wait_serialising_requests(req);
1866
1867 assert(!waited || !req->serialising ||
1868 is_request_serialising_and_aligned(req));
1869 assert(req->overlap_offset <= offset);
1870 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1871 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1872
1873 switch (req->type) {
1874 case BDRV_TRACKED_WRITE:
1875 case BDRV_TRACKED_DISCARD:
1876 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1877 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1878 } else {
1879 assert(child->perm & BLK_PERM_WRITE);
1880 }
1881 return notifier_with_return_list_notify(&bs->before_write_notifiers,
1882 req);
1883 case BDRV_TRACKED_TRUNCATE:
1884 assert(child->perm & BLK_PERM_RESIZE);
1885 return 0;
1886 default:
1887 abort();
1888 }
1889}
1890
1891static inline void coroutine_fn
1892bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
1893 BdrvTrackedRequest *req, int ret)
1894{
1895 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1896 BlockDriverState *bs = child->bs;
1897
1898 atomic_inc(&bs->write_gen);
1899
1900
1901
1902
1903
1904
1905
1906
1907 if (ret == 0 &&
1908 (req->type == BDRV_TRACKED_TRUNCATE ||
1909 end_sector > bs->total_sectors) &&
1910 req->type != BDRV_TRACKED_DISCARD) {
1911 bs->total_sectors = end_sector;
1912 bdrv_parent_cb_resize(bs);
1913 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
1914 }
1915 if (req->bytes) {
1916 switch (req->type) {
1917 case BDRV_TRACKED_WRITE:
1918 stat64_max(&bs->wr_highest_offset, offset + bytes);
1919
1920 case BDRV_TRACKED_DISCARD:
1921 bdrv_set_dirty(bs, offset, bytes);
1922 break;
1923 default:
1924 break;
1925 }
1926 }
1927}
1928
1929
1930
1931
1932
1933static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1934 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1935 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1936{
1937 BlockDriverState *bs = child->bs;
1938 BlockDriver *drv = bs->drv;
1939 int ret;
1940
1941 uint64_t bytes_remaining = bytes;
1942 int max_transfer;
1943
1944 if (!drv) {
1945 return -ENOMEDIUM;
1946 }
1947
1948 if (bdrv_has_readonly_bitmaps(bs)) {
1949 return -EPERM;
1950 }
1951
1952 assert(is_power_of_2(align));
1953 assert((offset & (align - 1)) == 0);
1954 assert((bytes & (align - 1)) == 0);
1955 assert(!qiov || qiov_offset + bytes <= qiov->size);
1956 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1957 align);
1958
1959 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
1960
1961 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1962 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1963 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
1964 flags |= BDRV_REQ_ZERO_WRITE;
1965 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1966 flags |= BDRV_REQ_MAY_UNMAP;
1967 }
1968 }
1969
1970 if (ret < 0) {
1971
1972 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1973 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1974 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1975 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1976 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
1977 qiov, qiov_offset);
1978 } else if (bytes <= max_transfer) {
1979 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1980 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
1981 } else {
1982 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1983 while (bytes_remaining) {
1984 int num = MIN(bytes_remaining, max_transfer);
1985 int local_flags = flags;
1986
1987 assert(num);
1988 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
1989 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1990
1991
1992 local_flags &= ~BDRV_REQ_FUA;
1993 }
1994
1995 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
1996 num, qiov, bytes - bytes_remaining,
1997 local_flags);
1998 if (ret < 0) {
1999 break;
2000 }
2001 bytes_remaining -= num;
2002 }
2003 }
2004 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2005
2006 if (ret >= 0) {
2007 ret = 0;
2008 }
2009 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2010
2011 return ret;
2012}
2013
2014static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2015 int64_t offset,
2016 unsigned int bytes,
2017 BdrvRequestFlags flags,
2018 BdrvTrackedRequest *req)
2019{
2020 BlockDriverState *bs = child->bs;
2021 QEMUIOVector local_qiov;
2022 uint64_t align = bs->bl.request_alignment;
2023 int ret = 0;
2024 bool padding;
2025 BdrvRequestPadding pad;
2026
2027 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2028 if (padding) {
2029 bdrv_mark_request_serialising(req, align);
2030 bdrv_wait_serialising_requests(req);
2031
2032 bdrv_padding_rmw_read(child, req, &pad, true);
2033
2034 if (pad.head || pad.merge_reads) {
2035 int64_t aligned_offset = offset & ~(align - 1);
2036 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2037
2038 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2039 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2040 align, &local_qiov, 0,
2041 flags & ~BDRV_REQ_ZERO_WRITE);
2042 if (ret < 0 || pad.merge_reads) {
2043
2044 goto out;
2045 }
2046 offset += write_bytes - pad.head;
2047 bytes -= write_bytes - pad.head;
2048 }
2049 }
2050
2051 assert(!bytes || (offset & (align - 1)) == 0);
2052 if (bytes >= align) {
2053
2054 uint64_t aligned_bytes = bytes & ~(align - 1);
2055 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2056 NULL, 0, flags);
2057 if (ret < 0) {
2058 goto out;
2059 }
2060 bytes -= aligned_bytes;
2061 offset += aligned_bytes;
2062 }
2063
2064 assert(!bytes || (offset & (align - 1)) == 0);
2065 if (bytes) {
2066 assert(align == pad.tail + bytes);
2067
2068 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2069 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2070 &local_qiov, 0,
2071 flags & ~BDRV_REQ_ZERO_WRITE);
2072 }
2073
2074out:
2075 bdrv_padding_destroy(&pad);
2076
2077 return ret;
2078}
2079
2080
2081
2082
2083int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2084 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
2085 BdrvRequestFlags flags)
2086{
2087 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2088}
2089
2090int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2091 int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
2092 BdrvRequestFlags flags)
2093{
2094 BlockDriverState *bs = child->bs;
2095 BdrvTrackedRequest req;
2096 uint64_t align = bs->bl.request_alignment;
2097 BdrvRequestPadding pad;
2098 int ret;
2099
2100 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
2101
2102 if (!bs->drv) {
2103 return -ENOMEDIUM;
2104 }
2105
2106 ret = bdrv_check_byte_request(bs, offset, bytes);
2107 if (ret < 0) {
2108 return ret;
2109 }
2110
2111
2112 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2113 !QEMU_IS_ALIGNED(offset | bytes, align))
2114 {
2115 return -ENOTSUP;
2116 }
2117
2118 bdrv_inc_in_flight(bs);
2119
2120
2121
2122
2123
2124 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2125
2126 if (flags & BDRV_REQ_ZERO_WRITE) {
2127 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2128 goto out;
2129 }
2130
2131 if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
2132 bdrv_mark_request_serialising(&req, align);
2133 bdrv_wait_serialising_requests(&req);
2134 bdrv_padding_rmw_read(child, &req, &pad, false);
2135 }
2136
2137 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2138 qiov, qiov_offset, flags);
2139
2140 bdrv_padding_destroy(&pad);
2141
2142out:
2143 tracked_request_end(&req);
2144 bdrv_dec_in_flight(bs);
2145
2146 return ret;
2147}
2148
2149int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2150 int bytes, BdrvRequestFlags flags)
2151{
2152 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2153
2154 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2155 flags &= ~BDRV_REQ_MAY_UNMAP;
2156 }
2157
2158 return bdrv_co_pwritev(child, offset, bytes, NULL,
2159 BDRV_REQ_ZERO_WRITE | flags);
2160}
2161
2162
2163
2164
2165int bdrv_flush_all(void)
2166{
2167 BdrvNextIterator it;
2168 BlockDriverState *bs = NULL;
2169 int result = 0;
2170
2171
2172
2173
2174
2175
2176 if (replay_events_enabled()) {
2177 return result;
2178 }
2179
2180 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2181 AioContext *aio_context = bdrv_get_aio_context(bs);
2182 int ret;
2183
2184 aio_context_acquire(aio_context);
2185 ret = bdrv_flush(bs);
2186 if (ret < 0 && !result) {
2187 result = ret;
2188 }
2189 aio_context_release(aio_context);
2190 }
2191
2192 return result;
2193}
2194
2195
2196typedef struct BdrvCoBlockStatusData {
2197 BlockDriverState *bs;
2198 BlockDriverState *base;
2199 bool want_zero;
2200 int64_t offset;
2201 int64_t bytes;
2202 int64_t *pnum;
2203 int64_t *map;
2204 BlockDriverState **file;
2205 int ret;
2206 bool done;
2207} BdrvCoBlockStatusData;
2208
2209int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
2210 bool want_zero,
2211 int64_t offset,
2212 int64_t bytes,
2213 int64_t *pnum,
2214 int64_t *map,
2215 BlockDriverState **file)
2216{
2217 assert(bs->file && bs->file->bs);
2218 *pnum = bytes;
2219 *map = offset;
2220 *file = bs->file->bs;
2221 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2222}
2223
2224int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
2225 bool want_zero,
2226 int64_t offset,
2227 int64_t bytes,
2228 int64_t *pnum,
2229 int64_t *map,
2230 BlockDriverState **file)
2231{
2232 assert(bs->backing && bs->backing->bs);
2233 *pnum = bytes;
2234 *map = offset;
2235 *file = bs->backing->bs;
2236 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2237}
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2267 bool want_zero,
2268 int64_t offset, int64_t bytes,
2269 int64_t *pnum, int64_t *map,
2270 BlockDriverState **file)
2271{
2272 int64_t total_size;
2273 int64_t n;
2274 int ret;
2275 int64_t local_map = 0;
2276 BlockDriverState *local_file = NULL;
2277 int64_t aligned_offset, aligned_bytes;
2278 uint32_t align;
2279
2280 assert(pnum);
2281 *pnum = 0;
2282 total_size = bdrv_getlength(bs);
2283 if (total_size < 0) {
2284 ret = total_size;
2285 goto early_out;
2286 }
2287
2288 if (offset >= total_size) {
2289 ret = BDRV_BLOCK_EOF;
2290 goto early_out;
2291 }
2292 if (!bytes) {
2293 ret = 0;
2294 goto early_out;
2295 }
2296
2297 n = total_size - offset;
2298 if (n < bytes) {
2299 bytes = n;
2300 }
2301
2302
2303 assert(bs->drv);
2304 if (!bs->drv->bdrv_co_block_status) {
2305 *pnum = bytes;
2306 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2307 if (offset + bytes == total_size) {
2308 ret |= BDRV_BLOCK_EOF;
2309 }
2310 if (bs->drv->protocol_name) {
2311 ret |= BDRV_BLOCK_OFFSET_VALID;
2312 local_map = offset;
2313 local_file = bs;
2314 }
2315 goto early_out;
2316 }
2317
2318 bdrv_inc_in_flight(bs);
2319
2320
2321 align = bs->bl.request_alignment;
2322 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2323 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2324
2325 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2326 aligned_bytes, pnum, &local_map,
2327 &local_file);
2328 if (ret < 0) {
2329 *pnum = 0;
2330 goto out;
2331 }
2332
2333
2334
2335
2336
2337 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2338 align > offset - aligned_offset);
2339 if (ret & BDRV_BLOCK_RECURSE) {
2340 assert(ret & BDRV_BLOCK_DATA);
2341 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2342 assert(!(ret & BDRV_BLOCK_ZERO));
2343 }
2344
2345 *pnum -= offset - aligned_offset;
2346 if (*pnum > bytes) {
2347 *pnum = bytes;
2348 }
2349 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2350 local_map += offset - aligned_offset;
2351 }
2352
2353 if (ret & BDRV_BLOCK_RAW) {
2354 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2355 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2356 *pnum, pnum, &local_map, &local_file);
2357 goto out;
2358 }
2359
2360 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2361 ret |= BDRV_BLOCK_ALLOCATED;
2362 } else if (want_zero) {
2363 if (bdrv_unallocated_blocks_are_zero(bs)) {
2364 ret |= BDRV_BLOCK_ZERO;
2365 } else if (bs->backing) {
2366 BlockDriverState *bs2 = bs->backing->bs;
2367 int64_t size2 = bdrv_getlength(bs2);
2368
2369 if (size2 >= 0 && offset >= size2) {
2370 ret |= BDRV_BLOCK_ZERO;
2371 }
2372 }
2373 }
2374
2375 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2376 local_file && local_file != bs &&
2377 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2378 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2379 int64_t file_pnum;
2380 int ret2;
2381
2382 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2383 *pnum, &file_pnum, NULL, NULL);
2384 if (ret2 >= 0) {
2385
2386
2387
2388 if (ret2 & BDRV_BLOCK_EOF &&
2389 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2390
2391
2392
2393
2394
2395 ret |= BDRV_BLOCK_ZERO;
2396 } else {
2397
2398 *pnum = file_pnum;
2399 ret |= (ret2 & BDRV_BLOCK_ZERO);
2400 }
2401 }
2402 }
2403
2404out:
2405 bdrv_dec_in_flight(bs);
2406 if (ret >= 0 && offset + *pnum == total_size) {
2407 ret |= BDRV_BLOCK_EOF;
2408 }
2409early_out:
2410 if (file) {
2411 *file = local_file;
2412 }
2413 if (map) {
2414 *map = local_map;
2415 }
2416 return ret;
2417}
2418
2419static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2420 BlockDriverState *base,
2421 bool want_zero,
2422 int64_t offset,
2423 int64_t bytes,
2424 int64_t *pnum,
2425 int64_t *map,
2426 BlockDriverState **file)
2427{
2428 BlockDriverState *p;
2429 int ret = 0;
2430 bool first = true;
2431
2432 assert(bs != base);
2433 for (p = bs; p != base; p = backing_bs(p)) {
2434 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2435 file);
2436 if (ret < 0) {
2437 break;
2438 }
2439 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2440
2441
2442
2443
2444
2445
2446 *pnum = bytes;
2447 }
2448 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2449 break;
2450 }
2451
2452
2453 bytes = MIN(bytes, *pnum);
2454 first = false;
2455 }
2456 return ret;
2457}
2458
2459
2460static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2461{
2462 BdrvCoBlockStatusData *data = opaque;
2463
2464 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2465 data->want_zero,
2466 data->offset, data->bytes,
2467 data->pnum, data->map, data->file);
2468 data->done = true;
2469 aio_wait_kick();
2470}
2471
2472
2473
2474
2475
2476
2477static int bdrv_common_block_status_above(BlockDriverState *bs,
2478 BlockDriverState *base,
2479 bool want_zero, int64_t offset,
2480 int64_t bytes, int64_t *pnum,
2481 int64_t *map,
2482 BlockDriverState **file)
2483{
2484 Coroutine *co;
2485 BdrvCoBlockStatusData data = {
2486 .bs = bs,
2487 .base = base,
2488 .want_zero = want_zero,
2489 .offset = offset,
2490 .bytes = bytes,
2491 .pnum = pnum,
2492 .map = map,
2493 .file = file,
2494 .done = false,
2495 };
2496
2497 if (qemu_in_coroutine()) {
2498
2499 bdrv_block_status_above_co_entry(&data);
2500 } else {
2501 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2502 bdrv_coroutine_enter(bs, co);
2503 BDRV_POLL_WHILE(bs, !data.done);
2504 }
2505 return data.ret;
2506}
2507
2508int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2509 int64_t offset, int64_t bytes, int64_t *pnum,
2510 int64_t *map, BlockDriverState **file)
2511{
2512 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2513 pnum, map, file);
2514}
2515
2516int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2517 int64_t *pnum, int64_t *map, BlockDriverState **file)
2518{
2519 return bdrv_block_status_above(bs, backing_bs(bs),
2520 offset, bytes, pnum, map, file);
2521}
2522
2523int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2524 int64_t bytes, int64_t *pnum)
2525{
2526 int ret;
2527 int64_t dummy;
2528
2529 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2530 bytes, pnum ? pnum : &dummy, NULL,
2531 NULL);
2532 if (ret < 0) {
2533 return ret;
2534 }
2535 return !!(ret & BDRV_BLOCK_ALLOCATED);
2536}
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555int bdrv_is_allocated_above(BlockDriverState *top,
2556 BlockDriverState *base,
2557 bool include_base, int64_t offset,
2558 int64_t bytes, int64_t *pnum)
2559{
2560 BlockDriverState *intermediate;
2561 int ret;
2562 int64_t n = bytes;
2563
2564 assert(base || !include_base);
2565
2566 intermediate = top;
2567 while (include_base || intermediate != base) {
2568 int64_t pnum_inter;
2569 int64_t size_inter;
2570
2571 assert(intermediate);
2572 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2573 if (ret < 0) {
2574 return ret;
2575 }
2576 if (ret) {
2577 *pnum = pnum_inter;
2578 return 1;
2579 }
2580
2581 size_inter = bdrv_getlength(intermediate);
2582 if (size_inter < 0) {
2583 return size_inter;
2584 }
2585 if (n > pnum_inter &&
2586 (intermediate == top || offset + pnum_inter < size_inter)) {
2587 n = pnum_inter;
2588 }
2589
2590 if (intermediate == base) {
2591 break;
2592 }
2593
2594 intermediate = backing_bs(intermediate);
2595 }
2596
2597 *pnum = n;
2598 return 0;
2599}
2600
2601typedef struct BdrvVmstateCo {
2602 BlockDriverState *bs;
2603 QEMUIOVector *qiov;
2604 int64_t pos;
2605 bool is_read;
2606 int ret;
2607} BdrvVmstateCo;
2608
2609static int coroutine_fn
2610bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2611 bool is_read)
2612{
2613 BlockDriver *drv = bs->drv;
2614 int ret = -ENOTSUP;
2615
2616 bdrv_inc_in_flight(bs);
2617
2618 if (!drv) {
2619 ret = -ENOMEDIUM;
2620 } else if (drv->bdrv_load_vmstate) {
2621 if (is_read) {
2622 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2623 } else {
2624 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2625 }
2626 } else if (bs->file) {
2627 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2628 }
2629
2630 bdrv_dec_in_flight(bs);
2631 return ret;
2632}
2633
2634static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2635{
2636 BdrvVmstateCo *co = opaque;
2637 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2638 aio_wait_kick();
2639}
2640
2641static inline int
2642bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2643 bool is_read)
2644{
2645 if (qemu_in_coroutine()) {
2646 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2647 } else {
2648 BdrvVmstateCo data = {
2649 .bs = bs,
2650 .qiov = qiov,
2651 .pos = pos,
2652 .is_read = is_read,
2653 .ret = -EINPROGRESS,
2654 };
2655 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2656
2657 bdrv_coroutine_enter(bs, co);
2658 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2659 return data.ret;
2660 }
2661}
2662
2663int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2664 int64_t pos, int size)
2665{
2666 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2667 int ret;
2668
2669 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2670 if (ret < 0) {
2671 return ret;
2672 }
2673
2674 return size;
2675}
2676
2677int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2678{
2679 return bdrv_rw_vmstate(bs, qiov, pos, false);
2680}
2681
2682int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2683 int64_t pos, int size)
2684{
2685 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2686 int ret;
2687
2688 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2689 if (ret < 0) {
2690 return ret;
2691 }
2692
2693 return size;
2694}
2695
2696int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2697{
2698 return bdrv_rw_vmstate(bs, qiov, pos, true);
2699}
2700
2701
2702
2703
2704void bdrv_aio_cancel(BlockAIOCB *acb)
2705{
2706 qemu_aio_ref(acb);
2707 bdrv_aio_cancel_async(acb);
2708 while (acb->refcnt > 1) {
2709 if (acb->aiocb_info->get_aio_context) {
2710 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2711 } else if (acb->bs) {
2712
2713
2714
2715
2716 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2717 aio_poll(bdrv_get_aio_context(acb->bs), true);
2718 } else {
2719 abort();
2720 }
2721 }
2722 qemu_aio_unref(acb);
2723}
2724
2725
2726
2727
2728void bdrv_aio_cancel_async(BlockAIOCB *acb)
2729{
2730 if (acb->aiocb_info->cancel_async) {
2731 acb->aiocb_info->cancel_async(acb);
2732 }
2733}
2734
2735
2736
2737
2738typedef struct FlushCo {
2739 BlockDriverState *bs;
2740 int ret;
2741} FlushCo;
2742
2743
2744static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2745{
2746 FlushCo *rwco = opaque;
2747
2748 rwco->ret = bdrv_co_flush(rwco->bs);
2749 aio_wait_kick();
2750}
2751
2752int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2753{
2754 int current_gen;
2755 int ret = 0;
2756
2757 bdrv_inc_in_flight(bs);
2758
2759 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2760 bdrv_is_sg(bs)) {
2761 goto early_exit;
2762 }
2763
2764 qemu_co_mutex_lock(&bs->reqs_lock);
2765 current_gen = atomic_read(&bs->write_gen);
2766
2767
2768 while (bs->active_flush_req) {
2769 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2770 }
2771
2772
2773 bs->active_flush_req = true;
2774 qemu_co_mutex_unlock(&bs->reqs_lock);
2775
2776
2777 if (bs->drv->bdrv_co_flush) {
2778 ret = bs->drv->bdrv_co_flush(bs);
2779 goto out;
2780 }
2781
2782
2783 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2784 if (bs->drv->bdrv_co_flush_to_os) {
2785 ret = bs->drv->bdrv_co_flush_to_os(bs);
2786 if (ret < 0) {
2787 goto out;
2788 }
2789 }
2790
2791
2792 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2793 goto flush_parent;
2794 }
2795
2796
2797 if (bs->flushed_gen == current_gen) {
2798 goto flush_parent;
2799 }
2800
2801 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2802 if (!bs->drv) {
2803
2804
2805 ret = -ENOMEDIUM;
2806 goto out;
2807 }
2808 if (bs->drv->bdrv_co_flush_to_disk) {
2809 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2810 } else if (bs->drv->bdrv_aio_flush) {
2811 BlockAIOCB *acb;
2812 CoroutineIOCompletion co = {
2813 .coroutine = qemu_coroutine_self(),
2814 };
2815
2816 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2817 if (acb == NULL) {
2818 ret = -EIO;
2819 } else {
2820 qemu_coroutine_yield();
2821 ret = co.ret;
2822 }
2823 } else {
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835 ret = 0;
2836 }
2837
2838 if (ret < 0) {
2839 goto out;
2840 }
2841
2842
2843
2844
2845flush_parent:
2846 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2847out:
2848
2849 if (ret == 0) {
2850 bs->flushed_gen = current_gen;
2851 }
2852
2853 qemu_co_mutex_lock(&bs->reqs_lock);
2854 bs->active_flush_req = false;
2855
2856 qemu_co_queue_next(&bs->flush_queue);
2857 qemu_co_mutex_unlock(&bs->reqs_lock);
2858
2859early_exit:
2860 bdrv_dec_in_flight(bs);
2861 return ret;
2862}
2863
2864int bdrv_flush(BlockDriverState *bs)
2865{
2866 Coroutine *co;
2867 FlushCo flush_co = {
2868 .bs = bs,
2869 .ret = NOT_DONE,
2870 };
2871
2872 if (qemu_in_coroutine()) {
2873
2874 bdrv_flush_co_entry(&flush_co);
2875 } else {
2876 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2877 bdrv_coroutine_enter(bs, co);
2878 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2879 }
2880
2881 return flush_co.ret;
2882}
2883
2884typedef struct DiscardCo {
2885 BdrvChild *child;
2886 int64_t offset;
2887 int64_t bytes;
2888 int ret;
2889} DiscardCo;
2890static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2891{
2892 DiscardCo *rwco = opaque;
2893
2894 rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
2895 aio_wait_kick();
2896}
2897
2898int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
2899 int64_t bytes)
2900{
2901 BdrvTrackedRequest req;
2902 int max_pdiscard, ret;
2903 int head, tail, align;
2904 BlockDriverState *bs = child->bs;
2905
2906 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
2907 return -ENOMEDIUM;
2908 }
2909
2910 if (bdrv_has_readonly_bitmaps(bs)) {
2911 return -EPERM;
2912 }
2913
2914 if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) {
2915 return -EIO;
2916 }
2917
2918
2919 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2920 return 0;
2921 }
2922
2923 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2924 return 0;
2925 }
2926
2927
2928
2929
2930
2931
2932 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2933 assert(align % bs->bl.request_alignment == 0);
2934 head = offset % align;
2935 tail = (offset + bytes) % align;
2936
2937 bdrv_inc_in_flight(bs);
2938 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2939
2940 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
2941 if (ret < 0) {
2942 goto out;
2943 }
2944
2945 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2946 align);
2947 assert(max_pdiscard >= bs->bl.request_alignment);
2948
2949 while (bytes > 0) {
2950 int64_t num = bytes;
2951
2952 if (head) {
2953
2954 num = MIN(bytes, align - head);
2955 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2956 num %= bs->bl.request_alignment;
2957 }
2958 head = (head + num) % align;
2959 assert(num < max_pdiscard);
2960 } else if (tail) {
2961 if (num > align) {
2962
2963 num -= tail;
2964 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2965 tail > bs->bl.request_alignment) {
2966 tail %= bs->bl.request_alignment;
2967 num -= tail;
2968 }
2969 }
2970
2971 if (num > max_pdiscard) {
2972 num = max_pdiscard;
2973 }
2974
2975 if (!bs->drv) {
2976 ret = -ENOMEDIUM;
2977 goto out;
2978 }
2979 if (bs->drv->bdrv_co_pdiscard) {
2980 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
2981 } else {
2982 BlockAIOCB *acb;
2983 CoroutineIOCompletion co = {
2984 .coroutine = qemu_coroutine_self(),
2985 };
2986
2987 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
2988 bdrv_co_io_em_complete, &co);
2989 if (acb == NULL) {
2990 ret = -EIO;
2991 goto out;
2992 } else {
2993 qemu_coroutine_yield();
2994 ret = co.ret;
2995 }
2996 }
2997 if (ret && ret != -ENOTSUP) {
2998 goto out;
2999 }
3000
3001 offset += num;
3002 bytes -= num;
3003 }
3004 ret = 0;
3005out:
3006 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3007 tracked_request_end(&req);
3008 bdrv_dec_in_flight(bs);
3009 return ret;
3010}
3011
3012int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
3013{
3014 Coroutine *co;
3015 DiscardCo rwco = {
3016 .child = child,
3017 .offset = offset,
3018 .bytes = bytes,
3019 .ret = NOT_DONE,
3020 };
3021
3022 if (qemu_in_coroutine()) {
3023
3024 bdrv_pdiscard_co_entry(&rwco);
3025 } else {
3026 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
3027 bdrv_coroutine_enter(child->bs, co);
3028 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
3029 }
3030
3031 return rwco.ret;
3032}
3033
3034int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3035{
3036 BlockDriver *drv = bs->drv;
3037 CoroutineIOCompletion co = {
3038 .coroutine = qemu_coroutine_self(),
3039 };
3040 BlockAIOCB *acb;
3041
3042 bdrv_inc_in_flight(bs);
3043 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3044 co.ret = -ENOTSUP;
3045 goto out;
3046 }
3047
3048 if (drv->bdrv_co_ioctl) {
3049 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3050 } else {
3051 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3052 if (!acb) {
3053 co.ret = -ENOTSUP;
3054 goto out;
3055 }
3056 qemu_coroutine_yield();
3057 }
3058out:
3059 bdrv_dec_in_flight(bs);
3060 return co.ret;
3061}
3062
3063void *qemu_blockalign(BlockDriverState *bs, size_t size)
3064{
3065 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3066}
3067
3068void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3069{
3070 return memset(qemu_blockalign(bs, size), 0, size);
3071}
3072
3073void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3074{
3075 size_t align = bdrv_opt_mem_align(bs);
3076
3077
3078 assert(align > 0);
3079 if (size == 0) {
3080 size = align;
3081 }
3082
3083 return qemu_try_memalign(align, size);
3084}
3085
3086void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3087{
3088 void *mem = qemu_try_blockalign(bs, size);
3089
3090 if (mem) {
3091 memset(mem, 0, size);
3092 }
3093
3094 return mem;
3095}
3096
3097
3098
3099
3100bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
3101{
3102 int i;
3103 size_t alignment = bdrv_min_mem_align(bs);
3104
3105 for (i = 0; i < qiov->niov; i++) {
3106 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
3107 return false;
3108 }
3109 if (qiov->iov[i].iov_len % alignment) {
3110 return false;
3111 }
3112 }
3113
3114 return true;
3115}
3116
3117void bdrv_add_before_write_notifier(BlockDriverState *bs,
3118 NotifierWithReturn *notifier)
3119{
3120 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
3121}
3122
3123void bdrv_io_plug(BlockDriverState *bs)
3124{
3125 BdrvChild *child;
3126
3127 QLIST_FOREACH(child, &bs->children, next) {
3128 bdrv_io_plug(child->bs);
3129 }
3130
3131 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
3132 BlockDriver *drv = bs->drv;
3133 if (drv && drv->bdrv_io_plug) {
3134 drv->bdrv_io_plug(bs);
3135 }
3136 }
3137}
3138
3139void bdrv_io_unplug(BlockDriverState *bs)
3140{
3141 BdrvChild *child;
3142
3143 assert(bs->io_plugged);
3144 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
3145 BlockDriver *drv = bs->drv;
3146 if (drv && drv->bdrv_io_unplug) {
3147 drv->bdrv_io_unplug(bs);
3148 }
3149 }
3150
3151 QLIST_FOREACH(child, &bs->children, next) {
3152 bdrv_io_unplug(child->bs);
3153 }
3154}
3155
3156void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3157{
3158 BdrvChild *child;
3159
3160 if (bs->drv && bs->drv->bdrv_register_buf) {
3161 bs->drv->bdrv_register_buf(bs, host, size);
3162 }
3163 QLIST_FOREACH(child, &bs->children, next) {
3164 bdrv_register_buf(child->bs, host, size);
3165 }
3166}
3167
3168void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3169{
3170 BdrvChild *child;
3171
3172 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3173 bs->drv->bdrv_unregister_buf(bs, host);
3174 }
3175 QLIST_FOREACH(child, &bs->children, next) {
3176 bdrv_unregister_buf(child->bs, host);
3177 }
3178}
3179
3180static int coroutine_fn bdrv_co_copy_range_internal(
3181 BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
3182 uint64_t dst_offset, uint64_t bytes,
3183 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3184 bool recurse_src)
3185{
3186 BdrvTrackedRequest req;
3187 int ret;
3188
3189
3190 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3191 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3192
3193 if (!dst || !dst->bs) {
3194 return -ENOMEDIUM;
3195 }
3196 ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
3197 if (ret) {
3198 return ret;
3199 }
3200 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3201 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3202 }
3203
3204 if (!src || !src->bs) {
3205 return -ENOMEDIUM;
3206 }
3207 ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
3208 if (ret) {
3209 return ret;
3210 }
3211
3212 if (!src->bs->drv->bdrv_co_copy_range_from
3213 || !dst->bs->drv->bdrv_co_copy_range_to
3214 || src->bs->encrypted || dst->bs->encrypted) {
3215 return -ENOTSUP;
3216 }
3217
3218 if (recurse_src) {
3219 bdrv_inc_in_flight(src->bs);
3220 tracked_request_begin(&req, src->bs, src_offset, bytes,
3221 BDRV_TRACKED_READ);
3222
3223
3224 assert(!(read_flags & BDRV_REQ_SERIALISING));
3225 if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
3226 bdrv_wait_serialising_requests(&req);
3227 }
3228
3229 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3230 src, src_offset,
3231 dst, dst_offset,
3232 bytes,
3233 read_flags, write_flags);
3234
3235 tracked_request_end(&req);
3236 bdrv_dec_in_flight(src->bs);
3237 } else {
3238 bdrv_inc_in_flight(dst->bs);
3239 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3240 BDRV_TRACKED_WRITE);
3241 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3242 write_flags);
3243 if (!ret) {
3244 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3245 src, src_offset,
3246 dst, dst_offset,
3247 bytes,
3248 read_flags, write_flags);
3249 }
3250 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3251 tracked_request_end(&req);
3252 bdrv_dec_in_flight(dst->bs);
3253 }
3254
3255 return ret;
3256}
3257
3258
3259
3260
3261
3262int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
3263 BdrvChild *dst, uint64_t dst_offset,
3264 uint64_t bytes,
3265 BdrvRequestFlags read_flags,
3266 BdrvRequestFlags write_flags)
3267{
3268 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3269 read_flags, write_flags);
3270 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3271 bytes, read_flags, write_flags, true);
3272}
3273
3274
3275
3276
3277
3278int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
3279 BdrvChild *dst, uint64_t dst_offset,
3280 uint64_t bytes,
3281 BdrvRequestFlags read_flags,
3282 BdrvRequestFlags write_flags)
3283{
3284 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3285 read_flags, write_flags);
3286 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3287 bytes, read_flags, write_flags, false);
3288}
3289
3290int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
3291 BdrvChild *dst, uint64_t dst_offset,
3292 uint64_t bytes, BdrvRequestFlags read_flags,
3293 BdrvRequestFlags write_flags)
3294{
3295 return bdrv_co_copy_range_from(src, src_offset,
3296 dst, dst_offset,
3297 bytes, read_flags, write_flags);
3298}
3299
3300static void bdrv_parent_cb_resize(BlockDriverState *bs)
3301{
3302 BdrvChild *c;
3303 QLIST_FOREACH(c, &bs->parents, next_parent) {
3304 if (c->role->resize) {
3305 c->role->resize(c);
3306 }
3307 }
3308}
3309
3310
3311
3312
3313
3314
3315
3316
3317int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3318 PreallocMode prealloc, Error **errp)
3319{
3320 BlockDriverState *bs = child->bs;
3321 BlockDriver *drv = bs->drv;
3322 BdrvTrackedRequest req;
3323 int64_t old_size, new_bytes;
3324 int ret;
3325
3326
3327
3328 if (!drv) {
3329 error_setg(errp, "No medium inserted");
3330 return -ENOMEDIUM;
3331 }
3332 if (offset < 0) {
3333 error_setg(errp, "Image size cannot be negative");
3334 return -EINVAL;
3335 }
3336
3337 old_size = bdrv_getlength(bs);
3338 if (old_size < 0) {
3339 error_setg_errno(errp, -old_size, "Failed to get old image size");
3340 return old_size;
3341 }
3342
3343 if (offset > old_size) {
3344 new_bytes = offset - old_size;
3345 } else {
3346 new_bytes = 0;
3347 }
3348
3349 bdrv_inc_in_flight(bs);
3350 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3351 BDRV_TRACKED_TRUNCATE);
3352
3353
3354
3355
3356 if (new_bytes) {
3357 bdrv_mark_request_serialising(&req, 1);
3358 }
3359 if (bs->read_only) {
3360 error_setg(errp, "Image is read-only");
3361 ret = -EACCES;
3362 goto out;
3363 }
3364 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3365 0);
3366 if (ret < 0) {
3367 error_setg_errno(errp, -ret,
3368 "Failed to prepare request for truncation");
3369 goto out;
3370 }
3371
3372 if (drv->bdrv_co_truncate) {
3373 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp);
3374 } else if (bs->file && drv->is_filter) {
3375 ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
3376 } else {
3377 error_setg(errp, "Image format driver does not support resize");
3378 ret = -ENOTSUP;
3379 goto out;
3380 }
3381 if (ret < 0) {
3382 goto out;
3383 }
3384
3385 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3386 if (ret < 0) {
3387 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3388 } else {
3389 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3390 }
3391
3392
3393
3394 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3395
3396out:
3397 tracked_request_end(&req);
3398 bdrv_dec_in_flight(bs);
3399
3400 return ret;
3401}
3402
3403typedef struct TruncateCo {
3404 BdrvChild *child;
3405 int64_t offset;
3406 bool exact;
3407 PreallocMode prealloc;
3408 Error **errp;
3409 int ret;
3410} TruncateCo;
3411
3412static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
3413{
3414 TruncateCo *tco = opaque;
3415 tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
3416 tco->prealloc, tco->errp);
3417 aio_wait_kick();
3418}
3419
3420int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
3421 PreallocMode prealloc, Error **errp)
3422{
3423 Coroutine *co;
3424 TruncateCo tco = {
3425 .child = child,
3426 .offset = offset,
3427 .exact = exact,
3428 .prealloc = prealloc,
3429 .errp = errp,
3430 .ret = NOT_DONE,
3431 };
3432
3433 if (qemu_in_coroutine()) {
3434
3435 bdrv_truncate_co_entry(&tco);
3436 } else {
3437 co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
3438 bdrv_coroutine_enter(child->bs, co);
3439 BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
3440 }
3441
3442 return tco.ret;
3443}
3444