1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "qemu/cutils.h"
33#include "qapi/error.h"
34#include "qemu/error-report.h"
35
36#define NOT_DONE 0x7fffffff
37
38
39#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
40
41static void bdrv_parent_cb_resize(BlockDriverState *bs);
42static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
43 int64_t offset, int bytes, BdrvRequestFlags flags);
44
45static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
46 bool ignore_bds_parents)
47{
48 BdrvChild *c, *next;
49
50 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
51 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
52 continue;
53 }
54 bdrv_parent_drained_begin_single(c, false);
55 }
56}
57
58static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
59 int *drained_end_counter)
60{
61 assert(c->parent_quiesce_counter > 0);
62 c->parent_quiesce_counter--;
63 if (c->role->drained_end) {
64 c->role->drained_end(c, drained_end_counter);
65 }
66}
67
68void bdrv_parent_drained_end_single(BdrvChild *c)
69{
70 int drained_end_counter = 0;
71 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
72 BDRV_POLL_WHILE(c->bs, atomic_read(&drained_end_counter) > 0);
73}
74
75static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
76 bool ignore_bds_parents,
77 int *drained_end_counter)
78{
79 BdrvChild *c;
80
81 QLIST_FOREACH(c, &bs->parents, next_parent) {
82 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
83 continue;
84 }
85 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
86 }
87}
88
89static bool bdrv_parent_drained_poll_single(BdrvChild *c)
90{
91 if (c->role->drained_poll) {
92 return c->role->drained_poll(c);
93 }
94 return false;
95}
96
97static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
98 bool ignore_bds_parents)
99{
100 BdrvChild *c, *next;
101 bool busy = false;
102
103 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
104 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
105 continue;
106 }
107 busy |= bdrv_parent_drained_poll_single(c);
108 }
109
110 return busy;
111}
112
113void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
114{
115 c->parent_quiesce_counter++;
116 if (c->role->drained_begin) {
117 c->role->drained_begin(c);
118 }
119 if (poll) {
120 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
121 }
122}
123
124static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
125{
126 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
127 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
128 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
129 src->opt_mem_alignment);
130 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
131 src->min_mem_alignment);
132 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
133}
134
135void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
136{
137 BlockDriver *drv = bs->drv;
138 Error *local_err = NULL;
139
140 memset(&bs->bl, 0, sizeof(bs->bl));
141
142 if (!drv) {
143 return;
144 }
145
146
147 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
148 drv->bdrv_aio_preadv) ? 1 : 512;
149
150
151 if (bs->file) {
152 bdrv_refresh_limits(bs->file->bs, &local_err);
153 if (local_err) {
154 error_propagate(errp, local_err);
155 return;
156 }
157 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
158 } else {
159 bs->bl.min_mem_alignment = 512;
160 bs->bl.opt_mem_alignment = getpagesize();
161
162
163 bs->bl.max_iov = IOV_MAX;
164 }
165
166 if (bs->backing) {
167 bdrv_refresh_limits(bs->backing->bs, &local_err);
168 if (local_err) {
169 error_propagate(errp, local_err);
170 return;
171 }
172 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
173 }
174
175
176 if (drv->bdrv_refresh_limits) {
177 drv->bdrv_refresh_limits(bs, errp);
178 }
179}
180
181
182
183
184
185
186void bdrv_enable_copy_on_read(BlockDriverState *bs)
187{
188 atomic_inc(&bs->copy_on_read);
189}
190
191void bdrv_disable_copy_on_read(BlockDriverState *bs)
192{
193 int old = atomic_fetch_dec(&bs->copy_on_read);
194 assert(old >= 1);
195}
196
197typedef struct {
198 Coroutine *co;
199 BlockDriverState *bs;
200 bool done;
201 bool begin;
202 bool recursive;
203 bool poll;
204 BdrvChild *parent;
205 bool ignore_bds_parents;
206 int *drained_end_counter;
207} BdrvCoDrainData;
208
209static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
210{
211 BdrvCoDrainData *data = opaque;
212 BlockDriverState *bs = data->bs;
213
214 if (data->begin) {
215 bs->drv->bdrv_co_drain_begin(bs);
216 } else {
217 bs->drv->bdrv_co_drain_end(bs);
218 }
219
220
221 atomic_mb_set(&data->done, true);
222 if (!data->begin) {
223 atomic_dec(data->drained_end_counter);
224 }
225 bdrv_dec_in_flight(bs);
226
227 g_free(data);
228}
229
230
231static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
232 int *drained_end_counter)
233{
234 BdrvCoDrainData *data;
235
236 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
237 (!begin && !bs->drv->bdrv_co_drain_end)) {
238 return;
239 }
240
241 data = g_new(BdrvCoDrainData, 1);
242 *data = (BdrvCoDrainData) {
243 .bs = bs,
244 .done = false,
245 .begin = begin,
246 .drained_end_counter = drained_end_counter,
247 };
248
249 if (!begin) {
250 atomic_inc(drained_end_counter);
251 }
252
253
254
255 bdrv_inc_in_flight(bs);
256 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
257 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
258}
259
260
261bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
262 BdrvChild *ignore_parent, bool ignore_bds_parents)
263{
264 BdrvChild *child, *next;
265
266 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
267 return true;
268 }
269
270 if (atomic_read(&bs->in_flight)) {
271 return true;
272 }
273
274 if (recursive) {
275 assert(!ignore_bds_parents);
276 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
277 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
278 return true;
279 }
280 }
281 }
282
283 return false;
284}
285
286static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
287 BdrvChild *ignore_parent)
288{
289 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
290}
291
292static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
293 BdrvChild *parent, bool ignore_bds_parents,
294 bool poll);
295static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
296 BdrvChild *parent, bool ignore_bds_parents,
297 int *drained_end_counter);
298
299static void bdrv_co_drain_bh_cb(void *opaque)
300{
301 BdrvCoDrainData *data = opaque;
302 Coroutine *co = data->co;
303 BlockDriverState *bs = data->bs;
304
305 if (bs) {
306 AioContext *ctx = bdrv_get_aio_context(bs);
307 AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
308
309
310
311
312
313
314
315 if (ctx == co_ctx) {
316 aio_context_acquire(ctx);
317 }
318 bdrv_dec_in_flight(bs);
319 if (data->begin) {
320 assert(!data->drained_end_counter);
321 bdrv_do_drained_begin(bs, data->recursive, data->parent,
322 data->ignore_bds_parents, data->poll);
323 } else {
324 assert(!data->poll);
325 bdrv_do_drained_end(bs, data->recursive, data->parent,
326 data->ignore_bds_parents,
327 data->drained_end_counter);
328 }
329 if (ctx == co_ctx) {
330 aio_context_release(ctx);
331 }
332 } else {
333 assert(data->begin);
334 bdrv_drain_all_begin();
335 }
336
337 data->done = true;
338 aio_co_wake(co);
339}
340
341static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
342 bool begin, bool recursive,
343 BdrvChild *parent,
344 bool ignore_bds_parents,
345 bool poll,
346 int *drained_end_counter)
347{
348 BdrvCoDrainData data;
349
350
351
352
353 assert(qemu_in_coroutine());
354 data = (BdrvCoDrainData) {
355 .co = qemu_coroutine_self(),
356 .bs = bs,
357 .done = false,
358 .begin = begin,
359 .recursive = recursive,
360 .parent = parent,
361 .ignore_bds_parents = ignore_bds_parents,
362 .poll = poll,
363 .drained_end_counter = drained_end_counter,
364 };
365
366 if (bs) {
367 bdrv_inc_in_flight(bs);
368 }
369 aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
370 bdrv_co_drain_bh_cb, &data);
371
372 qemu_coroutine_yield();
373
374
375 assert(data.done);
376}
377
378void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
379 BdrvChild *parent, bool ignore_bds_parents)
380{
381 assert(!qemu_in_coroutine());
382
383
384 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
385 aio_disable_external(bdrv_get_aio_context(bs));
386 }
387
388 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
389 bdrv_drain_invoke(bs, true, NULL);
390}
391
392static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
393 BdrvChild *parent, bool ignore_bds_parents,
394 bool poll)
395{
396 BdrvChild *child, *next;
397
398 if (qemu_in_coroutine()) {
399 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
400 poll, NULL);
401 return;
402 }
403
404 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
405
406 if (recursive) {
407 assert(!ignore_bds_parents);
408 bs->recursive_quiesce_counter++;
409 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
410 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
411 false);
412 }
413 }
414
415
416
417
418
419
420
421
422
423
424 if (poll) {
425 assert(!ignore_bds_parents);
426 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
427 }
428}
429
430void bdrv_drained_begin(BlockDriverState *bs)
431{
432 bdrv_do_drained_begin(bs, false, NULL, false, true);
433}
434
435void bdrv_subtree_drained_begin(BlockDriverState *bs)
436{
437 bdrv_do_drained_begin(bs, true, NULL, false, true);
438}
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
455 BdrvChild *parent, bool ignore_bds_parents,
456 int *drained_end_counter)
457{
458 BdrvChild *child;
459 int old_quiesce_counter;
460
461 assert(drained_end_counter != NULL);
462
463 if (qemu_in_coroutine()) {
464 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
465 false, drained_end_counter);
466 return;
467 }
468 assert(bs->quiesce_counter > 0);
469
470
471 bdrv_drain_invoke(bs, false, drained_end_counter);
472 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
473 drained_end_counter);
474
475 old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
476 if (old_quiesce_counter == 1) {
477 aio_enable_external(bdrv_get_aio_context(bs));
478 }
479
480 if (recursive) {
481 assert(!ignore_bds_parents);
482 bs->recursive_quiesce_counter--;
483 QLIST_FOREACH(child, &bs->children, next) {
484 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
485 drained_end_counter);
486 }
487 }
488}
489
490void bdrv_drained_end(BlockDriverState *bs)
491{
492 int drained_end_counter = 0;
493 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
494 BDRV_POLL_WHILE(bs, atomic_read(&drained_end_counter) > 0);
495}
496
497void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
498{
499 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
500}
501
502void bdrv_subtree_drained_end(BlockDriverState *bs)
503{
504 int drained_end_counter = 0;
505 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
506 BDRV_POLL_WHILE(bs, atomic_read(&drained_end_counter) > 0);
507}
508
509void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
510{
511 int i;
512
513 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
514 bdrv_do_drained_begin(child->bs, true, child, false, true);
515 }
516}
517
518void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
519{
520 int drained_end_counter = 0;
521 int i;
522
523 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
524 bdrv_do_drained_end(child->bs, true, child, false,
525 &drained_end_counter);
526 }
527
528 BDRV_POLL_WHILE(child->bs, atomic_read(&drained_end_counter) > 0);
529}
530
531
532
533
534
535
536
537
538void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
539{
540 assert(qemu_in_coroutine());
541 bdrv_drained_begin(bs);
542 bdrv_drained_end(bs);
543}
544
545void bdrv_drain(BlockDriverState *bs)
546{
547 bdrv_drained_begin(bs);
548 bdrv_drained_end(bs);
549}
550
551static void bdrv_drain_assert_idle(BlockDriverState *bs)
552{
553 BdrvChild *child, *next;
554
555 assert(atomic_read(&bs->in_flight) == 0);
556 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
557 bdrv_drain_assert_idle(child->bs);
558 }
559}
560
561unsigned int bdrv_drain_all_count = 0;
562
563static bool bdrv_drain_all_poll(void)
564{
565 BlockDriverState *bs = NULL;
566 bool result = false;
567
568
569
570 while ((bs = bdrv_next_all_states(bs))) {
571 AioContext *aio_context = bdrv_get_aio_context(bs);
572 aio_context_acquire(aio_context);
573 result |= bdrv_drain_poll(bs, false, NULL, true);
574 aio_context_release(aio_context);
575 }
576
577 return result;
578}
579
580
581
582
583
584
585
586
587
588
589
590
591
592void bdrv_drain_all_begin(void)
593{
594 BlockDriverState *bs = NULL;
595
596 if (qemu_in_coroutine()) {
597 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
598 return;
599 }
600
601
602
603 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
604 assert(bdrv_drain_all_count < INT_MAX);
605 bdrv_drain_all_count++;
606
607
608
609 while ((bs = bdrv_next_all_states(bs))) {
610 AioContext *aio_context = bdrv_get_aio_context(bs);
611
612 aio_context_acquire(aio_context);
613 bdrv_do_drained_begin(bs, false, NULL, true, false);
614 aio_context_release(aio_context);
615 }
616
617
618 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
619
620 while ((bs = bdrv_next_all_states(bs))) {
621 bdrv_drain_assert_idle(bs);
622 }
623}
624
625void bdrv_drain_all_end(void)
626{
627 BlockDriverState *bs = NULL;
628 int drained_end_counter = 0;
629
630 while ((bs = bdrv_next_all_states(bs))) {
631 AioContext *aio_context = bdrv_get_aio_context(bs);
632
633 aio_context_acquire(aio_context);
634 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
635 aio_context_release(aio_context);
636 }
637
638 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
639 AIO_WAIT_WHILE(NULL, atomic_read(&drained_end_counter) > 0);
640
641 assert(bdrv_drain_all_count > 0);
642 bdrv_drain_all_count--;
643}
644
645void bdrv_drain_all(void)
646{
647 bdrv_drain_all_begin();
648 bdrv_drain_all_end();
649}
650
651
652
653
654
655
656static void tracked_request_end(BdrvTrackedRequest *req)
657{
658 if (req->serialising) {
659 atomic_dec(&req->bs->serialising_in_flight);
660 }
661
662 qemu_co_mutex_lock(&req->bs->reqs_lock);
663 QLIST_REMOVE(req, list);
664 qemu_co_queue_restart_all(&req->wait_queue);
665 qemu_co_mutex_unlock(&req->bs->reqs_lock);
666}
667
668
669
670
671static void tracked_request_begin(BdrvTrackedRequest *req,
672 BlockDriverState *bs,
673 int64_t offset,
674 uint64_t bytes,
675 enum BdrvTrackedRequestType type)
676{
677 assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
678
679 *req = (BdrvTrackedRequest){
680 .bs = bs,
681 .offset = offset,
682 .bytes = bytes,
683 .type = type,
684 .co = qemu_coroutine_self(),
685 .serialising = false,
686 .overlap_offset = offset,
687 .overlap_bytes = bytes,
688 };
689
690 qemu_co_queue_init(&req->wait_queue);
691
692 qemu_co_mutex_lock(&bs->reqs_lock);
693 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
694 qemu_co_mutex_unlock(&bs->reqs_lock);
695}
696
697void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
698{
699 int64_t overlap_offset = req->offset & ~(align - 1);
700 uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
701 - overlap_offset;
702
703 if (!req->serialising) {
704 atomic_inc(&req->bs->serialising_in_flight);
705 req->serialising = true;
706 }
707
708 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
709 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
710}
711
712static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
713{
714
715
716
717
718
719
720 return req->serialising && (req->offset == req->overlap_offset) &&
721 (req->bytes == req->overlap_bytes);
722}
723
724
725
726
727
728BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
729{
730 BdrvTrackedRequest *req;
731 Coroutine *self = qemu_coroutine_self();
732
733 QLIST_FOREACH(req, &bs->tracked_requests, list) {
734 if (req->co == self) {
735 return req;
736 }
737 }
738
739 return NULL;
740}
741
742
743
744
745void bdrv_round_to_clusters(BlockDriverState *bs,
746 int64_t offset, int64_t bytes,
747 int64_t *cluster_offset,
748 int64_t *cluster_bytes)
749{
750 BlockDriverInfo bdi;
751
752 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
753 *cluster_offset = offset;
754 *cluster_bytes = bytes;
755 } else {
756 int64_t c = bdi.cluster_size;
757 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
758 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
759 }
760}
761
762static int bdrv_get_cluster_size(BlockDriverState *bs)
763{
764 BlockDriverInfo bdi;
765 int ret;
766
767 ret = bdrv_get_info(bs, &bdi);
768 if (ret < 0 || bdi.cluster_size == 0) {
769 return bs->bl.request_alignment;
770 } else {
771 return bdi.cluster_size;
772 }
773}
774
775static bool tracked_request_overlaps(BdrvTrackedRequest *req,
776 int64_t offset, uint64_t bytes)
777{
778
779 if (offset >= req->overlap_offset + req->overlap_bytes) {
780 return false;
781 }
782
783 if (req->overlap_offset >= offset + bytes) {
784 return false;
785 }
786 return true;
787}
788
789void bdrv_inc_in_flight(BlockDriverState *bs)
790{
791 atomic_inc(&bs->in_flight);
792}
793
794void bdrv_wakeup(BlockDriverState *bs)
795{
796 aio_wait_kick();
797}
798
799void bdrv_dec_in_flight(BlockDriverState *bs)
800{
801 atomic_dec(&bs->in_flight);
802 bdrv_wakeup(bs);
803}
804
805bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
806{
807 BlockDriverState *bs = self->bs;
808 BdrvTrackedRequest *req;
809 bool retry;
810 bool waited = false;
811
812 if (!atomic_read(&bs->serialising_in_flight)) {
813 return false;
814 }
815
816 do {
817 retry = false;
818 qemu_co_mutex_lock(&bs->reqs_lock);
819 QLIST_FOREACH(req, &bs->tracked_requests, list) {
820 if (req == self || (!req->serialising && !self->serialising)) {
821 continue;
822 }
823 if (tracked_request_overlaps(req, self->overlap_offset,
824 self->overlap_bytes))
825 {
826
827
828
829
830 assert(qemu_coroutine_self() != req->co);
831
832
833
834
835 if (!req->waiting_for) {
836 self->waiting_for = req;
837 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
838 self->waiting_for = NULL;
839 retry = true;
840 waited = true;
841 break;
842 }
843 }
844 }
845 qemu_co_mutex_unlock(&bs->reqs_lock);
846 } while (retry);
847
848 return waited;
849}
850
851static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
852 size_t size)
853{
854 if (size > BDRV_REQUEST_MAX_BYTES) {
855 return -EIO;
856 }
857
858 if (!bdrv_is_inserted(bs)) {
859 return -ENOMEDIUM;
860 }
861
862 if (offset < 0) {
863 return -EIO;
864 }
865
866 return 0;
867}
868
869typedef struct RwCo {
870 BdrvChild *child;
871 int64_t offset;
872 QEMUIOVector *qiov;
873 bool is_write;
874 int ret;
875 BdrvRequestFlags flags;
876} RwCo;
877
878static void coroutine_fn bdrv_rw_co_entry(void *opaque)
879{
880 RwCo *rwco = opaque;
881
882 if (!rwco->is_write) {
883 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
884 rwco->qiov->size, rwco->qiov,
885 rwco->flags);
886 } else {
887 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
888 rwco->qiov->size, rwco->qiov,
889 rwco->flags);
890 }
891 aio_wait_kick();
892}
893
894
895
896
897static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
898 QEMUIOVector *qiov, bool is_write,
899 BdrvRequestFlags flags)
900{
901 Coroutine *co;
902 RwCo rwco = {
903 .child = child,
904 .offset = offset,
905 .qiov = qiov,
906 .is_write = is_write,
907 .ret = NOT_DONE,
908 .flags = flags,
909 };
910
911 if (qemu_in_coroutine()) {
912
913 bdrv_rw_co_entry(&rwco);
914 } else {
915 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
916 bdrv_coroutine_enter(child->bs, co);
917 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
918 }
919 return rwco.ret;
920}
921
922int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
923 int bytes, BdrvRequestFlags flags)
924{
925 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
926
927 return bdrv_prwv_co(child, offset, &qiov, true,
928 BDRV_REQ_ZERO_WRITE | flags);
929}
930
931
932
933
934
935
936
937
938
939
940int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
941{
942 int ret;
943 int64_t target_size, bytes, offset = 0;
944 BlockDriverState *bs = child->bs;
945
946 target_size = bdrv_getlength(bs);
947 if (target_size < 0) {
948 return target_size;
949 }
950
951 for (;;) {
952 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
953 if (bytes <= 0) {
954 return 0;
955 }
956 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
957 if (ret < 0) {
958 return ret;
959 }
960 if (ret & BDRV_BLOCK_ZERO) {
961 offset += bytes;
962 continue;
963 }
964 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
965 if (ret < 0) {
966 return ret;
967 }
968 offset += bytes;
969 }
970}
971
972int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
973{
974 int ret;
975
976 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
977 if (ret < 0) {
978 return ret;
979 }
980
981 return qiov->size;
982}
983
984
985int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
986{
987 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
988
989 if (bytes < 0) {
990 return -EINVAL;
991 }
992
993 return bdrv_preadv(child, offset, &qiov);
994}
995
996int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
997{
998 int ret;
999
1000 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
1001 if (ret < 0) {
1002 return ret;
1003 }
1004
1005 return qiov->size;
1006}
1007
1008
1009
1010
1011
1012
1013
1014int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
1015{
1016 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1017
1018 if (bytes < 0) {
1019 return -EINVAL;
1020 }
1021
1022 return bdrv_pwritev(child, offset, &qiov);
1023}
1024
1025
1026
1027
1028
1029
1030
1031int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
1032 const void *buf, int count)
1033{
1034 int ret;
1035
1036 ret = bdrv_pwrite(child, offset, buf, count);
1037 if (ret < 0) {
1038 return ret;
1039 }
1040
1041 ret = bdrv_flush(child->bs);
1042 if (ret < 0) {
1043 return ret;
1044 }
1045
1046 return 0;
1047}
1048
1049typedef struct CoroutineIOCompletion {
1050 Coroutine *coroutine;
1051 int ret;
1052} CoroutineIOCompletion;
1053
1054static void bdrv_co_io_em_complete(void *opaque, int ret)
1055{
1056 CoroutineIOCompletion *co = opaque;
1057
1058 co->ret = ret;
1059 aio_co_wake(co->coroutine);
1060}
1061
1062static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1063 uint64_t offset, uint64_t bytes,
1064 QEMUIOVector *qiov, int flags)
1065{
1066 BlockDriver *drv = bs->drv;
1067 int64_t sector_num;
1068 unsigned int nb_sectors;
1069
1070 assert(!(flags & ~BDRV_REQ_MASK));
1071 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1072
1073 if (!drv) {
1074 return -ENOMEDIUM;
1075 }
1076
1077 if (drv->bdrv_co_preadv) {
1078 return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1079 }
1080
1081 if (drv->bdrv_aio_preadv) {
1082 BlockAIOCB *acb;
1083 CoroutineIOCompletion co = {
1084 .coroutine = qemu_coroutine_self(),
1085 };
1086
1087 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1088 bdrv_co_io_em_complete, &co);
1089 if (acb == NULL) {
1090 return -EIO;
1091 } else {
1092 qemu_coroutine_yield();
1093 return co.ret;
1094 }
1095 }
1096
1097 sector_num = offset >> BDRV_SECTOR_BITS;
1098 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1099
1100 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1101 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1102 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1103 assert(drv->bdrv_co_readv);
1104
1105 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1106}
1107
1108static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1109 uint64_t offset, uint64_t bytes,
1110 QEMUIOVector *qiov, int flags)
1111{
1112 BlockDriver *drv = bs->drv;
1113 int64_t sector_num;
1114 unsigned int nb_sectors;
1115 int ret;
1116
1117 assert(!(flags & ~BDRV_REQ_MASK));
1118 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1119
1120 if (!drv) {
1121 return -ENOMEDIUM;
1122 }
1123
1124 if (drv->bdrv_co_pwritev) {
1125 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1126 flags & bs->supported_write_flags);
1127 flags &= ~bs->supported_write_flags;
1128 goto emulate_flags;
1129 }
1130
1131 if (drv->bdrv_aio_pwritev) {
1132 BlockAIOCB *acb;
1133 CoroutineIOCompletion co = {
1134 .coroutine = qemu_coroutine_self(),
1135 };
1136
1137 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1138 flags & bs->supported_write_flags,
1139 bdrv_co_io_em_complete, &co);
1140 flags &= ~bs->supported_write_flags;
1141 if (acb == NULL) {
1142 ret = -EIO;
1143 } else {
1144 qemu_coroutine_yield();
1145 ret = co.ret;
1146 }
1147 goto emulate_flags;
1148 }
1149
1150 sector_num = offset >> BDRV_SECTOR_BITS;
1151 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1152
1153 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1154 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1155 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1156
1157 assert(drv->bdrv_co_writev);
1158 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1159 flags & bs->supported_write_flags);
1160 flags &= ~bs->supported_write_flags;
1161
1162emulate_flags:
1163 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1164 ret = bdrv_co_flush(bs);
1165 }
1166
1167 return ret;
1168}
1169
1170static int coroutine_fn
1171bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
1172 uint64_t bytes, QEMUIOVector *qiov)
1173{
1174 BlockDriver *drv = bs->drv;
1175
1176 if (!drv) {
1177 return -ENOMEDIUM;
1178 }
1179
1180 if (!drv->bdrv_co_pwritev_compressed) {
1181 return -ENOTSUP;
1182 }
1183
1184 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1185}
1186
1187static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1188 int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
1189{
1190 BlockDriverState *bs = child->bs;
1191
1192
1193
1194
1195
1196
1197 void *bounce_buffer;
1198
1199 BlockDriver *drv = bs->drv;
1200 QEMUIOVector local_qiov;
1201 int64_t cluster_offset;
1202 int64_t cluster_bytes;
1203 size_t skip_bytes;
1204 int ret;
1205 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1206 BDRV_REQUEST_MAX_BYTES);
1207 unsigned int progress = 0;
1208
1209 if (!drv) {
1210 return -ENOMEDIUM;
1211 }
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1228 skip_bytes = offset - cluster_offset;
1229
1230 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1231 cluster_offset, cluster_bytes);
1232
1233 bounce_buffer = qemu_try_blockalign(bs,
1234 MIN(MIN(max_transfer, cluster_bytes),
1235 MAX_BOUNCE_BUFFER));
1236 if (bounce_buffer == NULL) {
1237 ret = -ENOMEM;
1238 goto err;
1239 }
1240
1241 while (cluster_bytes) {
1242 int64_t pnum;
1243
1244 ret = bdrv_is_allocated(bs, cluster_offset,
1245 MIN(cluster_bytes, max_transfer), &pnum);
1246 if (ret < 0) {
1247
1248
1249
1250
1251 pnum = MIN(cluster_bytes, max_transfer);
1252 }
1253
1254
1255 if (ret == 0 && pnum == 0) {
1256 assert(progress >= bytes);
1257 break;
1258 }
1259
1260 assert(skip_bytes < pnum);
1261
1262 if (ret <= 0) {
1263
1264 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1265 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1266
1267 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1268 &local_qiov, 0);
1269 if (ret < 0) {
1270 goto err;
1271 }
1272
1273 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1274 if (drv->bdrv_co_pwrite_zeroes &&
1275 buffer_is_zero(bounce_buffer, pnum)) {
1276
1277
1278
1279 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1280 BDRV_REQ_WRITE_UNCHANGED);
1281 } else {
1282
1283
1284
1285 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1286 &local_qiov,
1287 BDRV_REQ_WRITE_UNCHANGED);
1288 }
1289
1290 if (ret < 0) {
1291
1292
1293
1294
1295
1296 goto err;
1297 }
1298
1299 qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
1300 pnum - skip_bytes);
1301 } else {
1302
1303 qemu_iovec_init(&local_qiov, qiov->niov);
1304 qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
1305 ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
1306 &local_qiov, 0);
1307 qemu_iovec_destroy(&local_qiov);
1308 if (ret < 0) {
1309 goto err;
1310 }
1311 }
1312
1313 cluster_offset += pnum;
1314 cluster_bytes -= pnum;
1315 progress += pnum - skip_bytes;
1316 skip_bytes = 0;
1317 }
1318 ret = 0;
1319
1320err:
1321 qemu_vfree(bounce_buffer);
1322 return ret;
1323}
1324
1325
1326
1327
1328
1329
1330static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1331 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1332 int64_t align, QEMUIOVector *qiov, int flags)
1333{
1334 BlockDriverState *bs = child->bs;
1335 int64_t total_bytes, max_bytes;
1336 int ret = 0;
1337 uint64_t bytes_remaining = bytes;
1338 int max_transfer;
1339
1340 assert(is_power_of_2(align));
1341 assert((offset & (align - 1)) == 0);
1342 assert((bytes & (align - 1)) == 0);
1343 assert(!qiov || bytes == qiov->size);
1344 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1345 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1346 align);
1347
1348
1349
1350
1351
1352 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
1353
1354
1355 if (flags & BDRV_REQ_COPY_ON_READ) {
1356
1357
1358
1359
1360
1361 bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
1362 }
1363
1364
1365 assert(!(flags & BDRV_REQ_SERIALISING));
1366
1367 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1368 bdrv_wait_serialising_requests(req);
1369 }
1370
1371 if (flags & BDRV_REQ_COPY_ON_READ) {
1372 int64_t pnum;
1373
1374 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1375 if (ret < 0) {
1376 goto out;
1377 }
1378
1379 if (!ret || pnum != bytes) {
1380 ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
1381 goto out;
1382 }
1383 }
1384
1385
1386 total_bytes = bdrv_getlength(bs);
1387 if (total_bytes < 0) {
1388 ret = total_bytes;
1389 goto out;
1390 }
1391
1392 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1393 if (bytes <= max_bytes && bytes <= max_transfer) {
1394 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
1395 goto out;
1396 }
1397
1398 while (bytes_remaining) {
1399 int num;
1400
1401 if (max_bytes) {
1402 QEMUIOVector local_qiov;
1403
1404 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1405 assert(num);
1406 qemu_iovec_init(&local_qiov, qiov->niov);
1407 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1408
1409 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1410 num, &local_qiov, 0);
1411 max_bytes -= num;
1412 qemu_iovec_destroy(&local_qiov);
1413 } else {
1414 num = bytes_remaining;
1415 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1416 bytes_remaining);
1417 }
1418 if (ret < 0) {
1419 goto out;
1420 }
1421 bytes_remaining -= num;
1422 }
1423
1424out:
1425 return ret < 0 ? ret : 0;
1426}
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450typedef struct BdrvRequestPadding {
1451 uint8_t *buf;
1452 size_t buf_len;
1453 uint8_t *tail_buf;
1454 size_t head;
1455 size_t tail;
1456 bool merge_reads;
1457 QEMUIOVector local_qiov;
1458} BdrvRequestPadding;
1459
1460static bool bdrv_init_padding(BlockDriverState *bs,
1461 int64_t offset, int64_t bytes,
1462 BdrvRequestPadding *pad)
1463{
1464 uint64_t align = bs->bl.request_alignment;
1465 size_t sum;
1466
1467 memset(pad, 0, sizeof(*pad));
1468
1469 pad->head = offset & (align - 1);
1470 pad->tail = ((offset + bytes) & (align - 1));
1471 if (pad->tail) {
1472 pad->tail = align - pad->tail;
1473 }
1474
1475 if ((!pad->head && !pad->tail) || !bytes) {
1476 return false;
1477 }
1478
1479 sum = pad->head + bytes + pad->tail;
1480 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1481 pad->buf = qemu_blockalign(bs, pad->buf_len);
1482 pad->merge_reads = sum == pad->buf_len;
1483 if (pad->tail) {
1484 pad->tail_buf = pad->buf + pad->buf_len - align;
1485 }
1486
1487 return true;
1488}
1489
1490static int bdrv_padding_rmw_read(BdrvChild *child,
1491 BdrvTrackedRequest *req,
1492 BdrvRequestPadding *pad,
1493 bool zero_middle)
1494{
1495 QEMUIOVector local_qiov;
1496 BlockDriverState *bs = child->bs;
1497 uint64_t align = bs->bl.request_alignment;
1498 int ret;
1499
1500 assert(req->serialising && pad->buf);
1501
1502 if (pad->head || pad->merge_reads) {
1503 uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
1504
1505 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1506
1507 if (pad->head) {
1508 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1509 }
1510 if (pad->merge_reads && pad->tail) {
1511 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1512 }
1513 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1514 align, &local_qiov, 0);
1515 if (ret < 0) {
1516 return ret;
1517 }
1518 if (pad->head) {
1519 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1520 }
1521 if (pad->merge_reads && pad->tail) {
1522 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1523 }
1524
1525 if (pad->merge_reads) {
1526 goto zero_mem;
1527 }
1528 }
1529
1530 if (pad->tail) {
1531 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1532
1533 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1534 ret = bdrv_aligned_preadv(
1535 child, req,
1536 req->overlap_offset + req->overlap_bytes - align,
1537 align, align, &local_qiov, 0);
1538 if (ret < 0) {
1539 return ret;
1540 }
1541 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1542 }
1543
1544zero_mem:
1545 if (zero_middle) {
1546 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1547 }
1548
1549 return 0;
1550}
1551
1552static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1553{
1554 if (pad->buf) {
1555 qemu_vfree(pad->buf);
1556 qemu_iovec_destroy(&pad->local_qiov);
1557 }
1558}
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
1573 int64_t *offset, unsigned int *bytes,
1574 BdrvRequestPadding *pad)
1575{
1576 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1577 return false;
1578 }
1579
1580 qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1581 *qiov, 0, *bytes,
1582 pad->buf + pad->buf_len - pad->tail, pad->tail);
1583 *bytes += pad->head + pad->tail;
1584 *offset -= pad->head;
1585 *qiov = &pad->local_qiov;
1586
1587 return true;
1588}
1589
1590int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1591 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1592 BdrvRequestFlags flags)
1593{
1594 BlockDriverState *bs = child->bs;
1595 BdrvTrackedRequest req;
1596 BdrvRequestPadding pad;
1597 int ret;
1598
1599 trace_bdrv_co_preadv(bs, offset, bytes, flags);
1600
1601 ret = bdrv_check_byte_request(bs, offset, bytes);
1602 if (ret < 0) {
1603 return ret;
1604 }
1605
1606 bdrv_inc_in_flight(bs);
1607
1608
1609 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1610 flags |= BDRV_REQ_COPY_ON_READ;
1611 }
1612
1613 bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
1614
1615 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1616 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1617 bs->bl.request_alignment,
1618 qiov, flags);
1619 tracked_request_end(&req);
1620 bdrv_dec_in_flight(bs);
1621
1622 bdrv_padding_destroy(&pad);
1623
1624 return ret;
1625}
1626
1627static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1628 int64_t offset, int bytes, BdrvRequestFlags flags)
1629{
1630 BlockDriver *drv = bs->drv;
1631 QEMUIOVector qiov;
1632 void *buf = NULL;
1633 int ret = 0;
1634 bool need_flush = false;
1635 int head = 0;
1636 int tail = 0;
1637
1638 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1639 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1640 bs->bl.request_alignment);
1641 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1642
1643 if (!drv) {
1644 return -ENOMEDIUM;
1645 }
1646
1647 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1648 return -ENOTSUP;
1649 }
1650
1651 assert(alignment % bs->bl.request_alignment == 0);
1652 head = offset % alignment;
1653 tail = (offset + bytes) % alignment;
1654 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1655 assert(max_write_zeroes >= bs->bl.request_alignment);
1656
1657 while (bytes > 0 && !ret) {
1658 int num = bytes;
1659
1660
1661
1662
1663
1664 if (head) {
1665
1666
1667
1668 num = MIN(MIN(bytes, max_transfer), alignment - head);
1669 head = (head + num) % alignment;
1670 assert(num < max_write_zeroes);
1671 } else if (tail && num > alignment) {
1672
1673 num -= tail;
1674 }
1675
1676
1677 if (num > max_write_zeroes) {
1678 num = max_write_zeroes;
1679 }
1680
1681 ret = -ENOTSUP;
1682
1683 if (drv->bdrv_co_pwrite_zeroes) {
1684 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1685 flags & bs->supported_zero_flags);
1686 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1687 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1688 need_flush = true;
1689 }
1690 } else {
1691 assert(!bs->supported_zero_flags);
1692 }
1693
1694 if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
1695
1696 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1697
1698 if ((flags & BDRV_REQ_FUA) &&
1699 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1700
1701
1702 write_flags &= ~BDRV_REQ_FUA;
1703 need_flush = true;
1704 }
1705 num = MIN(num, max_transfer);
1706 if (buf == NULL) {
1707 buf = qemu_try_blockalign0(bs, num);
1708 if (buf == NULL) {
1709 ret = -ENOMEM;
1710 goto fail;
1711 }
1712 }
1713 qemu_iovec_init_buf(&qiov, buf, num);
1714
1715 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
1716
1717
1718
1719
1720 if (num < max_transfer) {
1721 qemu_vfree(buf);
1722 buf = NULL;
1723 }
1724 }
1725
1726 offset += num;
1727 bytes -= num;
1728 }
1729
1730fail:
1731 if (ret == 0 && need_flush) {
1732 ret = bdrv_co_flush(bs);
1733 }
1734 qemu_vfree(buf);
1735 return ret;
1736}
1737
1738static inline int coroutine_fn
1739bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
1740 BdrvTrackedRequest *req, int flags)
1741{
1742 BlockDriverState *bs = child->bs;
1743 bool waited;
1744 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1745
1746 if (bs->read_only) {
1747 return -EPERM;
1748 }
1749
1750
1751 assert(!(flags & BDRV_REQ_NO_SERIALISING));
1752 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1753 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1754 assert(!(flags & ~BDRV_REQ_MASK));
1755
1756 if (flags & BDRV_REQ_SERIALISING) {
1757 bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
1758 }
1759
1760 waited = bdrv_wait_serialising_requests(req);
1761
1762 assert(!waited || !req->serialising ||
1763 is_request_serialising_and_aligned(req));
1764 assert(req->overlap_offset <= offset);
1765 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1766 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1767
1768 switch (req->type) {
1769 case BDRV_TRACKED_WRITE:
1770 case BDRV_TRACKED_DISCARD:
1771 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1772 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1773 } else {
1774 assert(child->perm & BLK_PERM_WRITE);
1775 }
1776 return notifier_with_return_list_notify(&bs->before_write_notifiers,
1777 req);
1778 case BDRV_TRACKED_TRUNCATE:
1779 assert(child->perm & BLK_PERM_RESIZE);
1780 return 0;
1781 default:
1782 abort();
1783 }
1784}
1785
1786static inline void coroutine_fn
1787bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
1788 BdrvTrackedRequest *req, int ret)
1789{
1790 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1791 BlockDriverState *bs = child->bs;
1792
1793 atomic_inc(&bs->write_gen);
1794
1795
1796
1797
1798
1799
1800
1801
1802 if (ret == 0 &&
1803 (req->type == BDRV_TRACKED_TRUNCATE ||
1804 end_sector > bs->total_sectors) &&
1805 req->type != BDRV_TRACKED_DISCARD) {
1806 bs->total_sectors = end_sector;
1807 bdrv_parent_cb_resize(bs);
1808 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
1809 }
1810 if (req->bytes) {
1811 switch (req->type) {
1812 case BDRV_TRACKED_WRITE:
1813 stat64_max(&bs->wr_highest_offset, offset + bytes);
1814
1815 case BDRV_TRACKED_DISCARD:
1816 bdrv_set_dirty(bs, offset, bytes);
1817 break;
1818 default:
1819 break;
1820 }
1821 }
1822}
1823
1824
1825
1826
1827
1828static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1829 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1830 int64_t align, QEMUIOVector *qiov, int flags)
1831{
1832 BlockDriverState *bs = child->bs;
1833 BlockDriver *drv = bs->drv;
1834 int ret;
1835
1836 uint64_t bytes_remaining = bytes;
1837 int max_transfer;
1838
1839 if (!drv) {
1840 return -ENOMEDIUM;
1841 }
1842
1843 if (bdrv_has_readonly_bitmaps(bs)) {
1844 return -EPERM;
1845 }
1846
1847 assert(is_power_of_2(align));
1848 assert((offset & (align - 1)) == 0);
1849 assert((bytes & (align - 1)) == 0);
1850 assert(!qiov || bytes == qiov->size);
1851 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1852 align);
1853
1854 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
1855
1856 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1857 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1858 qemu_iovec_is_zero(qiov, 0, qiov->size)) {
1859 flags |= BDRV_REQ_ZERO_WRITE;
1860 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1861 flags |= BDRV_REQ_MAY_UNMAP;
1862 }
1863 }
1864
1865 if (ret < 0) {
1866
1867 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1868 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1869 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1870 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1871 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
1872 } else if (bytes <= max_transfer) {
1873 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1874 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
1875 } else {
1876 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1877 while (bytes_remaining) {
1878 int num = MIN(bytes_remaining, max_transfer);
1879 QEMUIOVector local_qiov;
1880 int local_flags = flags;
1881
1882 assert(num);
1883 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
1884 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1885
1886
1887 local_flags &= ~BDRV_REQ_FUA;
1888 }
1889 qemu_iovec_init(&local_qiov, qiov->niov);
1890 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1891
1892 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
1893 num, &local_qiov, local_flags);
1894 qemu_iovec_destroy(&local_qiov);
1895 if (ret < 0) {
1896 break;
1897 }
1898 bytes_remaining -= num;
1899 }
1900 }
1901 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
1902
1903 if (ret >= 0) {
1904 ret = 0;
1905 }
1906 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
1907
1908 return ret;
1909}
1910
1911static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1912 int64_t offset,
1913 unsigned int bytes,
1914 BdrvRequestFlags flags,
1915 BdrvTrackedRequest *req)
1916{
1917 BlockDriverState *bs = child->bs;
1918 QEMUIOVector local_qiov;
1919 uint64_t align = bs->bl.request_alignment;
1920 int ret = 0;
1921 bool padding;
1922 BdrvRequestPadding pad;
1923
1924 padding = bdrv_init_padding(bs, offset, bytes, &pad);
1925 if (padding) {
1926 bdrv_mark_request_serialising(req, align);
1927 bdrv_wait_serialising_requests(req);
1928
1929 bdrv_padding_rmw_read(child, req, &pad, true);
1930
1931 if (pad.head || pad.merge_reads) {
1932 int64_t aligned_offset = offset & ~(align - 1);
1933 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
1934
1935 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
1936 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
1937 align, &local_qiov,
1938 flags & ~BDRV_REQ_ZERO_WRITE);
1939 if (ret < 0 || pad.merge_reads) {
1940
1941 goto out;
1942 }
1943 offset += write_bytes - pad.head;
1944 bytes -= write_bytes - pad.head;
1945 }
1946 }
1947
1948 assert(!bytes || (offset & (align - 1)) == 0);
1949 if (bytes >= align) {
1950
1951 uint64_t aligned_bytes = bytes & ~(align - 1);
1952 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
1953 NULL, flags);
1954 if (ret < 0) {
1955 goto out;
1956 }
1957 bytes -= aligned_bytes;
1958 offset += aligned_bytes;
1959 }
1960
1961 assert(!bytes || (offset & (align - 1)) == 0);
1962 if (bytes) {
1963 assert(align == pad.tail + bytes);
1964
1965 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
1966 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
1967 &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
1968 }
1969
1970out:
1971 bdrv_padding_destroy(&pad);
1972
1973 return ret;
1974}
1975
1976
1977
1978
1979int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1980 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1981 BdrvRequestFlags flags)
1982{
1983 BlockDriverState *bs = child->bs;
1984 BdrvTrackedRequest req;
1985 uint64_t align = bs->bl.request_alignment;
1986 BdrvRequestPadding pad;
1987 int ret;
1988
1989 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
1990
1991 if (!bs->drv) {
1992 return -ENOMEDIUM;
1993 }
1994
1995 ret = bdrv_check_byte_request(bs, offset, bytes);
1996 if (ret < 0) {
1997 return ret;
1998 }
1999
2000 bdrv_inc_in_flight(bs);
2001
2002
2003
2004
2005
2006 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2007
2008 if (flags & BDRV_REQ_ZERO_WRITE) {
2009 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2010 goto out;
2011 }
2012
2013 if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
2014 bdrv_mark_request_serialising(&req, align);
2015 bdrv_wait_serialising_requests(&req);
2016 bdrv_padding_rmw_read(child, &req, &pad, false);
2017 }
2018
2019 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2020 qiov, flags);
2021
2022 bdrv_padding_destroy(&pad);
2023
2024out:
2025 tracked_request_end(&req);
2026 bdrv_dec_in_flight(bs);
2027
2028 return ret;
2029}
2030
2031int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2032 int bytes, BdrvRequestFlags flags)
2033{
2034 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2035
2036 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2037 flags &= ~BDRV_REQ_MAY_UNMAP;
2038 }
2039
2040 return bdrv_co_pwritev(child, offset, bytes, NULL,
2041 BDRV_REQ_ZERO_WRITE | flags);
2042}
2043
2044
2045
2046
2047int bdrv_flush_all(void)
2048{
2049 BdrvNextIterator it;
2050 BlockDriverState *bs = NULL;
2051 int result = 0;
2052
2053 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2054 AioContext *aio_context = bdrv_get_aio_context(bs);
2055 int ret;
2056
2057 aio_context_acquire(aio_context);
2058 ret = bdrv_flush(bs);
2059 if (ret < 0 && !result) {
2060 result = ret;
2061 }
2062 aio_context_release(aio_context);
2063 }
2064
2065 return result;
2066}
2067
2068
2069typedef struct BdrvCoBlockStatusData {
2070 BlockDriverState *bs;
2071 BlockDriverState *base;
2072 bool want_zero;
2073 int64_t offset;
2074 int64_t bytes;
2075 int64_t *pnum;
2076 int64_t *map;
2077 BlockDriverState **file;
2078 int ret;
2079 bool done;
2080} BdrvCoBlockStatusData;
2081
2082int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
2083 bool want_zero,
2084 int64_t offset,
2085 int64_t bytes,
2086 int64_t *pnum,
2087 int64_t *map,
2088 BlockDriverState **file)
2089{
2090 assert(bs->file && bs->file->bs);
2091 *pnum = bytes;
2092 *map = offset;
2093 *file = bs->file->bs;
2094 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2095}
2096
2097int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
2098 bool want_zero,
2099 int64_t offset,
2100 int64_t bytes,
2101 int64_t *pnum,
2102 int64_t *map,
2103 BlockDriverState **file)
2104{
2105 assert(bs->backing && bs->backing->bs);
2106 *pnum = bytes;
2107 *map = offset;
2108 *file = bs->backing->bs;
2109 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2110}
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2140 bool want_zero,
2141 int64_t offset, int64_t bytes,
2142 int64_t *pnum, int64_t *map,
2143 BlockDriverState **file)
2144{
2145 int64_t total_size;
2146 int64_t n;
2147 int ret;
2148 int64_t local_map = 0;
2149 BlockDriverState *local_file = NULL;
2150 int64_t aligned_offset, aligned_bytes;
2151 uint32_t align;
2152
2153 assert(pnum);
2154 *pnum = 0;
2155 total_size = bdrv_getlength(bs);
2156 if (total_size < 0) {
2157 ret = total_size;
2158 goto early_out;
2159 }
2160
2161 if (offset >= total_size) {
2162 ret = BDRV_BLOCK_EOF;
2163 goto early_out;
2164 }
2165 if (!bytes) {
2166 ret = 0;
2167 goto early_out;
2168 }
2169
2170 n = total_size - offset;
2171 if (n < bytes) {
2172 bytes = n;
2173 }
2174
2175
2176 assert(bs->drv);
2177 if (!bs->drv->bdrv_co_block_status) {
2178 *pnum = bytes;
2179 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2180 if (offset + bytes == total_size) {
2181 ret |= BDRV_BLOCK_EOF;
2182 }
2183 if (bs->drv->protocol_name) {
2184 ret |= BDRV_BLOCK_OFFSET_VALID;
2185 local_map = offset;
2186 local_file = bs;
2187 }
2188 goto early_out;
2189 }
2190
2191 bdrv_inc_in_flight(bs);
2192
2193
2194 align = bs->bl.request_alignment;
2195 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2196 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2197
2198 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2199 aligned_bytes, pnum, &local_map,
2200 &local_file);
2201 if (ret < 0) {
2202 *pnum = 0;
2203 goto out;
2204 }
2205
2206
2207
2208
2209
2210 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2211 align > offset - aligned_offset);
2212 if (ret & BDRV_BLOCK_RECURSE) {
2213 assert(ret & BDRV_BLOCK_DATA);
2214 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2215 assert(!(ret & BDRV_BLOCK_ZERO));
2216 }
2217
2218 *pnum -= offset - aligned_offset;
2219 if (*pnum > bytes) {
2220 *pnum = bytes;
2221 }
2222 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2223 local_map += offset - aligned_offset;
2224 }
2225
2226 if (ret & BDRV_BLOCK_RAW) {
2227 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2228 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2229 *pnum, pnum, &local_map, &local_file);
2230 goto out;
2231 }
2232
2233 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2234 ret |= BDRV_BLOCK_ALLOCATED;
2235 } else if (want_zero) {
2236 if (bdrv_unallocated_blocks_are_zero(bs)) {
2237 ret |= BDRV_BLOCK_ZERO;
2238 } else if (bs->backing) {
2239 BlockDriverState *bs2 = bs->backing->bs;
2240 int64_t size2 = bdrv_getlength(bs2);
2241
2242 if (size2 >= 0 && offset >= size2) {
2243 ret |= BDRV_BLOCK_ZERO;
2244 }
2245 }
2246 }
2247
2248 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2249 local_file && local_file != bs &&
2250 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2251 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2252 int64_t file_pnum;
2253 int ret2;
2254
2255 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2256 *pnum, &file_pnum, NULL, NULL);
2257 if (ret2 >= 0) {
2258
2259
2260
2261 if (ret2 & BDRV_BLOCK_EOF &&
2262 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2263
2264
2265
2266
2267
2268 ret |= BDRV_BLOCK_ZERO;
2269 } else {
2270
2271 *pnum = file_pnum;
2272 ret |= (ret2 & BDRV_BLOCK_ZERO);
2273 }
2274 }
2275 }
2276
2277out:
2278 bdrv_dec_in_flight(bs);
2279 if (ret >= 0 && offset + *pnum == total_size) {
2280 ret |= BDRV_BLOCK_EOF;
2281 }
2282early_out:
2283 if (file) {
2284 *file = local_file;
2285 }
2286 if (map) {
2287 *map = local_map;
2288 }
2289 return ret;
2290}
2291
2292static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2293 BlockDriverState *base,
2294 bool want_zero,
2295 int64_t offset,
2296 int64_t bytes,
2297 int64_t *pnum,
2298 int64_t *map,
2299 BlockDriverState **file)
2300{
2301 BlockDriverState *p;
2302 int ret = 0;
2303 bool first = true;
2304
2305 assert(bs != base);
2306 for (p = bs; p != base; p = backing_bs(p)) {
2307 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2308 file);
2309 if (ret < 0) {
2310 break;
2311 }
2312 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2313
2314
2315
2316
2317
2318
2319 *pnum = bytes;
2320 }
2321 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2322 break;
2323 }
2324
2325
2326 bytes = MIN(bytes, *pnum);
2327 first = false;
2328 }
2329 return ret;
2330}
2331
2332
2333static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2334{
2335 BdrvCoBlockStatusData *data = opaque;
2336
2337 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2338 data->want_zero,
2339 data->offset, data->bytes,
2340 data->pnum, data->map, data->file);
2341 data->done = true;
2342 aio_wait_kick();
2343}
2344
2345
2346
2347
2348
2349
2350static int bdrv_common_block_status_above(BlockDriverState *bs,
2351 BlockDriverState *base,
2352 bool want_zero, int64_t offset,
2353 int64_t bytes, int64_t *pnum,
2354 int64_t *map,
2355 BlockDriverState **file)
2356{
2357 Coroutine *co;
2358 BdrvCoBlockStatusData data = {
2359 .bs = bs,
2360 .base = base,
2361 .want_zero = want_zero,
2362 .offset = offset,
2363 .bytes = bytes,
2364 .pnum = pnum,
2365 .map = map,
2366 .file = file,
2367 .done = false,
2368 };
2369
2370 if (qemu_in_coroutine()) {
2371
2372 bdrv_block_status_above_co_entry(&data);
2373 } else {
2374 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2375 bdrv_coroutine_enter(bs, co);
2376 BDRV_POLL_WHILE(bs, !data.done);
2377 }
2378 return data.ret;
2379}
2380
2381int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2382 int64_t offset, int64_t bytes, int64_t *pnum,
2383 int64_t *map, BlockDriverState **file)
2384{
2385 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2386 pnum, map, file);
2387}
2388
2389int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2390 int64_t *pnum, int64_t *map, BlockDriverState **file)
2391{
2392 return bdrv_block_status_above(bs, backing_bs(bs),
2393 offset, bytes, pnum, map, file);
2394}
2395
2396int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2397 int64_t bytes, int64_t *pnum)
2398{
2399 int ret;
2400 int64_t dummy;
2401
2402 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2403 bytes, pnum ? pnum : &dummy, NULL,
2404 NULL);
2405 if (ret < 0) {
2406 return ret;
2407 }
2408 return !!(ret & BDRV_BLOCK_ALLOCATED);
2409}
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428int bdrv_is_allocated_above(BlockDriverState *top,
2429 BlockDriverState *base,
2430 bool include_base, int64_t offset,
2431 int64_t bytes, int64_t *pnum)
2432{
2433 BlockDriverState *intermediate;
2434 int ret;
2435 int64_t n = bytes;
2436
2437 assert(base || !include_base);
2438
2439 intermediate = top;
2440 while (include_base || intermediate != base) {
2441 int64_t pnum_inter;
2442 int64_t size_inter;
2443
2444 assert(intermediate);
2445 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2446 if (ret < 0) {
2447 return ret;
2448 }
2449 if (ret) {
2450 *pnum = pnum_inter;
2451 return 1;
2452 }
2453
2454 size_inter = bdrv_getlength(intermediate);
2455 if (size_inter < 0) {
2456 return size_inter;
2457 }
2458 if (n > pnum_inter &&
2459 (intermediate == top || offset + pnum_inter < size_inter)) {
2460 n = pnum_inter;
2461 }
2462
2463 if (intermediate == base) {
2464 break;
2465 }
2466
2467 intermediate = backing_bs(intermediate);
2468 }
2469
2470 *pnum = n;
2471 return 0;
2472}
2473
2474typedef struct BdrvVmstateCo {
2475 BlockDriverState *bs;
2476 QEMUIOVector *qiov;
2477 int64_t pos;
2478 bool is_read;
2479 int ret;
2480} BdrvVmstateCo;
2481
2482static int coroutine_fn
2483bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2484 bool is_read)
2485{
2486 BlockDriver *drv = bs->drv;
2487 int ret = -ENOTSUP;
2488
2489 bdrv_inc_in_flight(bs);
2490
2491 if (!drv) {
2492 ret = -ENOMEDIUM;
2493 } else if (drv->bdrv_load_vmstate) {
2494 if (is_read) {
2495 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2496 } else {
2497 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2498 }
2499 } else if (bs->file) {
2500 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2501 }
2502
2503 bdrv_dec_in_flight(bs);
2504 return ret;
2505}
2506
2507static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2508{
2509 BdrvVmstateCo *co = opaque;
2510 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2511 aio_wait_kick();
2512}
2513
2514static inline int
2515bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2516 bool is_read)
2517{
2518 if (qemu_in_coroutine()) {
2519 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2520 } else {
2521 BdrvVmstateCo data = {
2522 .bs = bs,
2523 .qiov = qiov,
2524 .pos = pos,
2525 .is_read = is_read,
2526 .ret = -EINPROGRESS,
2527 };
2528 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2529
2530 bdrv_coroutine_enter(bs, co);
2531 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2532 return data.ret;
2533 }
2534}
2535
2536int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2537 int64_t pos, int size)
2538{
2539 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2540 int ret;
2541
2542 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2543 if (ret < 0) {
2544 return ret;
2545 }
2546
2547 return size;
2548}
2549
2550int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2551{
2552 return bdrv_rw_vmstate(bs, qiov, pos, false);
2553}
2554
2555int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2556 int64_t pos, int size)
2557{
2558 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2559 int ret;
2560
2561 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2562 if (ret < 0) {
2563 return ret;
2564 }
2565
2566 return size;
2567}
2568
2569int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2570{
2571 return bdrv_rw_vmstate(bs, qiov, pos, true);
2572}
2573
2574
2575
2576
2577void bdrv_aio_cancel(BlockAIOCB *acb)
2578{
2579 qemu_aio_ref(acb);
2580 bdrv_aio_cancel_async(acb);
2581 while (acb->refcnt > 1) {
2582 if (acb->aiocb_info->get_aio_context) {
2583 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2584 } else if (acb->bs) {
2585
2586
2587
2588
2589 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2590 aio_poll(bdrv_get_aio_context(acb->bs), true);
2591 } else {
2592 abort();
2593 }
2594 }
2595 qemu_aio_unref(acb);
2596}
2597
2598
2599
2600
2601void bdrv_aio_cancel_async(BlockAIOCB *acb)
2602{
2603 if (acb->aiocb_info->cancel_async) {
2604 acb->aiocb_info->cancel_async(acb);
2605 }
2606}
2607
2608
2609
2610
2611typedef struct FlushCo {
2612 BlockDriverState *bs;
2613 int ret;
2614} FlushCo;
2615
2616
2617static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2618{
2619 FlushCo *rwco = opaque;
2620
2621 rwco->ret = bdrv_co_flush(rwco->bs);
2622 aio_wait_kick();
2623}
2624
2625int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2626{
2627 int current_gen;
2628 int ret = 0;
2629
2630 bdrv_inc_in_flight(bs);
2631
2632 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2633 bdrv_is_sg(bs)) {
2634 goto early_exit;
2635 }
2636
2637 qemu_co_mutex_lock(&bs->reqs_lock);
2638 current_gen = atomic_read(&bs->write_gen);
2639
2640
2641 while (bs->active_flush_req) {
2642 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2643 }
2644
2645
2646 bs->active_flush_req = true;
2647 qemu_co_mutex_unlock(&bs->reqs_lock);
2648
2649
2650 if (bs->drv->bdrv_co_flush) {
2651 ret = bs->drv->bdrv_co_flush(bs);
2652 goto out;
2653 }
2654
2655
2656 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2657 if (bs->drv->bdrv_co_flush_to_os) {
2658 ret = bs->drv->bdrv_co_flush_to_os(bs);
2659 if (ret < 0) {
2660 goto out;
2661 }
2662 }
2663
2664
2665 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2666 goto flush_parent;
2667 }
2668
2669
2670 if (bs->flushed_gen == current_gen) {
2671 goto flush_parent;
2672 }
2673
2674 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2675 if (!bs->drv) {
2676
2677
2678 ret = -ENOMEDIUM;
2679 goto out;
2680 }
2681 if (bs->drv->bdrv_co_flush_to_disk) {
2682 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2683 } else if (bs->drv->bdrv_aio_flush) {
2684 BlockAIOCB *acb;
2685 CoroutineIOCompletion co = {
2686 .coroutine = qemu_coroutine_self(),
2687 };
2688
2689 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2690 if (acb == NULL) {
2691 ret = -EIO;
2692 } else {
2693 qemu_coroutine_yield();
2694 ret = co.ret;
2695 }
2696 } else {
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708 ret = 0;
2709 }
2710
2711 if (ret < 0) {
2712 goto out;
2713 }
2714
2715
2716
2717
2718flush_parent:
2719 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2720out:
2721
2722 if (ret == 0) {
2723 bs->flushed_gen = current_gen;
2724 }
2725
2726 qemu_co_mutex_lock(&bs->reqs_lock);
2727 bs->active_flush_req = false;
2728
2729 qemu_co_queue_next(&bs->flush_queue);
2730 qemu_co_mutex_unlock(&bs->reqs_lock);
2731
2732early_exit:
2733 bdrv_dec_in_flight(bs);
2734 return ret;
2735}
2736
2737int bdrv_flush(BlockDriverState *bs)
2738{
2739 Coroutine *co;
2740 FlushCo flush_co = {
2741 .bs = bs,
2742 .ret = NOT_DONE,
2743 };
2744
2745 if (qemu_in_coroutine()) {
2746
2747 bdrv_flush_co_entry(&flush_co);
2748 } else {
2749 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2750 bdrv_coroutine_enter(bs, co);
2751 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2752 }
2753
2754 return flush_co.ret;
2755}
2756
2757typedef struct DiscardCo {
2758 BdrvChild *child;
2759 int64_t offset;
2760 int64_t bytes;
2761 int ret;
2762} DiscardCo;
2763static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2764{
2765 DiscardCo *rwco = opaque;
2766
2767 rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
2768 aio_wait_kick();
2769}
2770
2771int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
2772 int64_t bytes)
2773{
2774 BdrvTrackedRequest req;
2775 int max_pdiscard, ret;
2776 int head, tail, align;
2777 BlockDriverState *bs = child->bs;
2778
2779 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
2780 return -ENOMEDIUM;
2781 }
2782
2783 if (bdrv_has_readonly_bitmaps(bs)) {
2784 return -EPERM;
2785 }
2786
2787 if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) {
2788 return -EIO;
2789 }
2790
2791
2792 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2793 return 0;
2794 }
2795
2796 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2797 return 0;
2798 }
2799
2800
2801
2802
2803
2804
2805 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2806 assert(align % bs->bl.request_alignment == 0);
2807 head = offset % align;
2808 tail = (offset + bytes) % align;
2809
2810 bdrv_inc_in_flight(bs);
2811 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2812
2813 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
2814 if (ret < 0) {
2815 goto out;
2816 }
2817
2818 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2819 align);
2820 assert(max_pdiscard >= bs->bl.request_alignment);
2821
2822 while (bytes > 0) {
2823 int64_t num = bytes;
2824
2825 if (head) {
2826
2827 num = MIN(bytes, align - head);
2828 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2829 num %= bs->bl.request_alignment;
2830 }
2831 head = (head + num) % align;
2832 assert(num < max_pdiscard);
2833 } else if (tail) {
2834 if (num > align) {
2835
2836 num -= tail;
2837 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2838 tail > bs->bl.request_alignment) {
2839 tail %= bs->bl.request_alignment;
2840 num -= tail;
2841 }
2842 }
2843
2844 if (num > max_pdiscard) {
2845 num = max_pdiscard;
2846 }
2847
2848 if (!bs->drv) {
2849 ret = -ENOMEDIUM;
2850 goto out;
2851 }
2852 if (bs->drv->bdrv_co_pdiscard) {
2853 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
2854 } else {
2855 BlockAIOCB *acb;
2856 CoroutineIOCompletion co = {
2857 .coroutine = qemu_coroutine_self(),
2858 };
2859
2860 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
2861 bdrv_co_io_em_complete, &co);
2862 if (acb == NULL) {
2863 ret = -EIO;
2864 goto out;
2865 } else {
2866 qemu_coroutine_yield();
2867 ret = co.ret;
2868 }
2869 }
2870 if (ret && ret != -ENOTSUP) {
2871 goto out;
2872 }
2873
2874 offset += num;
2875 bytes -= num;
2876 }
2877 ret = 0;
2878out:
2879 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
2880 tracked_request_end(&req);
2881 bdrv_dec_in_flight(bs);
2882 return ret;
2883}
2884
2885int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
2886{
2887 Coroutine *co;
2888 DiscardCo rwco = {
2889 .child = child,
2890 .offset = offset,
2891 .bytes = bytes,
2892 .ret = NOT_DONE,
2893 };
2894
2895 if (qemu_in_coroutine()) {
2896
2897 bdrv_pdiscard_co_entry(&rwco);
2898 } else {
2899 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
2900 bdrv_coroutine_enter(child->bs, co);
2901 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
2902 }
2903
2904 return rwco.ret;
2905}
2906
2907int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
2908{
2909 BlockDriver *drv = bs->drv;
2910 CoroutineIOCompletion co = {
2911 .coroutine = qemu_coroutine_self(),
2912 };
2913 BlockAIOCB *acb;
2914
2915 bdrv_inc_in_flight(bs);
2916 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
2917 co.ret = -ENOTSUP;
2918 goto out;
2919 }
2920
2921 if (drv->bdrv_co_ioctl) {
2922 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
2923 } else {
2924 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
2925 if (!acb) {
2926 co.ret = -ENOTSUP;
2927 goto out;
2928 }
2929 qemu_coroutine_yield();
2930 }
2931out:
2932 bdrv_dec_in_flight(bs);
2933 return co.ret;
2934}
2935
2936void *qemu_blockalign(BlockDriverState *bs, size_t size)
2937{
2938 return qemu_memalign(bdrv_opt_mem_align(bs), size);
2939}
2940
2941void *qemu_blockalign0(BlockDriverState *bs, size_t size)
2942{
2943 return memset(qemu_blockalign(bs, size), 0, size);
2944}
2945
2946void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
2947{
2948 size_t align = bdrv_opt_mem_align(bs);
2949
2950
2951 assert(align > 0);
2952 if (size == 0) {
2953 size = align;
2954 }
2955
2956 return qemu_try_memalign(align, size);
2957}
2958
2959void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
2960{
2961 void *mem = qemu_try_blockalign(bs, size);
2962
2963 if (mem) {
2964 memset(mem, 0, size);
2965 }
2966
2967 return mem;
2968}
2969
2970
2971
2972
2973bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
2974{
2975 int i;
2976 size_t alignment = bdrv_min_mem_align(bs);
2977
2978 for (i = 0; i < qiov->niov; i++) {
2979 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
2980 return false;
2981 }
2982 if (qiov->iov[i].iov_len % alignment) {
2983 return false;
2984 }
2985 }
2986
2987 return true;
2988}
2989
2990void bdrv_add_before_write_notifier(BlockDriverState *bs,
2991 NotifierWithReturn *notifier)
2992{
2993 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
2994}
2995
2996void bdrv_io_plug(BlockDriverState *bs)
2997{
2998 BdrvChild *child;
2999
3000 QLIST_FOREACH(child, &bs->children, next) {
3001 bdrv_io_plug(child->bs);
3002 }
3003
3004 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
3005 BlockDriver *drv = bs->drv;
3006 if (drv && drv->bdrv_io_plug) {
3007 drv->bdrv_io_plug(bs);
3008 }
3009 }
3010}
3011
3012void bdrv_io_unplug(BlockDriverState *bs)
3013{
3014 BdrvChild *child;
3015
3016 assert(bs->io_plugged);
3017 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
3018 BlockDriver *drv = bs->drv;
3019 if (drv && drv->bdrv_io_unplug) {
3020 drv->bdrv_io_unplug(bs);
3021 }
3022 }
3023
3024 QLIST_FOREACH(child, &bs->children, next) {
3025 bdrv_io_unplug(child->bs);
3026 }
3027}
3028
3029void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3030{
3031 BdrvChild *child;
3032
3033 if (bs->drv && bs->drv->bdrv_register_buf) {
3034 bs->drv->bdrv_register_buf(bs, host, size);
3035 }
3036 QLIST_FOREACH(child, &bs->children, next) {
3037 bdrv_register_buf(child->bs, host, size);
3038 }
3039}
3040
3041void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3042{
3043 BdrvChild *child;
3044
3045 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3046 bs->drv->bdrv_unregister_buf(bs, host);
3047 }
3048 QLIST_FOREACH(child, &bs->children, next) {
3049 bdrv_unregister_buf(child->bs, host);
3050 }
3051}
3052
3053static int coroutine_fn bdrv_co_copy_range_internal(
3054 BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
3055 uint64_t dst_offset, uint64_t bytes,
3056 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3057 bool recurse_src)
3058{
3059 BdrvTrackedRequest req;
3060 int ret;
3061
3062
3063 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3064 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3065
3066 if (!dst || !dst->bs) {
3067 return -ENOMEDIUM;
3068 }
3069 ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
3070 if (ret) {
3071 return ret;
3072 }
3073 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3074 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3075 }
3076
3077 if (!src || !src->bs) {
3078 return -ENOMEDIUM;
3079 }
3080 ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
3081 if (ret) {
3082 return ret;
3083 }
3084
3085 if (!src->bs->drv->bdrv_co_copy_range_from
3086 || !dst->bs->drv->bdrv_co_copy_range_to
3087 || src->bs->encrypted || dst->bs->encrypted) {
3088 return -ENOTSUP;
3089 }
3090
3091 if (recurse_src) {
3092 bdrv_inc_in_flight(src->bs);
3093 tracked_request_begin(&req, src->bs, src_offset, bytes,
3094 BDRV_TRACKED_READ);
3095
3096
3097 assert(!(read_flags & BDRV_REQ_SERIALISING));
3098 if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
3099 bdrv_wait_serialising_requests(&req);
3100 }
3101
3102 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3103 src, src_offset,
3104 dst, dst_offset,
3105 bytes,
3106 read_flags, write_flags);
3107
3108 tracked_request_end(&req);
3109 bdrv_dec_in_flight(src->bs);
3110 } else {
3111 bdrv_inc_in_flight(dst->bs);
3112 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3113 BDRV_TRACKED_WRITE);
3114 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3115 write_flags);
3116 if (!ret) {
3117 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3118 src, src_offset,
3119 dst, dst_offset,
3120 bytes,
3121 read_flags, write_flags);
3122 }
3123 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3124 tracked_request_end(&req);
3125 bdrv_dec_in_flight(dst->bs);
3126 }
3127
3128 return ret;
3129}
3130
3131
3132
3133
3134
3135int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
3136 BdrvChild *dst, uint64_t dst_offset,
3137 uint64_t bytes,
3138 BdrvRequestFlags read_flags,
3139 BdrvRequestFlags write_flags)
3140{
3141 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3142 read_flags, write_flags);
3143 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3144 bytes, read_flags, write_flags, true);
3145}
3146
3147
3148
3149
3150
3151int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
3152 BdrvChild *dst, uint64_t dst_offset,
3153 uint64_t bytes,
3154 BdrvRequestFlags read_flags,
3155 BdrvRequestFlags write_flags)
3156{
3157 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3158 read_flags, write_flags);
3159 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3160 bytes, read_flags, write_flags, false);
3161}
3162
3163int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
3164 BdrvChild *dst, uint64_t dst_offset,
3165 uint64_t bytes, BdrvRequestFlags read_flags,
3166 BdrvRequestFlags write_flags)
3167{
3168 return bdrv_co_copy_range_from(src, src_offset,
3169 dst, dst_offset,
3170 bytes, read_flags, write_flags);
3171}
3172
3173static void bdrv_parent_cb_resize(BlockDriverState *bs)
3174{
3175 BdrvChild *c;
3176 QLIST_FOREACH(c, &bs->parents, next_parent) {
3177 if (c->role->resize) {
3178 c->role->resize(c);
3179 }
3180 }
3181}
3182
3183
3184
3185
3186int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
3187 PreallocMode prealloc, Error **errp)
3188{
3189 BlockDriverState *bs = child->bs;
3190 BlockDriver *drv = bs->drv;
3191 BdrvTrackedRequest req;
3192 int64_t old_size, new_bytes;
3193 int ret;
3194
3195
3196
3197 if (!drv) {
3198 error_setg(errp, "No medium inserted");
3199 return -ENOMEDIUM;
3200 }
3201 if (offset < 0) {
3202 error_setg(errp, "Image size cannot be negative");
3203 return -EINVAL;
3204 }
3205
3206 old_size = bdrv_getlength(bs);
3207 if (old_size < 0) {
3208 error_setg_errno(errp, -old_size, "Failed to get old image size");
3209 return old_size;
3210 }
3211
3212 if (offset > old_size) {
3213 new_bytes = offset - old_size;
3214 } else {
3215 new_bytes = 0;
3216 }
3217
3218 bdrv_inc_in_flight(bs);
3219 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3220 BDRV_TRACKED_TRUNCATE);
3221
3222
3223
3224
3225 if (new_bytes) {
3226 bdrv_mark_request_serialising(&req, 1);
3227 }
3228 if (bs->read_only) {
3229 error_setg(errp, "Image is read-only");
3230 ret = -EACCES;
3231 goto out;
3232 }
3233 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3234 0);
3235 if (ret < 0) {
3236 error_setg_errno(errp, -ret,
3237 "Failed to prepare request for truncation");
3238 goto out;
3239 }
3240
3241 if (!drv->bdrv_co_truncate) {
3242 if (bs->file && drv->is_filter) {
3243 ret = bdrv_co_truncate(bs->file, offset, prealloc, errp);
3244 goto out;
3245 }
3246 error_setg(errp, "Image format driver does not support resize");
3247 ret = -ENOTSUP;
3248 goto out;
3249 }
3250
3251 ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp);
3252 if (ret < 0) {
3253 goto out;
3254 }
3255 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3256 if (ret < 0) {
3257 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3258 } else {
3259 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3260 }
3261
3262
3263
3264 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3265
3266out:
3267 tracked_request_end(&req);
3268 bdrv_dec_in_flight(bs);
3269
3270 return ret;
3271}
3272
3273typedef struct TruncateCo {
3274 BdrvChild *child;
3275 int64_t offset;
3276 PreallocMode prealloc;
3277 Error **errp;
3278 int ret;
3279} TruncateCo;
3280
3281static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
3282{
3283 TruncateCo *tco = opaque;
3284 tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc,
3285 tco->errp);
3286 aio_wait_kick();
3287}
3288
3289int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
3290 Error **errp)
3291{
3292 Coroutine *co;
3293 TruncateCo tco = {
3294 .child = child,
3295 .offset = offset,
3296 .prealloc = prealloc,
3297 .errp = errp,
3298 .ret = NOT_DONE,
3299 };
3300
3301 if (qemu_in_coroutine()) {
3302
3303 bdrv_truncate_co_entry(&tco);
3304 } else {
3305 co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
3306 bdrv_coroutine_enter(child->bs, co);
3307 BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
3308 }
3309
3310 return tco.ret;
3311}
3312