1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "qemu/cutils.h"
33#include "qapi/error.h"
34#include "qemu/error-report.h"
35#include "qemu/main-loop.h"
36#include "sysemu/replay.h"
37
38#define NOT_DONE 0x7fffffff
39
40
41#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
42
43static void bdrv_parent_cb_resize(BlockDriverState *bs);
44static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
45 int64_t offset, int bytes, BdrvRequestFlags flags);
46
47static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
48 bool ignore_bds_parents)
49{
50 BdrvChild *c, *next;
51
52 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
53 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
54 continue;
55 }
56 bdrv_parent_drained_begin_single(c, false);
57 }
58}
59
60static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
61 int *drained_end_counter)
62{
63 assert(c->parent_quiesce_counter > 0);
64 c->parent_quiesce_counter--;
65 if (c->role->drained_end) {
66 c->role->drained_end(c, drained_end_counter);
67 }
68}
69
70void bdrv_parent_drained_end_single(BdrvChild *c)
71{
72 int drained_end_counter = 0;
73 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
74 BDRV_POLL_WHILE(c->bs, atomic_read(&drained_end_counter) > 0);
75}
76
77static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
78 bool ignore_bds_parents,
79 int *drained_end_counter)
80{
81 BdrvChild *c;
82
83 QLIST_FOREACH(c, &bs->parents, next_parent) {
84 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
85 continue;
86 }
87 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
88 }
89}
90
91static bool bdrv_parent_drained_poll_single(BdrvChild *c)
92{
93 if (c->role->drained_poll) {
94 return c->role->drained_poll(c);
95 }
96 return false;
97}
98
99static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
100 bool ignore_bds_parents)
101{
102 BdrvChild *c, *next;
103 bool busy = false;
104
105 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
106 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
107 continue;
108 }
109 busy |= bdrv_parent_drained_poll_single(c);
110 }
111
112 return busy;
113}
114
115void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
116{
117 c->parent_quiesce_counter++;
118 if (c->role->drained_begin) {
119 c->role->drained_begin(c);
120 }
121 if (poll) {
122 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
123 }
124}
125
126static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
127{
128 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
129 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
130 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
131 src->opt_mem_alignment);
132 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
133 src->min_mem_alignment);
134 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
135}
136
137void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
138{
139 BlockDriver *drv = bs->drv;
140 Error *local_err = NULL;
141
142 memset(&bs->bl, 0, sizeof(bs->bl));
143
144 if (!drv) {
145 return;
146 }
147
148
149 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
150 drv->bdrv_aio_preadv ||
151 drv->bdrv_co_preadv_part) ? 1 : 512;
152
153
154 if (bs->file) {
155 bdrv_refresh_limits(bs->file->bs, &local_err);
156 if (local_err) {
157 error_propagate(errp, local_err);
158 return;
159 }
160 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
161 } else {
162 bs->bl.min_mem_alignment = 512;
163 bs->bl.opt_mem_alignment = qemu_real_host_page_size;
164
165
166 bs->bl.max_iov = IOV_MAX;
167 }
168
169 if (bs->backing) {
170 bdrv_refresh_limits(bs->backing->bs, &local_err);
171 if (local_err) {
172 error_propagate(errp, local_err);
173 return;
174 }
175 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
176 }
177
178
179 if (drv->bdrv_refresh_limits) {
180 drv->bdrv_refresh_limits(bs, errp);
181 }
182}
183
184
185
186
187
188
189void bdrv_enable_copy_on_read(BlockDriverState *bs)
190{
191 atomic_inc(&bs->copy_on_read);
192}
193
194void bdrv_disable_copy_on_read(BlockDriverState *bs)
195{
196 int old = atomic_fetch_dec(&bs->copy_on_read);
197 assert(old >= 1);
198}
199
200typedef struct {
201 Coroutine *co;
202 BlockDriverState *bs;
203 bool done;
204 bool begin;
205 bool recursive;
206 bool poll;
207 BdrvChild *parent;
208 bool ignore_bds_parents;
209 int *drained_end_counter;
210} BdrvCoDrainData;
211
212static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
213{
214 BdrvCoDrainData *data = opaque;
215 BlockDriverState *bs = data->bs;
216
217 if (data->begin) {
218 bs->drv->bdrv_co_drain_begin(bs);
219 } else {
220 bs->drv->bdrv_co_drain_end(bs);
221 }
222
223
224 atomic_mb_set(&data->done, true);
225 if (!data->begin) {
226 atomic_dec(data->drained_end_counter);
227 }
228 bdrv_dec_in_flight(bs);
229
230 g_free(data);
231}
232
233
234static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
235 int *drained_end_counter)
236{
237 BdrvCoDrainData *data;
238
239 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
240 (!begin && !bs->drv->bdrv_co_drain_end)) {
241 return;
242 }
243
244 data = g_new(BdrvCoDrainData, 1);
245 *data = (BdrvCoDrainData) {
246 .bs = bs,
247 .done = false,
248 .begin = begin,
249 .drained_end_counter = drained_end_counter,
250 };
251
252 if (!begin) {
253 atomic_inc(drained_end_counter);
254 }
255
256
257
258 bdrv_inc_in_flight(bs);
259 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
260 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
261}
262
263
264bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
265 BdrvChild *ignore_parent, bool ignore_bds_parents)
266{
267 BdrvChild *child, *next;
268
269 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
270 return true;
271 }
272
273 if (atomic_read(&bs->in_flight)) {
274 return true;
275 }
276
277 if (recursive) {
278 assert(!ignore_bds_parents);
279 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
280 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
281 return true;
282 }
283 }
284 }
285
286 return false;
287}
288
289static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
290 BdrvChild *ignore_parent)
291{
292 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
293}
294
295static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
296 BdrvChild *parent, bool ignore_bds_parents,
297 bool poll);
298static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
299 BdrvChild *parent, bool ignore_bds_parents,
300 int *drained_end_counter);
301
302static void bdrv_co_drain_bh_cb(void *opaque)
303{
304 BdrvCoDrainData *data = opaque;
305 Coroutine *co = data->co;
306 BlockDriverState *bs = data->bs;
307
308 if (bs) {
309 AioContext *ctx = bdrv_get_aio_context(bs);
310 AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
311
312
313
314
315
316
317
318 if (ctx == co_ctx) {
319 aio_context_acquire(ctx);
320 }
321 bdrv_dec_in_flight(bs);
322 if (data->begin) {
323 assert(!data->drained_end_counter);
324 bdrv_do_drained_begin(bs, data->recursive, data->parent,
325 data->ignore_bds_parents, data->poll);
326 } else {
327 assert(!data->poll);
328 bdrv_do_drained_end(bs, data->recursive, data->parent,
329 data->ignore_bds_parents,
330 data->drained_end_counter);
331 }
332 if (ctx == co_ctx) {
333 aio_context_release(ctx);
334 }
335 } else {
336 assert(data->begin);
337 bdrv_drain_all_begin();
338 }
339
340 data->done = true;
341 aio_co_wake(co);
342}
343
344static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
345 bool begin, bool recursive,
346 BdrvChild *parent,
347 bool ignore_bds_parents,
348 bool poll,
349 int *drained_end_counter)
350{
351 BdrvCoDrainData data;
352
353
354
355
356 assert(qemu_in_coroutine());
357 data = (BdrvCoDrainData) {
358 .co = qemu_coroutine_self(),
359 .bs = bs,
360 .done = false,
361 .begin = begin,
362 .recursive = recursive,
363 .parent = parent,
364 .ignore_bds_parents = ignore_bds_parents,
365 .poll = poll,
366 .drained_end_counter = drained_end_counter,
367 };
368
369 if (bs) {
370 bdrv_inc_in_flight(bs);
371 }
372 replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
373 bdrv_co_drain_bh_cb, &data);
374
375 qemu_coroutine_yield();
376
377
378 assert(data.done);
379}
380
381void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
382 BdrvChild *parent, bool ignore_bds_parents)
383{
384 assert(!qemu_in_coroutine());
385
386
387 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
388 aio_disable_external(bdrv_get_aio_context(bs));
389 }
390
391 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
392 bdrv_drain_invoke(bs, true, NULL);
393}
394
395static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
396 BdrvChild *parent, bool ignore_bds_parents,
397 bool poll)
398{
399 BdrvChild *child, *next;
400
401 if (qemu_in_coroutine()) {
402 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
403 poll, NULL);
404 return;
405 }
406
407 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
408
409 if (recursive) {
410 assert(!ignore_bds_parents);
411 bs->recursive_quiesce_counter++;
412 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
413 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
414 false);
415 }
416 }
417
418
419
420
421
422
423
424
425
426
427 if (poll) {
428 assert(!ignore_bds_parents);
429 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
430 }
431}
432
433void bdrv_drained_begin(BlockDriverState *bs)
434{
435 bdrv_do_drained_begin(bs, false, NULL, false, true);
436}
437
438void bdrv_subtree_drained_begin(BlockDriverState *bs)
439{
440 bdrv_do_drained_begin(bs, true, NULL, false, true);
441}
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
458 BdrvChild *parent, bool ignore_bds_parents,
459 int *drained_end_counter)
460{
461 BdrvChild *child;
462 int old_quiesce_counter;
463
464 assert(drained_end_counter != NULL);
465
466 if (qemu_in_coroutine()) {
467 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
468 false, drained_end_counter);
469 return;
470 }
471 assert(bs->quiesce_counter > 0);
472
473
474 bdrv_drain_invoke(bs, false, drained_end_counter);
475 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
476 drained_end_counter);
477
478 old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
479 if (old_quiesce_counter == 1) {
480 aio_enable_external(bdrv_get_aio_context(bs));
481 }
482
483 if (recursive) {
484 assert(!ignore_bds_parents);
485 bs->recursive_quiesce_counter--;
486 QLIST_FOREACH(child, &bs->children, next) {
487 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
488 drained_end_counter);
489 }
490 }
491}
492
493void bdrv_drained_end(BlockDriverState *bs)
494{
495 int drained_end_counter = 0;
496 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
497 BDRV_POLL_WHILE(bs, atomic_read(&drained_end_counter) > 0);
498}
499
500void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
501{
502 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
503}
504
505void bdrv_subtree_drained_end(BlockDriverState *bs)
506{
507 int drained_end_counter = 0;
508 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
509 BDRV_POLL_WHILE(bs, atomic_read(&drained_end_counter) > 0);
510}
511
512void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
513{
514 int i;
515
516 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
517 bdrv_do_drained_begin(child->bs, true, child, false, true);
518 }
519}
520
521void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
522{
523 int drained_end_counter = 0;
524 int i;
525
526 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
527 bdrv_do_drained_end(child->bs, true, child, false,
528 &drained_end_counter);
529 }
530
531 BDRV_POLL_WHILE(child->bs, atomic_read(&drained_end_counter) > 0);
532}
533
534
535
536
537
538
539
540
541void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
542{
543 assert(qemu_in_coroutine());
544 bdrv_drained_begin(bs);
545 bdrv_drained_end(bs);
546}
547
548void bdrv_drain(BlockDriverState *bs)
549{
550 bdrv_drained_begin(bs);
551 bdrv_drained_end(bs);
552}
553
554static void bdrv_drain_assert_idle(BlockDriverState *bs)
555{
556 BdrvChild *child, *next;
557
558 assert(atomic_read(&bs->in_flight) == 0);
559 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
560 bdrv_drain_assert_idle(child->bs);
561 }
562}
563
564unsigned int bdrv_drain_all_count = 0;
565
566static bool bdrv_drain_all_poll(void)
567{
568 BlockDriverState *bs = NULL;
569 bool result = false;
570
571
572
573 while ((bs = bdrv_next_all_states(bs))) {
574 AioContext *aio_context = bdrv_get_aio_context(bs);
575 aio_context_acquire(aio_context);
576 result |= bdrv_drain_poll(bs, false, NULL, true);
577 aio_context_release(aio_context);
578 }
579
580 return result;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595void bdrv_drain_all_begin(void)
596{
597 BlockDriverState *bs = NULL;
598
599 if (qemu_in_coroutine()) {
600 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
601 return;
602 }
603
604
605
606
607
608
609 if (replay_events_enabled()) {
610 return;
611 }
612
613
614
615 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
616 assert(bdrv_drain_all_count < INT_MAX);
617 bdrv_drain_all_count++;
618
619
620
621 while ((bs = bdrv_next_all_states(bs))) {
622 AioContext *aio_context = bdrv_get_aio_context(bs);
623
624 aio_context_acquire(aio_context);
625 bdrv_do_drained_begin(bs, false, NULL, true, false);
626 aio_context_release(aio_context);
627 }
628
629
630 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
631
632 while ((bs = bdrv_next_all_states(bs))) {
633 bdrv_drain_assert_idle(bs);
634 }
635}
636
637void bdrv_drain_all_end(void)
638{
639 BlockDriverState *bs = NULL;
640 int drained_end_counter = 0;
641
642
643
644
645
646
647 if (replay_events_enabled()) {
648 return;
649 }
650
651 while ((bs = bdrv_next_all_states(bs))) {
652 AioContext *aio_context = bdrv_get_aio_context(bs);
653
654 aio_context_acquire(aio_context);
655 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
656 aio_context_release(aio_context);
657 }
658
659 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
660 AIO_WAIT_WHILE(NULL, atomic_read(&drained_end_counter) > 0);
661
662 assert(bdrv_drain_all_count > 0);
663 bdrv_drain_all_count--;
664}
665
666void bdrv_drain_all(void)
667{
668 bdrv_drain_all_begin();
669 bdrv_drain_all_end();
670}
671
672
673
674
675
676
677static void tracked_request_end(BdrvTrackedRequest *req)
678{
679 if (req->serialising) {
680 atomic_dec(&req->bs->serialising_in_flight);
681 }
682
683 qemu_co_mutex_lock(&req->bs->reqs_lock);
684 QLIST_REMOVE(req, list);
685 qemu_co_queue_restart_all(&req->wait_queue);
686 qemu_co_mutex_unlock(&req->bs->reqs_lock);
687}
688
689
690
691
692static void tracked_request_begin(BdrvTrackedRequest *req,
693 BlockDriverState *bs,
694 int64_t offset,
695 uint64_t bytes,
696 enum BdrvTrackedRequestType type)
697{
698 assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
699
700 *req = (BdrvTrackedRequest){
701 .bs = bs,
702 .offset = offset,
703 .bytes = bytes,
704 .type = type,
705 .co = qemu_coroutine_self(),
706 .serialising = false,
707 .overlap_offset = offset,
708 .overlap_bytes = bytes,
709 };
710
711 qemu_co_queue_init(&req->wait_queue);
712
713 qemu_co_mutex_lock(&bs->reqs_lock);
714 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
715 qemu_co_mutex_unlock(&bs->reqs_lock);
716}
717
718void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
719{
720 int64_t overlap_offset = req->offset & ~(align - 1);
721 uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
722 - overlap_offset;
723
724 if (!req->serialising) {
725 atomic_inc(&req->bs->serialising_in_flight);
726 req->serialising = true;
727 }
728
729 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
730 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
731}
732
733static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
734{
735
736
737
738
739
740
741 return req->serialising && (req->offset == req->overlap_offset) &&
742 (req->bytes == req->overlap_bytes);
743}
744
745
746
747
748
749BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
750{
751 BdrvTrackedRequest *req;
752 Coroutine *self = qemu_coroutine_self();
753
754 QLIST_FOREACH(req, &bs->tracked_requests, list) {
755 if (req->co == self) {
756 return req;
757 }
758 }
759
760 return NULL;
761}
762
763
764
765
766void bdrv_round_to_clusters(BlockDriverState *bs,
767 int64_t offset, int64_t bytes,
768 int64_t *cluster_offset,
769 int64_t *cluster_bytes)
770{
771 BlockDriverInfo bdi;
772
773 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
774 *cluster_offset = offset;
775 *cluster_bytes = bytes;
776 } else {
777 int64_t c = bdi.cluster_size;
778 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
779 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
780 }
781}
782
783static int bdrv_get_cluster_size(BlockDriverState *bs)
784{
785 BlockDriverInfo bdi;
786 int ret;
787
788 ret = bdrv_get_info(bs, &bdi);
789 if (ret < 0 || bdi.cluster_size == 0) {
790 return bs->bl.request_alignment;
791 } else {
792 return bdi.cluster_size;
793 }
794}
795
796static bool tracked_request_overlaps(BdrvTrackedRequest *req,
797 int64_t offset, uint64_t bytes)
798{
799
800 if (offset >= req->overlap_offset + req->overlap_bytes) {
801 return false;
802 }
803
804 if (req->overlap_offset >= offset + bytes) {
805 return false;
806 }
807 return true;
808}
809
810void bdrv_inc_in_flight(BlockDriverState *bs)
811{
812 atomic_inc(&bs->in_flight);
813}
814
815void bdrv_wakeup(BlockDriverState *bs)
816{
817 aio_wait_kick();
818}
819
820void bdrv_dec_in_flight(BlockDriverState *bs)
821{
822 atomic_dec(&bs->in_flight);
823 bdrv_wakeup(bs);
824}
825
826bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
827{
828 BlockDriverState *bs = self->bs;
829 BdrvTrackedRequest *req;
830 bool retry;
831 bool waited = false;
832
833 if (!atomic_read(&bs->serialising_in_flight)) {
834 return false;
835 }
836
837 do {
838 retry = false;
839 qemu_co_mutex_lock(&bs->reqs_lock);
840 QLIST_FOREACH(req, &bs->tracked_requests, list) {
841 if (req == self || (!req->serialising && !self->serialising)) {
842 continue;
843 }
844 if (tracked_request_overlaps(req, self->overlap_offset,
845 self->overlap_bytes))
846 {
847
848
849
850
851 assert(qemu_coroutine_self() != req->co);
852
853
854
855
856 if (!req->waiting_for) {
857 self->waiting_for = req;
858 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
859 self->waiting_for = NULL;
860 retry = true;
861 waited = true;
862 break;
863 }
864 }
865 }
866 qemu_co_mutex_unlock(&bs->reqs_lock);
867 } while (retry);
868
869 return waited;
870}
871
872static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
873 size_t size)
874{
875 if (size > BDRV_REQUEST_MAX_BYTES) {
876 return -EIO;
877 }
878
879 if (!bdrv_is_inserted(bs)) {
880 return -ENOMEDIUM;
881 }
882
883 if (offset < 0) {
884 return -EIO;
885 }
886
887 return 0;
888}
889
890typedef struct RwCo {
891 BdrvChild *child;
892 int64_t offset;
893 QEMUIOVector *qiov;
894 bool is_write;
895 int ret;
896 BdrvRequestFlags flags;
897} RwCo;
898
899static void coroutine_fn bdrv_rw_co_entry(void *opaque)
900{
901 RwCo *rwco = opaque;
902
903 if (!rwco->is_write) {
904 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
905 rwco->qiov->size, rwco->qiov,
906 rwco->flags);
907 } else {
908 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
909 rwco->qiov->size, rwco->qiov,
910 rwco->flags);
911 }
912 aio_wait_kick();
913}
914
915
916
917
918static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
919 QEMUIOVector *qiov, bool is_write,
920 BdrvRequestFlags flags)
921{
922 Coroutine *co;
923 RwCo rwco = {
924 .child = child,
925 .offset = offset,
926 .qiov = qiov,
927 .is_write = is_write,
928 .ret = NOT_DONE,
929 .flags = flags,
930 };
931
932 if (qemu_in_coroutine()) {
933
934 bdrv_rw_co_entry(&rwco);
935 } else {
936 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
937 bdrv_coroutine_enter(child->bs, co);
938 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
939 }
940 return rwco.ret;
941}
942
943int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
944 int bytes, BdrvRequestFlags flags)
945{
946 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
947
948 return bdrv_prwv_co(child, offset, &qiov, true,
949 BDRV_REQ_ZERO_WRITE | flags);
950}
951
952
953
954
955
956
957
958
959
960
961int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
962{
963 int ret;
964 int64_t target_size, bytes, offset = 0;
965 BlockDriverState *bs = child->bs;
966
967 target_size = bdrv_getlength(bs);
968 if (target_size < 0) {
969 return target_size;
970 }
971
972 for (;;) {
973 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
974 if (bytes <= 0) {
975 return 0;
976 }
977 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
978 if (ret < 0) {
979 return ret;
980 }
981 if (ret & BDRV_BLOCK_ZERO) {
982 offset += bytes;
983 continue;
984 }
985 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
986 if (ret < 0) {
987 return ret;
988 }
989 offset += bytes;
990 }
991}
992
993int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
994{
995 int ret;
996
997 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
998 if (ret < 0) {
999 return ret;
1000 }
1001
1002 return qiov->size;
1003}
1004
1005
1006int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
1007{
1008 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1009
1010 if (bytes < 0) {
1011 return -EINVAL;
1012 }
1013
1014 return bdrv_preadv(child, offset, &qiov);
1015}
1016
1017int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
1018{
1019 int ret;
1020
1021 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
1022 if (ret < 0) {
1023 return ret;
1024 }
1025
1026 return qiov->size;
1027}
1028
1029
1030
1031
1032
1033
1034
1035int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
1036{
1037 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1038
1039 if (bytes < 0) {
1040 return -EINVAL;
1041 }
1042
1043 return bdrv_pwritev(child, offset, &qiov);
1044}
1045
1046
1047
1048
1049
1050
1051
1052int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
1053 const void *buf, int count)
1054{
1055 int ret;
1056
1057 ret = bdrv_pwrite(child, offset, buf, count);
1058 if (ret < 0) {
1059 return ret;
1060 }
1061
1062 ret = bdrv_flush(child->bs);
1063 if (ret < 0) {
1064 return ret;
1065 }
1066
1067 return 0;
1068}
1069
1070typedef struct CoroutineIOCompletion {
1071 Coroutine *coroutine;
1072 int ret;
1073} CoroutineIOCompletion;
1074
1075static void bdrv_co_io_em_complete(void *opaque, int ret)
1076{
1077 CoroutineIOCompletion *co = opaque;
1078
1079 co->ret = ret;
1080 aio_co_wake(co->coroutine);
1081}
1082
1083static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1084 uint64_t offset, uint64_t bytes,
1085 QEMUIOVector *qiov,
1086 size_t qiov_offset, int flags)
1087{
1088 BlockDriver *drv = bs->drv;
1089 int64_t sector_num;
1090 unsigned int nb_sectors;
1091 QEMUIOVector local_qiov;
1092 int ret;
1093
1094 assert(!(flags & ~BDRV_REQ_MASK));
1095 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1096
1097 if (!drv) {
1098 return -ENOMEDIUM;
1099 }
1100
1101 if (drv->bdrv_co_preadv_part) {
1102 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1103 flags);
1104 }
1105
1106 if (qiov_offset > 0 || bytes != qiov->size) {
1107 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1108 qiov = &local_qiov;
1109 }
1110
1111 if (drv->bdrv_co_preadv) {
1112 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1113 goto out;
1114 }
1115
1116 if (drv->bdrv_aio_preadv) {
1117 BlockAIOCB *acb;
1118 CoroutineIOCompletion co = {
1119 .coroutine = qemu_coroutine_self(),
1120 };
1121
1122 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1123 bdrv_co_io_em_complete, &co);
1124 if (acb == NULL) {
1125 ret = -EIO;
1126 goto out;
1127 } else {
1128 qemu_coroutine_yield();
1129 ret = co.ret;
1130 goto out;
1131 }
1132 }
1133
1134 sector_num = offset >> BDRV_SECTOR_BITS;
1135 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1136
1137 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1138 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1139 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1140 assert(drv->bdrv_co_readv);
1141
1142 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1143
1144out:
1145 if (qiov == &local_qiov) {
1146 qemu_iovec_destroy(&local_qiov);
1147 }
1148
1149 return ret;
1150}
1151
1152static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1153 uint64_t offset, uint64_t bytes,
1154 QEMUIOVector *qiov,
1155 size_t qiov_offset, int flags)
1156{
1157 BlockDriver *drv = bs->drv;
1158 int64_t sector_num;
1159 unsigned int nb_sectors;
1160 QEMUIOVector local_qiov;
1161 int ret;
1162
1163 assert(!(flags & ~BDRV_REQ_MASK));
1164 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1165
1166 if (!drv) {
1167 return -ENOMEDIUM;
1168 }
1169
1170 if (drv->bdrv_co_pwritev_part) {
1171 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1172 flags & bs->supported_write_flags);
1173 flags &= ~bs->supported_write_flags;
1174 goto emulate_flags;
1175 }
1176
1177 if (qiov_offset > 0 || bytes != qiov->size) {
1178 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1179 qiov = &local_qiov;
1180 }
1181
1182 if (drv->bdrv_co_pwritev) {
1183 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1184 flags & bs->supported_write_flags);
1185 flags &= ~bs->supported_write_flags;
1186 goto emulate_flags;
1187 }
1188
1189 if (drv->bdrv_aio_pwritev) {
1190 BlockAIOCB *acb;
1191 CoroutineIOCompletion co = {
1192 .coroutine = qemu_coroutine_self(),
1193 };
1194
1195 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1196 flags & bs->supported_write_flags,
1197 bdrv_co_io_em_complete, &co);
1198 flags &= ~bs->supported_write_flags;
1199 if (acb == NULL) {
1200 ret = -EIO;
1201 } else {
1202 qemu_coroutine_yield();
1203 ret = co.ret;
1204 }
1205 goto emulate_flags;
1206 }
1207
1208 sector_num = offset >> BDRV_SECTOR_BITS;
1209 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1210
1211 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1212 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1213 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1214
1215 assert(drv->bdrv_co_writev);
1216 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1217 flags & bs->supported_write_flags);
1218 flags &= ~bs->supported_write_flags;
1219
1220emulate_flags:
1221 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1222 ret = bdrv_co_flush(bs);
1223 }
1224
1225 if (qiov == &local_qiov) {
1226 qemu_iovec_destroy(&local_qiov);
1227 }
1228
1229 return ret;
1230}
1231
1232static int coroutine_fn
1233bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
1234 uint64_t bytes, QEMUIOVector *qiov,
1235 size_t qiov_offset)
1236{
1237 BlockDriver *drv = bs->drv;
1238 QEMUIOVector local_qiov;
1239 int ret;
1240
1241 if (!drv) {
1242 return -ENOMEDIUM;
1243 }
1244
1245 if (!block_driver_can_compress(drv)) {
1246 return -ENOTSUP;
1247 }
1248
1249 if (drv->bdrv_co_pwritev_compressed_part) {
1250 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1251 qiov, qiov_offset);
1252 }
1253
1254 if (qiov_offset == 0) {
1255 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1256 }
1257
1258 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1259 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1260 qemu_iovec_destroy(&local_qiov);
1261
1262 return ret;
1263}
1264
1265static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1266 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1267 size_t qiov_offset, int flags)
1268{
1269 BlockDriverState *bs = child->bs;
1270
1271
1272
1273
1274
1275
1276 void *bounce_buffer = NULL;
1277
1278 BlockDriver *drv = bs->drv;
1279 int64_t cluster_offset;
1280 int64_t cluster_bytes;
1281 size_t skip_bytes;
1282 int ret;
1283 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1284 BDRV_REQUEST_MAX_BYTES);
1285 unsigned int progress = 0;
1286 bool skip_write;
1287
1288 if (!drv) {
1289 return -ENOMEDIUM;
1290 }
1291
1292
1293
1294
1295
1296 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1313 skip_bytes = offset - cluster_offset;
1314
1315 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1316 cluster_offset, cluster_bytes);
1317
1318 while (cluster_bytes) {
1319 int64_t pnum;
1320
1321 if (skip_write) {
1322 ret = 1;
1323 pnum = MIN(cluster_bytes, max_transfer);
1324 } else {
1325 ret = bdrv_is_allocated(bs, cluster_offset,
1326 MIN(cluster_bytes, max_transfer), &pnum);
1327 if (ret < 0) {
1328
1329
1330
1331
1332
1333 pnum = MIN(cluster_bytes, max_transfer);
1334 }
1335
1336
1337 if (ret == 0 && pnum == 0) {
1338 assert(progress >= bytes);
1339 break;
1340 }
1341
1342 assert(skip_bytes < pnum);
1343 }
1344
1345 if (ret <= 0) {
1346 QEMUIOVector local_qiov;
1347
1348
1349 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1350 if (!bounce_buffer) {
1351 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1352 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1353 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1354
1355 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1356 if (!bounce_buffer) {
1357 ret = -ENOMEM;
1358 goto err;
1359 }
1360 }
1361 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1362
1363 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1364 &local_qiov, 0, 0);
1365 if (ret < 0) {
1366 goto err;
1367 }
1368
1369 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1370 if (drv->bdrv_co_pwrite_zeroes &&
1371 buffer_is_zero(bounce_buffer, pnum)) {
1372
1373
1374
1375 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1376 BDRV_REQ_WRITE_UNCHANGED);
1377 } else {
1378
1379
1380
1381 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1382 &local_qiov, 0,
1383 BDRV_REQ_WRITE_UNCHANGED);
1384 }
1385
1386 if (ret < 0) {
1387
1388
1389
1390
1391
1392 goto err;
1393 }
1394
1395 if (!(flags & BDRV_REQ_PREFETCH)) {
1396 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1397 bounce_buffer + skip_bytes,
1398 MIN(pnum - skip_bytes, bytes - progress));
1399 }
1400 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1401
1402 ret = bdrv_driver_preadv(bs, offset + progress,
1403 MIN(pnum - skip_bytes, bytes - progress),
1404 qiov, qiov_offset + progress, 0);
1405 if (ret < 0) {
1406 goto err;
1407 }
1408 }
1409
1410 cluster_offset += pnum;
1411 cluster_bytes -= pnum;
1412 progress += pnum - skip_bytes;
1413 skip_bytes = 0;
1414 }
1415 ret = 0;
1416
1417err:
1418 qemu_vfree(bounce_buffer);
1419 return ret;
1420}
1421
1422
1423
1424
1425
1426
1427static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1428 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1429 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1430{
1431 BlockDriverState *bs = child->bs;
1432 int64_t total_bytes, max_bytes;
1433 int ret = 0;
1434 uint64_t bytes_remaining = bytes;
1435 int max_transfer;
1436
1437 assert(is_power_of_2(align));
1438 assert((offset & (align - 1)) == 0);
1439 assert((bytes & (align - 1)) == 0);
1440 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1441 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1442 align);
1443
1444
1445
1446
1447
1448 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ |
1449 BDRV_REQ_PREFETCH)));
1450
1451
1452 if (flags & BDRV_REQ_COPY_ON_READ) {
1453
1454
1455
1456
1457
1458 bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
1459 }
1460
1461
1462 assert(!(flags & BDRV_REQ_SERIALISING));
1463
1464 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1465 bdrv_wait_serialising_requests(req);
1466 }
1467
1468 if (flags & BDRV_REQ_COPY_ON_READ) {
1469 int64_t pnum;
1470
1471 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1472 if (ret < 0) {
1473 goto out;
1474 }
1475
1476 if (!ret || pnum != bytes) {
1477 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1478 qiov, qiov_offset, flags);
1479 goto out;
1480 } else if (flags & BDRV_REQ_PREFETCH) {
1481 goto out;
1482 }
1483 }
1484
1485
1486 total_bytes = bdrv_getlength(bs);
1487 if (total_bytes < 0) {
1488 ret = total_bytes;
1489 goto out;
1490 }
1491
1492 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1493 if (bytes <= max_bytes && bytes <= max_transfer) {
1494 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
1495 goto out;
1496 }
1497
1498 while (bytes_remaining) {
1499 int num;
1500
1501 if (max_bytes) {
1502 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1503 assert(num);
1504
1505 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1506 num, qiov, bytes - bytes_remaining, 0);
1507 max_bytes -= num;
1508 } else {
1509 num = bytes_remaining;
1510 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1511 bytes_remaining);
1512 }
1513 if (ret < 0) {
1514 goto out;
1515 }
1516 bytes_remaining -= num;
1517 }
1518
1519out:
1520 return ret < 0 ? ret : 0;
1521}
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545typedef struct BdrvRequestPadding {
1546 uint8_t *buf;
1547 size_t buf_len;
1548 uint8_t *tail_buf;
1549 size_t head;
1550 size_t tail;
1551 bool merge_reads;
1552 QEMUIOVector local_qiov;
1553} BdrvRequestPadding;
1554
1555static bool bdrv_init_padding(BlockDriverState *bs,
1556 int64_t offset, int64_t bytes,
1557 BdrvRequestPadding *pad)
1558{
1559 uint64_t align = bs->bl.request_alignment;
1560 size_t sum;
1561
1562 memset(pad, 0, sizeof(*pad));
1563
1564 pad->head = offset & (align - 1);
1565 pad->tail = ((offset + bytes) & (align - 1));
1566 if (pad->tail) {
1567 pad->tail = align - pad->tail;
1568 }
1569
1570 if (!pad->head && !pad->tail) {
1571 return false;
1572 }
1573
1574 assert(bytes);
1575
1576 sum = pad->head + bytes + pad->tail;
1577 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1578 pad->buf = qemu_blockalign(bs, pad->buf_len);
1579 pad->merge_reads = sum == pad->buf_len;
1580 if (pad->tail) {
1581 pad->tail_buf = pad->buf + pad->buf_len - align;
1582 }
1583
1584 return true;
1585}
1586
1587static int bdrv_padding_rmw_read(BdrvChild *child,
1588 BdrvTrackedRequest *req,
1589 BdrvRequestPadding *pad,
1590 bool zero_middle)
1591{
1592 QEMUIOVector local_qiov;
1593 BlockDriverState *bs = child->bs;
1594 uint64_t align = bs->bl.request_alignment;
1595 int ret;
1596
1597 assert(req->serialising && pad->buf);
1598
1599 if (pad->head || pad->merge_reads) {
1600 uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
1601
1602 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1603
1604 if (pad->head) {
1605 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1606 }
1607 if (pad->merge_reads && pad->tail) {
1608 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1609 }
1610 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1611 align, &local_qiov, 0, 0);
1612 if (ret < 0) {
1613 return ret;
1614 }
1615 if (pad->head) {
1616 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1617 }
1618 if (pad->merge_reads && pad->tail) {
1619 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1620 }
1621
1622 if (pad->merge_reads) {
1623 goto zero_mem;
1624 }
1625 }
1626
1627 if (pad->tail) {
1628 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1629
1630 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1631 ret = bdrv_aligned_preadv(
1632 child, req,
1633 req->overlap_offset + req->overlap_bytes - align,
1634 align, align, &local_qiov, 0, 0);
1635 if (ret < 0) {
1636 return ret;
1637 }
1638 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1639 }
1640
1641zero_mem:
1642 if (zero_middle) {
1643 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1644 }
1645
1646 return 0;
1647}
1648
1649static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1650{
1651 if (pad->buf) {
1652 qemu_vfree(pad->buf);
1653 qemu_iovec_destroy(&pad->local_qiov);
1654 }
1655}
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669static bool bdrv_pad_request(BlockDriverState *bs,
1670 QEMUIOVector **qiov, size_t *qiov_offset,
1671 int64_t *offset, unsigned int *bytes,
1672 BdrvRequestPadding *pad)
1673{
1674 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1675 return false;
1676 }
1677
1678 qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1679 *qiov, *qiov_offset, *bytes,
1680 pad->buf + pad->buf_len - pad->tail, pad->tail);
1681 *bytes += pad->head + pad->tail;
1682 *offset -= pad->head;
1683 *qiov = &pad->local_qiov;
1684 *qiov_offset = 0;
1685
1686 return true;
1687}
1688
1689int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1690 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1691 BdrvRequestFlags flags)
1692{
1693 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1694}
1695
1696int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1697 int64_t offset, unsigned int bytes,
1698 QEMUIOVector *qiov, size_t qiov_offset,
1699 BdrvRequestFlags flags)
1700{
1701 BlockDriverState *bs = child->bs;
1702 BdrvTrackedRequest req;
1703 BdrvRequestPadding pad;
1704 int ret;
1705
1706 trace_bdrv_co_preadv(bs, offset, bytes, flags);
1707
1708 ret = bdrv_check_byte_request(bs, offset, bytes);
1709 if (ret < 0) {
1710 return ret;
1711 }
1712
1713 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1714
1715
1716
1717
1718
1719
1720
1721
1722 return 0;
1723 }
1724
1725 bdrv_inc_in_flight(bs);
1726
1727
1728 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1729 flags |= BDRV_REQ_COPY_ON_READ;
1730 }
1731
1732 bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
1733
1734 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1735 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1736 bs->bl.request_alignment,
1737 qiov, qiov_offset, flags);
1738 tracked_request_end(&req);
1739 bdrv_dec_in_flight(bs);
1740
1741 bdrv_padding_destroy(&pad);
1742
1743 return ret;
1744}
1745
1746static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1747 int64_t offset, int bytes, BdrvRequestFlags flags)
1748{
1749 BlockDriver *drv = bs->drv;
1750 QEMUIOVector qiov;
1751 void *buf = NULL;
1752 int ret = 0;
1753 bool need_flush = false;
1754 int head = 0;
1755 int tail = 0;
1756
1757 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1758 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1759 bs->bl.request_alignment);
1760 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1761
1762 if (!drv) {
1763 return -ENOMEDIUM;
1764 }
1765
1766 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1767 return -ENOTSUP;
1768 }
1769
1770 assert(alignment % bs->bl.request_alignment == 0);
1771 head = offset % alignment;
1772 tail = (offset + bytes) % alignment;
1773 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1774 assert(max_write_zeroes >= bs->bl.request_alignment);
1775
1776 while (bytes > 0 && !ret) {
1777 int num = bytes;
1778
1779
1780
1781
1782
1783 if (head) {
1784
1785
1786
1787 num = MIN(MIN(bytes, max_transfer), alignment - head);
1788 head = (head + num) % alignment;
1789 assert(num < max_write_zeroes);
1790 } else if (tail && num > alignment) {
1791
1792 num -= tail;
1793 }
1794
1795
1796 if (num > max_write_zeroes) {
1797 num = max_write_zeroes;
1798 }
1799
1800 ret = -ENOTSUP;
1801
1802 if (drv->bdrv_co_pwrite_zeroes) {
1803 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1804 flags & bs->supported_zero_flags);
1805 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1806 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1807 need_flush = true;
1808 }
1809 } else {
1810 assert(!bs->supported_zero_flags);
1811 }
1812
1813 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1814
1815 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1816
1817 if ((flags & BDRV_REQ_FUA) &&
1818 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1819
1820
1821 write_flags &= ~BDRV_REQ_FUA;
1822 need_flush = true;
1823 }
1824 num = MIN(num, max_transfer);
1825 if (buf == NULL) {
1826 buf = qemu_try_blockalign0(bs, num);
1827 if (buf == NULL) {
1828 ret = -ENOMEM;
1829 goto fail;
1830 }
1831 }
1832 qemu_iovec_init_buf(&qiov, buf, num);
1833
1834 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1835
1836
1837
1838
1839 if (num < max_transfer) {
1840 qemu_vfree(buf);
1841 buf = NULL;
1842 }
1843 }
1844
1845 offset += num;
1846 bytes -= num;
1847 }
1848
1849fail:
1850 if (ret == 0 && need_flush) {
1851 ret = bdrv_co_flush(bs);
1852 }
1853 qemu_vfree(buf);
1854 return ret;
1855}
1856
1857static inline int coroutine_fn
1858bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
1859 BdrvTrackedRequest *req, int flags)
1860{
1861 BlockDriverState *bs = child->bs;
1862 bool waited;
1863 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1864
1865 if (bs->read_only) {
1866 return -EPERM;
1867 }
1868
1869
1870 assert(!(flags & BDRV_REQ_NO_SERIALISING));
1871 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1872 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1873 assert(!(flags & ~BDRV_REQ_MASK));
1874
1875 if (flags & BDRV_REQ_SERIALISING) {
1876 bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
1877 }
1878
1879 waited = bdrv_wait_serialising_requests(req);
1880
1881 assert(!waited || !req->serialising ||
1882 is_request_serialising_and_aligned(req));
1883 assert(req->overlap_offset <= offset);
1884 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1885 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1886
1887 switch (req->type) {
1888 case BDRV_TRACKED_WRITE:
1889 case BDRV_TRACKED_DISCARD:
1890 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1891 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1892 } else {
1893 assert(child->perm & BLK_PERM_WRITE);
1894 }
1895 return notifier_with_return_list_notify(&bs->before_write_notifiers,
1896 req);
1897 case BDRV_TRACKED_TRUNCATE:
1898 assert(child->perm & BLK_PERM_RESIZE);
1899 return 0;
1900 default:
1901 abort();
1902 }
1903}
1904
1905static inline void coroutine_fn
1906bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
1907 BdrvTrackedRequest *req, int ret)
1908{
1909 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1910 BlockDriverState *bs = child->bs;
1911
1912 atomic_inc(&bs->write_gen);
1913
1914
1915
1916
1917
1918
1919
1920
1921 if (ret == 0 &&
1922 (req->type == BDRV_TRACKED_TRUNCATE ||
1923 end_sector > bs->total_sectors) &&
1924 req->type != BDRV_TRACKED_DISCARD) {
1925 bs->total_sectors = end_sector;
1926 bdrv_parent_cb_resize(bs);
1927 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
1928 }
1929 if (req->bytes) {
1930 switch (req->type) {
1931 case BDRV_TRACKED_WRITE:
1932 stat64_max(&bs->wr_highest_offset, offset + bytes);
1933
1934 case BDRV_TRACKED_DISCARD:
1935 bdrv_set_dirty(bs, offset, bytes);
1936 break;
1937 default:
1938 break;
1939 }
1940 }
1941}
1942
1943
1944
1945
1946
1947static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1948 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1949 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1950{
1951 BlockDriverState *bs = child->bs;
1952 BlockDriver *drv = bs->drv;
1953 int ret;
1954
1955 uint64_t bytes_remaining = bytes;
1956 int max_transfer;
1957
1958 if (!drv) {
1959 return -ENOMEDIUM;
1960 }
1961
1962 if (bdrv_has_readonly_bitmaps(bs)) {
1963 return -EPERM;
1964 }
1965
1966 assert(is_power_of_2(align));
1967 assert((offset & (align - 1)) == 0);
1968 assert((bytes & (align - 1)) == 0);
1969 assert(!qiov || qiov_offset + bytes <= qiov->size);
1970 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1971 align);
1972
1973 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
1974
1975 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1976 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1977 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
1978 flags |= BDRV_REQ_ZERO_WRITE;
1979 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1980 flags |= BDRV_REQ_MAY_UNMAP;
1981 }
1982 }
1983
1984 if (ret < 0) {
1985
1986 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1987 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1988 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1989 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1990 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
1991 qiov, qiov_offset);
1992 } else if (bytes <= max_transfer) {
1993 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1994 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
1995 } else {
1996 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1997 while (bytes_remaining) {
1998 int num = MIN(bytes_remaining, max_transfer);
1999 int local_flags = flags;
2000
2001 assert(num);
2002 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2003 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2004
2005
2006 local_flags &= ~BDRV_REQ_FUA;
2007 }
2008
2009 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2010 num, qiov, bytes - bytes_remaining,
2011 local_flags);
2012 if (ret < 0) {
2013 break;
2014 }
2015 bytes_remaining -= num;
2016 }
2017 }
2018 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2019
2020 if (ret >= 0) {
2021 ret = 0;
2022 }
2023 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2024
2025 return ret;
2026}
2027
2028static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2029 int64_t offset,
2030 unsigned int bytes,
2031 BdrvRequestFlags flags,
2032 BdrvTrackedRequest *req)
2033{
2034 BlockDriverState *bs = child->bs;
2035 QEMUIOVector local_qiov;
2036 uint64_t align = bs->bl.request_alignment;
2037 int ret = 0;
2038 bool padding;
2039 BdrvRequestPadding pad;
2040
2041 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2042 if (padding) {
2043 bdrv_mark_request_serialising(req, align);
2044 bdrv_wait_serialising_requests(req);
2045
2046 bdrv_padding_rmw_read(child, req, &pad, true);
2047
2048 if (pad.head || pad.merge_reads) {
2049 int64_t aligned_offset = offset & ~(align - 1);
2050 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2051
2052 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2053 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2054 align, &local_qiov, 0,
2055 flags & ~BDRV_REQ_ZERO_WRITE);
2056 if (ret < 0 || pad.merge_reads) {
2057
2058 goto out;
2059 }
2060 offset += write_bytes - pad.head;
2061 bytes -= write_bytes - pad.head;
2062 }
2063 }
2064
2065 assert(!bytes || (offset & (align - 1)) == 0);
2066 if (bytes >= align) {
2067
2068 uint64_t aligned_bytes = bytes & ~(align - 1);
2069 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2070 NULL, 0, flags);
2071 if (ret < 0) {
2072 goto out;
2073 }
2074 bytes -= aligned_bytes;
2075 offset += aligned_bytes;
2076 }
2077
2078 assert(!bytes || (offset & (align - 1)) == 0);
2079 if (bytes) {
2080 assert(align == pad.tail + bytes);
2081
2082 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2083 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2084 &local_qiov, 0,
2085 flags & ~BDRV_REQ_ZERO_WRITE);
2086 }
2087
2088out:
2089 bdrv_padding_destroy(&pad);
2090
2091 return ret;
2092}
2093
2094
2095
2096
2097int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2098 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
2099 BdrvRequestFlags flags)
2100{
2101 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2102}
2103
2104int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2105 int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
2106 BdrvRequestFlags flags)
2107{
2108 BlockDriverState *bs = child->bs;
2109 BdrvTrackedRequest req;
2110 uint64_t align = bs->bl.request_alignment;
2111 BdrvRequestPadding pad;
2112 int ret;
2113
2114 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
2115
2116 if (!bs->drv) {
2117 return -ENOMEDIUM;
2118 }
2119
2120 ret = bdrv_check_byte_request(bs, offset, bytes);
2121 if (ret < 0) {
2122 return ret;
2123 }
2124
2125
2126 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2127 !QEMU_IS_ALIGNED(offset | bytes, align))
2128 {
2129 return -ENOTSUP;
2130 }
2131
2132 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2133
2134
2135
2136
2137
2138
2139
2140
2141 return 0;
2142 }
2143
2144 bdrv_inc_in_flight(bs);
2145
2146
2147
2148
2149
2150 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2151
2152 if (flags & BDRV_REQ_ZERO_WRITE) {
2153 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2154 goto out;
2155 }
2156
2157 if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
2158 bdrv_mark_request_serialising(&req, align);
2159 bdrv_wait_serialising_requests(&req);
2160 bdrv_padding_rmw_read(child, &req, &pad, false);
2161 }
2162
2163 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2164 qiov, qiov_offset, flags);
2165
2166 bdrv_padding_destroy(&pad);
2167
2168out:
2169 tracked_request_end(&req);
2170 bdrv_dec_in_flight(bs);
2171
2172 return ret;
2173}
2174
2175int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2176 int bytes, BdrvRequestFlags flags)
2177{
2178 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2179
2180 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2181 flags &= ~BDRV_REQ_MAY_UNMAP;
2182 }
2183
2184 return bdrv_co_pwritev(child, offset, bytes, NULL,
2185 BDRV_REQ_ZERO_WRITE | flags);
2186}
2187
2188
2189
2190
2191int bdrv_flush_all(void)
2192{
2193 BdrvNextIterator it;
2194 BlockDriverState *bs = NULL;
2195 int result = 0;
2196
2197
2198
2199
2200
2201
2202 if (replay_events_enabled()) {
2203 return result;
2204 }
2205
2206 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2207 AioContext *aio_context = bdrv_get_aio_context(bs);
2208 int ret;
2209
2210 aio_context_acquire(aio_context);
2211 ret = bdrv_flush(bs);
2212 if (ret < 0 && !result) {
2213 result = ret;
2214 }
2215 aio_context_release(aio_context);
2216 }
2217
2218 return result;
2219}
2220
2221
2222typedef struct BdrvCoBlockStatusData {
2223 BlockDriverState *bs;
2224 BlockDriverState *base;
2225 bool want_zero;
2226 int64_t offset;
2227 int64_t bytes;
2228 int64_t *pnum;
2229 int64_t *map;
2230 BlockDriverState **file;
2231 int ret;
2232 bool done;
2233} BdrvCoBlockStatusData;
2234
2235int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
2236 bool want_zero,
2237 int64_t offset,
2238 int64_t bytes,
2239 int64_t *pnum,
2240 int64_t *map,
2241 BlockDriverState **file)
2242{
2243 assert(bs->file && bs->file->bs);
2244 *pnum = bytes;
2245 *map = offset;
2246 *file = bs->file->bs;
2247 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2248}
2249
2250int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
2251 bool want_zero,
2252 int64_t offset,
2253 int64_t bytes,
2254 int64_t *pnum,
2255 int64_t *map,
2256 BlockDriverState **file)
2257{
2258 assert(bs->backing && bs->backing->bs);
2259 *pnum = bytes;
2260 *map = offset;
2261 *file = bs->backing->bs;
2262 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2263}
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2293 bool want_zero,
2294 int64_t offset, int64_t bytes,
2295 int64_t *pnum, int64_t *map,
2296 BlockDriverState **file)
2297{
2298 int64_t total_size;
2299 int64_t n;
2300 int ret;
2301 int64_t local_map = 0;
2302 BlockDriverState *local_file = NULL;
2303 int64_t aligned_offset, aligned_bytes;
2304 uint32_t align;
2305
2306 assert(pnum);
2307 *pnum = 0;
2308 total_size = bdrv_getlength(bs);
2309 if (total_size < 0) {
2310 ret = total_size;
2311 goto early_out;
2312 }
2313
2314 if (offset >= total_size) {
2315 ret = BDRV_BLOCK_EOF;
2316 goto early_out;
2317 }
2318 if (!bytes) {
2319 ret = 0;
2320 goto early_out;
2321 }
2322
2323 n = total_size - offset;
2324 if (n < bytes) {
2325 bytes = n;
2326 }
2327
2328
2329 assert(bs->drv);
2330 if (!bs->drv->bdrv_co_block_status) {
2331 *pnum = bytes;
2332 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2333 if (offset + bytes == total_size) {
2334 ret |= BDRV_BLOCK_EOF;
2335 }
2336 if (bs->drv->protocol_name) {
2337 ret |= BDRV_BLOCK_OFFSET_VALID;
2338 local_map = offset;
2339 local_file = bs;
2340 }
2341 goto early_out;
2342 }
2343
2344 bdrv_inc_in_flight(bs);
2345
2346
2347 align = bs->bl.request_alignment;
2348 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2349 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2350
2351 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2352 aligned_bytes, pnum, &local_map,
2353 &local_file);
2354 if (ret < 0) {
2355 *pnum = 0;
2356 goto out;
2357 }
2358
2359
2360
2361
2362
2363 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2364 align > offset - aligned_offset);
2365 if (ret & BDRV_BLOCK_RECURSE) {
2366 assert(ret & BDRV_BLOCK_DATA);
2367 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2368 assert(!(ret & BDRV_BLOCK_ZERO));
2369 }
2370
2371 *pnum -= offset - aligned_offset;
2372 if (*pnum > bytes) {
2373 *pnum = bytes;
2374 }
2375 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2376 local_map += offset - aligned_offset;
2377 }
2378
2379 if (ret & BDRV_BLOCK_RAW) {
2380 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2381 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2382 *pnum, pnum, &local_map, &local_file);
2383 goto out;
2384 }
2385
2386 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2387 ret |= BDRV_BLOCK_ALLOCATED;
2388 } else if (want_zero) {
2389 if (bdrv_unallocated_blocks_are_zero(bs)) {
2390 ret |= BDRV_BLOCK_ZERO;
2391 } else if (bs->backing) {
2392 BlockDriverState *bs2 = bs->backing->bs;
2393 int64_t size2 = bdrv_getlength(bs2);
2394
2395 if (size2 >= 0 && offset >= size2) {
2396 ret |= BDRV_BLOCK_ZERO;
2397 }
2398 }
2399 }
2400
2401 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2402 local_file && local_file != bs &&
2403 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2404 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2405 int64_t file_pnum;
2406 int ret2;
2407
2408 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2409 *pnum, &file_pnum, NULL, NULL);
2410 if (ret2 >= 0) {
2411
2412
2413
2414 if (ret2 & BDRV_BLOCK_EOF &&
2415 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2416
2417
2418
2419
2420
2421 ret |= BDRV_BLOCK_ZERO;
2422 } else {
2423
2424 *pnum = file_pnum;
2425 ret |= (ret2 & BDRV_BLOCK_ZERO);
2426 }
2427 }
2428 }
2429
2430out:
2431 bdrv_dec_in_flight(bs);
2432 if (ret >= 0 && offset + *pnum == total_size) {
2433 ret |= BDRV_BLOCK_EOF;
2434 }
2435early_out:
2436 if (file) {
2437 *file = local_file;
2438 }
2439 if (map) {
2440 *map = local_map;
2441 }
2442 return ret;
2443}
2444
2445static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2446 BlockDriverState *base,
2447 bool want_zero,
2448 int64_t offset,
2449 int64_t bytes,
2450 int64_t *pnum,
2451 int64_t *map,
2452 BlockDriverState **file)
2453{
2454 BlockDriverState *p;
2455 int ret = 0;
2456 bool first = true;
2457
2458 assert(bs != base);
2459 for (p = bs; p != base; p = backing_bs(p)) {
2460 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2461 file);
2462 if (ret < 0) {
2463 break;
2464 }
2465 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2466
2467
2468
2469
2470
2471
2472 *pnum = bytes;
2473 }
2474 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2475 break;
2476 }
2477
2478
2479 bytes = MIN(bytes, *pnum);
2480 first = false;
2481 }
2482 return ret;
2483}
2484
2485
2486static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2487{
2488 BdrvCoBlockStatusData *data = opaque;
2489
2490 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2491 data->want_zero,
2492 data->offset, data->bytes,
2493 data->pnum, data->map, data->file);
2494 data->done = true;
2495 aio_wait_kick();
2496}
2497
2498
2499
2500
2501
2502
2503static int bdrv_common_block_status_above(BlockDriverState *bs,
2504 BlockDriverState *base,
2505 bool want_zero, int64_t offset,
2506 int64_t bytes, int64_t *pnum,
2507 int64_t *map,
2508 BlockDriverState **file)
2509{
2510 Coroutine *co;
2511 BdrvCoBlockStatusData data = {
2512 .bs = bs,
2513 .base = base,
2514 .want_zero = want_zero,
2515 .offset = offset,
2516 .bytes = bytes,
2517 .pnum = pnum,
2518 .map = map,
2519 .file = file,
2520 .done = false,
2521 };
2522
2523 if (qemu_in_coroutine()) {
2524
2525 bdrv_block_status_above_co_entry(&data);
2526 } else {
2527 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2528 bdrv_coroutine_enter(bs, co);
2529 BDRV_POLL_WHILE(bs, !data.done);
2530 }
2531 return data.ret;
2532}
2533
2534int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2535 int64_t offset, int64_t bytes, int64_t *pnum,
2536 int64_t *map, BlockDriverState **file)
2537{
2538 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2539 pnum, map, file);
2540}
2541
2542int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2543 int64_t *pnum, int64_t *map, BlockDriverState **file)
2544{
2545 return bdrv_block_status_above(bs, backing_bs(bs),
2546 offset, bytes, pnum, map, file);
2547}
2548
2549int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2550 int64_t bytes, int64_t *pnum)
2551{
2552 int ret;
2553 int64_t dummy;
2554
2555 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2556 bytes, pnum ? pnum : &dummy, NULL,
2557 NULL);
2558 if (ret < 0) {
2559 return ret;
2560 }
2561 return !!(ret & BDRV_BLOCK_ALLOCATED);
2562}
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581int bdrv_is_allocated_above(BlockDriverState *top,
2582 BlockDriverState *base,
2583 bool include_base, int64_t offset,
2584 int64_t bytes, int64_t *pnum)
2585{
2586 BlockDriverState *intermediate;
2587 int ret;
2588 int64_t n = bytes;
2589
2590 assert(base || !include_base);
2591
2592 intermediate = top;
2593 while (include_base || intermediate != base) {
2594 int64_t pnum_inter;
2595 int64_t size_inter;
2596
2597 assert(intermediate);
2598 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2599 if (ret < 0) {
2600 return ret;
2601 }
2602 if (ret) {
2603 *pnum = pnum_inter;
2604 return 1;
2605 }
2606
2607 size_inter = bdrv_getlength(intermediate);
2608 if (size_inter < 0) {
2609 return size_inter;
2610 }
2611 if (n > pnum_inter &&
2612 (intermediate == top || offset + pnum_inter < size_inter)) {
2613 n = pnum_inter;
2614 }
2615
2616 if (intermediate == base) {
2617 break;
2618 }
2619
2620 intermediate = backing_bs(intermediate);
2621 }
2622
2623 *pnum = n;
2624 return 0;
2625}
2626
2627typedef struct BdrvVmstateCo {
2628 BlockDriverState *bs;
2629 QEMUIOVector *qiov;
2630 int64_t pos;
2631 bool is_read;
2632 int ret;
2633} BdrvVmstateCo;
2634
2635static int coroutine_fn
2636bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2637 bool is_read)
2638{
2639 BlockDriver *drv = bs->drv;
2640 int ret = -ENOTSUP;
2641
2642 bdrv_inc_in_flight(bs);
2643
2644 if (!drv) {
2645 ret = -ENOMEDIUM;
2646 } else if (drv->bdrv_load_vmstate) {
2647 if (is_read) {
2648 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2649 } else {
2650 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2651 }
2652 } else if (bs->file) {
2653 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2654 }
2655
2656 bdrv_dec_in_flight(bs);
2657 return ret;
2658}
2659
2660static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2661{
2662 BdrvVmstateCo *co = opaque;
2663 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2664 aio_wait_kick();
2665}
2666
2667static inline int
2668bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2669 bool is_read)
2670{
2671 if (qemu_in_coroutine()) {
2672 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2673 } else {
2674 BdrvVmstateCo data = {
2675 .bs = bs,
2676 .qiov = qiov,
2677 .pos = pos,
2678 .is_read = is_read,
2679 .ret = -EINPROGRESS,
2680 };
2681 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2682
2683 bdrv_coroutine_enter(bs, co);
2684 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2685 return data.ret;
2686 }
2687}
2688
2689int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2690 int64_t pos, int size)
2691{
2692 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2693 int ret;
2694
2695 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2696 if (ret < 0) {
2697 return ret;
2698 }
2699
2700 return size;
2701}
2702
2703int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2704{
2705 return bdrv_rw_vmstate(bs, qiov, pos, false);
2706}
2707
2708int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2709 int64_t pos, int size)
2710{
2711 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2712 int ret;
2713
2714 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2715 if (ret < 0) {
2716 return ret;
2717 }
2718
2719 return size;
2720}
2721
2722int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2723{
2724 return bdrv_rw_vmstate(bs, qiov, pos, true);
2725}
2726
2727
2728
2729
2730void bdrv_aio_cancel(BlockAIOCB *acb)
2731{
2732 qemu_aio_ref(acb);
2733 bdrv_aio_cancel_async(acb);
2734 while (acb->refcnt > 1) {
2735 if (acb->aiocb_info->get_aio_context) {
2736 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2737 } else if (acb->bs) {
2738
2739
2740
2741
2742 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2743 aio_poll(bdrv_get_aio_context(acb->bs), true);
2744 } else {
2745 abort();
2746 }
2747 }
2748 qemu_aio_unref(acb);
2749}
2750
2751
2752
2753
2754void bdrv_aio_cancel_async(BlockAIOCB *acb)
2755{
2756 if (acb->aiocb_info->cancel_async) {
2757 acb->aiocb_info->cancel_async(acb);
2758 }
2759}
2760
2761
2762
2763
2764typedef struct FlushCo {
2765 BlockDriverState *bs;
2766 int ret;
2767} FlushCo;
2768
2769
2770static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2771{
2772 FlushCo *rwco = opaque;
2773
2774 rwco->ret = bdrv_co_flush(rwco->bs);
2775 aio_wait_kick();
2776}
2777
2778int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2779{
2780 int current_gen;
2781 int ret = 0;
2782
2783 bdrv_inc_in_flight(bs);
2784
2785 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2786 bdrv_is_sg(bs)) {
2787 goto early_exit;
2788 }
2789
2790 qemu_co_mutex_lock(&bs->reqs_lock);
2791 current_gen = atomic_read(&bs->write_gen);
2792
2793
2794 while (bs->active_flush_req) {
2795 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2796 }
2797
2798
2799 bs->active_flush_req = true;
2800 qemu_co_mutex_unlock(&bs->reqs_lock);
2801
2802
2803 if (bs->drv->bdrv_co_flush) {
2804 ret = bs->drv->bdrv_co_flush(bs);
2805 goto out;
2806 }
2807
2808
2809 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2810 if (bs->drv->bdrv_co_flush_to_os) {
2811 ret = bs->drv->bdrv_co_flush_to_os(bs);
2812 if (ret < 0) {
2813 goto out;
2814 }
2815 }
2816
2817
2818 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2819 goto flush_parent;
2820 }
2821
2822
2823 if (bs->flushed_gen == current_gen) {
2824 goto flush_parent;
2825 }
2826
2827 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2828 if (!bs->drv) {
2829
2830
2831 ret = -ENOMEDIUM;
2832 goto out;
2833 }
2834 if (bs->drv->bdrv_co_flush_to_disk) {
2835 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2836 } else if (bs->drv->bdrv_aio_flush) {
2837 BlockAIOCB *acb;
2838 CoroutineIOCompletion co = {
2839 .coroutine = qemu_coroutine_self(),
2840 };
2841
2842 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2843 if (acb == NULL) {
2844 ret = -EIO;
2845 } else {
2846 qemu_coroutine_yield();
2847 ret = co.ret;
2848 }
2849 } else {
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861 ret = 0;
2862 }
2863
2864 if (ret < 0) {
2865 goto out;
2866 }
2867
2868
2869
2870
2871flush_parent:
2872 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2873out:
2874
2875 if (ret == 0) {
2876 bs->flushed_gen = current_gen;
2877 }
2878
2879 qemu_co_mutex_lock(&bs->reqs_lock);
2880 bs->active_flush_req = false;
2881
2882 qemu_co_queue_next(&bs->flush_queue);
2883 qemu_co_mutex_unlock(&bs->reqs_lock);
2884
2885early_exit:
2886 bdrv_dec_in_flight(bs);
2887 return ret;
2888}
2889
2890int bdrv_flush(BlockDriverState *bs)
2891{
2892 Coroutine *co;
2893 FlushCo flush_co = {
2894 .bs = bs,
2895 .ret = NOT_DONE,
2896 };
2897
2898 if (qemu_in_coroutine()) {
2899
2900 bdrv_flush_co_entry(&flush_co);
2901 } else {
2902 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2903 bdrv_coroutine_enter(bs, co);
2904 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2905 }
2906
2907 return flush_co.ret;
2908}
2909
2910typedef struct DiscardCo {
2911 BdrvChild *child;
2912 int64_t offset;
2913 int64_t bytes;
2914 int ret;
2915} DiscardCo;
2916static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2917{
2918 DiscardCo *rwco = opaque;
2919
2920 rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
2921 aio_wait_kick();
2922}
2923
2924int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
2925 int64_t bytes)
2926{
2927 BdrvTrackedRequest req;
2928 int max_pdiscard, ret;
2929 int head, tail, align;
2930 BlockDriverState *bs = child->bs;
2931
2932 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
2933 return -ENOMEDIUM;
2934 }
2935
2936 if (bdrv_has_readonly_bitmaps(bs)) {
2937 return -EPERM;
2938 }
2939
2940 if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) {
2941 return -EIO;
2942 }
2943
2944
2945 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2946 return 0;
2947 }
2948
2949 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2950 return 0;
2951 }
2952
2953
2954
2955
2956
2957
2958 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2959 assert(align % bs->bl.request_alignment == 0);
2960 head = offset % align;
2961 tail = (offset + bytes) % align;
2962
2963 bdrv_inc_in_flight(bs);
2964 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2965
2966 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
2967 if (ret < 0) {
2968 goto out;
2969 }
2970
2971 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2972 align);
2973 assert(max_pdiscard >= bs->bl.request_alignment);
2974
2975 while (bytes > 0) {
2976 int64_t num = bytes;
2977
2978 if (head) {
2979
2980 num = MIN(bytes, align - head);
2981 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2982 num %= bs->bl.request_alignment;
2983 }
2984 head = (head + num) % align;
2985 assert(num < max_pdiscard);
2986 } else if (tail) {
2987 if (num > align) {
2988
2989 num -= tail;
2990 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2991 tail > bs->bl.request_alignment) {
2992 tail %= bs->bl.request_alignment;
2993 num -= tail;
2994 }
2995 }
2996
2997 if (num > max_pdiscard) {
2998 num = max_pdiscard;
2999 }
3000
3001 if (!bs->drv) {
3002 ret = -ENOMEDIUM;
3003 goto out;
3004 }
3005 if (bs->drv->bdrv_co_pdiscard) {
3006 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3007 } else {
3008 BlockAIOCB *acb;
3009 CoroutineIOCompletion co = {
3010 .coroutine = qemu_coroutine_self(),
3011 };
3012
3013 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3014 bdrv_co_io_em_complete, &co);
3015 if (acb == NULL) {
3016 ret = -EIO;
3017 goto out;
3018 } else {
3019 qemu_coroutine_yield();
3020 ret = co.ret;
3021 }
3022 }
3023 if (ret && ret != -ENOTSUP) {
3024 goto out;
3025 }
3026
3027 offset += num;
3028 bytes -= num;
3029 }
3030 ret = 0;
3031out:
3032 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3033 tracked_request_end(&req);
3034 bdrv_dec_in_flight(bs);
3035 return ret;
3036}
3037
3038int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
3039{
3040 Coroutine *co;
3041 DiscardCo rwco = {
3042 .child = child,
3043 .offset = offset,
3044 .bytes = bytes,
3045 .ret = NOT_DONE,
3046 };
3047
3048 if (qemu_in_coroutine()) {
3049
3050 bdrv_pdiscard_co_entry(&rwco);
3051 } else {
3052 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
3053 bdrv_coroutine_enter(child->bs, co);
3054 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
3055 }
3056
3057 return rwco.ret;
3058}
3059
3060int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3061{
3062 BlockDriver *drv = bs->drv;
3063 CoroutineIOCompletion co = {
3064 .coroutine = qemu_coroutine_self(),
3065 };
3066 BlockAIOCB *acb;
3067
3068 bdrv_inc_in_flight(bs);
3069 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3070 co.ret = -ENOTSUP;
3071 goto out;
3072 }
3073
3074 if (drv->bdrv_co_ioctl) {
3075 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3076 } else {
3077 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3078 if (!acb) {
3079 co.ret = -ENOTSUP;
3080 goto out;
3081 }
3082 qemu_coroutine_yield();
3083 }
3084out:
3085 bdrv_dec_in_flight(bs);
3086 return co.ret;
3087}
3088
3089void *qemu_blockalign(BlockDriverState *bs, size_t size)
3090{
3091 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3092}
3093
3094void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3095{
3096 return memset(qemu_blockalign(bs, size), 0, size);
3097}
3098
3099void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3100{
3101 size_t align = bdrv_opt_mem_align(bs);
3102
3103
3104 assert(align > 0);
3105 if (size == 0) {
3106 size = align;
3107 }
3108
3109 return qemu_try_memalign(align, size);
3110}
3111
3112void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3113{
3114 void *mem = qemu_try_blockalign(bs, size);
3115
3116 if (mem) {
3117 memset(mem, 0, size);
3118 }
3119
3120 return mem;
3121}
3122
3123
3124
3125
3126bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
3127{
3128 int i;
3129 size_t alignment = bdrv_min_mem_align(bs);
3130
3131 for (i = 0; i < qiov->niov; i++) {
3132 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
3133 return false;
3134 }
3135 if (qiov->iov[i].iov_len % alignment) {
3136 return false;
3137 }
3138 }
3139
3140 return true;
3141}
3142
3143void bdrv_add_before_write_notifier(BlockDriverState *bs,
3144 NotifierWithReturn *notifier)
3145{
3146 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
3147}
3148
3149void bdrv_io_plug(BlockDriverState *bs)
3150{
3151 BdrvChild *child;
3152
3153 QLIST_FOREACH(child, &bs->children, next) {
3154 bdrv_io_plug(child->bs);
3155 }
3156
3157 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
3158 BlockDriver *drv = bs->drv;
3159 if (drv && drv->bdrv_io_plug) {
3160 drv->bdrv_io_plug(bs);
3161 }
3162 }
3163}
3164
3165void bdrv_io_unplug(BlockDriverState *bs)
3166{
3167 BdrvChild *child;
3168
3169 assert(bs->io_plugged);
3170 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
3171 BlockDriver *drv = bs->drv;
3172 if (drv && drv->bdrv_io_unplug) {
3173 drv->bdrv_io_unplug(bs);
3174 }
3175 }
3176
3177 QLIST_FOREACH(child, &bs->children, next) {
3178 bdrv_io_unplug(child->bs);
3179 }
3180}
3181
3182void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3183{
3184 BdrvChild *child;
3185
3186 if (bs->drv && bs->drv->bdrv_register_buf) {
3187 bs->drv->bdrv_register_buf(bs, host, size);
3188 }
3189 QLIST_FOREACH(child, &bs->children, next) {
3190 bdrv_register_buf(child->bs, host, size);
3191 }
3192}
3193
3194void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3195{
3196 BdrvChild *child;
3197
3198 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3199 bs->drv->bdrv_unregister_buf(bs, host);
3200 }
3201 QLIST_FOREACH(child, &bs->children, next) {
3202 bdrv_unregister_buf(child->bs, host);
3203 }
3204}
3205
3206static int coroutine_fn bdrv_co_copy_range_internal(
3207 BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
3208 uint64_t dst_offset, uint64_t bytes,
3209 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3210 bool recurse_src)
3211{
3212 BdrvTrackedRequest req;
3213 int ret;
3214
3215
3216 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3217 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3218
3219 if (!dst || !dst->bs) {
3220 return -ENOMEDIUM;
3221 }
3222 ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
3223 if (ret) {
3224 return ret;
3225 }
3226 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3227 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3228 }
3229
3230 if (!src || !src->bs) {
3231 return -ENOMEDIUM;
3232 }
3233 ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
3234 if (ret) {
3235 return ret;
3236 }
3237
3238 if (!src->bs->drv->bdrv_co_copy_range_from
3239 || !dst->bs->drv->bdrv_co_copy_range_to
3240 || src->bs->encrypted || dst->bs->encrypted) {
3241 return -ENOTSUP;
3242 }
3243
3244 if (recurse_src) {
3245 bdrv_inc_in_flight(src->bs);
3246 tracked_request_begin(&req, src->bs, src_offset, bytes,
3247 BDRV_TRACKED_READ);
3248
3249
3250 assert(!(read_flags & BDRV_REQ_SERIALISING));
3251 if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
3252 bdrv_wait_serialising_requests(&req);
3253 }
3254
3255 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3256 src, src_offset,
3257 dst, dst_offset,
3258 bytes,
3259 read_flags, write_flags);
3260
3261 tracked_request_end(&req);
3262 bdrv_dec_in_flight(src->bs);
3263 } else {
3264 bdrv_inc_in_flight(dst->bs);
3265 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3266 BDRV_TRACKED_WRITE);
3267 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3268 write_flags);
3269 if (!ret) {
3270 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3271 src, src_offset,
3272 dst, dst_offset,
3273 bytes,
3274 read_flags, write_flags);
3275 }
3276 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3277 tracked_request_end(&req);
3278 bdrv_dec_in_flight(dst->bs);
3279 }
3280
3281 return ret;
3282}
3283
3284
3285
3286
3287
3288int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
3289 BdrvChild *dst, uint64_t dst_offset,
3290 uint64_t bytes,
3291 BdrvRequestFlags read_flags,
3292 BdrvRequestFlags write_flags)
3293{
3294 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3295 read_flags, write_flags);
3296 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3297 bytes, read_flags, write_flags, true);
3298}
3299
3300
3301
3302
3303
3304int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
3305 BdrvChild *dst, uint64_t dst_offset,
3306 uint64_t bytes,
3307 BdrvRequestFlags read_flags,
3308 BdrvRequestFlags write_flags)
3309{
3310 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3311 read_flags, write_flags);
3312 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3313 bytes, read_flags, write_flags, false);
3314}
3315
3316int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
3317 BdrvChild *dst, uint64_t dst_offset,
3318 uint64_t bytes, BdrvRequestFlags read_flags,
3319 BdrvRequestFlags write_flags)
3320{
3321 return bdrv_co_copy_range_from(src, src_offset,
3322 dst, dst_offset,
3323 bytes, read_flags, write_flags);
3324}
3325
3326static void bdrv_parent_cb_resize(BlockDriverState *bs)
3327{
3328 BdrvChild *c;
3329 QLIST_FOREACH(c, &bs->parents, next_parent) {
3330 if (c->role->resize) {
3331 c->role->resize(c);
3332 }
3333 }
3334}
3335
3336
3337
3338
3339
3340
3341
3342
3343int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3344 PreallocMode prealloc, Error **errp)
3345{
3346 BlockDriverState *bs = child->bs;
3347 BlockDriver *drv = bs->drv;
3348 BdrvTrackedRequest req;
3349 int64_t old_size, new_bytes;
3350 int ret;
3351
3352
3353
3354 if (!drv) {
3355 error_setg(errp, "No medium inserted");
3356 return -ENOMEDIUM;
3357 }
3358 if (offset < 0) {
3359 error_setg(errp, "Image size cannot be negative");
3360 return -EINVAL;
3361 }
3362
3363 old_size = bdrv_getlength(bs);
3364 if (old_size < 0) {
3365 error_setg_errno(errp, -old_size, "Failed to get old image size");
3366 return old_size;
3367 }
3368
3369 if (offset > old_size) {
3370 new_bytes = offset - old_size;
3371 } else {
3372 new_bytes = 0;
3373 }
3374
3375 bdrv_inc_in_flight(bs);
3376 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3377 BDRV_TRACKED_TRUNCATE);
3378
3379
3380
3381
3382 if (new_bytes) {
3383 bdrv_mark_request_serialising(&req, 1);
3384 }
3385 if (bs->read_only) {
3386 error_setg(errp, "Image is read-only");
3387 ret = -EACCES;
3388 goto out;
3389 }
3390 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3391 0);
3392 if (ret < 0) {
3393 error_setg_errno(errp, -ret,
3394 "Failed to prepare request for truncation");
3395 goto out;
3396 }
3397
3398 if (drv->bdrv_co_truncate) {
3399 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp);
3400 } else if (bs->file && drv->is_filter) {
3401 ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
3402 } else {
3403 error_setg(errp, "Image format driver does not support resize");
3404 ret = -ENOTSUP;
3405 goto out;
3406 }
3407 if (ret < 0) {
3408 goto out;
3409 }
3410
3411 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3412 if (ret < 0) {
3413 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3414 } else {
3415 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3416 }
3417
3418
3419
3420 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3421
3422out:
3423 tracked_request_end(&req);
3424 bdrv_dec_in_flight(bs);
3425
3426 return ret;
3427}
3428
3429typedef struct TruncateCo {
3430 BdrvChild *child;
3431 int64_t offset;
3432 bool exact;
3433 PreallocMode prealloc;
3434 Error **errp;
3435 int ret;
3436} TruncateCo;
3437
3438static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
3439{
3440 TruncateCo *tco = opaque;
3441 tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
3442 tco->prealloc, tco->errp);
3443 aio_wait_kick();
3444}
3445
3446int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
3447 PreallocMode prealloc, Error **errp)
3448{
3449 Coroutine *co;
3450 TruncateCo tco = {
3451 .child = child,
3452 .offset = offset,
3453 .exact = exact,
3454 .prealloc = prealloc,
3455 .errp = errp,
3456 .ret = NOT_DONE,
3457 };
3458
3459 if (qemu_in_coroutine()) {
3460
3461 bdrv_truncate_co_entry(&tco);
3462 } else {
3463 co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
3464 bdrv_coroutine_enter(child->bs, co);
3465 BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
3466 }
3467
3468 return tco.ret;
3469}
3470