1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "block/write-threshold.h"
34#include "qemu/cutils.h"
35#include "qemu/memalign.h"
36#include "qapi/error.h"
37#include "qemu/error-report.h"
38#include "qemu/main-loop.h"
39#include "sysemu/replay.h"
40
41
42#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
43
44static void bdrv_parent_cb_resize(BlockDriverState *bs);
45static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
46 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
47
48static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
49 bool ignore_bds_parents)
50{
51 BdrvChild *c, *next;
52
53 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
54 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
55 continue;
56 }
57 bdrv_parent_drained_begin_single(c, false);
58 }
59}
60
61static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
62 int *drained_end_counter)
63{
64 assert(c->parent_quiesce_counter > 0);
65 c->parent_quiesce_counter--;
66 if (c->klass->drained_end) {
67 c->klass->drained_end(c, drained_end_counter);
68 }
69}
70
71void bdrv_parent_drained_end_single(BdrvChild *c)
72{
73 int drained_end_counter = 0;
74 IO_OR_GS_CODE();
75 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
76 BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
77}
78
79static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
80 bool ignore_bds_parents,
81 int *drained_end_counter)
82{
83 BdrvChild *c;
84
85 QLIST_FOREACH(c, &bs->parents, next_parent) {
86 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
87 continue;
88 }
89 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
90 }
91}
92
93static bool bdrv_parent_drained_poll_single(BdrvChild *c)
94{
95 if (c->klass->drained_poll) {
96 return c->klass->drained_poll(c);
97 }
98 return false;
99}
100
101static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
102 bool ignore_bds_parents)
103{
104 BdrvChild *c, *next;
105 bool busy = false;
106
107 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
108 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
109 continue;
110 }
111 busy |= bdrv_parent_drained_poll_single(c);
112 }
113
114 return busy;
115}
116
117void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
118{
119 IO_OR_GS_CODE();
120 c->parent_quiesce_counter++;
121 if (c->klass->drained_begin) {
122 c->klass->drained_begin(c);
123 }
124 if (poll) {
125 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
126 }
127}
128
129static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
130{
131 dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
132 src->pdiscard_alignment);
133 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
134 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
135 dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
136 src->max_hw_transfer);
137 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
138 src->opt_mem_alignment);
139 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
140 src->min_mem_alignment);
141 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
142 dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
143}
144
145typedef struct BdrvRefreshLimitsState {
146 BlockDriverState *bs;
147 BlockLimits old_bl;
148} BdrvRefreshLimitsState;
149
150static void bdrv_refresh_limits_abort(void *opaque)
151{
152 BdrvRefreshLimitsState *s = opaque;
153
154 s->bs->bl = s->old_bl;
155}
156
157static TransactionActionDrv bdrv_refresh_limits_drv = {
158 .abort = bdrv_refresh_limits_abort,
159 .clean = g_free,
160};
161
162
163void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
164{
165 ERRP_GUARD();
166 BlockDriver *drv = bs->drv;
167 BdrvChild *c;
168 bool have_limits;
169
170 GLOBAL_STATE_CODE();
171
172 if (tran) {
173 BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
174 *s = (BdrvRefreshLimitsState) {
175 .bs = bs,
176 .old_bl = bs->bl,
177 };
178 tran_add(tran, &bdrv_refresh_limits_drv, s);
179 }
180
181 memset(&bs->bl, 0, sizeof(bs->bl));
182
183 if (!drv) {
184 return;
185 }
186
187
188 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
189 drv->bdrv_aio_preadv ||
190 drv->bdrv_co_preadv_part) ? 1 : 512;
191
192
193 have_limits = false;
194 QLIST_FOREACH(c, &bs->children, next) {
195 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
196 {
197 bdrv_merge_limits(&bs->bl, &c->bs->bl);
198 have_limits = true;
199 }
200 }
201
202 if (!have_limits) {
203 bs->bl.min_mem_alignment = 512;
204 bs->bl.opt_mem_alignment = qemu_real_host_page_size;
205
206
207 bs->bl.max_iov = IOV_MAX;
208 }
209
210
211 if (drv->bdrv_refresh_limits) {
212 drv->bdrv_refresh_limits(bs, errp);
213 if (*errp) {
214 return;
215 }
216 }
217
218 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
219 error_setg(errp, "Driver requires too large request alignment");
220 }
221}
222
223
224
225
226
227
228void bdrv_enable_copy_on_read(BlockDriverState *bs)
229{
230 IO_CODE();
231 qatomic_inc(&bs->copy_on_read);
232}
233
234void bdrv_disable_copy_on_read(BlockDriverState *bs)
235{
236 int old = qatomic_fetch_dec(&bs->copy_on_read);
237 IO_CODE();
238 assert(old >= 1);
239}
240
241typedef struct {
242 Coroutine *co;
243 BlockDriverState *bs;
244 bool done;
245 bool begin;
246 bool recursive;
247 bool poll;
248 BdrvChild *parent;
249 bool ignore_bds_parents;
250 int *drained_end_counter;
251} BdrvCoDrainData;
252
253static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
254{
255 BdrvCoDrainData *data = opaque;
256 BlockDriverState *bs = data->bs;
257
258 if (data->begin) {
259 bs->drv->bdrv_co_drain_begin(bs);
260 } else {
261 bs->drv->bdrv_co_drain_end(bs);
262 }
263
264
265 qatomic_mb_set(&data->done, true);
266 if (!data->begin) {
267 qatomic_dec(data->drained_end_counter);
268 }
269 bdrv_dec_in_flight(bs);
270
271 g_free(data);
272}
273
274
275static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
276 int *drained_end_counter)
277{
278 BdrvCoDrainData *data;
279
280 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
281 (!begin && !bs->drv->bdrv_co_drain_end)) {
282 return;
283 }
284
285 data = g_new(BdrvCoDrainData, 1);
286 *data = (BdrvCoDrainData) {
287 .bs = bs,
288 .done = false,
289 .begin = begin,
290 .drained_end_counter = drained_end_counter,
291 };
292
293 if (!begin) {
294 qatomic_inc(drained_end_counter);
295 }
296
297
298
299 bdrv_inc_in_flight(bs);
300 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
301 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
302}
303
304
305bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
306 BdrvChild *ignore_parent, bool ignore_bds_parents)
307{
308 BdrvChild *child, *next;
309 IO_OR_GS_CODE();
310
311 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
312 return true;
313 }
314
315 if (qatomic_read(&bs->in_flight)) {
316 return true;
317 }
318
319 if (recursive) {
320 assert(!ignore_bds_parents);
321 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
322 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
323 return true;
324 }
325 }
326 }
327
328 return false;
329}
330
331static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
332 BdrvChild *ignore_parent)
333{
334 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
335}
336
337static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
338 BdrvChild *parent, bool ignore_bds_parents,
339 bool poll);
340static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
341 BdrvChild *parent, bool ignore_bds_parents,
342 int *drained_end_counter);
343
344static void bdrv_co_drain_bh_cb(void *opaque)
345{
346 BdrvCoDrainData *data = opaque;
347 Coroutine *co = data->co;
348 BlockDriverState *bs = data->bs;
349
350 if (bs) {
351 AioContext *ctx = bdrv_get_aio_context(bs);
352 aio_context_acquire(ctx);
353 bdrv_dec_in_flight(bs);
354 if (data->begin) {
355 assert(!data->drained_end_counter);
356 bdrv_do_drained_begin(bs, data->recursive, data->parent,
357 data->ignore_bds_parents, data->poll);
358 } else {
359 assert(!data->poll);
360 bdrv_do_drained_end(bs, data->recursive, data->parent,
361 data->ignore_bds_parents,
362 data->drained_end_counter);
363 }
364 aio_context_release(ctx);
365 } else {
366 assert(data->begin);
367 bdrv_drain_all_begin();
368 }
369
370 data->done = true;
371 aio_co_wake(co);
372}
373
374static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
375 bool begin, bool recursive,
376 BdrvChild *parent,
377 bool ignore_bds_parents,
378 bool poll,
379 int *drained_end_counter)
380{
381 BdrvCoDrainData data;
382 Coroutine *self = qemu_coroutine_self();
383 AioContext *ctx = bdrv_get_aio_context(bs);
384 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
385
386
387
388
389 assert(qemu_in_coroutine());
390 data = (BdrvCoDrainData) {
391 .co = self,
392 .bs = bs,
393 .done = false,
394 .begin = begin,
395 .recursive = recursive,
396 .parent = parent,
397 .ignore_bds_parents = ignore_bds_parents,
398 .poll = poll,
399 .drained_end_counter = drained_end_counter,
400 };
401
402 if (bs) {
403 bdrv_inc_in_flight(bs);
404 }
405
406
407
408
409
410
411
412
413
414 if (ctx != co_ctx) {
415 aio_context_release(ctx);
416 }
417 replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
418
419 qemu_coroutine_yield();
420
421
422 assert(data.done);
423
424
425 if (ctx != co_ctx) {
426 aio_context_acquire(ctx);
427 }
428}
429
430void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
431 BdrvChild *parent, bool ignore_bds_parents)
432{
433 IO_OR_GS_CODE();
434 assert(!qemu_in_coroutine());
435
436
437 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
438 aio_disable_external(bdrv_get_aio_context(bs));
439 }
440
441 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
442 bdrv_drain_invoke(bs, true, NULL);
443}
444
445static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
446 BdrvChild *parent, bool ignore_bds_parents,
447 bool poll)
448{
449 BdrvChild *child, *next;
450
451 if (qemu_in_coroutine()) {
452 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
453 poll, NULL);
454 return;
455 }
456
457 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
458
459 if (recursive) {
460 assert(!ignore_bds_parents);
461 bs->recursive_quiesce_counter++;
462 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
463 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
464 false);
465 }
466 }
467
468
469
470
471
472
473
474
475
476
477 if (poll) {
478 assert(!ignore_bds_parents);
479 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
480 }
481}
482
483void bdrv_drained_begin(BlockDriverState *bs)
484{
485 IO_OR_GS_CODE();
486 bdrv_do_drained_begin(bs, false, NULL, false, true);
487}
488
489void bdrv_subtree_drained_begin(BlockDriverState *bs)
490{
491 IO_OR_GS_CODE();
492 bdrv_do_drained_begin(bs, true, NULL, false, true);
493}
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
510 BdrvChild *parent, bool ignore_bds_parents,
511 int *drained_end_counter)
512{
513 BdrvChild *child;
514 int old_quiesce_counter;
515
516 assert(drained_end_counter != NULL);
517
518 if (qemu_in_coroutine()) {
519 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
520 false, drained_end_counter);
521 return;
522 }
523 assert(bs->quiesce_counter > 0);
524
525
526 bdrv_drain_invoke(bs, false, drained_end_counter);
527 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
528 drained_end_counter);
529
530 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
531 if (old_quiesce_counter == 1) {
532 aio_enable_external(bdrv_get_aio_context(bs));
533 }
534
535 if (recursive) {
536 assert(!ignore_bds_parents);
537 bs->recursive_quiesce_counter--;
538 QLIST_FOREACH(child, &bs->children, next) {
539 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
540 drained_end_counter);
541 }
542 }
543}
544
545void bdrv_drained_end(BlockDriverState *bs)
546{
547 int drained_end_counter = 0;
548 IO_OR_GS_CODE();
549 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
550 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
551}
552
553void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
554{
555 IO_CODE();
556 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
557}
558
559void bdrv_subtree_drained_end(BlockDriverState *bs)
560{
561 int drained_end_counter = 0;
562 IO_OR_GS_CODE();
563 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
564 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
565}
566
567void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
568{
569 int i;
570 IO_OR_GS_CODE();
571
572 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
573 bdrv_do_drained_begin(child->bs, true, child, false, true);
574 }
575}
576
577void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
578{
579 int drained_end_counter = 0;
580 int i;
581 IO_OR_GS_CODE();
582
583 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
584 bdrv_do_drained_end(child->bs, true, child, false,
585 &drained_end_counter);
586 }
587
588 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
589}
590
591
592
593
594
595
596
597
598void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
599{
600 IO_OR_GS_CODE();
601 assert(qemu_in_coroutine());
602 bdrv_drained_begin(bs);
603 bdrv_drained_end(bs);
604}
605
606void bdrv_drain(BlockDriverState *bs)
607{
608 IO_OR_GS_CODE();
609 bdrv_drained_begin(bs);
610 bdrv_drained_end(bs);
611}
612
613static void bdrv_drain_assert_idle(BlockDriverState *bs)
614{
615 BdrvChild *child, *next;
616
617 assert(qatomic_read(&bs->in_flight) == 0);
618 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
619 bdrv_drain_assert_idle(child->bs);
620 }
621}
622
623unsigned int bdrv_drain_all_count = 0;
624
625static bool bdrv_drain_all_poll(void)
626{
627 BlockDriverState *bs = NULL;
628 bool result = false;
629 GLOBAL_STATE_CODE();
630
631
632
633 while ((bs = bdrv_next_all_states(bs))) {
634 AioContext *aio_context = bdrv_get_aio_context(bs);
635 aio_context_acquire(aio_context);
636 result |= bdrv_drain_poll(bs, false, NULL, true);
637 aio_context_release(aio_context);
638 }
639
640 return result;
641}
642
643
644
645
646
647
648
649
650
651
652
653
654
655void bdrv_drain_all_begin(void)
656{
657 BlockDriverState *bs = NULL;
658 GLOBAL_STATE_CODE();
659
660 if (qemu_in_coroutine()) {
661 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
662 return;
663 }
664
665
666
667
668
669
670 if (replay_events_enabled()) {
671 return;
672 }
673
674
675
676 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
677 assert(bdrv_drain_all_count < INT_MAX);
678 bdrv_drain_all_count++;
679
680
681
682 while ((bs = bdrv_next_all_states(bs))) {
683 AioContext *aio_context = bdrv_get_aio_context(bs);
684
685 aio_context_acquire(aio_context);
686 bdrv_do_drained_begin(bs, false, NULL, true, false);
687 aio_context_release(aio_context);
688 }
689
690
691 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
692
693 while ((bs = bdrv_next_all_states(bs))) {
694 bdrv_drain_assert_idle(bs);
695 }
696}
697
698void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
699{
700 int drained_end_counter = 0;
701 GLOBAL_STATE_CODE();
702
703 g_assert(bs->quiesce_counter > 0);
704 g_assert(!bs->refcnt);
705
706 while (bs->quiesce_counter) {
707 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
708 }
709 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
710}
711
712void bdrv_drain_all_end(void)
713{
714 BlockDriverState *bs = NULL;
715 int drained_end_counter = 0;
716 GLOBAL_STATE_CODE();
717
718
719
720
721
722
723 if (replay_events_enabled()) {
724 return;
725 }
726
727 while ((bs = bdrv_next_all_states(bs))) {
728 AioContext *aio_context = bdrv_get_aio_context(bs);
729
730 aio_context_acquire(aio_context);
731 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
732 aio_context_release(aio_context);
733 }
734
735 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
736 AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
737
738 assert(bdrv_drain_all_count > 0);
739 bdrv_drain_all_count--;
740}
741
742void bdrv_drain_all(void)
743{
744 GLOBAL_STATE_CODE();
745 bdrv_drain_all_begin();
746 bdrv_drain_all_end();
747}
748
749
750
751
752
753
754static void tracked_request_end(BdrvTrackedRequest *req)
755{
756 if (req->serialising) {
757 qatomic_dec(&req->bs->serialising_in_flight);
758 }
759
760 qemu_co_mutex_lock(&req->bs->reqs_lock);
761 QLIST_REMOVE(req, list);
762 qemu_co_queue_restart_all(&req->wait_queue);
763 qemu_co_mutex_unlock(&req->bs->reqs_lock);
764}
765
766
767
768
769static void tracked_request_begin(BdrvTrackedRequest *req,
770 BlockDriverState *bs,
771 int64_t offset,
772 int64_t bytes,
773 enum BdrvTrackedRequestType type)
774{
775 bdrv_check_request(offset, bytes, &error_abort);
776
777 *req = (BdrvTrackedRequest){
778 .bs = bs,
779 .offset = offset,
780 .bytes = bytes,
781 .type = type,
782 .co = qemu_coroutine_self(),
783 .serialising = false,
784 .overlap_offset = offset,
785 .overlap_bytes = bytes,
786 };
787
788 qemu_co_queue_init(&req->wait_queue);
789
790 qemu_co_mutex_lock(&bs->reqs_lock);
791 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
792 qemu_co_mutex_unlock(&bs->reqs_lock);
793}
794
795static bool tracked_request_overlaps(BdrvTrackedRequest *req,
796 int64_t offset, int64_t bytes)
797{
798 bdrv_check_request(offset, bytes, &error_abort);
799
800
801 if (offset >= req->overlap_offset + req->overlap_bytes) {
802 return false;
803 }
804
805 if (req->overlap_offset >= offset + bytes) {
806 return false;
807 }
808 return true;
809}
810
811
812static BdrvTrackedRequest *
813bdrv_find_conflicting_request(BdrvTrackedRequest *self)
814{
815 BdrvTrackedRequest *req;
816
817 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
818 if (req == self || (!req->serialising && !self->serialising)) {
819 continue;
820 }
821 if (tracked_request_overlaps(req, self->overlap_offset,
822 self->overlap_bytes))
823 {
824
825
826
827
828
829 assert(qemu_coroutine_self() != req->co);
830
831
832
833
834
835
836 if (!req->waiting_for) {
837 return req;
838 }
839 }
840 }
841
842 return NULL;
843}
844
845
846static bool coroutine_fn
847bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
848{
849 BdrvTrackedRequest *req;
850 bool waited = false;
851
852 while ((req = bdrv_find_conflicting_request(self))) {
853 self->waiting_for = req;
854 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
855 self->waiting_for = NULL;
856 waited = true;
857 }
858
859 return waited;
860}
861
862
863static void tracked_request_set_serialising(BdrvTrackedRequest *req,
864 uint64_t align)
865{
866 int64_t overlap_offset = req->offset & ~(align - 1);
867 int64_t overlap_bytes =
868 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
869
870 bdrv_check_request(req->offset, req->bytes, &error_abort);
871
872 if (!req->serialising) {
873 qatomic_inc(&req->bs->serialising_in_flight);
874 req->serialising = true;
875 }
876
877 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
878 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
879}
880
881
882
883
884
885BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
886{
887 BdrvTrackedRequest *req;
888 Coroutine *self = qemu_coroutine_self();
889 IO_CODE();
890
891 QLIST_FOREACH(req, &bs->tracked_requests, list) {
892 if (req->co == self) {
893 return req;
894 }
895 }
896
897 return NULL;
898}
899
900
901
902
903void bdrv_round_to_clusters(BlockDriverState *bs,
904 int64_t offset, int64_t bytes,
905 int64_t *cluster_offset,
906 int64_t *cluster_bytes)
907{
908 BlockDriverInfo bdi;
909 IO_CODE();
910 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
911 *cluster_offset = offset;
912 *cluster_bytes = bytes;
913 } else {
914 int64_t c = bdi.cluster_size;
915 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
916 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
917 }
918}
919
920static int bdrv_get_cluster_size(BlockDriverState *bs)
921{
922 BlockDriverInfo bdi;
923 int ret;
924
925 ret = bdrv_get_info(bs, &bdi);
926 if (ret < 0 || bdi.cluster_size == 0) {
927 return bs->bl.request_alignment;
928 } else {
929 return bdi.cluster_size;
930 }
931}
932
933void bdrv_inc_in_flight(BlockDriverState *bs)
934{
935 IO_CODE();
936 qatomic_inc(&bs->in_flight);
937}
938
939void bdrv_wakeup(BlockDriverState *bs)
940{
941 IO_CODE();
942 aio_wait_kick();
943}
944
945void bdrv_dec_in_flight(BlockDriverState *bs)
946{
947 IO_CODE();
948 qatomic_dec(&bs->in_flight);
949 bdrv_wakeup(bs);
950}
951
952static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
953{
954 BlockDriverState *bs = self->bs;
955 bool waited = false;
956
957 if (!qatomic_read(&bs->serialising_in_flight)) {
958 return false;
959 }
960
961 qemu_co_mutex_lock(&bs->reqs_lock);
962 waited = bdrv_wait_serialising_requests_locked(self);
963 qemu_co_mutex_unlock(&bs->reqs_lock);
964
965 return waited;
966}
967
968bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
969 uint64_t align)
970{
971 bool waited;
972 IO_CODE();
973
974 qemu_co_mutex_lock(&req->bs->reqs_lock);
975
976 tracked_request_set_serialising(req, align);
977 waited = bdrv_wait_serialising_requests_locked(req);
978
979 qemu_co_mutex_unlock(&req->bs->reqs_lock);
980
981 return waited;
982}
983
984int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
985 QEMUIOVector *qiov, size_t qiov_offset,
986 Error **errp)
987{
988
989
990
991
992 if (offset < 0) {
993 error_setg(errp, "offset is negative: %" PRIi64, offset);
994 return -EIO;
995 }
996
997 if (bytes < 0) {
998 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
999 return -EIO;
1000 }
1001
1002 if (bytes > BDRV_MAX_LENGTH) {
1003 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
1004 bytes, BDRV_MAX_LENGTH);
1005 return -EIO;
1006 }
1007
1008 if (offset > BDRV_MAX_LENGTH) {
1009 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
1010 offset, BDRV_MAX_LENGTH);
1011 return -EIO;
1012 }
1013
1014 if (offset > BDRV_MAX_LENGTH - bytes) {
1015 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
1016 "exceeds maximum(%" PRIi64 ")", offset, bytes,
1017 BDRV_MAX_LENGTH);
1018 return -EIO;
1019 }
1020
1021 if (!qiov) {
1022 return 0;
1023 }
1024
1025
1026
1027
1028
1029 if (qiov_offset > qiov->size) {
1030 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
1031 qiov_offset, qiov->size);
1032 return -EIO;
1033 }
1034
1035 if (bytes > qiov->size - qiov_offset) {
1036 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
1037 "vector size(%zu)", bytes, qiov_offset, qiov->size);
1038 return -EIO;
1039 }
1040
1041 return 0;
1042}
1043
1044int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
1045{
1046 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
1047}
1048
1049static int bdrv_check_request32(int64_t offset, int64_t bytes,
1050 QEMUIOVector *qiov, size_t qiov_offset)
1051{
1052 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
1053 if (ret < 0) {
1054 return ret;
1055 }
1056
1057 if (bytes > BDRV_REQUEST_MAX_BYTES) {
1058 return -EIO;
1059 }
1060
1061 return 0;
1062}
1063
1064int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
1065 int64_t bytes, BdrvRequestFlags flags)
1066{
1067 IO_CODE();
1068 return bdrv_pwritev(child, offset, bytes, NULL,
1069 BDRV_REQ_ZERO_WRITE | flags);
1070}
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
1082{
1083 int ret;
1084 int64_t target_size, bytes, offset = 0;
1085 BlockDriverState *bs = child->bs;
1086 IO_CODE();
1087
1088 target_size = bdrv_getlength(bs);
1089 if (target_size < 0) {
1090 return target_size;
1091 }
1092
1093 for (;;) {
1094 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
1095 if (bytes <= 0) {
1096 return 0;
1097 }
1098 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
1099 if (ret < 0) {
1100 return ret;
1101 }
1102 if (ret & BDRV_BLOCK_ZERO) {
1103 offset += bytes;
1104 continue;
1105 }
1106 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
1107 if (ret < 0) {
1108 return ret;
1109 }
1110 offset += bytes;
1111 }
1112}
1113
1114
1115int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
1116{
1117 int ret;
1118 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1119 IO_CODE();
1120
1121 if (bytes < 0) {
1122 return -EINVAL;
1123 }
1124
1125 ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
1126
1127 return ret < 0 ? ret : bytes;
1128}
1129
1130
1131
1132
1133
1134
1135
1136int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
1137 int64_t bytes)
1138{
1139 int ret;
1140 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1141 IO_CODE();
1142
1143 if (bytes < 0) {
1144 return -EINVAL;
1145 }
1146
1147 ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
1148
1149 return ret < 0 ? ret : bytes;
1150}
1151
1152
1153
1154
1155
1156
1157
1158int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
1159 const void *buf, int64_t count)
1160{
1161 int ret;
1162 IO_CODE();
1163
1164 ret = bdrv_pwrite(child, offset, buf, count);
1165 if (ret < 0) {
1166 return ret;
1167 }
1168
1169 ret = bdrv_flush(child->bs);
1170 if (ret < 0) {
1171 return ret;
1172 }
1173
1174 return 0;
1175}
1176
1177typedef struct CoroutineIOCompletion {
1178 Coroutine *coroutine;
1179 int ret;
1180} CoroutineIOCompletion;
1181
1182static void bdrv_co_io_em_complete(void *opaque, int ret)
1183{
1184 CoroutineIOCompletion *co = opaque;
1185
1186 co->ret = ret;
1187 aio_co_wake(co->coroutine);
1188}
1189
1190static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1191 int64_t offset, int64_t bytes,
1192 QEMUIOVector *qiov,
1193 size_t qiov_offset, int flags)
1194{
1195 BlockDriver *drv = bs->drv;
1196 int64_t sector_num;
1197 unsigned int nb_sectors;
1198 QEMUIOVector local_qiov;
1199 int ret;
1200
1201 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1202 assert(!(flags & ~BDRV_REQ_MASK));
1203 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1204
1205 if (!drv) {
1206 return -ENOMEDIUM;
1207 }
1208
1209 if (drv->bdrv_co_preadv_part) {
1210 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1211 flags);
1212 }
1213
1214 if (qiov_offset > 0 || bytes != qiov->size) {
1215 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1216 qiov = &local_qiov;
1217 }
1218
1219 if (drv->bdrv_co_preadv) {
1220 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1221 goto out;
1222 }
1223
1224 if (drv->bdrv_aio_preadv) {
1225 BlockAIOCB *acb;
1226 CoroutineIOCompletion co = {
1227 .coroutine = qemu_coroutine_self(),
1228 };
1229
1230 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1231 bdrv_co_io_em_complete, &co);
1232 if (acb == NULL) {
1233 ret = -EIO;
1234 goto out;
1235 } else {
1236 qemu_coroutine_yield();
1237 ret = co.ret;
1238 goto out;
1239 }
1240 }
1241
1242 sector_num = offset >> BDRV_SECTOR_BITS;
1243 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1244
1245 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1246 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1247 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1248 assert(drv->bdrv_co_readv);
1249
1250 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1251
1252out:
1253 if (qiov == &local_qiov) {
1254 qemu_iovec_destroy(&local_qiov);
1255 }
1256
1257 return ret;
1258}
1259
1260static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1261 int64_t offset, int64_t bytes,
1262 QEMUIOVector *qiov,
1263 size_t qiov_offset,
1264 BdrvRequestFlags flags)
1265{
1266 BlockDriver *drv = bs->drv;
1267 int64_t sector_num;
1268 unsigned int nb_sectors;
1269 QEMUIOVector local_qiov;
1270 int ret;
1271
1272 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1273 assert(!(flags & ~BDRV_REQ_MASK));
1274 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1275
1276 if (!drv) {
1277 return -ENOMEDIUM;
1278 }
1279
1280 if (drv->bdrv_co_pwritev_part) {
1281 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1282 flags & bs->supported_write_flags);
1283 flags &= ~bs->supported_write_flags;
1284 goto emulate_flags;
1285 }
1286
1287 if (qiov_offset > 0 || bytes != qiov->size) {
1288 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1289 qiov = &local_qiov;
1290 }
1291
1292 if (drv->bdrv_co_pwritev) {
1293 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1294 flags & bs->supported_write_flags);
1295 flags &= ~bs->supported_write_flags;
1296 goto emulate_flags;
1297 }
1298
1299 if (drv->bdrv_aio_pwritev) {
1300 BlockAIOCB *acb;
1301 CoroutineIOCompletion co = {
1302 .coroutine = qemu_coroutine_self(),
1303 };
1304
1305 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1306 flags & bs->supported_write_flags,
1307 bdrv_co_io_em_complete, &co);
1308 flags &= ~bs->supported_write_flags;
1309 if (acb == NULL) {
1310 ret = -EIO;
1311 } else {
1312 qemu_coroutine_yield();
1313 ret = co.ret;
1314 }
1315 goto emulate_flags;
1316 }
1317
1318 sector_num = offset >> BDRV_SECTOR_BITS;
1319 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1320
1321 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1322 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1323 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1324
1325 assert(drv->bdrv_co_writev);
1326 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1327 flags & bs->supported_write_flags);
1328 flags &= ~bs->supported_write_flags;
1329
1330emulate_flags:
1331 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1332 ret = bdrv_co_flush(bs);
1333 }
1334
1335 if (qiov == &local_qiov) {
1336 qemu_iovec_destroy(&local_qiov);
1337 }
1338
1339 return ret;
1340}
1341
1342static int coroutine_fn
1343bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1344 int64_t bytes, QEMUIOVector *qiov,
1345 size_t qiov_offset)
1346{
1347 BlockDriver *drv = bs->drv;
1348 QEMUIOVector local_qiov;
1349 int ret;
1350
1351 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1352
1353 if (!drv) {
1354 return -ENOMEDIUM;
1355 }
1356
1357 if (!block_driver_can_compress(drv)) {
1358 return -ENOTSUP;
1359 }
1360
1361 if (drv->bdrv_co_pwritev_compressed_part) {
1362 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1363 qiov, qiov_offset);
1364 }
1365
1366 if (qiov_offset == 0) {
1367 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1368 }
1369
1370 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1371 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1372 qemu_iovec_destroy(&local_qiov);
1373
1374 return ret;
1375}
1376
1377static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1378 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1379 size_t qiov_offset, int flags)
1380{
1381 BlockDriverState *bs = child->bs;
1382
1383
1384
1385
1386
1387
1388 void *bounce_buffer = NULL;
1389
1390 BlockDriver *drv = bs->drv;
1391 int64_t cluster_offset;
1392 int64_t cluster_bytes;
1393 int64_t skip_bytes;
1394 int ret;
1395 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1396 BDRV_REQUEST_MAX_BYTES);
1397 int64_t progress = 0;
1398 bool skip_write;
1399
1400 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1401
1402 if (!drv) {
1403 return -ENOMEDIUM;
1404 }
1405
1406
1407
1408
1409
1410 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1427 skip_bytes = offset - cluster_offset;
1428
1429 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1430 cluster_offset, cluster_bytes);
1431
1432 while (cluster_bytes) {
1433 int64_t pnum;
1434
1435 if (skip_write) {
1436 ret = 1;
1437 pnum = MIN(cluster_bytes, max_transfer);
1438 } else {
1439 ret = bdrv_is_allocated(bs, cluster_offset,
1440 MIN(cluster_bytes, max_transfer), &pnum);
1441 if (ret < 0) {
1442
1443
1444
1445
1446
1447 pnum = MIN(cluster_bytes, max_transfer);
1448 }
1449
1450
1451 if (ret == 0 && pnum == 0) {
1452 assert(progress >= bytes);
1453 break;
1454 }
1455
1456 assert(skip_bytes < pnum);
1457 }
1458
1459 if (ret <= 0) {
1460 QEMUIOVector local_qiov;
1461
1462
1463 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1464 if (!bounce_buffer) {
1465 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1466 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1467 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1468
1469 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1470 if (!bounce_buffer) {
1471 ret = -ENOMEM;
1472 goto err;
1473 }
1474 }
1475 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1476
1477 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1478 &local_qiov, 0, 0);
1479 if (ret < 0) {
1480 goto err;
1481 }
1482
1483 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1484 if (drv->bdrv_co_pwrite_zeroes &&
1485 buffer_is_zero(bounce_buffer, pnum)) {
1486
1487
1488
1489 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1490 BDRV_REQ_WRITE_UNCHANGED);
1491 } else {
1492
1493
1494
1495 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1496 &local_qiov, 0,
1497 BDRV_REQ_WRITE_UNCHANGED);
1498 }
1499
1500 if (ret < 0) {
1501
1502
1503
1504
1505
1506 goto err;
1507 }
1508
1509 if (!(flags & BDRV_REQ_PREFETCH)) {
1510 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1511 bounce_buffer + skip_bytes,
1512 MIN(pnum - skip_bytes, bytes - progress));
1513 }
1514 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1515
1516 ret = bdrv_driver_preadv(bs, offset + progress,
1517 MIN(pnum - skip_bytes, bytes - progress),
1518 qiov, qiov_offset + progress, 0);
1519 if (ret < 0) {
1520 goto err;
1521 }
1522 }
1523
1524 cluster_offset += pnum;
1525 cluster_bytes -= pnum;
1526 progress += pnum - skip_bytes;
1527 skip_bytes = 0;
1528 }
1529 ret = 0;
1530
1531err:
1532 qemu_vfree(bounce_buffer);
1533 return ret;
1534}
1535
1536
1537
1538
1539
1540
1541static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1542 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
1543 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1544{
1545 BlockDriverState *bs = child->bs;
1546 int64_t total_bytes, max_bytes;
1547 int ret = 0;
1548 int64_t bytes_remaining = bytes;
1549 int max_transfer;
1550
1551 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1552 assert(is_power_of_2(align));
1553 assert((offset & (align - 1)) == 0);
1554 assert((bytes & (align - 1)) == 0);
1555 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1556 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1557 align);
1558
1559
1560
1561
1562
1563 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
1564
1565
1566 if (flags & BDRV_REQ_COPY_ON_READ) {
1567
1568
1569
1570
1571
1572 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1573 } else {
1574 bdrv_wait_serialising_requests(req);
1575 }
1576
1577 if (flags & BDRV_REQ_COPY_ON_READ) {
1578 int64_t pnum;
1579
1580
1581 flags &= ~BDRV_REQ_COPY_ON_READ;
1582
1583 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1584 if (ret < 0) {
1585 goto out;
1586 }
1587
1588 if (!ret || pnum != bytes) {
1589 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1590 qiov, qiov_offset, flags);
1591 goto out;
1592 } else if (flags & BDRV_REQ_PREFETCH) {
1593 goto out;
1594 }
1595 }
1596
1597
1598 total_bytes = bdrv_getlength(bs);
1599 if (total_bytes < 0) {
1600 ret = total_bytes;
1601 goto out;
1602 }
1603
1604 assert(!(flags & ~bs->supported_read_flags));
1605
1606 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1607 if (bytes <= max_bytes && bytes <= max_transfer) {
1608 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1609 goto out;
1610 }
1611
1612 while (bytes_remaining) {
1613 int64_t num;
1614
1615 if (max_bytes) {
1616 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1617 assert(num);
1618
1619 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1620 num, qiov,
1621 qiov_offset + bytes - bytes_remaining,
1622 flags);
1623 max_bytes -= num;
1624 } else {
1625 num = bytes_remaining;
1626 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1627 0, bytes_remaining);
1628 }
1629 if (ret < 0) {
1630 goto out;
1631 }
1632 bytes_remaining -= num;
1633 }
1634
1635out:
1636 return ret < 0 ? ret : 0;
1637}
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661typedef struct BdrvRequestPadding {
1662 uint8_t *buf;
1663 size_t buf_len;
1664 uint8_t *tail_buf;
1665 size_t head;
1666 size_t tail;
1667 bool merge_reads;
1668 QEMUIOVector local_qiov;
1669} BdrvRequestPadding;
1670
1671static bool bdrv_init_padding(BlockDriverState *bs,
1672 int64_t offset, int64_t bytes,
1673 BdrvRequestPadding *pad)
1674{
1675 int64_t align = bs->bl.request_alignment;
1676 int64_t sum;
1677
1678 bdrv_check_request(offset, bytes, &error_abort);
1679 assert(align <= INT_MAX);
1680 assert(align <= SIZE_MAX / 2);
1681
1682 memset(pad, 0, sizeof(*pad));
1683
1684 pad->head = offset & (align - 1);
1685 pad->tail = ((offset + bytes) & (align - 1));
1686 if (pad->tail) {
1687 pad->tail = align - pad->tail;
1688 }
1689
1690 if (!pad->head && !pad->tail) {
1691 return false;
1692 }
1693
1694 assert(bytes);
1695
1696 sum = pad->head + bytes + pad->tail;
1697 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1698 pad->buf = qemu_blockalign(bs, pad->buf_len);
1699 pad->merge_reads = sum == pad->buf_len;
1700 if (pad->tail) {
1701 pad->tail_buf = pad->buf + pad->buf_len - align;
1702 }
1703
1704 return true;
1705}
1706
1707static int bdrv_padding_rmw_read(BdrvChild *child,
1708 BdrvTrackedRequest *req,
1709 BdrvRequestPadding *pad,
1710 bool zero_middle)
1711{
1712 QEMUIOVector local_qiov;
1713 BlockDriverState *bs = child->bs;
1714 uint64_t align = bs->bl.request_alignment;
1715 int ret;
1716
1717 assert(req->serialising && pad->buf);
1718
1719 if (pad->head || pad->merge_reads) {
1720 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1721
1722 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1723
1724 if (pad->head) {
1725 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1726 }
1727 if (pad->merge_reads && pad->tail) {
1728 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1729 }
1730 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1731 align, &local_qiov, 0, 0);
1732 if (ret < 0) {
1733 return ret;
1734 }
1735 if (pad->head) {
1736 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1737 }
1738 if (pad->merge_reads && pad->tail) {
1739 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1740 }
1741
1742 if (pad->merge_reads) {
1743 goto zero_mem;
1744 }
1745 }
1746
1747 if (pad->tail) {
1748 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1749
1750 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1751 ret = bdrv_aligned_preadv(
1752 child, req,
1753 req->overlap_offset + req->overlap_bytes - align,
1754 align, align, &local_qiov, 0, 0);
1755 if (ret < 0) {
1756 return ret;
1757 }
1758 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1759 }
1760
1761zero_mem:
1762 if (zero_middle) {
1763 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1764 }
1765
1766 return 0;
1767}
1768
1769static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1770{
1771 if (pad->buf) {
1772 qemu_vfree(pad->buf);
1773 qemu_iovec_destroy(&pad->local_qiov);
1774 }
1775 memset(pad, 0, sizeof(*pad));
1776}
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790static int bdrv_pad_request(BlockDriverState *bs,
1791 QEMUIOVector **qiov, size_t *qiov_offset,
1792 int64_t *offset, int64_t *bytes,
1793 BdrvRequestPadding *pad, bool *padded)
1794{
1795 int ret;
1796
1797 bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
1798
1799 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1800 if (padded) {
1801 *padded = false;
1802 }
1803 return 0;
1804 }
1805
1806 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1807 *qiov, *qiov_offset, *bytes,
1808 pad->buf + pad->buf_len - pad->tail,
1809 pad->tail);
1810 if (ret < 0) {
1811 bdrv_padding_destroy(pad);
1812 return ret;
1813 }
1814 *bytes += pad->head + pad->tail;
1815 *offset -= pad->head;
1816 *qiov = &pad->local_qiov;
1817 *qiov_offset = 0;
1818 if (padded) {
1819 *padded = true;
1820 }
1821
1822 return 0;
1823}
1824
1825int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1826 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1827 BdrvRequestFlags flags)
1828{
1829 IO_CODE();
1830 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1831}
1832
1833int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1834 int64_t offset, int64_t bytes,
1835 QEMUIOVector *qiov, size_t qiov_offset,
1836 BdrvRequestFlags flags)
1837{
1838 BlockDriverState *bs = child->bs;
1839 BdrvTrackedRequest req;
1840 BdrvRequestPadding pad;
1841 int ret;
1842 IO_CODE();
1843
1844 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1845
1846 if (!bdrv_is_inserted(bs)) {
1847 return -ENOMEDIUM;
1848 }
1849
1850 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1851 if (ret < 0) {
1852 return ret;
1853 }
1854
1855 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1856
1857
1858
1859
1860
1861
1862
1863
1864 return 0;
1865 }
1866
1867 bdrv_inc_in_flight(bs);
1868
1869
1870 if (qatomic_read(&bs->copy_on_read)) {
1871 flags |= BDRV_REQ_COPY_ON_READ;
1872 }
1873
1874 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
1875 NULL);
1876 if (ret < 0) {
1877 goto fail;
1878 }
1879
1880 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1881 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1882 bs->bl.request_alignment,
1883 qiov, qiov_offset, flags);
1884 tracked_request_end(&req);
1885 bdrv_padding_destroy(&pad);
1886
1887fail:
1888 bdrv_dec_in_flight(bs);
1889
1890 return ret;
1891}
1892
1893static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1894 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
1895{
1896 BlockDriver *drv = bs->drv;
1897 QEMUIOVector qiov;
1898 void *buf = NULL;
1899 int ret = 0;
1900 bool need_flush = false;
1901 int head = 0;
1902 int tail = 0;
1903
1904 int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
1905 INT64_MAX);
1906 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1907 bs->bl.request_alignment);
1908 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1909
1910 bdrv_check_request(offset, bytes, &error_abort);
1911
1912 if (!drv) {
1913 return -ENOMEDIUM;
1914 }
1915
1916 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1917 return -ENOTSUP;
1918 }
1919
1920
1921 bdrv_bsc_invalidate_range(bs, offset, bytes);
1922
1923 assert(alignment % bs->bl.request_alignment == 0);
1924 head = offset % alignment;
1925 tail = (offset + bytes) % alignment;
1926 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1927 assert(max_write_zeroes >= bs->bl.request_alignment);
1928
1929 while (bytes > 0 && !ret) {
1930 int64_t num = bytes;
1931
1932
1933
1934
1935
1936 if (head) {
1937
1938
1939
1940 num = MIN(MIN(bytes, max_transfer), alignment - head);
1941 head = (head + num) % alignment;
1942 assert(num < max_write_zeroes);
1943 } else if (tail && num > alignment) {
1944
1945 num -= tail;
1946 }
1947
1948
1949 if (num > max_write_zeroes) {
1950 num = max_write_zeroes;
1951 }
1952
1953 ret = -ENOTSUP;
1954
1955 if (drv->bdrv_co_pwrite_zeroes) {
1956 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1957 flags & bs->supported_zero_flags);
1958 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1959 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1960 need_flush = true;
1961 }
1962 } else {
1963 assert(!bs->supported_zero_flags);
1964 }
1965
1966 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1967
1968 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1969
1970 if ((flags & BDRV_REQ_FUA) &&
1971 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1972
1973
1974 write_flags &= ~BDRV_REQ_FUA;
1975 need_flush = true;
1976 }
1977 num = MIN(num, max_transfer);
1978 if (buf == NULL) {
1979 buf = qemu_try_blockalign0(bs, num);
1980 if (buf == NULL) {
1981 ret = -ENOMEM;
1982 goto fail;
1983 }
1984 }
1985 qemu_iovec_init_buf(&qiov, buf, num);
1986
1987 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1988
1989
1990
1991
1992 if (num < max_transfer) {
1993 qemu_vfree(buf);
1994 buf = NULL;
1995 }
1996 }
1997
1998 offset += num;
1999 bytes -= num;
2000 }
2001
2002fail:
2003 if (ret == 0 && need_flush) {
2004 ret = bdrv_co_flush(bs);
2005 }
2006 qemu_vfree(buf);
2007 return ret;
2008}
2009
2010static inline int coroutine_fn
2011bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
2012 BdrvTrackedRequest *req, int flags)
2013{
2014 BlockDriverState *bs = child->bs;
2015
2016 bdrv_check_request(offset, bytes, &error_abort);
2017
2018 if (bdrv_is_read_only(bs)) {
2019 return -EPERM;
2020 }
2021
2022 assert(!(bs->open_flags & BDRV_O_INACTIVE));
2023 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
2024 assert(!(flags & ~BDRV_REQ_MASK));
2025 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
2026
2027 if (flags & BDRV_REQ_SERIALISING) {
2028 QEMU_LOCK_GUARD(&bs->reqs_lock);
2029
2030 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
2031
2032 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
2033 return -EBUSY;
2034 }
2035
2036 bdrv_wait_serialising_requests_locked(req);
2037 } else {
2038 bdrv_wait_serialising_requests(req);
2039 }
2040
2041 assert(req->overlap_offset <= offset);
2042 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
2043 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
2044 child->perm & BLK_PERM_RESIZE);
2045
2046 switch (req->type) {
2047 case BDRV_TRACKED_WRITE:
2048 case BDRV_TRACKED_DISCARD:
2049 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
2050 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
2051 } else {
2052 assert(child->perm & BLK_PERM_WRITE);
2053 }
2054 bdrv_write_threshold_check_write(bs, offset, bytes);
2055 return 0;
2056 case BDRV_TRACKED_TRUNCATE:
2057 assert(child->perm & BLK_PERM_RESIZE);
2058 return 0;
2059 default:
2060 abort();
2061 }
2062}
2063
2064static inline void coroutine_fn
2065bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
2066 BdrvTrackedRequest *req, int ret)
2067{
2068 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
2069 BlockDriverState *bs = child->bs;
2070
2071 bdrv_check_request(offset, bytes, &error_abort);
2072
2073 qatomic_inc(&bs->write_gen);
2074
2075
2076
2077
2078
2079
2080
2081
2082 if (ret == 0 &&
2083 (req->type == BDRV_TRACKED_TRUNCATE ||
2084 end_sector > bs->total_sectors) &&
2085 req->type != BDRV_TRACKED_DISCARD) {
2086 bs->total_sectors = end_sector;
2087 bdrv_parent_cb_resize(bs);
2088 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2089 }
2090 if (req->bytes) {
2091 switch (req->type) {
2092 case BDRV_TRACKED_WRITE:
2093 stat64_max(&bs->wr_highest_offset, offset + bytes);
2094
2095 case BDRV_TRACKED_DISCARD:
2096 bdrv_set_dirty(bs, offset, bytes);
2097 break;
2098 default:
2099 break;
2100 }
2101 }
2102}
2103
2104
2105
2106
2107
2108static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
2109 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
2110 int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
2111 BdrvRequestFlags flags)
2112{
2113 BlockDriverState *bs = child->bs;
2114 BlockDriver *drv = bs->drv;
2115 int ret;
2116
2117 int64_t bytes_remaining = bytes;
2118 int max_transfer;
2119
2120 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2121
2122 if (!drv) {
2123 return -ENOMEDIUM;
2124 }
2125
2126 if (bdrv_has_readonly_bitmaps(bs)) {
2127 return -EPERM;
2128 }
2129
2130 assert(is_power_of_2(align));
2131 assert((offset & (align - 1)) == 0);
2132 assert((bytes & (align - 1)) == 0);
2133 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2134 align);
2135
2136 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2137
2138 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2139 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2140 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2141 flags |= BDRV_REQ_ZERO_WRITE;
2142 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2143 flags |= BDRV_REQ_MAY_UNMAP;
2144 }
2145 }
2146
2147 if (ret < 0) {
2148
2149 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2150 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2151 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2152 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2153 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2154 qiov, qiov_offset);
2155 } else if (bytes <= max_transfer) {
2156 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2157 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2158 } else {
2159 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2160 while (bytes_remaining) {
2161 int num = MIN(bytes_remaining, max_transfer);
2162 int local_flags = flags;
2163
2164 assert(num);
2165 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2166 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2167
2168
2169 local_flags &= ~BDRV_REQ_FUA;
2170 }
2171
2172 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2173 num, qiov,
2174 qiov_offset + bytes - bytes_remaining,
2175 local_flags);
2176 if (ret < 0) {
2177 break;
2178 }
2179 bytes_remaining -= num;
2180 }
2181 }
2182 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2183
2184 if (ret >= 0) {
2185 ret = 0;
2186 }
2187 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2188
2189 return ret;
2190}
2191
2192static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2193 int64_t offset,
2194 int64_t bytes,
2195 BdrvRequestFlags flags,
2196 BdrvTrackedRequest *req)
2197{
2198 BlockDriverState *bs = child->bs;
2199 QEMUIOVector local_qiov;
2200 uint64_t align = bs->bl.request_alignment;
2201 int ret = 0;
2202 bool padding;
2203 BdrvRequestPadding pad;
2204
2205 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2206 if (padding) {
2207 assert(!(flags & BDRV_REQ_NO_WAIT));
2208 bdrv_make_request_serialising(req, align);
2209
2210 bdrv_padding_rmw_read(child, req, &pad, true);
2211
2212 if (pad.head || pad.merge_reads) {
2213 int64_t aligned_offset = offset & ~(align - 1);
2214 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2215
2216 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2217 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2218 align, &local_qiov, 0,
2219 flags & ~BDRV_REQ_ZERO_WRITE);
2220 if (ret < 0 || pad.merge_reads) {
2221
2222 goto out;
2223 }
2224 offset += write_bytes - pad.head;
2225 bytes -= write_bytes - pad.head;
2226 }
2227 }
2228
2229 assert(!bytes || (offset & (align - 1)) == 0);
2230 if (bytes >= align) {
2231
2232 int64_t aligned_bytes = bytes & ~(align - 1);
2233 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2234 NULL, 0, flags);
2235 if (ret < 0) {
2236 goto out;
2237 }
2238 bytes -= aligned_bytes;
2239 offset += aligned_bytes;
2240 }
2241
2242 assert(!bytes || (offset & (align - 1)) == 0);
2243 if (bytes) {
2244 assert(align == pad.tail + bytes);
2245
2246 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2247 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2248 &local_qiov, 0,
2249 flags & ~BDRV_REQ_ZERO_WRITE);
2250 }
2251
2252out:
2253 bdrv_padding_destroy(&pad);
2254
2255 return ret;
2256}
2257
2258
2259
2260
2261int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2262 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2263 BdrvRequestFlags flags)
2264{
2265 IO_CODE();
2266 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2267}
2268
2269int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2270 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2271 BdrvRequestFlags flags)
2272{
2273 BlockDriverState *bs = child->bs;
2274 BdrvTrackedRequest req;
2275 uint64_t align = bs->bl.request_alignment;
2276 BdrvRequestPadding pad;
2277 int ret;
2278 bool padded = false;
2279 IO_CODE();
2280
2281 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2282
2283 if (!bdrv_is_inserted(bs)) {
2284 return -ENOMEDIUM;
2285 }
2286
2287 if (flags & BDRV_REQ_ZERO_WRITE) {
2288 ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
2289 } else {
2290 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2291 }
2292 if (ret < 0) {
2293 return ret;
2294 }
2295
2296
2297 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2298 !QEMU_IS_ALIGNED(offset | bytes, align))
2299 {
2300 return -ENOTSUP;
2301 }
2302
2303 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2304
2305
2306
2307
2308
2309
2310
2311
2312 return 0;
2313 }
2314
2315 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2316
2317
2318
2319
2320
2321 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
2322 &padded);
2323 if (ret < 0) {
2324 return ret;
2325 }
2326 }
2327
2328 bdrv_inc_in_flight(bs);
2329 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2330
2331 if (flags & BDRV_REQ_ZERO_WRITE) {
2332 assert(!padded);
2333 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2334 goto out;
2335 }
2336
2337 if (padded) {
2338
2339
2340
2341
2342
2343
2344 assert(!(flags & BDRV_REQ_NO_WAIT));
2345 bdrv_make_request_serialising(&req, align);
2346 bdrv_padding_rmw_read(child, &req, &pad, false);
2347 }
2348
2349 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2350 qiov, qiov_offset, flags);
2351
2352 bdrv_padding_destroy(&pad);
2353
2354out:
2355 tracked_request_end(&req);
2356 bdrv_dec_in_flight(bs);
2357
2358 return ret;
2359}
2360
2361int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2362 int64_t bytes, BdrvRequestFlags flags)
2363{
2364 IO_CODE();
2365 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2366
2367 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2368 flags &= ~BDRV_REQ_MAY_UNMAP;
2369 }
2370
2371 return bdrv_co_pwritev(child, offset, bytes, NULL,
2372 BDRV_REQ_ZERO_WRITE | flags);
2373}
2374
2375
2376
2377
2378int bdrv_flush_all(void)
2379{
2380 BdrvNextIterator it;
2381 BlockDriverState *bs = NULL;
2382 int result = 0;
2383
2384 GLOBAL_STATE_CODE();
2385
2386
2387
2388
2389
2390
2391 if (replay_events_enabled()) {
2392 return result;
2393 }
2394
2395 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2396 AioContext *aio_context = bdrv_get_aio_context(bs);
2397 int ret;
2398
2399 aio_context_acquire(aio_context);
2400 ret = bdrv_flush(bs);
2401 if (ret < 0 && !result) {
2402 result = ret;
2403 }
2404 aio_context_release(aio_context);
2405 }
2406
2407 return result;
2408}
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2438 bool want_zero,
2439 int64_t offset, int64_t bytes,
2440 int64_t *pnum, int64_t *map,
2441 BlockDriverState **file)
2442{
2443 int64_t total_size;
2444 int64_t n;
2445 int ret;
2446 int64_t local_map = 0;
2447 BlockDriverState *local_file = NULL;
2448 int64_t aligned_offset, aligned_bytes;
2449 uint32_t align;
2450 bool has_filtered_child;
2451
2452 assert(pnum);
2453 *pnum = 0;
2454 total_size = bdrv_getlength(bs);
2455 if (total_size < 0) {
2456 ret = total_size;
2457 goto early_out;
2458 }
2459
2460 if (offset >= total_size) {
2461 ret = BDRV_BLOCK_EOF;
2462 goto early_out;
2463 }
2464 if (!bytes) {
2465 ret = 0;
2466 goto early_out;
2467 }
2468
2469 n = total_size - offset;
2470 if (n < bytes) {
2471 bytes = n;
2472 }
2473
2474
2475 assert(bs->drv);
2476 has_filtered_child = bdrv_filter_child(bs);
2477 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2478 *pnum = bytes;
2479 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2480 if (offset + bytes == total_size) {
2481 ret |= BDRV_BLOCK_EOF;
2482 }
2483 if (bs->drv->protocol_name) {
2484 ret |= BDRV_BLOCK_OFFSET_VALID;
2485 local_map = offset;
2486 local_file = bs;
2487 }
2488 goto early_out;
2489 }
2490
2491 bdrv_inc_in_flight(bs);
2492
2493
2494 align = bs->bl.request_alignment;
2495 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2496 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2497
2498 if (bs->drv->bdrv_co_block_status) {
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520 if (QLIST_EMPTY(&bs->children) &&
2521 bdrv_bsc_is_data(bs, aligned_offset, pnum))
2522 {
2523 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
2524 local_file = bs;
2525 local_map = aligned_offset;
2526 } else {
2527 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2528 aligned_bytes, pnum, &local_map,
2529 &local_file);
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542 if (want_zero &&
2543 ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
2544 QLIST_EMPTY(&bs->children))
2545 {
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557 assert(local_file == bs);
2558 assert(local_map == aligned_offset);
2559 bdrv_bsc_fill(bs, aligned_offset, *pnum);
2560 }
2561 }
2562 } else {
2563
2564
2565 local_file = bdrv_filter_bs(bs);
2566 assert(local_file);
2567
2568 *pnum = aligned_bytes;
2569 local_map = aligned_offset;
2570 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2571 }
2572 if (ret < 0) {
2573 *pnum = 0;
2574 goto out;
2575 }
2576
2577
2578
2579
2580
2581 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2582 align > offset - aligned_offset);
2583 if (ret & BDRV_BLOCK_RECURSE) {
2584 assert(ret & BDRV_BLOCK_DATA);
2585 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2586 assert(!(ret & BDRV_BLOCK_ZERO));
2587 }
2588
2589 *pnum -= offset - aligned_offset;
2590 if (*pnum > bytes) {
2591 *pnum = bytes;
2592 }
2593 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2594 local_map += offset - aligned_offset;
2595 }
2596
2597 if (ret & BDRV_BLOCK_RAW) {
2598 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2599 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2600 *pnum, pnum, &local_map, &local_file);
2601 goto out;
2602 }
2603
2604 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2605 ret |= BDRV_BLOCK_ALLOCATED;
2606 } else if (bs->drv->supports_backing) {
2607 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2608
2609 if (!cow_bs) {
2610 ret |= BDRV_BLOCK_ZERO;
2611 } else if (want_zero) {
2612 int64_t size2 = bdrv_getlength(cow_bs);
2613
2614 if (size2 >= 0 && offset >= size2) {
2615 ret |= BDRV_BLOCK_ZERO;
2616 }
2617 }
2618 }
2619
2620 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2621 local_file && local_file != bs &&
2622 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2623 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2624 int64_t file_pnum;
2625 int ret2;
2626
2627 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2628 *pnum, &file_pnum, NULL, NULL);
2629 if (ret2 >= 0) {
2630
2631
2632
2633 if (ret2 & BDRV_BLOCK_EOF &&
2634 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2635
2636
2637
2638
2639
2640 ret |= BDRV_BLOCK_ZERO;
2641 } else {
2642
2643 *pnum = file_pnum;
2644 ret |= (ret2 & BDRV_BLOCK_ZERO);
2645 }
2646 }
2647 }
2648
2649out:
2650 bdrv_dec_in_flight(bs);
2651 if (ret >= 0 && offset + *pnum == total_size) {
2652 ret |= BDRV_BLOCK_EOF;
2653 }
2654early_out:
2655 if (file) {
2656 *file = local_file;
2657 }
2658 if (map) {
2659 *map = local_map;
2660 }
2661 return ret;
2662}
2663
2664int coroutine_fn
2665bdrv_co_common_block_status_above(BlockDriverState *bs,
2666 BlockDriverState *base,
2667 bool include_base,
2668 bool want_zero,
2669 int64_t offset,
2670 int64_t bytes,
2671 int64_t *pnum,
2672 int64_t *map,
2673 BlockDriverState **file,
2674 int *depth)
2675{
2676 int ret;
2677 BlockDriverState *p;
2678 int64_t eof = 0;
2679 int dummy;
2680 IO_CODE();
2681
2682 assert(!include_base || base);
2683
2684 if (!depth) {
2685 depth = &dummy;
2686 }
2687 *depth = 0;
2688
2689 if (!include_base && bs == base) {
2690 *pnum = bytes;
2691 return 0;
2692 }
2693
2694 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2695 ++*depth;
2696 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2697 return ret;
2698 }
2699
2700 if (ret & BDRV_BLOCK_EOF) {
2701 eof = offset + *pnum;
2702 }
2703
2704 assert(*pnum <= bytes);
2705 bytes = *pnum;
2706
2707 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2708 p = bdrv_filter_or_cow_bs(p))
2709 {
2710 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2711 file);
2712 ++*depth;
2713 if (ret < 0) {
2714 return ret;
2715 }
2716 if (*pnum == 0) {
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726 assert(ret & BDRV_BLOCK_EOF);
2727 *pnum = bytes;
2728 if (file) {
2729 *file = p;
2730 }
2731 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2732 break;
2733 }
2734 if (ret & BDRV_BLOCK_ALLOCATED) {
2735
2736
2737
2738
2739
2740
2741
2742 ret &= ~BDRV_BLOCK_EOF;
2743 break;
2744 }
2745
2746 if (p == base) {
2747 assert(include_base);
2748 break;
2749 }
2750
2751
2752
2753
2754
2755 assert(*pnum <= bytes);
2756 bytes = *pnum;
2757 }
2758
2759 if (offset + *pnum == eof) {
2760 ret |= BDRV_BLOCK_EOF;
2761 }
2762
2763 return ret;
2764}
2765
2766int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2767 int64_t offset, int64_t bytes, int64_t *pnum,
2768 int64_t *map, BlockDriverState **file)
2769{
2770 IO_CODE();
2771 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2772 pnum, map, file, NULL);
2773}
2774
2775int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2776 int64_t *pnum, int64_t *map, BlockDriverState **file)
2777{
2778 IO_CODE();
2779 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2780 offset, bytes, pnum, map, file);
2781}
2782
2783
2784
2785
2786
2787
2788
2789
2790int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2791 int64_t bytes)
2792{
2793 int ret;
2794 int64_t pnum = bytes;
2795 IO_CODE();
2796
2797 if (!bytes) {
2798 return 1;
2799 }
2800
2801 ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
2802 bytes, &pnum, NULL, NULL, NULL);
2803
2804 if (ret < 0) {
2805 return ret;
2806 }
2807
2808 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2809}
2810
2811int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2812 int64_t bytes, int64_t *pnum)
2813{
2814 int ret;
2815 int64_t dummy;
2816 IO_CODE();
2817
2818 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2819 bytes, pnum ? pnum : &dummy, NULL,
2820 NULL, NULL);
2821 if (ret < 0) {
2822 return ret;
2823 }
2824 return !!(ret & BDRV_BLOCK_ALLOCATED);
2825}
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844int bdrv_is_allocated_above(BlockDriverState *top,
2845 BlockDriverState *base,
2846 bool include_base, int64_t offset,
2847 int64_t bytes, int64_t *pnum)
2848{
2849 int depth;
2850 int ret = bdrv_common_block_status_above(top, base, include_base, false,
2851 offset, bytes, pnum, NULL, NULL,
2852 &depth);
2853 IO_CODE();
2854 if (ret < 0) {
2855 return ret;
2856 }
2857
2858 if (ret & BDRV_BLOCK_ALLOCATED) {
2859 return depth;
2860 }
2861 return 0;
2862}
2863
2864int coroutine_fn
2865bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2866{
2867 BlockDriver *drv = bs->drv;
2868 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2869 int ret;
2870 IO_CODE();
2871
2872 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2873 if (ret < 0) {
2874 return ret;
2875 }
2876
2877 if (!drv) {
2878 return -ENOMEDIUM;
2879 }
2880
2881 bdrv_inc_in_flight(bs);
2882
2883 if (drv->bdrv_load_vmstate) {
2884 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2885 } else if (child_bs) {
2886 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2887 } else {
2888 ret = -ENOTSUP;
2889 }
2890
2891 bdrv_dec_in_flight(bs);
2892
2893 return ret;
2894}
2895
2896int coroutine_fn
2897bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2898{
2899 BlockDriver *drv = bs->drv;
2900 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2901 int ret;
2902 IO_CODE();
2903
2904 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2905 if (ret < 0) {
2906 return ret;
2907 }
2908
2909 if (!drv) {
2910 return -ENOMEDIUM;
2911 }
2912
2913 bdrv_inc_in_flight(bs);
2914
2915 if (drv->bdrv_save_vmstate) {
2916 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2917 } else if (child_bs) {
2918 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2919 } else {
2920 ret = -ENOTSUP;
2921 }
2922
2923 bdrv_dec_in_flight(bs);
2924
2925 return ret;
2926}
2927
2928int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2929 int64_t pos, int size)
2930{
2931 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2932 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2933 IO_CODE();
2934
2935 return ret < 0 ? ret : size;
2936}
2937
2938int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2939 int64_t pos, int size)
2940{
2941 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2942 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2943 IO_CODE();
2944
2945 return ret < 0 ? ret : size;
2946}
2947
2948
2949
2950
2951void bdrv_aio_cancel(BlockAIOCB *acb)
2952{
2953 IO_CODE();
2954 qemu_aio_ref(acb);
2955 bdrv_aio_cancel_async(acb);
2956 while (acb->refcnt > 1) {
2957 if (acb->aiocb_info->get_aio_context) {
2958 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2959 } else if (acb->bs) {
2960
2961
2962
2963
2964 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2965 aio_poll(bdrv_get_aio_context(acb->bs), true);
2966 } else {
2967 abort();
2968 }
2969 }
2970 qemu_aio_unref(acb);
2971}
2972
2973
2974
2975
2976void bdrv_aio_cancel_async(BlockAIOCB *acb)
2977{
2978 IO_CODE();
2979 if (acb->aiocb_info->cancel_async) {
2980 acb->aiocb_info->cancel_async(acb);
2981 }
2982}
2983
2984
2985
2986
2987int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2988{
2989 BdrvChild *primary_child = bdrv_primary_child(bs);
2990 BdrvChild *child;
2991 int current_gen;
2992 int ret = 0;
2993 IO_CODE();
2994
2995 bdrv_inc_in_flight(bs);
2996
2997 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2998 bdrv_is_sg(bs)) {
2999 goto early_exit;
3000 }
3001
3002 qemu_co_mutex_lock(&bs->reqs_lock);
3003 current_gen = qatomic_read(&bs->write_gen);
3004
3005
3006 while (bs->active_flush_req) {
3007 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
3008 }
3009
3010
3011 bs->active_flush_req = true;
3012 qemu_co_mutex_unlock(&bs->reqs_lock);
3013
3014
3015 if (bs->drv->bdrv_co_flush) {
3016 ret = bs->drv->bdrv_co_flush(bs);
3017 goto out;
3018 }
3019
3020
3021 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
3022 if (bs->drv->bdrv_co_flush_to_os) {
3023 ret = bs->drv->bdrv_co_flush_to_os(bs);
3024 if (ret < 0) {
3025 goto out;
3026 }
3027 }
3028
3029
3030 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3031 goto flush_children;
3032 }
3033
3034
3035 if (bs->flushed_gen == current_gen) {
3036 goto flush_children;
3037 }
3038
3039 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
3040 if (!bs->drv) {
3041
3042
3043 ret = -ENOMEDIUM;
3044 goto out;
3045 }
3046 if (bs->drv->bdrv_co_flush_to_disk) {
3047 ret = bs->drv->bdrv_co_flush_to_disk(bs);
3048 } else if (bs->drv->bdrv_aio_flush) {
3049 BlockAIOCB *acb;
3050 CoroutineIOCompletion co = {
3051 .coroutine = qemu_coroutine_self(),
3052 };
3053
3054 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3055 if (acb == NULL) {
3056 ret = -EIO;
3057 } else {
3058 qemu_coroutine_yield();
3059 ret = co.ret;
3060 }
3061 } else {
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073 ret = 0;
3074 }
3075
3076 if (ret < 0) {
3077 goto out;
3078 }
3079
3080
3081
3082
3083flush_children:
3084 ret = 0;
3085 QLIST_FOREACH(child, &bs->children, next) {
3086 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
3087 int this_child_ret = bdrv_co_flush(child->bs);
3088 if (!ret) {
3089 ret = this_child_ret;
3090 }
3091 }
3092 }
3093
3094out:
3095
3096 if (ret == 0) {
3097 bs->flushed_gen = current_gen;
3098 }
3099
3100 qemu_co_mutex_lock(&bs->reqs_lock);
3101 bs->active_flush_req = false;
3102
3103 qemu_co_queue_next(&bs->flush_queue);
3104 qemu_co_mutex_unlock(&bs->reqs_lock);
3105
3106early_exit:
3107 bdrv_dec_in_flight(bs);
3108 return ret;
3109}
3110
3111int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
3112 int64_t bytes)
3113{
3114 BdrvTrackedRequest req;
3115 int ret;
3116 int64_t max_pdiscard;
3117 int head, tail, align;
3118 BlockDriverState *bs = child->bs;
3119 IO_CODE();
3120
3121 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
3122 return -ENOMEDIUM;
3123 }
3124
3125 if (bdrv_has_readonly_bitmaps(bs)) {
3126 return -EPERM;
3127 }
3128
3129 ret = bdrv_check_request(offset, bytes, NULL);
3130 if (ret < 0) {
3131 return ret;
3132 }
3133
3134
3135 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3136 return 0;
3137 }
3138
3139 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
3140 return 0;
3141 }
3142
3143
3144 bdrv_bsc_invalidate_range(bs, offset, bytes);
3145
3146
3147
3148
3149
3150
3151 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
3152 assert(align % bs->bl.request_alignment == 0);
3153 head = offset % align;
3154 tail = (offset + bytes) % align;
3155
3156 bdrv_inc_in_flight(bs);
3157 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
3158
3159 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
3160 if (ret < 0) {
3161 goto out;
3162 }
3163
3164 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
3165 align);
3166 assert(max_pdiscard >= bs->bl.request_alignment);
3167
3168 while (bytes > 0) {
3169 int64_t num = bytes;
3170
3171 if (head) {
3172
3173 num = MIN(bytes, align - head);
3174 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3175 num %= bs->bl.request_alignment;
3176 }
3177 head = (head + num) % align;
3178 assert(num < max_pdiscard);
3179 } else if (tail) {
3180 if (num > align) {
3181
3182 num -= tail;
3183 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3184 tail > bs->bl.request_alignment) {
3185 tail %= bs->bl.request_alignment;
3186 num -= tail;
3187 }
3188 }
3189
3190 if (num > max_pdiscard) {
3191 num = max_pdiscard;
3192 }
3193
3194 if (!bs->drv) {
3195 ret = -ENOMEDIUM;
3196 goto out;
3197 }
3198 if (bs->drv->bdrv_co_pdiscard) {
3199 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3200 } else {
3201 BlockAIOCB *acb;
3202 CoroutineIOCompletion co = {
3203 .coroutine = qemu_coroutine_self(),
3204 };
3205
3206 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3207 bdrv_co_io_em_complete, &co);
3208 if (acb == NULL) {
3209 ret = -EIO;
3210 goto out;
3211 } else {
3212 qemu_coroutine_yield();
3213 ret = co.ret;
3214 }
3215 }
3216 if (ret && ret != -ENOTSUP) {
3217 goto out;
3218 }
3219
3220 offset += num;
3221 bytes -= num;
3222 }
3223 ret = 0;
3224out:
3225 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3226 tracked_request_end(&req);
3227 bdrv_dec_in_flight(bs);
3228 return ret;
3229}
3230
3231int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3232{
3233 BlockDriver *drv = bs->drv;
3234 CoroutineIOCompletion co = {
3235 .coroutine = qemu_coroutine_self(),
3236 };
3237 BlockAIOCB *acb;
3238 IO_CODE();
3239
3240 bdrv_inc_in_flight(bs);
3241 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3242 co.ret = -ENOTSUP;
3243 goto out;
3244 }
3245
3246 if (drv->bdrv_co_ioctl) {
3247 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3248 } else {
3249 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3250 if (!acb) {
3251 co.ret = -ENOTSUP;
3252 goto out;
3253 }
3254 qemu_coroutine_yield();
3255 }
3256out:
3257 bdrv_dec_in_flight(bs);
3258 return co.ret;
3259}
3260
3261void *qemu_blockalign(BlockDriverState *bs, size_t size)
3262{
3263 IO_CODE();
3264 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3265}
3266
3267void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3268{
3269 IO_CODE();
3270 return memset(qemu_blockalign(bs, size), 0, size);
3271}
3272
3273void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3274{
3275 size_t align = bdrv_opt_mem_align(bs);
3276 IO_CODE();
3277
3278
3279 assert(align > 0);
3280 if (size == 0) {
3281 size = align;
3282 }
3283
3284 return qemu_try_memalign(align, size);
3285}
3286
3287void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3288{
3289 void *mem = qemu_try_blockalign(bs, size);
3290 IO_CODE();
3291
3292 if (mem) {
3293 memset(mem, 0, size);
3294 }
3295
3296 return mem;
3297}
3298
3299
3300
3301
3302bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
3303{
3304 int i;
3305 size_t alignment = bdrv_min_mem_align(bs);
3306 IO_CODE();
3307
3308 for (i = 0; i < qiov->niov; i++) {
3309 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
3310 return false;
3311 }
3312 if (qiov->iov[i].iov_len % alignment) {
3313 return false;
3314 }
3315 }
3316
3317 return true;
3318}
3319
3320void bdrv_io_plug(BlockDriverState *bs)
3321{
3322 BdrvChild *child;
3323 IO_CODE();
3324
3325 QLIST_FOREACH(child, &bs->children, next) {
3326 bdrv_io_plug(child->bs);
3327 }
3328
3329 if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
3330 BlockDriver *drv = bs->drv;
3331 if (drv && drv->bdrv_io_plug) {
3332 drv->bdrv_io_plug(bs);
3333 }
3334 }
3335}
3336
3337void bdrv_io_unplug(BlockDriverState *bs)
3338{
3339 BdrvChild *child;
3340 IO_CODE();
3341
3342 assert(bs->io_plugged);
3343 if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
3344 BlockDriver *drv = bs->drv;
3345 if (drv && drv->bdrv_io_unplug) {
3346 drv->bdrv_io_unplug(bs);
3347 }
3348 }
3349
3350 QLIST_FOREACH(child, &bs->children, next) {
3351 bdrv_io_unplug(child->bs);
3352 }
3353}
3354
3355void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3356{
3357 BdrvChild *child;
3358
3359 GLOBAL_STATE_CODE();
3360 if (bs->drv && bs->drv->bdrv_register_buf) {
3361 bs->drv->bdrv_register_buf(bs, host, size);
3362 }
3363 QLIST_FOREACH(child, &bs->children, next) {
3364 bdrv_register_buf(child->bs, host, size);
3365 }
3366}
3367
3368void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3369{
3370 BdrvChild *child;
3371
3372 GLOBAL_STATE_CODE();
3373 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3374 bs->drv->bdrv_unregister_buf(bs, host);
3375 }
3376 QLIST_FOREACH(child, &bs->children, next) {
3377 bdrv_unregister_buf(child->bs, host);
3378 }
3379}
3380
3381static int coroutine_fn bdrv_co_copy_range_internal(
3382 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3383 int64_t dst_offset, int64_t bytes,
3384 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3385 bool recurse_src)
3386{
3387 BdrvTrackedRequest req;
3388 int ret;
3389
3390
3391 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3392 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3393 assert(!(read_flags & BDRV_REQ_NO_WAIT));
3394 assert(!(write_flags & BDRV_REQ_NO_WAIT));
3395
3396 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
3397 return -ENOMEDIUM;
3398 }
3399 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3400 if (ret) {
3401 return ret;
3402 }
3403 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3404 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3405 }
3406
3407 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
3408 return -ENOMEDIUM;
3409 }
3410 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3411 if (ret) {
3412 return ret;
3413 }
3414
3415 if (!src->bs->drv->bdrv_co_copy_range_from
3416 || !dst->bs->drv->bdrv_co_copy_range_to
3417 || src->bs->encrypted || dst->bs->encrypted) {
3418 return -ENOTSUP;
3419 }
3420
3421 if (recurse_src) {
3422 bdrv_inc_in_flight(src->bs);
3423 tracked_request_begin(&req, src->bs, src_offset, bytes,
3424 BDRV_TRACKED_READ);
3425
3426
3427 assert(!(read_flags & BDRV_REQ_SERIALISING));
3428 bdrv_wait_serialising_requests(&req);
3429
3430 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3431 src, src_offset,
3432 dst, dst_offset,
3433 bytes,
3434 read_flags, write_flags);
3435
3436 tracked_request_end(&req);
3437 bdrv_dec_in_flight(src->bs);
3438 } else {
3439 bdrv_inc_in_flight(dst->bs);
3440 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3441 BDRV_TRACKED_WRITE);
3442 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3443 write_flags);
3444 if (!ret) {
3445 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3446 src, src_offset,
3447 dst, dst_offset,
3448 bytes,
3449 read_flags, write_flags);
3450 }
3451 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3452 tracked_request_end(&req);
3453 bdrv_dec_in_flight(dst->bs);
3454 }
3455
3456 return ret;
3457}
3458
3459
3460
3461
3462
3463int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3464 BdrvChild *dst, int64_t dst_offset,
3465 int64_t bytes,
3466 BdrvRequestFlags read_flags,
3467 BdrvRequestFlags write_flags)
3468{
3469 IO_CODE();
3470 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3471 read_flags, write_flags);
3472 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3473 bytes, read_flags, write_flags, true);
3474}
3475
3476
3477
3478
3479
3480int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3481 BdrvChild *dst, int64_t dst_offset,
3482 int64_t bytes,
3483 BdrvRequestFlags read_flags,
3484 BdrvRequestFlags write_flags)
3485{
3486 IO_CODE();
3487 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3488 read_flags, write_flags);
3489 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3490 bytes, read_flags, write_flags, false);
3491}
3492
3493int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3494 BdrvChild *dst, int64_t dst_offset,
3495 int64_t bytes, BdrvRequestFlags read_flags,
3496 BdrvRequestFlags write_flags)
3497{
3498 IO_CODE();
3499 return bdrv_co_copy_range_from(src, src_offset,
3500 dst, dst_offset,
3501 bytes, read_flags, write_flags);
3502}
3503
3504static void bdrv_parent_cb_resize(BlockDriverState *bs)
3505{
3506 BdrvChild *c;
3507 QLIST_FOREACH(c, &bs->parents, next_parent) {
3508 if (c->klass->resize) {
3509 c->klass->resize(c);
3510 }
3511 }
3512}
3513
3514
3515
3516
3517
3518
3519
3520
3521int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3522 PreallocMode prealloc, BdrvRequestFlags flags,
3523 Error **errp)
3524{
3525 BlockDriverState *bs = child->bs;
3526 BdrvChild *filtered, *backing;
3527 BlockDriver *drv = bs->drv;
3528 BdrvTrackedRequest req;
3529 int64_t old_size, new_bytes;
3530 int ret;
3531 IO_CODE();
3532
3533
3534 if (!drv) {
3535 error_setg(errp, "No medium inserted");
3536 return -ENOMEDIUM;
3537 }
3538 if (offset < 0) {
3539 error_setg(errp, "Image size cannot be negative");
3540 return -EINVAL;
3541 }
3542
3543 ret = bdrv_check_request(offset, 0, errp);
3544 if (ret < 0) {
3545 return ret;
3546 }
3547
3548 old_size = bdrv_getlength(bs);
3549 if (old_size < 0) {
3550 error_setg_errno(errp, -old_size, "Failed to get old image size");
3551 return old_size;
3552 }
3553
3554 if (bdrv_is_read_only(bs)) {
3555 error_setg(errp, "Image is read-only");
3556 return -EACCES;
3557 }
3558
3559 if (offset > old_size) {
3560 new_bytes = offset - old_size;
3561 } else {
3562 new_bytes = 0;
3563 }
3564
3565 bdrv_inc_in_flight(bs);
3566 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3567 BDRV_TRACKED_TRUNCATE);
3568
3569
3570
3571
3572 if (new_bytes) {
3573 bdrv_make_request_serialising(&req, 1);
3574 }
3575 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3576 0);
3577 if (ret < 0) {
3578 error_setg_errno(errp, -ret,
3579 "Failed to prepare request for truncation");
3580 goto out;
3581 }
3582
3583 filtered = bdrv_filter_child(bs);
3584 backing = bdrv_cow_child(bs);
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596 if (new_bytes && backing) {
3597 int64_t backing_len;
3598
3599 backing_len = bdrv_getlength(backing->bs);
3600 if (backing_len < 0) {
3601 ret = backing_len;
3602 error_setg_errno(errp, -ret, "Could not get backing file size");
3603 goto out;
3604 }
3605
3606 if (backing_len > old_size) {
3607 flags |= BDRV_REQ_ZERO_WRITE;
3608 }
3609 }
3610
3611 if (drv->bdrv_co_truncate) {
3612 if (flags & ~bs->supported_truncate_flags) {
3613 error_setg(errp, "Block driver does not support requested flags");
3614 ret = -ENOTSUP;
3615 goto out;
3616 }
3617 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3618 } else if (filtered) {
3619 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3620 } else {
3621 error_setg(errp, "Image format driver does not support resize");
3622 ret = -ENOTSUP;
3623 goto out;
3624 }
3625 if (ret < 0) {
3626 goto out;
3627 }
3628
3629 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3630 if (ret < 0) {
3631 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3632 } else {
3633 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3634 }
3635
3636
3637
3638 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3639
3640out:
3641 tracked_request_end(&req);
3642 bdrv_dec_in_flight(bs);
3643
3644 return ret;
3645}
3646
3647void bdrv_cancel_in_flight(BlockDriverState *bs)
3648{
3649 GLOBAL_STATE_CODE();
3650 if (!bs || !bs->drv) {
3651 return;
3652 }
3653
3654 if (bs->drv->bdrv_cancel_in_flight) {
3655 bs->drv->bdrv_cancel_in_flight(bs);
3656 }
3657}
3658
3659int coroutine_fn
3660bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
3661 QEMUIOVector *qiov, size_t qiov_offset)
3662{
3663 BlockDriverState *bs = child->bs;
3664 BlockDriver *drv = bs->drv;
3665 int ret;
3666 IO_CODE();
3667
3668 if (!drv) {
3669 return -ENOMEDIUM;
3670 }
3671
3672 if (!drv->bdrv_co_preadv_snapshot) {
3673 return -ENOTSUP;
3674 }
3675
3676 bdrv_inc_in_flight(bs);
3677 ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
3678 bdrv_dec_in_flight(bs);
3679
3680 return ret;
3681}
3682
3683int coroutine_fn
3684bdrv_co_snapshot_block_status(BlockDriverState *bs,
3685 bool want_zero, int64_t offset, int64_t bytes,
3686 int64_t *pnum, int64_t *map,
3687 BlockDriverState **file)
3688{
3689 BlockDriver *drv = bs->drv;
3690 int ret;
3691 IO_CODE();
3692
3693 if (!drv) {
3694 return -ENOMEDIUM;
3695 }
3696
3697 if (!drv->bdrv_co_snapshot_block_status) {
3698 return -ENOTSUP;
3699 }
3700
3701 bdrv_inc_in_flight(bs);
3702 ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
3703 pnum, map, file);
3704 bdrv_dec_in_flight(bs);
3705
3706 return ret;
3707}
3708
3709int coroutine_fn
3710bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
3711{
3712 BlockDriver *drv = bs->drv;
3713 int ret;
3714 IO_CODE();
3715
3716 if (!drv) {
3717 return -ENOMEDIUM;
3718 }
3719
3720 if (!drv->bdrv_co_pdiscard_snapshot) {
3721 return -ENOTSUP;
3722 }
3723
3724 bdrv_inc_in_flight(bs);
3725 ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
3726 bdrv_dec_in_flight(bs);
3727
3728 return ret;
3729}
3730