1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "block/write-threshold.h"
34#include "qemu/cutils.h"
35#include "qemu/memalign.h"
36#include "qapi/error.h"
37#include "qemu/error-report.h"
38#include "qemu/main-loop.h"
39#include "sysemu/replay.h"
40
41
42#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
43
44static void bdrv_parent_cb_resize(BlockDriverState *bs);
45static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
46 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
47
48static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
49 bool ignore_bds_parents)
50{
51 BdrvChild *c, *next;
52
53 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
54 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
55 continue;
56 }
57 bdrv_parent_drained_begin_single(c, false);
58 }
59}
60
61static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
62 int *drained_end_counter)
63{
64 assert(c->parent_quiesce_counter > 0);
65 c->parent_quiesce_counter--;
66 if (c->klass->drained_end) {
67 c->klass->drained_end(c, drained_end_counter);
68 }
69}
70
71void bdrv_parent_drained_end_single(BdrvChild *c)
72{
73 int drained_end_counter = 0;
74 IO_OR_GS_CODE();
75 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
76 BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
77}
78
79static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
80 bool ignore_bds_parents,
81 int *drained_end_counter)
82{
83 BdrvChild *c;
84
85 QLIST_FOREACH(c, &bs->parents, next_parent) {
86 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
87 continue;
88 }
89 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
90 }
91}
92
93static bool bdrv_parent_drained_poll_single(BdrvChild *c)
94{
95 if (c->klass->drained_poll) {
96 return c->klass->drained_poll(c);
97 }
98 return false;
99}
100
101static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
102 bool ignore_bds_parents)
103{
104 BdrvChild *c, *next;
105 bool busy = false;
106
107 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
108 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
109 continue;
110 }
111 busy |= bdrv_parent_drained_poll_single(c);
112 }
113
114 return busy;
115}
116
117void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
118{
119 IO_OR_GS_CODE();
120 c->parent_quiesce_counter++;
121 if (c->klass->drained_begin) {
122 c->klass->drained_begin(c);
123 }
124 if (poll) {
125 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
126 }
127}
128
129static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
130{
131 dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
132 src->pdiscard_alignment);
133 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
134 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
135 dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
136 src->max_hw_transfer);
137 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
138 src->opt_mem_alignment);
139 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
140 src->min_mem_alignment);
141 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
142 dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
143}
144
145typedef struct BdrvRefreshLimitsState {
146 BlockDriverState *bs;
147 BlockLimits old_bl;
148} BdrvRefreshLimitsState;
149
150static void bdrv_refresh_limits_abort(void *opaque)
151{
152 BdrvRefreshLimitsState *s = opaque;
153
154 s->bs->bl = s->old_bl;
155}
156
157static TransactionActionDrv bdrv_refresh_limits_drv = {
158 .abort = bdrv_refresh_limits_abort,
159 .clean = g_free,
160};
161
162
163void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
164{
165 ERRP_GUARD();
166 BlockDriver *drv = bs->drv;
167 BdrvChild *c;
168 bool have_limits;
169
170 GLOBAL_STATE_CODE();
171
172 if (tran) {
173 BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
174 *s = (BdrvRefreshLimitsState) {
175 .bs = bs,
176 .old_bl = bs->bl,
177 };
178 tran_add(tran, &bdrv_refresh_limits_drv, s);
179 }
180
181 memset(&bs->bl, 0, sizeof(bs->bl));
182
183 if (!drv) {
184 return;
185 }
186
187
188 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
189 drv->bdrv_aio_preadv ||
190 drv->bdrv_co_preadv_part) ? 1 : 512;
191
192
193 have_limits = false;
194 QLIST_FOREACH(c, &bs->children, next) {
195 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
196 {
197 bdrv_merge_limits(&bs->bl, &c->bs->bl);
198 have_limits = true;
199 }
200 }
201
202 if (!have_limits) {
203 bs->bl.min_mem_alignment = 512;
204 bs->bl.opt_mem_alignment = qemu_real_host_page_size();
205
206
207 bs->bl.max_iov = IOV_MAX;
208 }
209
210
211 if (drv->bdrv_refresh_limits) {
212 drv->bdrv_refresh_limits(bs, errp);
213 if (*errp) {
214 return;
215 }
216 }
217
218 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
219 error_setg(errp, "Driver requires too large request alignment");
220 }
221}
222
223
224
225
226
227
228void bdrv_enable_copy_on_read(BlockDriverState *bs)
229{
230 IO_CODE();
231 qatomic_inc(&bs->copy_on_read);
232}
233
234void bdrv_disable_copy_on_read(BlockDriverState *bs)
235{
236 int old = qatomic_fetch_dec(&bs->copy_on_read);
237 IO_CODE();
238 assert(old >= 1);
239}
240
241typedef struct {
242 Coroutine *co;
243 BlockDriverState *bs;
244 bool done;
245 bool begin;
246 bool recursive;
247 bool poll;
248 BdrvChild *parent;
249 bool ignore_bds_parents;
250 int *drained_end_counter;
251} BdrvCoDrainData;
252
253static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
254{
255 BdrvCoDrainData *data = opaque;
256 BlockDriverState *bs = data->bs;
257
258 if (data->begin) {
259 bs->drv->bdrv_co_drain_begin(bs);
260 } else {
261 bs->drv->bdrv_co_drain_end(bs);
262 }
263
264
265 qatomic_mb_set(&data->done, true);
266 if (!data->begin) {
267 qatomic_dec(data->drained_end_counter);
268 }
269 bdrv_dec_in_flight(bs);
270
271 g_free(data);
272}
273
274
275static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
276 int *drained_end_counter)
277{
278 BdrvCoDrainData *data;
279
280 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
281 (!begin && !bs->drv->bdrv_co_drain_end)) {
282 return;
283 }
284
285 data = g_new(BdrvCoDrainData, 1);
286 *data = (BdrvCoDrainData) {
287 .bs = bs,
288 .done = false,
289 .begin = begin,
290 .drained_end_counter = drained_end_counter,
291 };
292
293 if (!begin) {
294 qatomic_inc(drained_end_counter);
295 }
296
297
298
299 bdrv_inc_in_flight(bs);
300 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
301 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
302}
303
304
305bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
306 BdrvChild *ignore_parent, bool ignore_bds_parents)
307{
308 BdrvChild *child, *next;
309 IO_OR_GS_CODE();
310
311 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
312 return true;
313 }
314
315 if (qatomic_read(&bs->in_flight)) {
316 return true;
317 }
318
319 if (recursive) {
320 assert(!ignore_bds_parents);
321 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
322 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
323 return true;
324 }
325 }
326 }
327
328 return false;
329}
330
331static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
332 BdrvChild *ignore_parent)
333{
334 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
335}
336
337static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
338 BdrvChild *parent, bool ignore_bds_parents,
339 bool poll);
340static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
341 BdrvChild *parent, bool ignore_bds_parents,
342 int *drained_end_counter);
343
344static void bdrv_co_drain_bh_cb(void *opaque)
345{
346 BdrvCoDrainData *data = opaque;
347 Coroutine *co = data->co;
348 BlockDriverState *bs = data->bs;
349
350 if (bs) {
351 AioContext *ctx = bdrv_get_aio_context(bs);
352 aio_context_acquire(ctx);
353 bdrv_dec_in_flight(bs);
354 if (data->begin) {
355 assert(!data->drained_end_counter);
356 bdrv_do_drained_begin(bs, data->recursive, data->parent,
357 data->ignore_bds_parents, data->poll);
358 } else {
359 assert(!data->poll);
360 bdrv_do_drained_end(bs, data->recursive, data->parent,
361 data->ignore_bds_parents,
362 data->drained_end_counter);
363 }
364 aio_context_release(ctx);
365 } else {
366 assert(data->begin);
367 bdrv_drain_all_begin();
368 }
369
370 data->done = true;
371 aio_co_wake(co);
372}
373
374static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
375 bool begin, bool recursive,
376 BdrvChild *parent,
377 bool ignore_bds_parents,
378 bool poll,
379 int *drained_end_counter)
380{
381 BdrvCoDrainData data;
382 Coroutine *self = qemu_coroutine_self();
383 AioContext *ctx = bdrv_get_aio_context(bs);
384 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
385
386
387
388
389 assert(qemu_in_coroutine());
390 data = (BdrvCoDrainData) {
391 .co = self,
392 .bs = bs,
393 .done = false,
394 .begin = begin,
395 .recursive = recursive,
396 .parent = parent,
397 .ignore_bds_parents = ignore_bds_parents,
398 .poll = poll,
399 .drained_end_counter = drained_end_counter,
400 };
401
402 if (bs) {
403 bdrv_inc_in_flight(bs);
404 }
405
406
407
408
409
410
411
412
413
414 if (ctx != co_ctx) {
415 aio_context_release(ctx);
416 }
417 replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
418
419 qemu_coroutine_yield();
420
421
422 assert(data.done);
423
424
425 if (ctx != co_ctx) {
426 aio_context_acquire(ctx);
427 }
428}
429
430void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
431 BdrvChild *parent, bool ignore_bds_parents)
432{
433 IO_OR_GS_CODE();
434 assert(!qemu_in_coroutine());
435
436
437 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
438 aio_disable_external(bdrv_get_aio_context(bs));
439 }
440
441 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
442 bdrv_drain_invoke(bs, true, NULL);
443}
444
445static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
446 BdrvChild *parent, bool ignore_bds_parents,
447 bool poll)
448{
449 BdrvChild *child, *next;
450
451 if (qemu_in_coroutine()) {
452 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
453 poll, NULL);
454 return;
455 }
456
457 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
458
459 if (recursive) {
460 assert(!ignore_bds_parents);
461 bs->recursive_quiesce_counter++;
462 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
463 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
464 false);
465 }
466 }
467
468
469
470
471
472
473
474
475
476
477 if (poll) {
478 assert(!ignore_bds_parents);
479 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
480 }
481}
482
483void bdrv_drained_begin(BlockDriverState *bs)
484{
485 IO_OR_GS_CODE();
486 bdrv_do_drained_begin(bs, false, NULL, false, true);
487}
488
489void bdrv_subtree_drained_begin(BlockDriverState *bs)
490{
491 IO_OR_GS_CODE();
492 bdrv_do_drained_begin(bs, true, NULL, false, true);
493}
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
510 BdrvChild *parent, bool ignore_bds_parents,
511 int *drained_end_counter)
512{
513 BdrvChild *child;
514 int old_quiesce_counter;
515
516 assert(drained_end_counter != NULL);
517
518 if (qemu_in_coroutine()) {
519 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
520 false, drained_end_counter);
521 return;
522 }
523 assert(bs->quiesce_counter > 0);
524
525
526 bdrv_drain_invoke(bs, false, drained_end_counter);
527 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
528 drained_end_counter);
529
530 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
531 if (old_quiesce_counter == 1) {
532 aio_enable_external(bdrv_get_aio_context(bs));
533 }
534
535 if (recursive) {
536 assert(!ignore_bds_parents);
537 bs->recursive_quiesce_counter--;
538 QLIST_FOREACH(child, &bs->children, next) {
539 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
540 drained_end_counter);
541 }
542 }
543}
544
545void bdrv_drained_end(BlockDriverState *bs)
546{
547 int drained_end_counter = 0;
548 IO_OR_GS_CODE();
549 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
550 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
551}
552
553void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
554{
555 IO_CODE();
556 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
557}
558
559void bdrv_subtree_drained_end(BlockDriverState *bs)
560{
561 int drained_end_counter = 0;
562 IO_OR_GS_CODE();
563 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
564 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
565}
566
567void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
568{
569 int i;
570 IO_OR_GS_CODE();
571
572 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
573 bdrv_do_drained_begin(child->bs, true, child, false, true);
574 }
575}
576
577void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
578{
579 int drained_end_counter = 0;
580 int i;
581 IO_OR_GS_CODE();
582
583 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
584 bdrv_do_drained_end(child->bs, true, child, false,
585 &drained_end_counter);
586 }
587
588 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
589}
590
591void bdrv_drain(BlockDriverState *bs)
592{
593 IO_OR_GS_CODE();
594 bdrv_drained_begin(bs);
595 bdrv_drained_end(bs);
596}
597
598static void bdrv_drain_assert_idle(BlockDriverState *bs)
599{
600 BdrvChild *child, *next;
601
602 assert(qatomic_read(&bs->in_flight) == 0);
603 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
604 bdrv_drain_assert_idle(child->bs);
605 }
606}
607
608unsigned int bdrv_drain_all_count = 0;
609
610static bool bdrv_drain_all_poll(void)
611{
612 BlockDriverState *bs = NULL;
613 bool result = false;
614 GLOBAL_STATE_CODE();
615
616
617
618 while ((bs = bdrv_next_all_states(bs))) {
619 AioContext *aio_context = bdrv_get_aio_context(bs);
620 aio_context_acquire(aio_context);
621 result |= bdrv_drain_poll(bs, false, NULL, true);
622 aio_context_release(aio_context);
623 }
624
625 return result;
626}
627
628
629
630
631
632
633
634
635
636
637
638
639
640void bdrv_drain_all_begin(void)
641{
642 BlockDriverState *bs = NULL;
643 GLOBAL_STATE_CODE();
644
645 if (qemu_in_coroutine()) {
646 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
647 return;
648 }
649
650
651
652
653
654
655 if (replay_events_enabled()) {
656 return;
657 }
658
659
660
661 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
662 assert(bdrv_drain_all_count < INT_MAX);
663 bdrv_drain_all_count++;
664
665
666
667 while ((bs = bdrv_next_all_states(bs))) {
668 AioContext *aio_context = bdrv_get_aio_context(bs);
669
670 aio_context_acquire(aio_context);
671 bdrv_do_drained_begin(bs, false, NULL, true, false);
672 aio_context_release(aio_context);
673 }
674
675
676 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
677
678 while ((bs = bdrv_next_all_states(bs))) {
679 bdrv_drain_assert_idle(bs);
680 }
681}
682
683void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
684{
685 int drained_end_counter = 0;
686 GLOBAL_STATE_CODE();
687
688 g_assert(bs->quiesce_counter > 0);
689 g_assert(!bs->refcnt);
690
691 while (bs->quiesce_counter) {
692 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
693 }
694 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
695}
696
697void bdrv_drain_all_end(void)
698{
699 BlockDriverState *bs = NULL;
700 int drained_end_counter = 0;
701 GLOBAL_STATE_CODE();
702
703
704
705
706
707
708 if (replay_events_enabled()) {
709 return;
710 }
711
712 while ((bs = bdrv_next_all_states(bs))) {
713 AioContext *aio_context = bdrv_get_aio_context(bs);
714
715 aio_context_acquire(aio_context);
716 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
717 aio_context_release(aio_context);
718 }
719
720 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
721 AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
722
723 assert(bdrv_drain_all_count > 0);
724 bdrv_drain_all_count--;
725}
726
727void bdrv_drain_all(void)
728{
729 GLOBAL_STATE_CODE();
730 bdrv_drain_all_begin();
731 bdrv_drain_all_end();
732}
733
734
735
736
737
738
739static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
740{
741 if (req->serialising) {
742 qatomic_dec(&req->bs->serialising_in_flight);
743 }
744
745 qemu_co_mutex_lock(&req->bs->reqs_lock);
746 QLIST_REMOVE(req, list);
747 qemu_co_queue_restart_all(&req->wait_queue);
748 qemu_co_mutex_unlock(&req->bs->reqs_lock);
749}
750
751
752
753
754static void tracked_request_begin(BdrvTrackedRequest *req,
755 BlockDriverState *bs,
756 int64_t offset,
757 int64_t bytes,
758 enum BdrvTrackedRequestType type)
759{
760 bdrv_check_request(offset, bytes, &error_abort);
761
762 *req = (BdrvTrackedRequest){
763 .bs = bs,
764 .offset = offset,
765 .bytes = bytes,
766 .type = type,
767 .co = qemu_coroutine_self(),
768 .serialising = false,
769 .overlap_offset = offset,
770 .overlap_bytes = bytes,
771 };
772
773 qemu_co_queue_init(&req->wait_queue);
774
775 qemu_co_mutex_lock(&bs->reqs_lock);
776 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
777 qemu_co_mutex_unlock(&bs->reqs_lock);
778}
779
780static bool tracked_request_overlaps(BdrvTrackedRequest *req,
781 int64_t offset, int64_t bytes)
782{
783 bdrv_check_request(offset, bytes, &error_abort);
784
785
786 if (offset >= req->overlap_offset + req->overlap_bytes) {
787 return false;
788 }
789
790 if (req->overlap_offset >= offset + bytes) {
791 return false;
792 }
793 return true;
794}
795
796
797static BdrvTrackedRequest *
798bdrv_find_conflicting_request(BdrvTrackedRequest *self)
799{
800 BdrvTrackedRequest *req;
801
802 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
803 if (req == self || (!req->serialising && !self->serialising)) {
804 continue;
805 }
806 if (tracked_request_overlaps(req, self->overlap_offset,
807 self->overlap_bytes))
808 {
809
810
811
812
813
814 assert(qemu_coroutine_self() != req->co);
815
816
817
818
819
820
821 if (!req->waiting_for) {
822 return req;
823 }
824 }
825 }
826
827 return NULL;
828}
829
830
831static bool coroutine_fn
832bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
833{
834 BdrvTrackedRequest *req;
835 bool waited = false;
836
837 while ((req = bdrv_find_conflicting_request(self))) {
838 self->waiting_for = req;
839 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
840 self->waiting_for = NULL;
841 waited = true;
842 }
843
844 return waited;
845}
846
847
848static void tracked_request_set_serialising(BdrvTrackedRequest *req,
849 uint64_t align)
850{
851 int64_t overlap_offset = req->offset & ~(align - 1);
852 int64_t overlap_bytes =
853 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
854
855 bdrv_check_request(req->offset, req->bytes, &error_abort);
856
857 if (!req->serialising) {
858 qatomic_inc(&req->bs->serialising_in_flight);
859 req->serialising = true;
860 }
861
862 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
863 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
864}
865
866
867
868
869
870BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
871{
872 BdrvTrackedRequest *req;
873 Coroutine *self = qemu_coroutine_self();
874 IO_CODE();
875
876 QLIST_FOREACH(req, &bs->tracked_requests, list) {
877 if (req->co == self) {
878 return req;
879 }
880 }
881
882 return NULL;
883}
884
885
886
887
888void bdrv_round_to_clusters(BlockDriverState *bs,
889 int64_t offset, int64_t bytes,
890 int64_t *cluster_offset,
891 int64_t *cluster_bytes)
892{
893 BlockDriverInfo bdi;
894 IO_CODE();
895 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
896 *cluster_offset = offset;
897 *cluster_bytes = bytes;
898 } else {
899 int64_t c = bdi.cluster_size;
900 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
901 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
902 }
903}
904
905static int bdrv_get_cluster_size(BlockDriverState *bs)
906{
907 BlockDriverInfo bdi;
908 int ret;
909
910 ret = bdrv_get_info(bs, &bdi);
911 if (ret < 0 || bdi.cluster_size == 0) {
912 return bs->bl.request_alignment;
913 } else {
914 return bdi.cluster_size;
915 }
916}
917
918void bdrv_inc_in_flight(BlockDriverState *bs)
919{
920 IO_CODE();
921 qatomic_inc(&bs->in_flight);
922}
923
924void bdrv_wakeup(BlockDriverState *bs)
925{
926 IO_CODE();
927 aio_wait_kick();
928}
929
930void bdrv_dec_in_flight(BlockDriverState *bs)
931{
932 IO_CODE();
933 qatomic_dec(&bs->in_flight);
934 bdrv_wakeup(bs);
935}
936
937static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
938{
939 BlockDriverState *bs = self->bs;
940 bool waited = false;
941
942 if (!qatomic_read(&bs->serialising_in_flight)) {
943 return false;
944 }
945
946 qemu_co_mutex_lock(&bs->reqs_lock);
947 waited = bdrv_wait_serialising_requests_locked(self);
948 qemu_co_mutex_unlock(&bs->reqs_lock);
949
950 return waited;
951}
952
953bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
954 uint64_t align)
955{
956 bool waited;
957 IO_CODE();
958
959 qemu_co_mutex_lock(&req->bs->reqs_lock);
960
961 tracked_request_set_serialising(req, align);
962 waited = bdrv_wait_serialising_requests_locked(req);
963
964 qemu_co_mutex_unlock(&req->bs->reqs_lock);
965
966 return waited;
967}
968
969int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
970 QEMUIOVector *qiov, size_t qiov_offset,
971 Error **errp)
972{
973
974
975
976
977 if (offset < 0) {
978 error_setg(errp, "offset is negative: %" PRIi64, offset);
979 return -EIO;
980 }
981
982 if (bytes < 0) {
983 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
984 return -EIO;
985 }
986
987 if (bytes > BDRV_MAX_LENGTH) {
988 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
989 bytes, BDRV_MAX_LENGTH);
990 return -EIO;
991 }
992
993 if (offset > BDRV_MAX_LENGTH) {
994 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
995 offset, BDRV_MAX_LENGTH);
996 return -EIO;
997 }
998
999 if (offset > BDRV_MAX_LENGTH - bytes) {
1000 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
1001 "exceeds maximum(%" PRIi64 ")", offset, bytes,
1002 BDRV_MAX_LENGTH);
1003 return -EIO;
1004 }
1005
1006 if (!qiov) {
1007 return 0;
1008 }
1009
1010
1011
1012
1013
1014 if (qiov_offset > qiov->size) {
1015 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
1016 qiov_offset, qiov->size);
1017 return -EIO;
1018 }
1019
1020 if (bytes > qiov->size - qiov_offset) {
1021 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
1022 "vector size(%zu)", bytes, qiov_offset, qiov->size);
1023 return -EIO;
1024 }
1025
1026 return 0;
1027}
1028
1029int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
1030{
1031 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
1032}
1033
1034static int bdrv_check_request32(int64_t offset, int64_t bytes,
1035 QEMUIOVector *qiov, size_t qiov_offset)
1036{
1037 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
1038 if (ret < 0) {
1039 return ret;
1040 }
1041
1042 if (bytes > BDRV_REQUEST_MAX_BYTES) {
1043 return -EIO;
1044 }
1045
1046 return 0;
1047}
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
1059{
1060 int ret;
1061 int64_t target_size, bytes, offset = 0;
1062 BlockDriverState *bs = child->bs;
1063 IO_CODE();
1064
1065 target_size = bdrv_getlength(bs);
1066 if (target_size < 0) {
1067 return target_size;
1068 }
1069
1070 for (;;) {
1071 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
1072 if (bytes <= 0) {
1073 return 0;
1074 }
1075 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
1076 if (ret < 0) {
1077 return ret;
1078 }
1079 if (ret & BDRV_BLOCK_ZERO) {
1080 offset += bytes;
1081 continue;
1082 }
1083 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
1084 if (ret < 0) {
1085 return ret;
1086 }
1087 offset += bytes;
1088 }
1089}
1090
1091
1092
1093
1094
1095
1096
1097int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
1098 int64_t bytes, const void *buf,
1099 BdrvRequestFlags flags)
1100{
1101 int ret;
1102 IO_CODE();
1103
1104 ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
1105 if (ret < 0) {
1106 return ret;
1107 }
1108
1109 ret = bdrv_co_flush(child->bs);
1110 if (ret < 0) {
1111 return ret;
1112 }
1113
1114 return 0;
1115}
1116
1117typedef struct CoroutineIOCompletion {
1118 Coroutine *coroutine;
1119 int ret;
1120} CoroutineIOCompletion;
1121
1122static void bdrv_co_io_em_complete(void *opaque, int ret)
1123{
1124 CoroutineIOCompletion *co = opaque;
1125
1126 co->ret = ret;
1127 aio_co_wake(co->coroutine);
1128}
1129
1130static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1131 int64_t offset, int64_t bytes,
1132 QEMUIOVector *qiov,
1133 size_t qiov_offset, int flags)
1134{
1135 BlockDriver *drv = bs->drv;
1136 int64_t sector_num;
1137 unsigned int nb_sectors;
1138 QEMUIOVector local_qiov;
1139 int ret;
1140
1141 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1142 assert(!(flags & ~BDRV_REQ_MASK));
1143 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1144
1145 if (!drv) {
1146 return -ENOMEDIUM;
1147 }
1148
1149 if (drv->bdrv_co_preadv_part) {
1150 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1151 flags);
1152 }
1153
1154 if (qiov_offset > 0 || bytes != qiov->size) {
1155 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1156 qiov = &local_qiov;
1157 }
1158
1159 if (drv->bdrv_co_preadv) {
1160 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1161 goto out;
1162 }
1163
1164 if (drv->bdrv_aio_preadv) {
1165 BlockAIOCB *acb;
1166 CoroutineIOCompletion co = {
1167 .coroutine = qemu_coroutine_self(),
1168 };
1169
1170 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1171 bdrv_co_io_em_complete, &co);
1172 if (acb == NULL) {
1173 ret = -EIO;
1174 goto out;
1175 } else {
1176 qemu_coroutine_yield();
1177 ret = co.ret;
1178 goto out;
1179 }
1180 }
1181
1182 sector_num = offset >> BDRV_SECTOR_BITS;
1183 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1184
1185 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1186 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1187 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1188 assert(drv->bdrv_co_readv);
1189
1190 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1191
1192out:
1193 if (qiov == &local_qiov) {
1194 qemu_iovec_destroy(&local_qiov);
1195 }
1196
1197 return ret;
1198}
1199
1200static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1201 int64_t offset, int64_t bytes,
1202 QEMUIOVector *qiov,
1203 size_t qiov_offset,
1204 BdrvRequestFlags flags)
1205{
1206 BlockDriver *drv = bs->drv;
1207 int64_t sector_num;
1208 unsigned int nb_sectors;
1209 QEMUIOVector local_qiov;
1210 int ret;
1211
1212 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1213 assert(!(flags & ~BDRV_REQ_MASK));
1214 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1215
1216 if (!drv) {
1217 return -ENOMEDIUM;
1218 }
1219
1220 if (drv->bdrv_co_pwritev_part) {
1221 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1222 flags & bs->supported_write_flags);
1223 flags &= ~bs->supported_write_flags;
1224 goto emulate_flags;
1225 }
1226
1227 if (qiov_offset > 0 || bytes != qiov->size) {
1228 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1229 qiov = &local_qiov;
1230 }
1231
1232 if (drv->bdrv_co_pwritev) {
1233 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1234 flags & bs->supported_write_flags);
1235 flags &= ~bs->supported_write_flags;
1236 goto emulate_flags;
1237 }
1238
1239 if (drv->bdrv_aio_pwritev) {
1240 BlockAIOCB *acb;
1241 CoroutineIOCompletion co = {
1242 .coroutine = qemu_coroutine_self(),
1243 };
1244
1245 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1246 flags & bs->supported_write_flags,
1247 bdrv_co_io_em_complete, &co);
1248 flags &= ~bs->supported_write_flags;
1249 if (acb == NULL) {
1250 ret = -EIO;
1251 } else {
1252 qemu_coroutine_yield();
1253 ret = co.ret;
1254 }
1255 goto emulate_flags;
1256 }
1257
1258 sector_num = offset >> BDRV_SECTOR_BITS;
1259 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1260
1261 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1262 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1263 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1264
1265 assert(drv->bdrv_co_writev);
1266 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1267 flags & bs->supported_write_flags);
1268 flags &= ~bs->supported_write_flags;
1269
1270emulate_flags:
1271 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1272 ret = bdrv_co_flush(bs);
1273 }
1274
1275 if (qiov == &local_qiov) {
1276 qemu_iovec_destroy(&local_qiov);
1277 }
1278
1279 return ret;
1280}
1281
1282static int coroutine_fn
1283bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1284 int64_t bytes, QEMUIOVector *qiov,
1285 size_t qiov_offset)
1286{
1287 BlockDriver *drv = bs->drv;
1288 QEMUIOVector local_qiov;
1289 int ret;
1290
1291 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1292
1293 if (!drv) {
1294 return -ENOMEDIUM;
1295 }
1296
1297 if (!block_driver_can_compress(drv)) {
1298 return -ENOTSUP;
1299 }
1300
1301 if (drv->bdrv_co_pwritev_compressed_part) {
1302 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1303 qiov, qiov_offset);
1304 }
1305
1306 if (qiov_offset == 0) {
1307 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1308 }
1309
1310 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1311 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1312 qemu_iovec_destroy(&local_qiov);
1313
1314 return ret;
1315}
1316
1317static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1318 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1319 size_t qiov_offset, int flags)
1320{
1321 BlockDriverState *bs = child->bs;
1322
1323
1324
1325
1326
1327
1328 void *bounce_buffer = NULL;
1329
1330 BlockDriver *drv = bs->drv;
1331 int64_t cluster_offset;
1332 int64_t cluster_bytes;
1333 int64_t skip_bytes;
1334 int ret;
1335 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1336 BDRV_REQUEST_MAX_BYTES);
1337 int64_t progress = 0;
1338 bool skip_write;
1339
1340 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1341
1342 if (!drv) {
1343 return -ENOMEDIUM;
1344 }
1345
1346
1347
1348
1349
1350 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1367 skip_bytes = offset - cluster_offset;
1368
1369 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1370 cluster_offset, cluster_bytes);
1371
1372 while (cluster_bytes) {
1373 int64_t pnum;
1374
1375 if (skip_write) {
1376 ret = 1;
1377 pnum = MIN(cluster_bytes, max_transfer);
1378 } else {
1379 ret = bdrv_is_allocated(bs, cluster_offset,
1380 MIN(cluster_bytes, max_transfer), &pnum);
1381 if (ret < 0) {
1382
1383
1384
1385
1386
1387 pnum = MIN(cluster_bytes, max_transfer);
1388 }
1389
1390
1391 if (ret == 0 && pnum == 0) {
1392 assert(progress >= bytes);
1393 break;
1394 }
1395
1396 assert(skip_bytes < pnum);
1397 }
1398
1399 if (ret <= 0) {
1400 QEMUIOVector local_qiov;
1401
1402
1403 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1404 if (!bounce_buffer) {
1405 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1406 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1407 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1408
1409 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1410 if (!bounce_buffer) {
1411 ret = -ENOMEM;
1412 goto err;
1413 }
1414 }
1415 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1416
1417 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1418 &local_qiov, 0, 0);
1419 if (ret < 0) {
1420 goto err;
1421 }
1422
1423 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1424 if (drv->bdrv_co_pwrite_zeroes &&
1425 buffer_is_zero(bounce_buffer, pnum)) {
1426
1427
1428
1429 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1430 BDRV_REQ_WRITE_UNCHANGED);
1431 } else {
1432
1433
1434
1435 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1436 &local_qiov, 0,
1437 BDRV_REQ_WRITE_UNCHANGED);
1438 }
1439
1440 if (ret < 0) {
1441
1442
1443
1444
1445
1446 goto err;
1447 }
1448
1449 if (!(flags & BDRV_REQ_PREFETCH)) {
1450 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1451 bounce_buffer + skip_bytes,
1452 MIN(pnum - skip_bytes, bytes - progress));
1453 }
1454 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1455
1456 ret = bdrv_driver_preadv(bs, offset + progress,
1457 MIN(pnum - skip_bytes, bytes - progress),
1458 qiov, qiov_offset + progress, 0);
1459 if (ret < 0) {
1460 goto err;
1461 }
1462 }
1463
1464 cluster_offset += pnum;
1465 cluster_bytes -= pnum;
1466 progress += pnum - skip_bytes;
1467 skip_bytes = 0;
1468 }
1469 ret = 0;
1470
1471err:
1472 qemu_vfree(bounce_buffer);
1473 return ret;
1474}
1475
1476
1477
1478
1479
1480
1481static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1482 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
1483 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1484{
1485 BlockDriverState *bs = child->bs;
1486 int64_t total_bytes, max_bytes;
1487 int ret = 0;
1488 int64_t bytes_remaining = bytes;
1489 int max_transfer;
1490
1491 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1492 assert(is_power_of_2(align));
1493 assert((offset & (align - 1)) == 0);
1494 assert((bytes & (align - 1)) == 0);
1495 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1496 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1497 align);
1498
1499
1500
1501
1502
1503 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
1504
1505
1506 if (flags & BDRV_REQ_COPY_ON_READ) {
1507
1508
1509
1510
1511
1512 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1513 } else {
1514 bdrv_wait_serialising_requests(req);
1515 }
1516
1517 if (flags & BDRV_REQ_COPY_ON_READ) {
1518 int64_t pnum;
1519
1520
1521 flags &= ~BDRV_REQ_COPY_ON_READ;
1522
1523 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1524 if (ret < 0) {
1525 goto out;
1526 }
1527
1528 if (!ret || pnum != bytes) {
1529 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1530 qiov, qiov_offset, flags);
1531 goto out;
1532 } else if (flags & BDRV_REQ_PREFETCH) {
1533 goto out;
1534 }
1535 }
1536
1537
1538 total_bytes = bdrv_getlength(bs);
1539 if (total_bytes < 0) {
1540 ret = total_bytes;
1541 goto out;
1542 }
1543
1544 assert(!(flags & ~bs->supported_read_flags));
1545
1546 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1547 if (bytes <= max_bytes && bytes <= max_transfer) {
1548 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1549 goto out;
1550 }
1551
1552 while (bytes_remaining) {
1553 int64_t num;
1554
1555 if (max_bytes) {
1556 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1557 assert(num);
1558
1559 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1560 num, qiov,
1561 qiov_offset + bytes - bytes_remaining,
1562 flags);
1563 max_bytes -= num;
1564 } else {
1565 num = bytes_remaining;
1566 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1567 0, bytes_remaining);
1568 }
1569 if (ret < 0) {
1570 goto out;
1571 }
1572 bytes_remaining -= num;
1573 }
1574
1575out:
1576 return ret < 0 ? ret : 0;
1577}
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601typedef struct BdrvRequestPadding {
1602 uint8_t *buf;
1603 size_t buf_len;
1604 uint8_t *tail_buf;
1605 size_t head;
1606 size_t tail;
1607 bool merge_reads;
1608 QEMUIOVector local_qiov;
1609} BdrvRequestPadding;
1610
1611static bool bdrv_init_padding(BlockDriverState *bs,
1612 int64_t offset, int64_t bytes,
1613 BdrvRequestPadding *pad)
1614{
1615 int64_t align = bs->bl.request_alignment;
1616 int64_t sum;
1617
1618 bdrv_check_request(offset, bytes, &error_abort);
1619 assert(align <= INT_MAX);
1620 assert(align <= SIZE_MAX / 2);
1621
1622 memset(pad, 0, sizeof(*pad));
1623
1624 pad->head = offset & (align - 1);
1625 pad->tail = ((offset + bytes) & (align - 1));
1626 if (pad->tail) {
1627 pad->tail = align - pad->tail;
1628 }
1629
1630 if (!pad->head && !pad->tail) {
1631 return false;
1632 }
1633
1634 assert(bytes);
1635
1636 sum = pad->head + bytes + pad->tail;
1637 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1638 pad->buf = qemu_blockalign(bs, pad->buf_len);
1639 pad->merge_reads = sum == pad->buf_len;
1640 if (pad->tail) {
1641 pad->tail_buf = pad->buf + pad->buf_len - align;
1642 }
1643
1644 return true;
1645}
1646
1647static int bdrv_padding_rmw_read(BdrvChild *child,
1648 BdrvTrackedRequest *req,
1649 BdrvRequestPadding *pad,
1650 bool zero_middle)
1651{
1652 QEMUIOVector local_qiov;
1653 BlockDriverState *bs = child->bs;
1654 uint64_t align = bs->bl.request_alignment;
1655 int ret;
1656
1657 assert(req->serialising && pad->buf);
1658
1659 if (pad->head || pad->merge_reads) {
1660 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1661
1662 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1663
1664 if (pad->head) {
1665 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1666 }
1667 if (pad->merge_reads && pad->tail) {
1668 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1669 }
1670 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1671 align, &local_qiov, 0, 0);
1672 if (ret < 0) {
1673 return ret;
1674 }
1675 if (pad->head) {
1676 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1677 }
1678 if (pad->merge_reads && pad->tail) {
1679 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1680 }
1681
1682 if (pad->merge_reads) {
1683 goto zero_mem;
1684 }
1685 }
1686
1687 if (pad->tail) {
1688 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1689
1690 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1691 ret = bdrv_aligned_preadv(
1692 child, req,
1693 req->overlap_offset + req->overlap_bytes - align,
1694 align, align, &local_qiov, 0, 0);
1695 if (ret < 0) {
1696 return ret;
1697 }
1698 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1699 }
1700
1701zero_mem:
1702 if (zero_middle) {
1703 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1704 }
1705
1706 return 0;
1707}
1708
1709static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1710{
1711 if (pad->buf) {
1712 qemu_vfree(pad->buf);
1713 qemu_iovec_destroy(&pad->local_qiov);
1714 }
1715 memset(pad, 0, sizeof(*pad));
1716}
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730static int bdrv_pad_request(BlockDriverState *bs,
1731 QEMUIOVector **qiov, size_t *qiov_offset,
1732 int64_t *offset, int64_t *bytes,
1733 BdrvRequestPadding *pad, bool *padded)
1734{
1735 int ret;
1736
1737 bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
1738
1739 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1740 if (padded) {
1741 *padded = false;
1742 }
1743 return 0;
1744 }
1745
1746 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1747 *qiov, *qiov_offset, *bytes,
1748 pad->buf + pad->buf_len - pad->tail,
1749 pad->tail);
1750 if (ret < 0) {
1751 bdrv_padding_destroy(pad);
1752 return ret;
1753 }
1754 *bytes += pad->head + pad->tail;
1755 *offset -= pad->head;
1756 *qiov = &pad->local_qiov;
1757 *qiov_offset = 0;
1758 if (padded) {
1759 *padded = true;
1760 }
1761
1762 return 0;
1763}
1764
1765int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1766 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1767 BdrvRequestFlags flags)
1768{
1769 IO_CODE();
1770 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1771}
1772
1773int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1774 int64_t offset, int64_t bytes,
1775 QEMUIOVector *qiov, size_t qiov_offset,
1776 BdrvRequestFlags flags)
1777{
1778 BlockDriverState *bs = child->bs;
1779 BdrvTrackedRequest req;
1780 BdrvRequestPadding pad;
1781 int ret;
1782 IO_CODE();
1783
1784 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1785
1786 if (!bdrv_is_inserted(bs)) {
1787 return -ENOMEDIUM;
1788 }
1789
1790 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1791 if (ret < 0) {
1792 return ret;
1793 }
1794
1795 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1796
1797
1798
1799
1800
1801
1802
1803
1804 return 0;
1805 }
1806
1807 bdrv_inc_in_flight(bs);
1808
1809
1810 if (qatomic_read(&bs->copy_on_read)) {
1811 flags |= BDRV_REQ_COPY_ON_READ;
1812 }
1813
1814 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
1815 NULL);
1816 if (ret < 0) {
1817 goto fail;
1818 }
1819
1820 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1821 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1822 bs->bl.request_alignment,
1823 qiov, qiov_offset, flags);
1824 tracked_request_end(&req);
1825 bdrv_padding_destroy(&pad);
1826
1827fail:
1828 bdrv_dec_in_flight(bs);
1829
1830 return ret;
1831}
1832
1833static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1834 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
1835{
1836 BlockDriver *drv = bs->drv;
1837 QEMUIOVector qiov;
1838 void *buf = NULL;
1839 int ret = 0;
1840 bool need_flush = false;
1841 int head = 0;
1842 int tail = 0;
1843
1844 int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
1845 INT64_MAX);
1846 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1847 bs->bl.request_alignment);
1848 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1849
1850 bdrv_check_request(offset, bytes, &error_abort);
1851
1852 if (!drv) {
1853 return -ENOMEDIUM;
1854 }
1855
1856 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1857 return -ENOTSUP;
1858 }
1859
1860
1861 bdrv_bsc_invalidate_range(bs, offset, bytes);
1862
1863 assert(alignment % bs->bl.request_alignment == 0);
1864 head = offset % alignment;
1865 tail = (offset + bytes) % alignment;
1866 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1867 assert(max_write_zeroes >= bs->bl.request_alignment);
1868
1869 while (bytes > 0 && !ret) {
1870 int64_t num = bytes;
1871
1872
1873
1874
1875
1876 if (head) {
1877
1878
1879
1880 num = MIN(MIN(bytes, max_transfer), alignment - head);
1881 head = (head + num) % alignment;
1882 assert(num < max_write_zeroes);
1883 } else if (tail && num > alignment) {
1884
1885 num -= tail;
1886 }
1887
1888
1889 if (num > max_write_zeroes) {
1890 num = max_write_zeroes;
1891 }
1892
1893 ret = -ENOTSUP;
1894
1895 if (drv->bdrv_co_pwrite_zeroes) {
1896 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1897 flags & bs->supported_zero_flags);
1898 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1899 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1900 need_flush = true;
1901 }
1902 } else {
1903 assert(!bs->supported_zero_flags);
1904 }
1905
1906 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1907
1908 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1909
1910 if ((flags & BDRV_REQ_FUA) &&
1911 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1912
1913
1914 write_flags &= ~BDRV_REQ_FUA;
1915 need_flush = true;
1916 }
1917 num = MIN(num, max_transfer);
1918 if (buf == NULL) {
1919 buf = qemu_try_blockalign0(bs, num);
1920 if (buf == NULL) {
1921 ret = -ENOMEM;
1922 goto fail;
1923 }
1924 }
1925 qemu_iovec_init_buf(&qiov, buf, num);
1926
1927 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1928
1929
1930
1931
1932 if (num < max_transfer) {
1933 qemu_vfree(buf);
1934 buf = NULL;
1935 }
1936 }
1937
1938 offset += num;
1939 bytes -= num;
1940 }
1941
1942fail:
1943 if (ret == 0 && need_flush) {
1944 ret = bdrv_co_flush(bs);
1945 }
1946 qemu_vfree(buf);
1947 return ret;
1948}
1949
1950static inline int coroutine_fn
1951bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
1952 BdrvTrackedRequest *req, int flags)
1953{
1954 BlockDriverState *bs = child->bs;
1955
1956 bdrv_check_request(offset, bytes, &error_abort);
1957
1958 if (bdrv_is_read_only(bs)) {
1959 return -EPERM;
1960 }
1961
1962 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1963 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1964 assert(!(flags & ~BDRV_REQ_MASK));
1965 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
1966
1967 if (flags & BDRV_REQ_SERIALISING) {
1968 QEMU_LOCK_GUARD(&bs->reqs_lock);
1969
1970 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
1971
1972 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
1973 return -EBUSY;
1974 }
1975
1976 bdrv_wait_serialising_requests_locked(req);
1977 } else {
1978 bdrv_wait_serialising_requests(req);
1979 }
1980
1981 assert(req->overlap_offset <= offset);
1982 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1983 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
1984 child->perm & BLK_PERM_RESIZE);
1985
1986 switch (req->type) {
1987 case BDRV_TRACKED_WRITE:
1988 case BDRV_TRACKED_DISCARD:
1989 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1990 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1991 } else {
1992 assert(child->perm & BLK_PERM_WRITE);
1993 }
1994 bdrv_write_threshold_check_write(bs, offset, bytes);
1995 return 0;
1996 case BDRV_TRACKED_TRUNCATE:
1997 assert(child->perm & BLK_PERM_RESIZE);
1998 return 0;
1999 default:
2000 abort();
2001 }
2002}
2003
2004static inline void coroutine_fn
2005bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
2006 BdrvTrackedRequest *req, int ret)
2007{
2008 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
2009 BlockDriverState *bs = child->bs;
2010
2011 bdrv_check_request(offset, bytes, &error_abort);
2012
2013 qatomic_inc(&bs->write_gen);
2014
2015
2016
2017
2018
2019
2020
2021
2022 if (ret == 0 &&
2023 (req->type == BDRV_TRACKED_TRUNCATE ||
2024 end_sector > bs->total_sectors) &&
2025 req->type != BDRV_TRACKED_DISCARD) {
2026 bs->total_sectors = end_sector;
2027 bdrv_parent_cb_resize(bs);
2028 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2029 }
2030 if (req->bytes) {
2031 switch (req->type) {
2032 case BDRV_TRACKED_WRITE:
2033 stat64_max(&bs->wr_highest_offset, offset + bytes);
2034
2035 case BDRV_TRACKED_DISCARD:
2036 bdrv_set_dirty(bs, offset, bytes);
2037 break;
2038 default:
2039 break;
2040 }
2041 }
2042}
2043
2044
2045
2046
2047
2048static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
2049 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
2050 int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
2051 BdrvRequestFlags flags)
2052{
2053 BlockDriverState *bs = child->bs;
2054 BlockDriver *drv = bs->drv;
2055 int ret;
2056
2057 int64_t bytes_remaining = bytes;
2058 int max_transfer;
2059
2060 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2061
2062 if (!drv) {
2063 return -ENOMEDIUM;
2064 }
2065
2066 if (bdrv_has_readonly_bitmaps(bs)) {
2067 return -EPERM;
2068 }
2069
2070 assert(is_power_of_2(align));
2071 assert((offset & (align - 1)) == 0);
2072 assert((bytes & (align - 1)) == 0);
2073 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2074 align);
2075
2076 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2077
2078 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2079 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2080 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2081 flags |= BDRV_REQ_ZERO_WRITE;
2082 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2083 flags |= BDRV_REQ_MAY_UNMAP;
2084 }
2085 }
2086
2087 if (ret < 0) {
2088
2089 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2090 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2091 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2092 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2093 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2094 qiov, qiov_offset);
2095 } else if (bytes <= max_transfer) {
2096 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2097 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2098 } else {
2099 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2100 while (bytes_remaining) {
2101 int num = MIN(bytes_remaining, max_transfer);
2102 int local_flags = flags;
2103
2104 assert(num);
2105 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2106 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2107
2108
2109 local_flags &= ~BDRV_REQ_FUA;
2110 }
2111
2112 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2113 num, qiov,
2114 qiov_offset + bytes - bytes_remaining,
2115 local_flags);
2116 if (ret < 0) {
2117 break;
2118 }
2119 bytes_remaining -= num;
2120 }
2121 }
2122 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2123
2124 if (ret >= 0) {
2125 ret = 0;
2126 }
2127 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2128
2129 return ret;
2130}
2131
2132static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2133 int64_t offset,
2134 int64_t bytes,
2135 BdrvRequestFlags flags,
2136 BdrvTrackedRequest *req)
2137{
2138 BlockDriverState *bs = child->bs;
2139 QEMUIOVector local_qiov;
2140 uint64_t align = bs->bl.request_alignment;
2141 int ret = 0;
2142 bool padding;
2143 BdrvRequestPadding pad;
2144
2145 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2146 if (padding) {
2147 assert(!(flags & BDRV_REQ_NO_WAIT));
2148 bdrv_make_request_serialising(req, align);
2149
2150 bdrv_padding_rmw_read(child, req, &pad, true);
2151
2152 if (pad.head || pad.merge_reads) {
2153 int64_t aligned_offset = offset & ~(align - 1);
2154 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2155
2156 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2157 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2158 align, &local_qiov, 0,
2159 flags & ~BDRV_REQ_ZERO_WRITE);
2160 if (ret < 0 || pad.merge_reads) {
2161
2162 goto out;
2163 }
2164 offset += write_bytes - pad.head;
2165 bytes -= write_bytes - pad.head;
2166 }
2167 }
2168
2169 assert(!bytes || (offset & (align - 1)) == 0);
2170 if (bytes >= align) {
2171
2172 int64_t aligned_bytes = bytes & ~(align - 1);
2173 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2174 NULL, 0, flags);
2175 if (ret < 0) {
2176 goto out;
2177 }
2178 bytes -= aligned_bytes;
2179 offset += aligned_bytes;
2180 }
2181
2182 assert(!bytes || (offset & (align - 1)) == 0);
2183 if (bytes) {
2184 assert(align == pad.tail + bytes);
2185
2186 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2187 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2188 &local_qiov, 0,
2189 flags & ~BDRV_REQ_ZERO_WRITE);
2190 }
2191
2192out:
2193 bdrv_padding_destroy(&pad);
2194
2195 return ret;
2196}
2197
2198
2199
2200
2201int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2202 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2203 BdrvRequestFlags flags)
2204{
2205 IO_CODE();
2206 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2207}
2208
2209int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2210 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2211 BdrvRequestFlags flags)
2212{
2213 BlockDriverState *bs = child->bs;
2214 BdrvTrackedRequest req;
2215 uint64_t align = bs->bl.request_alignment;
2216 BdrvRequestPadding pad;
2217 int ret;
2218 bool padded = false;
2219 IO_CODE();
2220
2221 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2222
2223 if (!bdrv_is_inserted(bs)) {
2224 return -ENOMEDIUM;
2225 }
2226
2227 if (flags & BDRV_REQ_ZERO_WRITE) {
2228 ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
2229 } else {
2230 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2231 }
2232 if (ret < 0) {
2233 return ret;
2234 }
2235
2236
2237 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2238 !QEMU_IS_ALIGNED(offset | bytes, align))
2239 {
2240 return -ENOTSUP;
2241 }
2242
2243 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2244
2245
2246
2247
2248
2249
2250
2251
2252 return 0;
2253 }
2254
2255 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2256
2257
2258
2259
2260
2261 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
2262 &padded);
2263 if (ret < 0) {
2264 return ret;
2265 }
2266 }
2267
2268 bdrv_inc_in_flight(bs);
2269 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2270
2271 if (flags & BDRV_REQ_ZERO_WRITE) {
2272 assert(!padded);
2273 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2274 goto out;
2275 }
2276
2277 if (padded) {
2278
2279
2280
2281
2282
2283
2284 assert(!(flags & BDRV_REQ_NO_WAIT));
2285 bdrv_make_request_serialising(&req, align);
2286 bdrv_padding_rmw_read(child, &req, &pad, false);
2287 }
2288
2289 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2290 qiov, qiov_offset, flags);
2291
2292 bdrv_padding_destroy(&pad);
2293
2294out:
2295 tracked_request_end(&req);
2296 bdrv_dec_in_flight(bs);
2297
2298 return ret;
2299}
2300
2301int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2302 int64_t bytes, BdrvRequestFlags flags)
2303{
2304 IO_CODE();
2305 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2306
2307 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2308 flags &= ~BDRV_REQ_MAY_UNMAP;
2309 }
2310
2311 return bdrv_co_pwritev(child, offset, bytes, NULL,
2312 BDRV_REQ_ZERO_WRITE | flags);
2313}
2314
2315
2316
2317
2318int bdrv_flush_all(void)
2319{
2320 BdrvNextIterator it;
2321 BlockDriverState *bs = NULL;
2322 int result = 0;
2323
2324 GLOBAL_STATE_CODE();
2325
2326
2327
2328
2329
2330
2331 if (replay_events_enabled()) {
2332 return result;
2333 }
2334
2335 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2336 AioContext *aio_context = bdrv_get_aio_context(bs);
2337 int ret;
2338
2339 aio_context_acquire(aio_context);
2340 ret = bdrv_flush(bs);
2341 if (ret < 0 && !result) {
2342 result = ret;
2343 }
2344 aio_context_release(aio_context);
2345 }
2346
2347 return result;
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2378 bool want_zero,
2379 int64_t offset, int64_t bytes,
2380 int64_t *pnum, int64_t *map,
2381 BlockDriverState **file)
2382{
2383 int64_t total_size;
2384 int64_t n;
2385 int ret;
2386 int64_t local_map = 0;
2387 BlockDriverState *local_file = NULL;
2388 int64_t aligned_offset, aligned_bytes;
2389 uint32_t align;
2390 bool has_filtered_child;
2391
2392 assert(pnum);
2393 *pnum = 0;
2394 total_size = bdrv_getlength(bs);
2395 if (total_size < 0) {
2396 ret = total_size;
2397 goto early_out;
2398 }
2399
2400 if (offset >= total_size) {
2401 ret = BDRV_BLOCK_EOF;
2402 goto early_out;
2403 }
2404 if (!bytes) {
2405 ret = 0;
2406 goto early_out;
2407 }
2408
2409 n = total_size - offset;
2410 if (n < bytes) {
2411 bytes = n;
2412 }
2413
2414
2415 assert(bs->drv);
2416 has_filtered_child = bdrv_filter_child(bs);
2417 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2418 *pnum = bytes;
2419 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2420 if (offset + bytes == total_size) {
2421 ret |= BDRV_BLOCK_EOF;
2422 }
2423 if (bs->drv->protocol_name) {
2424 ret |= BDRV_BLOCK_OFFSET_VALID;
2425 local_map = offset;
2426 local_file = bs;
2427 }
2428 goto early_out;
2429 }
2430
2431 bdrv_inc_in_flight(bs);
2432
2433
2434 align = bs->bl.request_alignment;
2435 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2436 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2437
2438 if (bs->drv->bdrv_co_block_status) {
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460 if (QLIST_EMPTY(&bs->children) &&
2461 bdrv_bsc_is_data(bs, aligned_offset, pnum))
2462 {
2463 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
2464 local_file = bs;
2465 local_map = aligned_offset;
2466 } else {
2467 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2468 aligned_bytes, pnum, &local_map,
2469 &local_file);
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482 if (want_zero &&
2483 ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
2484 QLIST_EMPTY(&bs->children))
2485 {
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497 assert(local_file == bs);
2498 assert(local_map == aligned_offset);
2499 bdrv_bsc_fill(bs, aligned_offset, *pnum);
2500 }
2501 }
2502 } else {
2503
2504
2505 local_file = bdrv_filter_bs(bs);
2506 assert(local_file);
2507
2508 *pnum = aligned_bytes;
2509 local_map = aligned_offset;
2510 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2511 }
2512 if (ret < 0) {
2513 *pnum = 0;
2514 goto out;
2515 }
2516
2517
2518
2519
2520
2521 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2522 align > offset - aligned_offset);
2523 if (ret & BDRV_BLOCK_RECURSE) {
2524 assert(ret & BDRV_BLOCK_DATA);
2525 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2526 assert(!(ret & BDRV_BLOCK_ZERO));
2527 }
2528
2529 *pnum -= offset - aligned_offset;
2530 if (*pnum > bytes) {
2531 *pnum = bytes;
2532 }
2533 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2534 local_map += offset - aligned_offset;
2535 }
2536
2537 if (ret & BDRV_BLOCK_RAW) {
2538 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2539 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2540 *pnum, pnum, &local_map, &local_file);
2541 goto out;
2542 }
2543
2544 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2545 ret |= BDRV_BLOCK_ALLOCATED;
2546 } else if (bs->drv->supports_backing) {
2547 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2548
2549 if (!cow_bs) {
2550 ret |= BDRV_BLOCK_ZERO;
2551 } else if (want_zero) {
2552 int64_t size2 = bdrv_getlength(cow_bs);
2553
2554 if (size2 >= 0 && offset >= size2) {
2555 ret |= BDRV_BLOCK_ZERO;
2556 }
2557 }
2558 }
2559
2560 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2561 local_file && local_file != bs &&
2562 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2563 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2564 int64_t file_pnum;
2565 int ret2;
2566
2567 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2568 *pnum, &file_pnum, NULL, NULL);
2569 if (ret2 >= 0) {
2570
2571
2572
2573 if (ret2 & BDRV_BLOCK_EOF &&
2574 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2575
2576
2577
2578
2579
2580 ret |= BDRV_BLOCK_ZERO;
2581 } else {
2582
2583 *pnum = file_pnum;
2584 ret |= (ret2 & BDRV_BLOCK_ZERO);
2585 }
2586 }
2587 }
2588
2589out:
2590 bdrv_dec_in_flight(bs);
2591 if (ret >= 0 && offset + *pnum == total_size) {
2592 ret |= BDRV_BLOCK_EOF;
2593 }
2594early_out:
2595 if (file) {
2596 *file = local_file;
2597 }
2598 if (map) {
2599 *map = local_map;
2600 }
2601 return ret;
2602}
2603
2604int coroutine_fn
2605bdrv_co_common_block_status_above(BlockDriverState *bs,
2606 BlockDriverState *base,
2607 bool include_base,
2608 bool want_zero,
2609 int64_t offset,
2610 int64_t bytes,
2611 int64_t *pnum,
2612 int64_t *map,
2613 BlockDriverState **file,
2614 int *depth)
2615{
2616 int ret;
2617 BlockDriverState *p;
2618 int64_t eof = 0;
2619 int dummy;
2620 IO_CODE();
2621
2622 assert(!include_base || base);
2623
2624 if (!depth) {
2625 depth = &dummy;
2626 }
2627 *depth = 0;
2628
2629 if (!include_base && bs == base) {
2630 *pnum = bytes;
2631 return 0;
2632 }
2633
2634 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2635 ++*depth;
2636 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2637 return ret;
2638 }
2639
2640 if (ret & BDRV_BLOCK_EOF) {
2641 eof = offset + *pnum;
2642 }
2643
2644 assert(*pnum <= bytes);
2645 bytes = *pnum;
2646
2647 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2648 p = bdrv_filter_or_cow_bs(p))
2649 {
2650 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2651 file);
2652 ++*depth;
2653 if (ret < 0) {
2654 return ret;
2655 }
2656 if (*pnum == 0) {
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666 assert(ret & BDRV_BLOCK_EOF);
2667 *pnum = bytes;
2668 if (file) {
2669 *file = p;
2670 }
2671 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2672 break;
2673 }
2674 if (ret & BDRV_BLOCK_ALLOCATED) {
2675
2676
2677
2678
2679
2680
2681
2682 ret &= ~BDRV_BLOCK_EOF;
2683 break;
2684 }
2685
2686 if (p == base) {
2687 assert(include_base);
2688 break;
2689 }
2690
2691
2692
2693
2694
2695 assert(*pnum <= bytes);
2696 bytes = *pnum;
2697 }
2698
2699 if (offset + *pnum == eof) {
2700 ret |= BDRV_BLOCK_EOF;
2701 }
2702
2703 return ret;
2704}
2705
2706int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2707 int64_t offset, int64_t bytes, int64_t *pnum,
2708 int64_t *map, BlockDriverState **file)
2709{
2710 IO_CODE();
2711 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2712 pnum, map, file, NULL);
2713}
2714
2715int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2716 int64_t *pnum, int64_t *map, BlockDriverState **file)
2717{
2718 IO_CODE();
2719 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2720 offset, bytes, pnum, map, file);
2721}
2722
2723
2724
2725
2726
2727
2728
2729
2730int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2731 int64_t bytes)
2732{
2733 int ret;
2734 int64_t pnum = bytes;
2735 IO_CODE();
2736
2737 if (!bytes) {
2738 return 1;
2739 }
2740
2741 ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
2742 bytes, &pnum, NULL, NULL, NULL);
2743
2744 if (ret < 0) {
2745 return ret;
2746 }
2747
2748 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2749}
2750
2751int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2752 int64_t bytes, int64_t *pnum)
2753{
2754 int ret;
2755 int64_t dummy;
2756 IO_CODE();
2757
2758 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2759 bytes, pnum ? pnum : &dummy, NULL,
2760 NULL, NULL);
2761 if (ret < 0) {
2762 return ret;
2763 }
2764 return !!(ret & BDRV_BLOCK_ALLOCATED);
2765}
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784int bdrv_is_allocated_above(BlockDriverState *top,
2785 BlockDriverState *base,
2786 bool include_base, int64_t offset,
2787 int64_t bytes, int64_t *pnum)
2788{
2789 int depth;
2790 int ret = bdrv_common_block_status_above(top, base, include_base, false,
2791 offset, bytes, pnum, NULL, NULL,
2792 &depth);
2793 IO_CODE();
2794 if (ret < 0) {
2795 return ret;
2796 }
2797
2798 if (ret & BDRV_BLOCK_ALLOCATED) {
2799 return depth;
2800 }
2801 return 0;
2802}
2803
2804int coroutine_fn
2805bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2806{
2807 BlockDriver *drv = bs->drv;
2808 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2809 int ret;
2810 IO_CODE();
2811
2812 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2813 if (ret < 0) {
2814 return ret;
2815 }
2816
2817 if (!drv) {
2818 return -ENOMEDIUM;
2819 }
2820
2821 bdrv_inc_in_flight(bs);
2822
2823 if (drv->bdrv_load_vmstate) {
2824 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2825 } else if (child_bs) {
2826 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2827 } else {
2828 ret = -ENOTSUP;
2829 }
2830
2831 bdrv_dec_in_flight(bs);
2832
2833 return ret;
2834}
2835
2836int coroutine_fn
2837bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2838{
2839 BlockDriver *drv = bs->drv;
2840 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2841 int ret;
2842 IO_CODE();
2843
2844 ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
2845 if (ret < 0) {
2846 return ret;
2847 }
2848
2849 if (!drv) {
2850 return -ENOMEDIUM;
2851 }
2852
2853 bdrv_inc_in_flight(bs);
2854
2855 if (drv->bdrv_save_vmstate) {
2856 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2857 } else if (child_bs) {
2858 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2859 } else {
2860 ret = -ENOTSUP;
2861 }
2862
2863 bdrv_dec_in_flight(bs);
2864
2865 return ret;
2866}
2867
2868int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2869 int64_t pos, int size)
2870{
2871 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2872 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2873 IO_CODE();
2874
2875 return ret < 0 ? ret : size;
2876}
2877
2878int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2879 int64_t pos, int size)
2880{
2881 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2882 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2883 IO_CODE();
2884
2885 return ret < 0 ? ret : size;
2886}
2887
2888
2889
2890
2891void bdrv_aio_cancel(BlockAIOCB *acb)
2892{
2893 IO_CODE();
2894 qemu_aio_ref(acb);
2895 bdrv_aio_cancel_async(acb);
2896 while (acb->refcnt > 1) {
2897 if (acb->aiocb_info->get_aio_context) {
2898 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2899 } else if (acb->bs) {
2900
2901
2902
2903
2904 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2905 aio_poll(bdrv_get_aio_context(acb->bs), true);
2906 } else {
2907 abort();
2908 }
2909 }
2910 qemu_aio_unref(acb);
2911}
2912
2913
2914
2915
2916void bdrv_aio_cancel_async(BlockAIOCB *acb)
2917{
2918 IO_CODE();
2919 if (acb->aiocb_info->cancel_async) {
2920 acb->aiocb_info->cancel_async(acb);
2921 }
2922}
2923
2924
2925
2926
2927int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2928{
2929 BdrvChild *primary_child = bdrv_primary_child(bs);
2930 BdrvChild *child;
2931 int current_gen;
2932 int ret = 0;
2933 IO_CODE();
2934
2935 bdrv_inc_in_flight(bs);
2936
2937 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2938 bdrv_is_sg(bs)) {
2939 goto early_exit;
2940 }
2941
2942 qemu_co_mutex_lock(&bs->reqs_lock);
2943 current_gen = qatomic_read(&bs->write_gen);
2944
2945
2946 while (bs->active_flush_req) {
2947 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2948 }
2949
2950
2951 bs->active_flush_req = true;
2952 qemu_co_mutex_unlock(&bs->reqs_lock);
2953
2954
2955 if (bs->drv->bdrv_co_flush) {
2956 ret = bs->drv->bdrv_co_flush(bs);
2957 goto out;
2958 }
2959
2960
2961 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
2962 if (bs->drv->bdrv_co_flush_to_os) {
2963 ret = bs->drv->bdrv_co_flush_to_os(bs);
2964 if (ret < 0) {
2965 goto out;
2966 }
2967 }
2968
2969
2970 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2971 goto flush_children;
2972 }
2973
2974
2975 if (bs->flushed_gen == current_gen) {
2976 goto flush_children;
2977 }
2978
2979 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
2980 if (!bs->drv) {
2981
2982
2983 ret = -ENOMEDIUM;
2984 goto out;
2985 }
2986 if (bs->drv->bdrv_co_flush_to_disk) {
2987 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2988 } else if (bs->drv->bdrv_aio_flush) {
2989 BlockAIOCB *acb;
2990 CoroutineIOCompletion co = {
2991 .coroutine = qemu_coroutine_self(),
2992 };
2993
2994 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2995 if (acb == NULL) {
2996 ret = -EIO;
2997 } else {
2998 qemu_coroutine_yield();
2999 ret = co.ret;
3000 }
3001 } else {
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013 ret = 0;
3014 }
3015
3016 if (ret < 0) {
3017 goto out;
3018 }
3019
3020
3021
3022
3023flush_children:
3024 ret = 0;
3025 QLIST_FOREACH(child, &bs->children, next) {
3026 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
3027 int this_child_ret = bdrv_co_flush(child->bs);
3028 if (!ret) {
3029 ret = this_child_ret;
3030 }
3031 }
3032 }
3033
3034out:
3035
3036 if (ret == 0) {
3037 bs->flushed_gen = current_gen;
3038 }
3039
3040 qemu_co_mutex_lock(&bs->reqs_lock);
3041 bs->active_flush_req = false;
3042
3043 qemu_co_queue_next(&bs->flush_queue);
3044 qemu_co_mutex_unlock(&bs->reqs_lock);
3045
3046early_exit:
3047 bdrv_dec_in_flight(bs);
3048 return ret;
3049}
3050
3051int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
3052 int64_t bytes)
3053{
3054 BdrvTrackedRequest req;
3055 int ret;
3056 int64_t max_pdiscard;
3057 int head, tail, align;
3058 BlockDriverState *bs = child->bs;
3059 IO_CODE();
3060
3061 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
3062 return -ENOMEDIUM;
3063 }
3064
3065 if (bdrv_has_readonly_bitmaps(bs)) {
3066 return -EPERM;
3067 }
3068
3069 ret = bdrv_check_request(offset, bytes, NULL);
3070 if (ret < 0) {
3071 return ret;
3072 }
3073
3074
3075 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3076 return 0;
3077 }
3078
3079 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
3080 return 0;
3081 }
3082
3083
3084 bdrv_bsc_invalidate_range(bs, offset, bytes);
3085
3086
3087
3088
3089
3090
3091 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
3092 assert(align % bs->bl.request_alignment == 0);
3093 head = offset % align;
3094 tail = (offset + bytes) % align;
3095
3096 bdrv_inc_in_flight(bs);
3097 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
3098
3099 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
3100 if (ret < 0) {
3101 goto out;
3102 }
3103
3104 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
3105 align);
3106 assert(max_pdiscard >= bs->bl.request_alignment);
3107
3108 while (bytes > 0) {
3109 int64_t num = bytes;
3110
3111 if (head) {
3112
3113 num = MIN(bytes, align - head);
3114 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3115 num %= bs->bl.request_alignment;
3116 }
3117 head = (head + num) % align;
3118 assert(num < max_pdiscard);
3119 } else if (tail) {
3120 if (num > align) {
3121
3122 num -= tail;
3123 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3124 tail > bs->bl.request_alignment) {
3125 tail %= bs->bl.request_alignment;
3126 num -= tail;
3127 }
3128 }
3129
3130 if (num > max_pdiscard) {
3131 num = max_pdiscard;
3132 }
3133
3134 if (!bs->drv) {
3135 ret = -ENOMEDIUM;
3136 goto out;
3137 }
3138 if (bs->drv->bdrv_co_pdiscard) {
3139 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3140 } else {
3141 BlockAIOCB *acb;
3142 CoroutineIOCompletion co = {
3143 .coroutine = qemu_coroutine_self(),
3144 };
3145
3146 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3147 bdrv_co_io_em_complete, &co);
3148 if (acb == NULL) {
3149 ret = -EIO;
3150 goto out;
3151 } else {
3152 qemu_coroutine_yield();
3153 ret = co.ret;
3154 }
3155 }
3156 if (ret && ret != -ENOTSUP) {
3157 goto out;
3158 }
3159
3160 offset += num;
3161 bytes -= num;
3162 }
3163 ret = 0;
3164out:
3165 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3166 tracked_request_end(&req);
3167 bdrv_dec_in_flight(bs);
3168 return ret;
3169}
3170
3171int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3172{
3173 BlockDriver *drv = bs->drv;
3174 CoroutineIOCompletion co = {
3175 .coroutine = qemu_coroutine_self(),
3176 };
3177 BlockAIOCB *acb;
3178 IO_CODE();
3179
3180 bdrv_inc_in_flight(bs);
3181 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3182 co.ret = -ENOTSUP;
3183 goto out;
3184 }
3185
3186 if (drv->bdrv_co_ioctl) {
3187 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3188 } else {
3189 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3190 if (!acb) {
3191 co.ret = -ENOTSUP;
3192 goto out;
3193 }
3194 qemu_coroutine_yield();
3195 }
3196out:
3197 bdrv_dec_in_flight(bs);
3198 return co.ret;
3199}
3200
3201void *qemu_blockalign(BlockDriverState *bs, size_t size)
3202{
3203 IO_CODE();
3204 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3205}
3206
3207void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3208{
3209 IO_CODE();
3210 return memset(qemu_blockalign(bs, size), 0, size);
3211}
3212
3213void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3214{
3215 size_t align = bdrv_opt_mem_align(bs);
3216 IO_CODE();
3217
3218
3219 assert(align > 0);
3220 if (size == 0) {
3221 size = align;
3222 }
3223
3224 return qemu_try_memalign(align, size);
3225}
3226
3227void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3228{
3229 void *mem = qemu_try_blockalign(bs, size);
3230 IO_CODE();
3231
3232 if (mem) {
3233 memset(mem, 0, size);
3234 }
3235
3236 return mem;
3237}
3238
3239
3240
3241
3242bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
3243{
3244 int i;
3245 size_t alignment = bdrv_min_mem_align(bs);
3246 IO_CODE();
3247
3248 for (i = 0; i < qiov->niov; i++) {
3249 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
3250 return false;
3251 }
3252 if (qiov->iov[i].iov_len % alignment) {
3253 return false;
3254 }
3255 }
3256
3257 return true;
3258}
3259
3260void bdrv_io_plug(BlockDriverState *bs)
3261{
3262 BdrvChild *child;
3263 IO_CODE();
3264
3265 QLIST_FOREACH(child, &bs->children, next) {
3266 bdrv_io_plug(child->bs);
3267 }
3268
3269 if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
3270 BlockDriver *drv = bs->drv;
3271 if (drv && drv->bdrv_io_plug) {
3272 drv->bdrv_io_plug(bs);
3273 }
3274 }
3275}
3276
3277void bdrv_io_unplug(BlockDriverState *bs)
3278{
3279 BdrvChild *child;
3280 IO_CODE();
3281
3282 assert(bs->io_plugged);
3283 if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
3284 BlockDriver *drv = bs->drv;
3285 if (drv && drv->bdrv_io_unplug) {
3286 drv->bdrv_io_unplug(bs);
3287 }
3288 }
3289
3290 QLIST_FOREACH(child, &bs->children, next) {
3291 bdrv_io_unplug(child->bs);
3292 }
3293}
3294
3295void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3296{
3297 BdrvChild *child;
3298
3299 GLOBAL_STATE_CODE();
3300 if (bs->drv && bs->drv->bdrv_register_buf) {
3301 bs->drv->bdrv_register_buf(bs, host, size);
3302 }
3303 QLIST_FOREACH(child, &bs->children, next) {
3304 bdrv_register_buf(child->bs, host, size);
3305 }
3306}
3307
3308void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3309{
3310 BdrvChild *child;
3311
3312 GLOBAL_STATE_CODE();
3313 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3314 bs->drv->bdrv_unregister_buf(bs, host);
3315 }
3316 QLIST_FOREACH(child, &bs->children, next) {
3317 bdrv_unregister_buf(child->bs, host);
3318 }
3319}
3320
3321static int coroutine_fn bdrv_co_copy_range_internal(
3322 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3323 int64_t dst_offset, int64_t bytes,
3324 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3325 bool recurse_src)
3326{
3327 BdrvTrackedRequest req;
3328 int ret;
3329
3330
3331 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3332 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3333 assert(!(read_flags & BDRV_REQ_NO_WAIT));
3334 assert(!(write_flags & BDRV_REQ_NO_WAIT));
3335
3336 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
3337 return -ENOMEDIUM;
3338 }
3339 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3340 if (ret) {
3341 return ret;
3342 }
3343 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3344 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3345 }
3346
3347 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
3348 return -ENOMEDIUM;
3349 }
3350 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3351 if (ret) {
3352 return ret;
3353 }
3354
3355 if (!src->bs->drv->bdrv_co_copy_range_from
3356 || !dst->bs->drv->bdrv_co_copy_range_to
3357 || src->bs->encrypted || dst->bs->encrypted) {
3358 return -ENOTSUP;
3359 }
3360
3361 if (recurse_src) {
3362 bdrv_inc_in_flight(src->bs);
3363 tracked_request_begin(&req, src->bs, src_offset, bytes,
3364 BDRV_TRACKED_READ);
3365
3366
3367 assert(!(read_flags & BDRV_REQ_SERIALISING));
3368 bdrv_wait_serialising_requests(&req);
3369
3370 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3371 src, src_offset,
3372 dst, dst_offset,
3373 bytes,
3374 read_flags, write_flags);
3375
3376 tracked_request_end(&req);
3377 bdrv_dec_in_flight(src->bs);
3378 } else {
3379 bdrv_inc_in_flight(dst->bs);
3380 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3381 BDRV_TRACKED_WRITE);
3382 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3383 write_flags);
3384 if (!ret) {
3385 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3386 src, src_offset,
3387 dst, dst_offset,
3388 bytes,
3389 read_flags, write_flags);
3390 }
3391 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3392 tracked_request_end(&req);
3393 bdrv_dec_in_flight(dst->bs);
3394 }
3395
3396 return ret;
3397}
3398
3399
3400
3401
3402
3403int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3404 BdrvChild *dst, int64_t dst_offset,
3405 int64_t bytes,
3406 BdrvRequestFlags read_flags,
3407 BdrvRequestFlags write_flags)
3408{
3409 IO_CODE();
3410 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3411 read_flags, write_flags);
3412 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3413 bytes, read_flags, write_flags, true);
3414}
3415
3416
3417
3418
3419
3420int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3421 BdrvChild *dst, int64_t dst_offset,
3422 int64_t bytes,
3423 BdrvRequestFlags read_flags,
3424 BdrvRequestFlags write_flags)
3425{
3426 IO_CODE();
3427 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3428 read_flags, write_flags);
3429 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3430 bytes, read_flags, write_flags, false);
3431}
3432
3433int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3434 BdrvChild *dst, int64_t dst_offset,
3435 int64_t bytes, BdrvRequestFlags read_flags,
3436 BdrvRequestFlags write_flags)
3437{
3438 IO_CODE();
3439 return bdrv_co_copy_range_from(src, src_offset,
3440 dst, dst_offset,
3441 bytes, read_flags, write_flags);
3442}
3443
3444static void bdrv_parent_cb_resize(BlockDriverState *bs)
3445{
3446 BdrvChild *c;
3447 QLIST_FOREACH(c, &bs->parents, next_parent) {
3448 if (c->klass->resize) {
3449 c->klass->resize(c);
3450 }
3451 }
3452}
3453
3454
3455
3456
3457
3458
3459
3460
3461int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3462 PreallocMode prealloc, BdrvRequestFlags flags,
3463 Error **errp)
3464{
3465 BlockDriverState *bs = child->bs;
3466 BdrvChild *filtered, *backing;
3467 BlockDriver *drv = bs->drv;
3468 BdrvTrackedRequest req;
3469 int64_t old_size, new_bytes;
3470 int ret;
3471 IO_CODE();
3472
3473
3474 if (!drv) {
3475 error_setg(errp, "No medium inserted");
3476 return -ENOMEDIUM;
3477 }
3478 if (offset < 0) {
3479 error_setg(errp, "Image size cannot be negative");
3480 return -EINVAL;
3481 }
3482
3483 ret = bdrv_check_request(offset, 0, errp);
3484 if (ret < 0) {
3485 return ret;
3486 }
3487
3488 old_size = bdrv_getlength(bs);
3489 if (old_size < 0) {
3490 error_setg_errno(errp, -old_size, "Failed to get old image size");
3491 return old_size;
3492 }
3493
3494 if (bdrv_is_read_only(bs)) {
3495 error_setg(errp, "Image is read-only");
3496 return -EACCES;
3497 }
3498
3499 if (offset > old_size) {
3500 new_bytes = offset - old_size;
3501 } else {
3502 new_bytes = 0;
3503 }
3504
3505 bdrv_inc_in_flight(bs);
3506 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3507 BDRV_TRACKED_TRUNCATE);
3508
3509
3510
3511
3512 if (new_bytes) {
3513 bdrv_make_request_serialising(&req, 1);
3514 }
3515 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3516 0);
3517 if (ret < 0) {
3518 error_setg_errno(errp, -ret,
3519 "Failed to prepare request for truncation");
3520 goto out;
3521 }
3522
3523 filtered = bdrv_filter_child(bs);
3524 backing = bdrv_cow_child(bs);
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536 if (new_bytes && backing) {
3537 int64_t backing_len;
3538
3539 backing_len = bdrv_getlength(backing->bs);
3540 if (backing_len < 0) {
3541 ret = backing_len;
3542 error_setg_errno(errp, -ret, "Could not get backing file size");
3543 goto out;
3544 }
3545
3546 if (backing_len > old_size) {
3547 flags |= BDRV_REQ_ZERO_WRITE;
3548 }
3549 }
3550
3551 if (drv->bdrv_co_truncate) {
3552 if (flags & ~bs->supported_truncate_flags) {
3553 error_setg(errp, "Block driver does not support requested flags");
3554 ret = -ENOTSUP;
3555 goto out;
3556 }
3557 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3558 } else if (filtered) {
3559 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3560 } else {
3561 error_setg(errp, "Image format driver does not support resize");
3562 ret = -ENOTSUP;
3563 goto out;
3564 }
3565 if (ret < 0) {
3566 goto out;
3567 }
3568
3569 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3570 if (ret < 0) {
3571 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3572 } else {
3573 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3574 }
3575
3576
3577
3578 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3579
3580out:
3581 tracked_request_end(&req);
3582 bdrv_dec_in_flight(bs);
3583
3584 return ret;
3585}
3586
3587void bdrv_cancel_in_flight(BlockDriverState *bs)
3588{
3589 GLOBAL_STATE_CODE();
3590 if (!bs || !bs->drv) {
3591 return;
3592 }
3593
3594 if (bs->drv->bdrv_cancel_in_flight) {
3595 bs->drv->bdrv_cancel_in_flight(bs);
3596 }
3597}
3598
3599int coroutine_fn
3600bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
3601 QEMUIOVector *qiov, size_t qiov_offset)
3602{
3603 BlockDriverState *bs = child->bs;
3604 BlockDriver *drv = bs->drv;
3605 int ret;
3606 IO_CODE();
3607
3608 if (!drv) {
3609 return -ENOMEDIUM;
3610 }
3611
3612 if (!drv->bdrv_co_preadv_snapshot) {
3613 return -ENOTSUP;
3614 }
3615
3616 bdrv_inc_in_flight(bs);
3617 ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
3618 bdrv_dec_in_flight(bs);
3619
3620 return ret;
3621}
3622
3623int coroutine_fn
3624bdrv_co_snapshot_block_status(BlockDriverState *bs,
3625 bool want_zero, int64_t offset, int64_t bytes,
3626 int64_t *pnum, int64_t *map,
3627 BlockDriverState **file)
3628{
3629 BlockDriver *drv = bs->drv;
3630 int ret;
3631 IO_CODE();
3632
3633 if (!drv) {
3634 return -ENOMEDIUM;
3635 }
3636
3637 if (!drv->bdrv_co_snapshot_block_status) {
3638 return -ENOTSUP;
3639 }
3640
3641 bdrv_inc_in_flight(bs);
3642 ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
3643 pnum, map, file);
3644 bdrv_dec_in_flight(bs);
3645
3646 return ret;
3647}
3648
3649int coroutine_fn
3650bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
3651{
3652 BlockDriver *drv = bs->drv;
3653 int ret;
3654 IO_CODE();
3655
3656 if (!drv) {
3657 return -ENOMEDIUM;
3658 }
3659
3660 if (!drv->bdrv_co_pdiscard_snapshot) {
3661 return -ENOTSUP;
3662 }
3663
3664 bdrv_inc_in_flight(bs);
3665 ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
3666 bdrv_dec_in_flight(bs);
3667
3668 return ret;
3669}
3670