1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "qemu/cutils.h"
34#include "qapi/error.h"
35#include "qemu/error-report.h"
36#include "qemu/main-loop.h"
37#include "sysemu/replay.h"
38
39
40#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
41
42static void bdrv_parent_cb_resize(BlockDriverState *bs);
43static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
44 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
45
46static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
47 bool ignore_bds_parents)
48{
49 BdrvChild *c, *next;
50
51 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
52 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
53 continue;
54 }
55 bdrv_parent_drained_begin_single(c, false);
56 }
57}
58
59static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
60 int *drained_end_counter)
61{
62 assert(c->parent_quiesce_counter > 0);
63 c->parent_quiesce_counter--;
64 if (c->klass->drained_end) {
65 c->klass->drained_end(c, drained_end_counter);
66 }
67}
68
69void bdrv_parent_drained_end_single(BdrvChild *c)
70{
71 int drained_end_counter = 0;
72 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
73 BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
74}
75
76static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
77 bool ignore_bds_parents,
78 int *drained_end_counter)
79{
80 BdrvChild *c;
81
82 QLIST_FOREACH(c, &bs->parents, next_parent) {
83 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
84 continue;
85 }
86 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
87 }
88}
89
90static bool bdrv_parent_drained_poll_single(BdrvChild *c)
91{
92 if (c->klass->drained_poll) {
93 return c->klass->drained_poll(c);
94 }
95 return false;
96}
97
98static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
99 bool ignore_bds_parents)
100{
101 BdrvChild *c, *next;
102 bool busy = false;
103
104 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
105 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
106 continue;
107 }
108 busy |= bdrv_parent_drained_poll_single(c);
109 }
110
111 return busy;
112}
113
114void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
115{
116 c->parent_quiesce_counter++;
117 if (c->klass->drained_begin) {
118 c->klass->drained_begin(c);
119 }
120 if (poll) {
121 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
122 }
123}
124
125static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
126{
127 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
128 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
129 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
130 src->opt_mem_alignment);
131 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
132 src->min_mem_alignment);
133 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
134}
135
136void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
137{
138 ERRP_GUARD();
139 BlockDriver *drv = bs->drv;
140 BdrvChild *c;
141 bool have_limits;
142
143 memset(&bs->bl, 0, sizeof(bs->bl));
144
145 if (!drv) {
146 return;
147 }
148
149
150 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
151 drv->bdrv_aio_preadv ||
152 drv->bdrv_co_preadv_part) ? 1 : 512;
153
154
155 have_limits = false;
156 QLIST_FOREACH(c, &bs->children, next) {
157 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
158 {
159 bdrv_refresh_limits(c->bs, errp);
160 if (*errp) {
161 return;
162 }
163 bdrv_merge_limits(&bs->bl, &c->bs->bl);
164 have_limits = true;
165 }
166 }
167
168 if (!have_limits) {
169 bs->bl.min_mem_alignment = 512;
170 bs->bl.opt_mem_alignment = qemu_real_host_page_size;
171
172
173 bs->bl.max_iov = IOV_MAX;
174 }
175
176
177 if (drv->bdrv_refresh_limits) {
178 drv->bdrv_refresh_limits(bs, errp);
179 if (*errp) {
180 return;
181 }
182 }
183
184 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
185 error_setg(errp, "Driver requires too large request alignment");
186 }
187}
188
189
190
191
192
193
194void bdrv_enable_copy_on_read(BlockDriverState *bs)
195{
196 qatomic_inc(&bs->copy_on_read);
197}
198
199void bdrv_disable_copy_on_read(BlockDriverState *bs)
200{
201 int old = qatomic_fetch_dec(&bs->copy_on_read);
202 assert(old >= 1);
203}
204
205typedef struct {
206 Coroutine *co;
207 BlockDriverState *bs;
208 bool done;
209 bool begin;
210 bool recursive;
211 bool poll;
212 BdrvChild *parent;
213 bool ignore_bds_parents;
214 int *drained_end_counter;
215} BdrvCoDrainData;
216
217static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
218{
219 BdrvCoDrainData *data = opaque;
220 BlockDriverState *bs = data->bs;
221
222 if (data->begin) {
223 bs->drv->bdrv_co_drain_begin(bs);
224 } else {
225 bs->drv->bdrv_co_drain_end(bs);
226 }
227
228
229 qatomic_mb_set(&data->done, true);
230 if (!data->begin) {
231 qatomic_dec(data->drained_end_counter);
232 }
233 bdrv_dec_in_flight(bs);
234
235 g_free(data);
236}
237
238
239static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
240 int *drained_end_counter)
241{
242 BdrvCoDrainData *data;
243
244 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
245 (!begin && !bs->drv->bdrv_co_drain_end)) {
246 return;
247 }
248
249 data = g_new(BdrvCoDrainData, 1);
250 *data = (BdrvCoDrainData) {
251 .bs = bs,
252 .done = false,
253 .begin = begin,
254 .drained_end_counter = drained_end_counter,
255 };
256
257 if (!begin) {
258 qatomic_inc(drained_end_counter);
259 }
260
261
262
263 bdrv_inc_in_flight(bs);
264 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
265 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
266}
267
268
269bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
270 BdrvChild *ignore_parent, bool ignore_bds_parents)
271{
272 BdrvChild *child, *next;
273
274 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
275 return true;
276 }
277
278 if (qatomic_read(&bs->in_flight)) {
279 return true;
280 }
281
282 if (recursive) {
283 assert(!ignore_bds_parents);
284 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
285 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
286 return true;
287 }
288 }
289 }
290
291 return false;
292}
293
294static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
295 BdrvChild *ignore_parent)
296{
297 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
298}
299
300static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
301 BdrvChild *parent, bool ignore_bds_parents,
302 bool poll);
303static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
304 BdrvChild *parent, bool ignore_bds_parents,
305 int *drained_end_counter);
306
307static void bdrv_co_drain_bh_cb(void *opaque)
308{
309 BdrvCoDrainData *data = opaque;
310 Coroutine *co = data->co;
311 BlockDriverState *bs = data->bs;
312
313 if (bs) {
314 AioContext *ctx = bdrv_get_aio_context(bs);
315 aio_context_acquire(ctx);
316 bdrv_dec_in_flight(bs);
317 if (data->begin) {
318 assert(!data->drained_end_counter);
319 bdrv_do_drained_begin(bs, data->recursive, data->parent,
320 data->ignore_bds_parents, data->poll);
321 } else {
322 assert(!data->poll);
323 bdrv_do_drained_end(bs, data->recursive, data->parent,
324 data->ignore_bds_parents,
325 data->drained_end_counter);
326 }
327 aio_context_release(ctx);
328 } else {
329 assert(data->begin);
330 bdrv_drain_all_begin();
331 }
332
333 data->done = true;
334 aio_co_wake(co);
335}
336
337static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
338 bool begin, bool recursive,
339 BdrvChild *parent,
340 bool ignore_bds_parents,
341 bool poll,
342 int *drained_end_counter)
343{
344 BdrvCoDrainData data;
345 Coroutine *self = qemu_coroutine_self();
346 AioContext *ctx = bdrv_get_aio_context(bs);
347 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
348
349
350
351
352 assert(qemu_in_coroutine());
353 data = (BdrvCoDrainData) {
354 .co = self,
355 .bs = bs,
356 .done = false,
357 .begin = begin,
358 .recursive = recursive,
359 .parent = parent,
360 .ignore_bds_parents = ignore_bds_parents,
361 .poll = poll,
362 .drained_end_counter = drained_end_counter,
363 };
364
365 if (bs) {
366 bdrv_inc_in_flight(bs);
367 }
368
369
370
371
372
373
374
375
376
377 if (ctx != co_ctx) {
378 aio_context_release(ctx);
379 }
380 replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
381
382 qemu_coroutine_yield();
383
384
385 assert(data.done);
386
387
388 if (ctx != co_ctx) {
389 aio_context_acquire(ctx);
390 }
391}
392
393void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
394 BdrvChild *parent, bool ignore_bds_parents)
395{
396 assert(!qemu_in_coroutine());
397
398
399 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
400 aio_disable_external(bdrv_get_aio_context(bs));
401 }
402
403 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
404 bdrv_drain_invoke(bs, true, NULL);
405}
406
407static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
408 BdrvChild *parent, bool ignore_bds_parents,
409 bool poll)
410{
411 BdrvChild *child, *next;
412
413 if (qemu_in_coroutine()) {
414 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
415 poll, NULL);
416 return;
417 }
418
419 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
420
421 if (recursive) {
422 assert(!ignore_bds_parents);
423 bs->recursive_quiesce_counter++;
424 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
425 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
426 false);
427 }
428 }
429
430
431
432
433
434
435
436
437
438
439 if (poll) {
440 assert(!ignore_bds_parents);
441 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
442 }
443}
444
445void bdrv_drained_begin(BlockDriverState *bs)
446{
447 bdrv_do_drained_begin(bs, false, NULL, false, true);
448}
449
450void bdrv_subtree_drained_begin(BlockDriverState *bs)
451{
452 bdrv_do_drained_begin(bs, true, NULL, false, true);
453}
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
470 BdrvChild *parent, bool ignore_bds_parents,
471 int *drained_end_counter)
472{
473 BdrvChild *child;
474 int old_quiesce_counter;
475
476 assert(drained_end_counter != NULL);
477
478 if (qemu_in_coroutine()) {
479 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
480 false, drained_end_counter);
481 return;
482 }
483 assert(bs->quiesce_counter > 0);
484
485
486 bdrv_drain_invoke(bs, false, drained_end_counter);
487 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
488 drained_end_counter);
489
490 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
491 if (old_quiesce_counter == 1) {
492 aio_enable_external(bdrv_get_aio_context(bs));
493 }
494
495 if (recursive) {
496 assert(!ignore_bds_parents);
497 bs->recursive_quiesce_counter--;
498 QLIST_FOREACH(child, &bs->children, next) {
499 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
500 drained_end_counter);
501 }
502 }
503}
504
505void bdrv_drained_end(BlockDriverState *bs)
506{
507 int drained_end_counter = 0;
508 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
509 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
510}
511
512void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
513{
514 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
515}
516
517void bdrv_subtree_drained_end(BlockDriverState *bs)
518{
519 int drained_end_counter = 0;
520 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
521 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
522}
523
524void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
525{
526 int i;
527
528 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
529 bdrv_do_drained_begin(child->bs, true, child, false, true);
530 }
531}
532
533void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
534{
535 int drained_end_counter = 0;
536 int i;
537
538 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
539 bdrv_do_drained_end(child->bs, true, child, false,
540 &drained_end_counter);
541 }
542
543 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
544}
545
546
547
548
549
550
551
552
553void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
554{
555 assert(qemu_in_coroutine());
556 bdrv_drained_begin(bs);
557 bdrv_drained_end(bs);
558}
559
560void bdrv_drain(BlockDriverState *bs)
561{
562 bdrv_drained_begin(bs);
563 bdrv_drained_end(bs);
564}
565
566static void bdrv_drain_assert_idle(BlockDriverState *bs)
567{
568 BdrvChild *child, *next;
569
570 assert(qatomic_read(&bs->in_flight) == 0);
571 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
572 bdrv_drain_assert_idle(child->bs);
573 }
574}
575
576unsigned int bdrv_drain_all_count = 0;
577
578static bool bdrv_drain_all_poll(void)
579{
580 BlockDriverState *bs = NULL;
581 bool result = false;
582
583
584
585 while ((bs = bdrv_next_all_states(bs))) {
586 AioContext *aio_context = bdrv_get_aio_context(bs);
587 aio_context_acquire(aio_context);
588 result |= bdrv_drain_poll(bs, false, NULL, true);
589 aio_context_release(aio_context);
590 }
591
592 return result;
593}
594
595
596
597
598
599
600
601
602
603
604
605
606
607void bdrv_drain_all_begin(void)
608{
609 BlockDriverState *bs = NULL;
610
611 if (qemu_in_coroutine()) {
612 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
613 return;
614 }
615
616
617
618
619
620
621 if (replay_events_enabled()) {
622 return;
623 }
624
625
626
627 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
628 assert(bdrv_drain_all_count < INT_MAX);
629 bdrv_drain_all_count++;
630
631
632
633 while ((bs = bdrv_next_all_states(bs))) {
634 AioContext *aio_context = bdrv_get_aio_context(bs);
635
636 aio_context_acquire(aio_context);
637 bdrv_do_drained_begin(bs, false, NULL, true, false);
638 aio_context_release(aio_context);
639 }
640
641
642 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
643
644 while ((bs = bdrv_next_all_states(bs))) {
645 bdrv_drain_assert_idle(bs);
646 }
647}
648
649void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
650{
651 int drained_end_counter = 0;
652
653 g_assert(bs->quiesce_counter > 0);
654 g_assert(!bs->refcnt);
655
656 while (bs->quiesce_counter) {
657 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
658 }
659 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
660}
661
662void bdrv_drain_all_end(void)
663{
664 BlockDriverState *bs = NULL;
665 int drained_end_counter = 0;
666
667
668
669
670
671
672 if (replay_events_enabled()) {
673 return;
674 }
675
676 while ((bs = bdrv_next_all_states(bs))) {
677 AioContext *aio_context = bdrv_get_aio_context(bs);
678
679 aio_context_acquire(aio_context);
680 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
681 aio_context_release(aio_context);
682 }
683
684 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
685 AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
686
687 assert(bdrv_drain_all_count > 0);
688 bdrv_drain_all_count--;
689}
690
691void bdrv_drain_all(void)
692{
693 bdrv_drain_all_begin();
694 bdrv_drain_all_end();
695}
696
697
698
699
700
701
702static void tracked_request_end(BdrvTrackedRequest *req)
703{
704 if (req->serialising) {
705 qatomic_dec(&req->bs->serialising_in_flight);
706 }
707
708 qemu_co_mutex_lock(&req->bs->reqs_lock);
709 QLIST_REMOVE(req, list);
710 qemu_co_queue_restart_all(&req->wait_queue);
711 qemu_co_mutex_unlock(&req->bs->reqs_lock);
712}
713
714
715
716
717static void tracked_request_begin(BdrvTrackedRequest *req,
718 BlockDriverState *bs,
719 int64_t offset,
720 int64_t bytes,
721 enum BdrvTrackedRequestType type)
722{
723 bdrv_check_request(offset, bytes, &error_abort);
724
725 *req = (BdrvTrackedRequest){
726 .bs = bs,
727 .offset = offset,
728 .bytes = bytes,
729 .type = type,
730 .co = qemu_coroutine_self(),
731 .serialising = false,
732 .overlap_offset = offset,
733 .overlap_bytes = bytes,
734 };
735
736 qemu_co_queue_init(&req->wait_queue);
737
738 qemu_co_mutex_lock(&bs->reqs_lock);
739 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
740 qemu_co_mutex_unlock(&bs->reqs_lock);
741}
742
743static bool tracked_request_overlaps(BdrvTrackedRequest *req,
744 int64_t offset, int64_t bytes)
745{
746 bdrv_check_request(offset, bytes, &error_abort);
747
748
749 if (offset >= req->overlap_offset + req->overlap_bytes) {
750 return false;
751 }
752
753 if (req->overlap_offset >= offset + bytes) {
754 return false;
755 }
756 return true;
757}
758
759
760static BdrvTrackedRequest *
761bdrv_find_conflicting_request(BdrvTrackedRequest *self)
762{
763 BdrvTrackedRequest *req;
764
765 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
766 if (req == self || (!req->serialising && !self->serialising)) {
767 continue;
768 }
769 if (tracked_request_overlaps(req, self->overlap_offset,
770 self->overlap_bytes))
771 {
772
773
774
775
776
777 assert(qemu_coroutine_self() != req->co);
778
779
780
781
782
783
784 if (!req->waiting_for) {
785 return req;
786 }
787 }
788 }
789
790 return NULL;
791}
792
793
794static bool coroutine_fn
795bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
796{
797 BdrvTrackedRequest *req;
798 bool waited = false;
799
800 while ((req = bdrv_find_conflicting_request(self))) {
801 self->waiting_for = req;
802 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
803 self->waiting_for = NULL;
804 waited = true;
805 }
806
807 return waited;
808}
809
810
811static void tracked_request_set_serialising(BdrvTrackedRequest *req,
812 uint64_t align)
813{
814 int64_t overlap_offset = req->offset & ~(align - 1);
815 int64_t overlap_bytes =
816 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
817
818 bdrv_check_request(req->offset, req->bytes, &error_abort);
819
820 if (!req->serialising) {
821 qatomic_inc(&req->bs->serialising_in_flight);
822 req->serialising = true;
823 }
824
825 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
826 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
827}
828
829
830
831
832
833BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
834{
835 BdrvTrackedRequest *req;
836 Coroutine *self = qemu_coroutine_self();
837
838 QLIST_FOREACH(req, &bs->tracked_requests, list) {
839 if (req->co == self) {
840 return req;
841 }
842 }
843
844 return NULL;
845}
846
847
848
849
850void bdrv_round_to_clusters(BlockDriverState *bs,
851 int64_t offset, int64_t bytes,
852 int64_t *cluster_offset,
853 int64_t *cluster_bytes)
854{
855 BlockDriverInfo bdi;
856
857 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
858 *cluster_offset = offset;
859 *cluster_bytes = bytes;
860 } else {
861 int64_t c = bdi.cluster_size;
862 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
863 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
864 }
865}
866
867static int bdrv_get_cluster_size(BlockDriverState *bs)
868{
869 BlockDriverInfo bdi;
870 int ret;
871
872 ret = bdrv_get_info(bs, &bdi);
873 if (ret < 0 || bdi.cluster_size == 0) {
874 return bs->bl.request_alignment;
875 } else {
876 return bdi.cluster_size;
877 }
878}
879
880void bdrv_inc_in_flight(BlockDriverState *bs)
881{
882 qatomic_inc(&bs->in_flight);
883}
884
885void bdrv_wakeup(BlockDriverState *bs)
886{
887 aio_wait_kick();
888}
889
890void bdrv_dec_in_flight(BlockDriverState *bs)
891{
892 qatomic_dec(&bs->in_flight);
893 bdrv_wakeup(bs);
894}
895
896static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
897{
898 BlockDriverState *bs = self->bs;
899 bool waited = false;
900
901 if (!qatomic_read(&bs->serialising_in_flight)) {
902 return false;
903 }
904
905 qemu_co_mutex_lock(&bs->reqs_lock);
906 waited = bdrv_wait_serialising_requests_locked(self);
907 qemu_co_mutex_unlock(&bs->reqs_lock);
908
909 return waited;
910}
911
912bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
913 uint64_t align)
914{
915 bool waited;
916
917 qemu_co_mutex_lock(&req->bs->reqs_lock);
918
919 tracked_request_set_serialising(req, align);
920 waited = bdrv_wait_serialising_requests_locked(req);
921
922 qemu_co_mutex_unlock(&req->bs->reqs_lock);
923
924 return waited;
925}
926
927static int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
928 QEMUIOVector *qiov, size_t qiov_offset,
929 Error **errp)
930{
931
932
933
934
935 if (offset < 0) {
936 error_setg(errp, "offset is negative: %" PRIi64, offset);
937 return -EIO;
938 }
939
940 if (bytes < 0) {
941 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
942 return -EIO;
943 }
944
945 if (bytes > BDRV_MAX_LENGTH) {
946 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
947 bytes, BDRV_MAX_LENGTH);
948 return -EIO;
949 }
950
951 if (offset > BDRV_MAX_LENGTH) {
952 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
953 offset, BDRV_MAX_LENGTH);
954 return -EIO;
955 }
956
957 if (offset > BDRV_MAX_LENGTH - bytes) {
958 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
959 "exceeds maximum(%" PRIi64 ")", offset, bytes,
960 BDRV_MAX_LENGTH);
961 return -EIO;
962 }
963
964 if (!qiov) {
965 return 0;
966 }
967
968
969
970
971
972 if (qiov_offset > qiov->size) {
973 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
974 qiov_offset, qiov->size);
975 return -EIO;
976 }
977
978 if (bytes > qiov->size - qiov_offset) {
979 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
980 "vector size(%zu)", bytes, qiov_offset, qiov->size);
981 return -EIO;
982 }
983
984 return 0;
985}
986
987int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
988{
989 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
990}
991
992static int bdrv_check_request32(int64_t offset, int64_t bytes,
993 QEMUIOVector *qiov, size_t qiov_offset)
994{
995 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
996 if (ret < 0) {
997 return ret;
998 }
999
1000 if (bytes > BDRV_REQUEST_MAX_BYTES) {
1001 return -EIO;
1002 }
1003
1004 return 0;
1005}
1006
1007int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
1008 int64_t bytes, BdrvRequestFlags flags)
1009{
1010 return bdrv_pwritev(child, offset, bytes, NULL,
1011 BDRV_REQ_ZERO_WRITE | flags);
1012}
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
1024{
1025 int ret;
1026 int64_t target_size, bytes, offset = 0;
1027 BlockDriverState *bs = child->bs;
1028
1029 target_size = bdrv_getlength(bs);
1030 if (target_size < 0) {
1031 return target_size;
1032 }
1033
1034 for (;;) {
1035 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
1036 if (bytes <= 0) {
1037 return 0;
1038 }
1039 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
1040 if (ret < 0) {
1041 return ret;
1042 }
1043 if (ret & BDRV_BLOCK_ZERO) {
1044 offset += bytes;
1045 continue;
1046 }
1047 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
1048 if (ret < 0) {
1049 return ret;
1050 }
1051 offset += bytes;
1052 }
1053}
1054
1055
1056int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
1057{
1058 int ret;
1059 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1060
1061 if (bytes < 0) {
1062 return -EINVAL;
1063 }
1064
1065 ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
1066
1067 return ret < 0 ? ret : bytes;
1068}
1069
1070
1071
1072
1073
1074
1075
1076int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
1077 int64_t bytes)
1078{
1079 int ret;
1080 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1081
1082 if (bytes < 0) {
1083 return -EINVAL;
1084 }
1085
1086 ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
1087
1088 return ret < 0 ? ret : bytes;
1089}
1090
1091
1092
1093
1094
1095
1096
1097int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
1098 const void *buf, int64_t count)
1099{
1100 int ret;
1101
1102 ret = bdrv_pwrite(child, offset, buf, count);
1103 if (ret < 0) {
1104 return ret;
1105 }
1106
1107 ret = bdrv_flush(child->bs);
1108 if (ret < 0) {
1109 return ret;
1110 }
1111
1112 return 0;
1113}
1114
1115typedef struct CoroutineIOCompletion {
1116 Coroutine *coroutine;
1117 int ret;
1118} CoroutineIOCompletion;
1119
1120static void bdrv_co_io_em_complete(void *opaque, int ret)
1121{
1122 CoroutineIOCompletion *co = opaque;
1123
1124 co->ret = ret;
1125 aio_co_wake(co->coroutine);
1126}
1127
1128static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1129 int64_t offset, int64_t bytes,
1130 QEMUIOVector *qiov,
1131 size_t qiov_offset, int flags)
1132{
1133 BlockDriver *drv = bs->drv;
1134 int64_t sector_num;
1135 unsigned int nb_sectors;
1136 QEMUIOVector local_qiov;
1137 int ret;
1138
1139 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1140 assert(!(flags & ~BDRV_REQ_MASK));
1141 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1142
1143 if (!drv) {
1144 return -ENOMEDIUM;
1145 }
1146
1147 if (drv->bdrv_co_preadv_part) {
1148 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1149 flags);
1150 }
1151
1152 if (qiov_offset > 0 || bytes != qiov->size) {
1153 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1154 qiov = &local_qiov;
1155 }
1156
1157 if (drv->bdrv_co_preadv) {
1158 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1159 goto out;
1160 }
1161
1162 if (drv->bdrv_aio_preadv) {
1163 BlockAIOCB *acb;
1164 CoroutineIOCompletion co = {
1165 .coroutine = qemu_coroutine_self(),
1166 };
1167
1168 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1169 bdrv_co_io_em_complete, &co);
1170 if (acb == NULL) {
1171 ret = -EIO;
1172 goto out;
1173 } else {
1174 qemu_coroutine_yield();
1175 ret = co.ret;
1176 goto out;
1177 }
1178 }
1179
1180 sector_num = offset >> BDRV_SECTOR_BITS;
1181 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1182
1183 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1184 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1185 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1186 assert(drv->bdrv_co_readv);
1187
1188 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1189
1190out:
1191 if (qiov == &local_qiov) {
1192 qemu_iovec_destroy(&local_qiov);
1193 }
1194
1195 return ret;
1196}
1197
1198static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1199 int64_t offset, int64_t bytes,
1200 QEMUIOVector *qiov,
1201 size_t qiov_offset, int flags)
1202{
1203 BlockDriver *drv = bs->drv;
1204 int64_t sector_num;
1205 unsigned int nb_sectors;
1206 QEMUIOVector local_qiov;
1207 int ret;
1208
1209 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1210 assert(!(flags & ~BDRV_REQ_MASK));
1211 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1212
1213 if (!drv) {
1214 return -ENOMEDIUM;
1215 }
1216
1217 if (drv->bdrv_co_pwritev_part) {
1218 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1219 flags & bs->supported_write_flags);
1220 flags &= ~bs->supported_write_flags;
1221 goto emulate_flags;
1222 }
1223
1224 if (qiov_offset > 0 || bytes != qiov->size) {
1225 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1226 qiov = &local_qiov;
1227 }
1228
1229 if (drv->bdrv_co_pwritev) {
1230 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1231 flags & bs->supported_write_flags);
1232 flags &= ~bs->supported_write_flags;
1233 goto emulate_flags;
1234 }
1235
1236 if (drv->bdrv_aio_pwritev) {
1237 BlockAIOCB *acb;
1238 CoroutineIOCompletion co = {
1239 .coroutine = qemu_coroutine_self(),
1240 };
1241
1242 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1243 flags & bs->supported_write_flags,
1244 bdrv_co_io_em_complete, &co);
1245 flags &= ~bs->supported_write_flags;
1246 if (acb == NULL) {
1247 ret = -EIO;
1248 } else {
1249 qemu_coroutine_yield();
1250 ret = co.ret;
1251 }
1252 goto emulate_flags;
1253 }
1254
1255 sector_num = offset >> BDRV_SECTOR_BITS;
1256 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1257
1258 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1259 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1260 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1261
1262 assert(drv->bdrv_co_writev);
1263 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1264 flags & bs->supported_write_flags);
1265 flags &= ~bs->supported_write_flags;
1266
1267emulate_flags:
1268 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1269 ret = bdrv_co_flush(bs);
1270 }
1271
1272 if (qiov == &local_qiov) {
1273 qemu_iovec_destroy(&local_qiov);
1274 }
1275
1276 return ret;
1277}
1278
1279static int coroutine_fn
1280bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1281 int64_t bytes, QEMUIOVector *qiov,
1282 size_t qiov_offset)
1283{
1284 BlockDriver *drv = bs->drv;
1285 QEMUIOVector local_qiov;
1286 int ret;
1287
1288 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1289
1290 if (!drv) {
1291 return -ENOMEDIUM;
1292 }
1293
1294 if (!block_driver_can_compress(drv)) {
1295 return -ENOTSUP;
1296 }
1297
1298 if (drv->bdrv_co_pwritev_compressed_part) {
1299 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1300 qiov, qiov_offset);
1301 }
1302
1303 if (qiov_offset == 0) {
1304 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1305 }
1306
1307 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1308 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1309 qemu_iovec_destroy(&local_qiov);
1310
1311 return ret;
1312}
1313
1314static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1315 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1316 size_t qiov_offset, int flags)
1317{
1318 BlockDriverState *bs = child->bs;
1319
1320
1321
1322
1323
1324
1325 void *bounce_buffer = NULL;
1326
1327 BlockDriver *drv = bs->drv;
1328 int64_t cluster_offset;
1329 int64_t cluster_bytes;
1330 int64_t skip_bytes;
1331 int ret;
1332 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1333 BDRV_REQUEST_MAX_BYTES);
1334 int64_t progress = 0;
1335 bool skip_write;
1336
1337 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1338
1339 if (!drv) {
1340 return -ENOMEDIUM;
1341 }
1342
1343
1344
1345
1346
1347 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1364 skip_bytes = offset - cluster_offset;
1365
1366 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1367 cluster_offset, cluster_bytes);
1368
1369 while (cluster_bytes) {
1370 int64_t pnum;
1371
1372 if (skip_write) {
1373 ret = 1;
1374 pnum = MIN(cluster_bytes, max_transfer);
1375 } else {
1376 ret = bdrv_is_allocated(bs, cluster_offset,
1377 MIN(cluster_bytes, max_transfer), &pnum);
1378 if (ret < 0) {
1379
1380
1381
1382
1383
1384 pnum = MIN(cluster_bytes, max_transfer);
1385 }
1386
1387
1388 if (ret == 0 && pnum == 0) {
1389 assert(progress >= bytes);
1390 break;
1391 }
1392
1393 assert(skip_bytes < pnum);
1394 }
1395
1396 if (ret <= 0) {
1397 QEMUIOVector local_qiov;
1398
1399
1400 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1401 if (!bounce_buffer) {
1402 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1403 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1404 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1405
1406 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1407 if (!bounce_buffer) {
1408 ret = -ENOMEM;
1409 goto err;
1410 }
1411 }
1412 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1413
1414 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1415 &local_qiov, 0, 0);
1416 if (ret < 0) {
1417 goto err;
1418 }
1419
1420 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1421 if (drv->bdrv_co_pwrite_zeroes &&
1422 buffer_is_zero(bounce_buffer, pnum)) {
1423
1424
1425
1426 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1427 BDRV_REQ_WRITE_UNCHANGED);
1428 } else {
1429
1430
1431
1432 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1433 &local_qiov, 0,
1434 BDRV_REQ_WRITE_UNCHANGED);
1435 }
1436
1437 if (ret < 0) {
1438
1439
1440
1441
1442
1443 goto err;
1444 }
1445
1446 if (!(flags & BDRV_REQ_PREFETCH)) {
1447 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1448 bounce_buffer + skip_bytes,
1449 MIN(pnum - skip_bytes, bytes - progress));
1450 }
1451 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1452
1453 ret = bdrv_driver_preadv(bs, offset + progress,
1454 MIN(pnum - skip_bytes, bytes - progress),
1455 qiov, qiov_offset + progress, 0);
1456 if (ret < 0) {
1457 goto err;
1458 }
1459 }
1460
1461 cluster_offset += pnum;
1462 cluster_bytes -= pnum;
1463 progress += pnum - skip_bytes;
1464 skip_bytes = 0;
1465 }
1466 ret = 0;
1467
1468err:
1469 qemu_vfree(bounce_buffer);
1470 return ret;
1471}
1472
1473
1474
1475
1476
1477
1478static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1479 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
1480 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1481{
1482 BlockDriverState *bs = child->bs;
1483 int64_t total_bytes, max_bytes;
1484 int ret = 0;
1485 int64_t bytes_remaining = bytes;
1486 int max_transfer;
1487
1488 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1489 assert(is_power_of_2(align));
1490 assert((offset & (align - 1)) == 0);
1491 assert((bytes & (align - 1)) == 0);
1492 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1493 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1494 align);
1495
1496
1497
1498
1499
1500 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
1501
1502
1503 if (flags & BDRV_REQ_COPY_ON_READ) {
1504
1505
1506
1507
1508
1509 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1510 } else {
1511 bdrv_wait_serialising_requests(req);
1512 }
1513
1514 if (flags & BDRV_REQ_COPY_ON_READ) {
1515 int64_t pnum;
1516
1517
1518 flags &= ~BDRV_REQ_COPY_ON_READ;
1519
1520 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1521 if (ret < 0) {
1522 goto out;
1523 }
1524
1525 if (!ret || pnum != bytes) {
1526 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1527 qiov, qiov_offset, flags);
1528 goto out;
1529 } else if (flags & BDRV_REQ_PREFETCH) {
1530 goto out;
1531 }
1532 }
1533
1534
1535 total_bytes = bdrv_getlength(bs);
1536 if (total_bytes < 0) {
1537 ret = total_bytes;
1538 goto out;
1539 }
1540
1541 assert(!(flags & ~bs->supported_read_flags));
1542
1543 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1544 if (bytes <= max_bytes && bytes <= max_transfer) {
1545 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1546 goto out;
1547 }
1548
1549 while (bytes_remaining) {
1550 int64_t num;
1551
1552 if (max_bytes) {
1553 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1554 assert(num);
1555
1556 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1557 num, qiov,
1558 qiov_offset + bytes - bytes_remaining,
1559 flags);
1560 max_bytes -= num;
1561 } else {
1562 num = bytes_remaining;
1563 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1564 0, bytes_remaining);
1565 }
1566 if (ret < 0) {
1567 goto out;
1568 }
1569 bytes_remaining -= num;
1570 }
1571
1572out:
1573 return ret < 0 ? ret : 0;
1574}
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598typedef struct BdrvRequestPadding {
1599 uint8_t *buf;
1600 size_t buf_len;
1601 uint8_t *tail_buf;
1602 size_t head;
1603 size_t tail;
1604 bool merge_reads;
1605 QEMUIOVector local_qiov;
1606} BdrvRequestPadding;
1607
1608static bool bdrv_init_padding(BlockDriverState *bs,
1609 int64_t offset, int64_t bytes,
1610 BdrvRequestPadding *pad)
1611{
1612 int64_t align = bs->bl.request_alignment;
1613 int64_t sum;
1614
1615 bdrv_check_request(offset, bytes, &error_abort);
1616 assert(align <= INT_MAX);
1617 assert(align <= SIZE_MAX / 2);
1618
1619 memset(pad, 0, sizeof(*pad));
1620
1621 pad->head = offset & (align - 1);
1622 pad->tail = ((offset + bytes) & (align - 1));
1623 if (pad->tail) {
1624 pad->tail = align - pad->tail;
1625 }
1626
1627 if (!pad->head && !pad->tail) {
1628 return false;
1629 }
1630
1631 assert(bytes);
1632
1633 sum = pad->head + bytes + pad->tail;
1634 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1635 pad->buf = qemu_blockalign(bs, pad->buf_len);
1636 pad->merge_reads = sum == pad->buf_len;
1637 if (pad->tail) {
1638 pad->tail_buf = pad->buf + pad->buf_len - align;
1639 }
1640
1641 return true;
1642}
1643
1644static int bdrv_padding_rmw_read(BdrvChild *child,
1645 BdrvTrackedRequest *req,
1646 BdrvRequestPadding *pad,
1647 bool zero_middle)
1648{
1649 QEMUIOVector local_qiov;
1650 BlockDriverState *bs = child->bs;
1651 uint64_t align = bs->bl.request_alignment;
1652 int ret;
1653
1654 assert(req->serialising && pad->buf);
1655
1656 if (pad->head || pad->merge_reads) {
1657 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1658
1659 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1660
1661 if (pad->head) {
1662 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1663 }
1664 if (pad->merge_reads && pad->tail) {
1665 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1666 }
1667 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1668 align, &local_qiov, 0, 0);
1669 if (ret < 0) {
1670 return ret;
1671 }
1672 if (pad->head) {
1673 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1674 }
1675 if (pad->merge_reads && pad->tail) {
1676 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1677 }
1678
1679 if (pad->merge_reads) {
1680 goto zero_mem;
1681 }
1682 }
1683
1684 if (pad->tail) {
1685 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1686
1687 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1688 ret = bdrv_aligned_preadv(
1689 child, req,
1690 req->overlap_offset + req->overlap_bytes - align,
1691 align, align, &local_qiov, 0, 0);
1692 if (ret < 0) {
1693 return ret;
1694 }
1695 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1696 }
1697
1698zero_mem:
1699 if (zero_middle) {
1700 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1701 }
1702
1703 return 0;
1704}
1705
1706static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1707{
1708 if (pad->buf) {
1709 qemu_vfree(pad->buf);
1710 qemu_iovec_destroy(&pad->local_qiov);
1711 }
1712 memset(pad, 0, sizeof(*pad));
1713}
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727static int bdrv_pad_request(BlockDriverState *bs,
1728 QEMUIOVector **qiov, size_t *qiov_offset,
1729 int64_t *offset, int64_t *bytes,
1730 BdrvRequestPadding *pad, bool *padded)
1731{
1732 int ret;
1733
1734 bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
1735
1736 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1737 if (padded) {
1738 *padded = false;
1739 }
1740 return 0;
1741 }
1742
1743 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1744 *qiov, *qiov_offset, *bytes,
1745 pad->buf + pad->buf_len - pad->tail,
1746 pad->tail);
1747 if (ret < 0) {
1748 bdrv_padding_destroy(pad);
1749 return ret;
1750 }
1751 *bytes += pad->head + pad->tail;
1752 *offset -= pad->head;
1753 *qiov = &pad->local_qiov;
1754 *qiov_offset = 0;
1755 if (padded) {
1756 *padded = true;
1757 }
1758
1759 return 0;
1760}
1761
1762int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1763 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1764 BdrvRequestFlags flags)
1765{
1766 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1767}
1768
1769int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1770 int64_t offset, int64_t bytes,
1771 QEMUIOVector *qiov, size_t qiov_offset,
1772 BdrvRequestFlags flags)
1773{
1774 BlockDriverState *bs = child->bs;
1775 BdrvTrackedRequest req;
1776 BdrvRequestPadding pad;
1777 int ret;
1778
1779 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1780
1781 if (!bdrv_is_inserted(bs)) {
1782 return -ENOMEDIUM;
1783 }
1784
1785 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1786 if (ret < 0) {
1787 return ret;
1788 }
1789
1790 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1791
1792
1793
1794
1795
1796
1797
1798
1799 return 0;
1800 }
1801
1802 bdrv_inc_in_flight(bs);
1803
1804
1805 if (qatomic_read(&bs->copy_on_read)) {
1806 flags |= BDRV_REQ_COPY_ON_READ;
1807 }
1808
1809 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
1810 NULL);
1811 if (ret < 0) {
1812 return ret;
1813 }
1814
1815 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1816 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1817 bs->bl.request_alignment,
1818 qiov, qiov_offset, flags);
1819 tracked_request_end(&req);
1820 bdrv_dec_in_flight(bs);
1821
1822 bdrv_padding_destroy(&pad);
1823
1824 return ret;
1825}
1826
1827static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1828 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
1829{
1830 BlockDriver *drv = bs->drv;
1831 QEMUIOVector qiov;
1832 void *buf = NULL;
1833 int ret = 0;
1834 bool need_flush = false;
1835 int head = 0;
1836 int tail = 0;
1837
1838 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1839 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1840 bs->bl.request_alignment);
1841 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1842
1843 bdrv_check_request(offset, bytes, &error_abort);
1844
1845 if (!drv) {
1846 return -ENOMEDIUM;
1847 }
1848
1849 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1850 return -ENOTSUP;
1851 }
1852
1853 assert(alignment % bs->bl.request_alignment == 0);
1854 head = offset % alignment;
1855 tail = (offset + bytes) % alignment;
1856 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1857 assert(max_write_zeroes >= bs->bl.request_alignment);
1858
1859 while (bytes > 0 && !ret) {
1860 int64_t num = bytes;
1861
1862
1863
1864
1865
1866 if (head) {
1867
1868
1869
1870 num = MIN(MIN(bytes, max_transfer), alignment - head);
1871 head = (head + num) % alignment;
1872 assert(num < max_write_zeroes);
1873 } else if (tail && num > alignment) {
1874
1875 num -= tail;
1876 }
1877
1878
1879 if (num > max_write_zeroes) {
1880 num = max_write_zeroes;
1881 }
1882
1883 ret = -ENOTSUP;
1884
1885 if (drv->bdrv_co_pwrite_zeroes) {
1886 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1887 flags & bs->supported_zero_flags);
1888 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1889 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1890 need_flush = true;
1891 }
1892 } else {
1893 assert(!bs->supported_zero_flags);
1894 }
1895
1896 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1897
1898 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1899
1900 if ((flags & BDRV_REQ_FUA) &&
1901 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1902
1903
1904 write_flags &= ~BDRV_REQ_FUA;
1905 need_flush = true;
1906 }
1907 num = MIN(num, max_transfer);
1908 if (buf == NULL) {
1909 buf = qemu_try_blockalign0(bs, num);
1910 if (buf == NULL) {
1911 ret = -ENOMEM;
1912 goto fail;
1913 }
1914 }
1915 qemu_iovec_init_buf(&qiov, buf, num);
1916
1917 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1918
1919
1920
1921
1922 if (num < max_transfer) {
1923 qemu_vfree(buf);
1924 buf = NULL;
1925 }
1926 }
1927
1928 offset += num;
1929 bytes -= num;
1930 }
1931
1932fail:
1933 if (ret == 0 && need_flush) {
1934 ret = bdrv_co_flush(bs);
1935 }
1936 qemu_vfree(buf);
1937 return ret;
1938}
1939
1940static inline int coroutine_fn
1941bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
1942 BdrvTrackedRequest *req, int flags)
1943{
1944 BlockDriverState *bs = child->bs;
1945
1946 bdrv_check_request(offset, bytes, &error_abort);
1947
1948 if (bs->read_only) {
1949 return -EPERM;
1950 }
1951
1952 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1953 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1954 assert(!(flags & ~BDRV_REQ_MASK));
1955 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
1956
1957 if (flags & BDRV_REQ_SERIALISING) {
1958 QEMU_LOCK_GUARD(&bs->reqs_lock);
1959
1960 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
1961
1962 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
1963 return -EBUSY;
1964 }
1965
1966 bdrv_wait_serialising_requests_locked(req);
1967 } else {
1968 bdrv_wait_serialising_requests(req);
1969 }
1970
1971 assert(req->overlap_offset <= offset);
1972 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1973 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
1974 child->perm & BLK_PERM_RESIZE);
1975
1976 switch (req->type) {
1977 case BDRV_TRACKED_WRITE:
1978 case BDRV_TRACKED_DISCARD:
1979 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1980 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1981 } else {
1982 assert(child->perm & BLK_PERM_WRITE);
1983 }
1984 return notifier_with_return_list_notify(&bs->before_write_notifiers,
1985 req);
1986 case BDRV_TRACKED_TRUNCATE:
1987 assert(child->perm & BLK_PERM_RESIZE);
1988 return 0;
1989 default:
1990 abort();
1991 }
1992}
1993
1994static inline void coroutine_fn
1995bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
1996 BdrvTrackedRequest *req, int ret)
1997{
1998 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1999 BlockDriverState *bs = child->bs;
2000
2001 bdrv_check_request(offset, bytes, &error_abort);
2002
2003 qatomic_inc(&bs->write_gen);
2004
2005
2006
2007
2008
2009
2010
2011
2012 if (ret == 0 &&
2013 (req->type == BDRV_TRACKED_TRUNCATE ||
2014 end_sector > bs->total_sectors) &&
2015 req->type != BDRV_TRACKED_DISCARD) {
2016 bs->total_sectors = end_sector;
2017 bdrv_parent_cb_resize(bs);
2018 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2019 }
2020 if (req->bytes) {
2021 switch (req->type) {
2022 case BDRV_TRACKED_WRITE:
2023 stat64_max(&bs->wr_highest_offset, offset + bytes);
2024
2025 case BDRV_TRACKED_DISCARD:
2026 bdrv_set_dirty(bs, offset, bytes);
2027 break;
2028 default:
2029 break;
2030 }
2031 }
2032}
2033
2034
2035
2036
2037
2038static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
2039 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
2040 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
2041{
2042 BlockDriverState *bs = child->bs;
2043 BlockDriver *drv = bs->drv;
2044 int ret;
2045
2046 int64_t bytes_remaining = bytes;
2047 int max_transfer;
2048
2049 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2050
2051 if (!drv) {
2052 return -ENOMEDIUM;
2053 }
2054
2055 if (bdrv_has_readonly_bitmaps(bs)) {
2056 return -EPERM;
2057 }
2058
2059 assert(is_power_of_2(align));
2060 assert((offset & (align - 1)) == 0);
2061 assert((bytes & (align - 1)) == 0);
2062 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2063 align);
2064
2065 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2066
2067 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2068 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2069 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2070 flags |= BDRV_REQ_ZERO_WRITE;
2071 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2072 flags |= BDRV_REQ_MAY_UNMAP;
2073 }
2074 }
2075
2076 if (ret < 0) {
2077
2078 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2079 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2080 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2081 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2082 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2083 qiov, qiov_offset);
2084 } else if (bytes <= max_transfer) {
2085 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2086 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2087 } else {
2088 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2089 while (bytes_remaining) {
2090 int num = MIN(bytes_remaining, max_transfer);
2091 int local_flags = flags;
2092
2093 assert(num);
2094 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2095 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2096
2097
2098 local_flags &= ~BDRV_REQ_FUA;
2099 }
2100
2101 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2102 num, qiov,
2103 qiov_offset + bytes - bytes_remaining,
2104 local_flags);
2105 if (ret < 0) {
2106 break;
2107 }
2108 bytes_remaining -= num;
2109 }
2110 }
2111 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2112
2113 if (ret >= 0) {
2114 ret = 0;
2115 }
2116 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2117
2118 return ret;
2119}
2120
2121static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2122 int64_t offset,
2123 int64_t bytes,
2124 BdrvRequestFlags flags,
2125 BdrvTrackedRequest *req)
2126{
2127 BlockDriverState *bs = child->bs;
2128 QEMUIOVector local_qiov;
2129 uint64_t align = bs->bl.request_alignment;
2130 int ret = 0;
2131 bool padding;
2132 BdrvRequestPadding pad;
2133
2134 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2135 if (padding) {
2136 bdrv_make_request_serialising(req, align);
2137
2138 bdrv_padding_rmw_read(child, req, &pad, true);
2139
2140 if (pad.head || pad.merge_reads) {
2141 int64_t aligned_offset = offset & ~(align - 1);
2142 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2143
2144 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2145 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2146 align, &local_qiov, 0,
2147 flags & ~BDRV_REQ_ZERO_WRITE);
2148 if (ret < 0 || pad.merge_reads) {
2149
2150 goto out;
2151 }
2152 offset += write_bytes - pad.head;
2153 bytes -= write_bytes - pad.head;
2154 }
2155 }
2156
2157 assert(!bytes || (offset & (align - 1)) == 0);
2158 if (bytes >= align) {
2159
2160 int64_t aligned_bytes = bytes & ~(align - 1);
2161 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2162 NULL, 0, flags);
2163 if (ret < 0) {
2164 goto out;
2165 }
2166 bytes -= aligned_bytes;
2167 offset += aligned_bytes;
2168 }
2169
2170 assert(!bytes || (offset & (align - 1)) == 0);
2171 if (bytes) {
2172 assert(align == pad.tail + bytes);
2173
2174 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2175 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2176 &local_qiov, 0,
2177 flags & ~BDRV_REQ_ZERO_WRITE);
2178 }
2179
2180out:
2181 bdrv_padding_destroy(&pad);
2182
2183 return ret;
2184}
2185
2186
2187
2188
2189int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2190 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2191 BdrvRequestFlags flags)
2192{
2193 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2194}
2195
2196int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2197 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2198 BdrvRequestFlags flags)
2199{
2200 BlockDriverState *bs = child->bs;
2201 BdrvTrackedRequest req;
2202 uint64_t align = bs->bl.request_alignment;
2203 BdrvRequestPadding pad;
2204 int ret;
2205 bool padded = false;
2206
2207 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2208
2209 if (!bdrv_is_inserted(bs)) {
2210 return -ENOMEDIUM;
2211 }
2212
2213 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2214 if (ret < 0) {
2215 return ret;
2216 }
2217
2218
2219 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2220 !QEMU_IS_ALIGNED(offset | bytes, align))
2221 {
2222 return -ENOTSUP;
2223 }
2224
2225 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2226
2227
2228
2229
2230
2231
2232
2233
2234 return 0;
2235 }
2236
2237 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2238
2239
2240
2241
2242
2243 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
2244 &padded);
2245 if (ret < 0) {
2246 return ret;
2247 }
2248 }
2249
2250 bdrv_inc_in_flight(bs);
2251 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2252
2253 if (flags & BDRV_REQ_ZERO_WRITE) {
2254 assert(!padded);
2255 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2256 goto out;
2257 }
2258
2259 if (padded) {
2260
2261
2262
2263
2264
2265
2266 bdrv_make_request_serialising(&req, align);
2267 bdrv_padding_rmw_read(child, &req, &pad, false);
2268 }
2269
2270 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2271 qiov, qiov_offset, flags);
2272
2273 bdrv_padding_destroy(&pad);
2274
2275out:
2276 tracked_request_end(&req);
2277 bdrv_dec_in_flight(bs);
2278
2279 return ret;
2280}
2281
2282int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2283 int64_t bytes, BdrvRequestFlags flags)
2284{
2285 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2286
2287 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2288 flags &= ~BDRV_REQ_MAY_UNMAP;
2289 }
2290
2291 return bdrv_co_pwritev(child, offset, bytes, NULL,
2292 BDRV_REQ_ZERO_WRITE | flags);
2293}
2294
2295
2296
2297
2298int bdrv_flush_all(void)
2299{
2300 BdrvNextIterator it;
2301 BlockDriverState *bs = NULL;
2302 int result = 0;
2303
2304
2305
2306
2307
2308
2309 if (replay_events_enabled()) {
2310 return result;
2311 }
2312
2313 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2314 AioContext *aio_context = bdrv_get_aio_context(bs);
2315 int ret;
2316
2317 aio_context_acquire(aio_context);
2318 ret = bdrv_flush(bs);
2319 if (ret < 0 && !result) {
2320 result = ret;
2321 }
2322 aio_context_release(aio_context);
2323 }
2324
2325 return result;
2326}
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2356 bool want_zero,
2357 int64_t offset, int64_t bytes,
2358 int64_t *pnum, int64_t *map,
2359 BlockDriverState **file)
2360{
2361 int64_t total_size;
2362 int64_t n;
2363 int ret;
2364 int64_t local_map = 0;
2365 BlockDriverState *local_file = NULL;
2366 int64_t aligned_offset, aligned_bytes;
2367 uint32_t align;
2368 bool has_filtered_child;
2369
2370 assert(pnum);
2371 *pnum = 0;
2372 total_size = bdrv_getlength(bs);
2373 if (total_size < 0) {
2374 ret = total_size;
2375 goto early_out;
2376 }
2377
2378 if (offset >= total_size) {
2379 ret = BDRV_BLOCK_EOF;
2380 goto early_out;
2381 }
2382 if (!bytes) {
2383 ret = 0;
2384 goto early_out;
2385 }
2386
2387 n = total_size - offset;
2388 if (n < bytes) {
2389 bytes = n;
2390 }
2391
2392
2393 assert(bs->drv);
2394 has_filtered_child = bdrv_filter_child(bs);
2395 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2396 *pnum = bytes;
2397 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2398 if (offset + bytes == total_size) {
2399 ret |= BDRV_BLOCK_EOF;
2400 }
2401 if (bs->drv->protocol_name) {
2402 ret |= BDRV_BLOCK_OFFSET_VALID;
2403 local_map = offset;
2404 local_file = bs;
2405 }
2406 goto early_out;
2407 }
2408
2409 bdrv_inc_in_flight(bs);
2410
2411
2412 align = bs->bl.request_alignment;
2413 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2414 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2415
2416 if (bs->drv->bdrv_co_block_status) {
2417 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2418 aligned_bytes, pnum, &local_map,
2419 &local_file);
2420 } else {
2421
2422
2423 local_file = bdrv_filter_bs(bs);
2424 assert(local_file);
2425
2426 *pnum = aligned_bytes;
2427 local_map = aligned_offset;
2428 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2429 }
2430 if (ret < 0) {
2431 *pnum = 0;
2432 goto out;
2433 }
2434
2435
2436
2437
2438
2439 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2440 align > offset - aligned_offset);
2441 if (ret & BDRV_BLOCK_RECURSE) {
2442 assert(ret & BDRV_BLOCK_DATA);
2443 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2444 assert(!(ret & BDRV_BLOCK_ZERO));
2445 }
2446
2447 *pnum -= offset - aligned_offset;
2448 if (*pnum > bytes) {
2449 *pnum = bytes;
2450 }
2451 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2452 local_map += offset - aligned_offset;
2453 }
2454
2455 if (ret & BDRV_BLOCK_RAW) {
2456 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2457 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2458 *pnum, pnum, &local_map, &local_file);
2459 goto out;
2460 }
2461
2462 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2463 ret |= BDRV_BLOCK_ALLOCATED;
2464 } else if (bs->drv->supports_backing) {
2465 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2466
2467 if (!cow_bs) {
2468 ret |= BDRV_BLOCK_ZERO;
2469 } else if (want_zero) {
2470 int64_t size2 = bdrv_getlength(cow_bs);
2471
2472 if (size2 >= 0 && offset >= size2) {
2473 ret |= BDRV_BLOCK_ZERO;
2474 }
2475 }
2476 }
2477
2478 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2479 local_file && local_file != bs &&
2480 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2481 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2482 int64_t file_pnum;
2483 int ret2;
2484
2485 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2486 *pnum, &file_pnum, NULL, NULL);
2487 if (ret2 >= 0) {
2488
2489
2490
2491 if (ret2 & BDRV_BLOCK_EOF &&
2492 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2493
2494
2495
2496
2497
2498 ret |= BDRV_BLOCK_ZERO;
2499 } else {
2500
2501 *pnum = file_pnum;
2502 ret |= (ret2 & BDRV_BLOCK_ZERO);
2503 }
2504 }
2505 }
2506
2507out:
2508 bdrv_dec_in_flight(bs);
2509 if (ret >= 0 && offset + *pnum == total_size) {
2510 ret |= BDRV_BLOCK_EOF;
2511 }
2512early_out:
2513 if (file) {
2514 *file = local_file;
2515 }
2516 if (map) {
2517 *map = local_map;
2518 }
2519 return ret;
2520}
2521
2522int coroutine_fn
2523bdrv_co_common_block_status_above(BlockDriverState *bs,
2524 BlockDriverState *base,
2525 bool include_base,
2526 bool want_zero,
2527 int64_t offset,
2528 int64_t bytes,
2529 int64_t *pnum,
2530 int64_t *map,
2531 BlockDriverState **file,
2532 int *depth)
2533{
2534 int ret;
2535 BlockDriverState *p;
2536 int64_t eof = 0;
2537 int dummy;
2538
2539 assert(!include_base || base);
2540
2541 if (!depth) {
2542 depth = &dummy;
2543 }
2544 *depth = 0;
2545
2546 if (!include_base && bs == base) {
2547 *pnum = bytes;
2548 return 0;
2549 }
2550
2551 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2552 ++*depth;
2553 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2554 return ret;
2555 }
2556
2557 if (ret & BDRV_BLOCK_EOF) {
2558 eof = offset + *pnum;
2559 }
2560
2561 assert(*pnum <= bytes);
2562 bytes = *pnum;
2563
2564 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2565 p = bdrv_filter_or_cow_bs(p))
2566 {
2567 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2568 file);
2569 ++*depth;
2570 if (ret < 0) {
2571 return ret;
2572 }
2573 if (*pnum == 0) {
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583 assert(ret & BDRV_BLOCK_EOF);
2584 *pnum = bytes;
2585 if (file) {
2586 *file = p;
2587 }
2588 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2589 break;
2590 }
2591 if (ret & BDRV_BLOCK_ALLOCATED) {
2592
2593
2594
2595
2596
2597
2598
2599 ret &= ~BDRV_BLOCK_EOF;
2600 break;
2601 }
2602
2603 if (p == base) {
2604 assert(include_base);
2605 break;
2606 }
2607
2608
2609
2610
2611
2612 assert(*pnum <= bytes);
2613 bytes = *pnum;
2614 }
2615
2616 if (offset + *pnum == eof) {
2617 ret |= BDRV_BLOCK_EOF;
2618 }
2619
2620 return ret;
2621}
2622
2623int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2624 int64_t offset, int64_t bytes, int64_t *pnum,
2625 int64_t *map, BlockDriverState **file)
2626{
2627 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2628 pnum, map, file, NULL);
2629}
2630
2631int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2632 int64_t *pnum, int64_t *map, BlockDriverState **file)
2633{
2634 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2635 offset, bytes, pnum, map, file);
2636}
2637
2638
2639
2640
2641
2642
2643
2644
2645int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2646 int64_t bytes)
2647{
2648 int ret;
2649 int64_t pnum = bytes;
2650
2651 if (!bytes) {
2652 return 1;
2653 }
2654
2655 ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
2656 bytes, &pnum, NULL, NULL, NULL);
2657
2658 if (ret < 0) {
2659 return ret;
2660 }
2661
2662 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2663}
2664
2665int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2666 int64_t bytes, int64_t *pnum)
2667{
2668 int ret;
2669 int64_t dummy;
2670
2671 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2672 bytes, pnum ? pnum : &dummy, NULL,
2673 NULL, NULL);
2674 if (ret < 0) {
2675 return ret;
2676 }
2677 return !!(ret & BDRV_BLOCK_ALLOCATED);
2678}
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697int bdrv_is_allocated_above(BlockDriverState *top,
2698 BlockDriverState *base,
2699 bool include_base, int64_t offset,
2700 int64_t bytes, int64_t *pnum)
2701{
2702 int depth;
2703 int ret = bdrv_common_block_status_above(top, base, include_base, false,
2704 offset, bytes, pnum, NULL, NULL,
2705 &depth);
2706 if (ret < 0) {
2707 return ret;
2708 }
2709
2710 if (ret & BDRV_BLOCK_ALLOCATED) {
2711 return depth;
2712 }
2713 return 0;
2714}
2715
2716int coroutine_fn
2717bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2718{
2719 BlockDriver *drv = bs->drv;
2720 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2721 int ret = -ENOTSUP;
2722
2723 if (!drv) {
2724 return -ENOMEDIUM;
2725 }
2726
2727 bdrv_inc_in_flight(bs);
2728
2729 if (drv->bdrv_load_vmstate) {
2730 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2731 } else if (child_bs) {
2732 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2733 }
2734
2735 bdrv_dec_in_flight(bs);
2736
2737 return ret;
2738}
2739
2740int coroutine_fn
2741bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2742{
2743 BlockDriver *drv = bs->drv;
2744 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2745 int ret = -ENOTSUP;
2746
2747 if (!drv) {
2748 return -ENOMEDIUM;
2749 }
2750
2751 bdrv_inc_in_flight(bs);
2752
2753 if (drv->bdrv_save_vmstate) {
2754 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2755 } else if (child_bs) {
2756 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2757 }
2758
2759 bdrv_dec_in_flight(bs);
2760
2761 return ret;
2762}
2763
2764int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2765 int64_t pos, int size)
2766{
2767 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2768 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2769
2770 return ret < 0 ? ret : size;
2771}
2772
2773int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2774 int64_t pos, int size)
2775{
2776 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2777 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2778
2779 return ret < 0 ? ret : size;
2780}
2781
2782
2783
2784
2785void bdrv_aio_cancel(BlockAIOCB *acb)
2786{
2787 qemu_aio_ref(acb);
2788 bdrv_aio_cancel_async(acb);
2789 while (acb->refcnt > 1) {
2790 if (acb->aiocb_info->get_aio_context) {
2791 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2792 } else if (acb->bs) {
2793
2794
2795
2796
2797 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2798 aio_poll(bdrv_get_aio_context(acb->bs), true);
2799 } else {
2800 abort();
2801 }
2802 }
2803 qemu_aio_unref(acb);
2804}
2805
2806
2807
2808
2809void bdrv_aio_cancel_async(BlockAIOCB *acb)
2810{
2811 if (acb->aiocb_info->cancel_async) {
2812 acb->aiocb_info->cancel_async(acb);
2813 }
2814}
2815
2816
2817
2818
2819int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2820{
2821 BdrvChild *primary_child = bdrv_primary_child(bs);
2822 BdrvChild *child;
2823 int current_gen;
2824 int ret = 0;
2825
2826 bdrv_inc_in_flight(bs);
2827
2828 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2829 bdrv_is_sg(bs)) {
2830 goto early_exit;
2831 }
2832
2833 qemu_co_mutex_lock(&bs->reqs_lock);
2834 current_gen = qatomic_read(&bs->write_gen);
2835
2836
2837 while (bs->active_flush_req) {
2838 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2839 }
2840
2841
2842 bs->active_flush_req = true;
2843 qemu_co_mutex_unlock(&bs->reqs_lock);
2844
2845
2846 if (bs->drv->bdrv_co_flush) {
2847 ret = bs->drv->bdrv_co_flush(bs);
2848 goto out;
2849 }
2850
2851
2852 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
2853 if (bs->drv->bdrv_co_flush_to_os) {
2854 ret = bs->drv->bdrv_co_flush_to_os(bs);
2855 if (ret < 0) {
2856 goto out;
2857 }
2858 }
2859
2860
2861 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2862 goto flush_children;
2863 }
2864
2865
2866 if (bs->flushed_gen == current_gen) {
2867 goto flush_children;
2868 }
2869
2870 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
2871 if (!bs->drv) {
2872
2873
2874 ret = -ENOMEDIUM;
2875 goto out;
2876 }
2877 if (bs->drv->bdrv_co_flush_to_disk) {
2878 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2879 } else if (bs->drv->bdrv_aio_flush) {
2880 BlockAIOCB *acb;
2881 CoroutineIOCompletion co = {
2882 .coroutine = qemu_coroutine_self(),
2883 };
2884
2885 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2886 if (acb == NULL) {
2887 ret = -EIO;
2888 } else {
2889 qemu_coroutine_yield();
2890 ret = co.ret;
2891 }
2892 } else {
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904 ret = 0;
2905 }
2906
2907 if (ret < 0) {
2908 goto out;
2909 }
2910
2911
2912
2913
2914flush_children:
2915 ret = 0;
2916 QLIST_FOREACH(child, &bs->children, next) {
2917 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
2918 int this_child_ret = bdrv_co_flush(child->bs);
2919 if (!ret) {
2920 ret = this_child_ret;
2921 }
2922 }
2923 }
2924
2925out:
2926
2927 if (ret == 0) {
2928 bs->flushed_gen = current_gen;
2929 }
2930
2931 qemu_co_mutex_lock(&bs->reqs_lock);
2932 bs->active_flush_req = false;
2933
2934 qemu_co_queue_next(&bs->flush_queue);
2935 qemu_co_mutex_unlock(&bs->reqs_lock);
2936
2937early_exit:
2938 bdrv_dec_in_flight(bs);
2939 return ret;
2940}
2941
2942int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
2943 int64_t bytes)
2944{
2945 BdrvTrackedRequest req;
2946 int max_pdiscard, ret;
2947 int head, tail, align;
2948 BlockDriverState *bs = child->bs;
2949
2950 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
2951 return -ENOMEDIUM;
2952 }
2953
2954 if (bdrv_has_readonly_bitmaps(bs)) {
2955 return -EPERM;
2956 }
2957
2958 ret = bdrv_check_request(offset, bytes, NULL);
2959 if (ret < 0) {
2960 return ret;
2961 }
2962
2963
2964 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2965 return 0;
2966 }
2967
2968 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2969 return 0;
2970 }
2971
2972
2973
2974
2975
2976
2977 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2978 assert(align % bs->bl.request_alignment == 0);
2979 head = offset % align;
2980 tail = (offset + bytes) % align;
2981
2982 bdrv_inc_in_flight(bs);
2983 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2984
2985 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
2986 if (ret < 0) {
2987 goto out;
2988 }
2989
2990 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2991 align);
2992 assert(max_pdiscard >= bs->bl.request_alignment);
2993
2994 while (bytes > 0) {
2995 int64_t num = bytes;
2996
2997 if (head) {
2998
2999 num = MIN(bytes, align - head);
3000 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3001 num %= bs->bl.request_alignment;
3002 }
3003 head = (head + num) % align;
3004 assert(num < max_pdiscard);
3005 } else if (tail) {
3006 if (num > align) {
3007
3008 num -= tail;
3009 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3010 tail > bs->bl.request_alignment) {
3011 tail %= bs->bl.request_alignment;
3012 num -= tail;
3013 }
3014 }
3015
3016 if (num > max_pdiscard) {
3017 num = max_pdiscard;
3018 }
3019
3020 if (!bs->drv) {
3021 ret = -ENOMEDIUM;
3022 goto out;
3023 }
3024 if (bs->drv->bdrv_co_pdiscard) {
3025 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3026 } else {
3027 BlockAIOCB *acb;
3028 CoroutineIOCompletion co = {
3029 .coroutine = qemu_coroutine_self(),
3030 };
3031
3032 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3033 bdrv_co_io_em_complete, &co);
3034 if (acb == NULL) {
3035 ret = -EIO;
3036 goto out;
3037 } else {
3038 qemu_coroutine_yield();
3039 ret = co.ret;
3040 }
3041 }
3042 if (ret && ret != -ENOTSUP) {
3043 goto out;
3044 }
3045
3046 offset += num;
3047 bytes -= num;
3048 }
3049 ret = 0;
3050out:
3051 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3052 tracked_request_end(&req);
3053 bdrv_dec_in_flight(bs);
3054 return ret;
3055}
3056
3057int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3058{
3059 BlockDriver *drv = bs->drv;
3060 CoroutineIOCompletion co = {
3061 .coroutine = qemu_coroutine_self(),
3062 };
3063 BlockAIOCB *acb;
3064
3065 bdrv_inc_in_flight(bs);
3066 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3067 co.ret = -ENOTSUP;
3068 goto out;
3069 }
3070
3071 if (drv->bdrv_co_ioctl) {
3072 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3073 } else {
3074 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3075 if (!acb) {
3076 co.ret = -ENOTSUP;
3077 goto out;
3078 }
3079 qemu_coroutine_yield();
3080 }
3081out:
3082 bdrv_dec_in_flight(bs);
3083 return co.ret;
3084}
3085
3086void *qemu_blockalign(BlockDriverState *bs, size_t size)
3087{
3088 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3089}
3090
3091void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3092{
3093 return memset(qemu_blockalign(bs, size), 0, size);
3094}
3095
3096void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3097{
3098 size_t align = bdrv_opt_mem_align(bs);
3099
3100
3101 assert(align > 0);
3102 if (size == 0) {
3103 size = align;
3104 }
3105
3106 return qemu_try_memalign(align, size);
3107}
3108
3109void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3110{
3111 void *mem = qemu_try_blockalign(bs, size);
3112
3113 if (mem) {
3114 memset(mem, 0, size);
3115 }
3116
3117 return mem;
3118}
3119
3120
3121
3122
3123bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
3124{
3125 int i;
3126 size_t alignment = bdrv_min_mem_align(bs);
3127
3128 for (i = 0; i < qiov->niov; i++) {
3129 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
3130 return false;
3131 }
3132 if (qiov->iov[i].iov_len % alignment) {
3133 return false;
3134 }
3135 }
3136
3137 return true;
3138}
3139
3140void bdrv_add_before_write_notifier(BlockDriverState *bs,
3141 NotifierWithReturn *notifier)
3142{
3143 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
3144}
3145
3146void bdrv_io_plug(BlockDriverState *bs)
3147{
3148 BdrvChild *child;
3149
3150 QLIST_FOREACH(child, &bs->children, next) {
3151 bdrv_io_plug(child->bs);
3152 }
3153
3154 if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
3155 BlockDriver *drv = bs->drv;
3156 if (drv && drv->bdrv_io_plug) {
3157 drv->bdrv_io_plug(bs);
3158 }
3159 }
3160}
3161
3162void bdrv_io_unplug(BlockDriverState *bs)
3163{
3164 BdrvChild *child;
3165
3166 assert(bs->io_plugged);
3167 if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
3168 BlockDriver *drv = bs->drv;
3169 if (drv && drv->bdrv_io_unplug) {
3170 drv->bdrv_io_unplug(bs);
3171 }
3172 }
3173
3174 QLIST_FOREACH(child, &bs->children, next) {
3175 bdrv_io_unplug(child->bs);
3176 }
3177}
3178
3179void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3180{
3181 BdrvChild *child;
3182
3183 if (bs->drv && bs->drv->bdrv_register_buf) {
3184 bs->drv->bdrv_register_buf(bs, host, size);
3185 }
3186 QLIST_FOREACH(child, &bs->children, next) {
3187 bdrv_register_buf(child->bs, host, size);
3188 }
3189}
3190
3191void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3192{
3193 BdrvChild *child;
3194
3195 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3196 bs->drv->bdrv_unregister_buf(bs, host);
3197 }
3198 QLIST_FOREACH(child, &bs->children, next) {
3199 bdrv_unregister_buf(child->bs, host);
3200 }
3201}
3202
3203static int coroutine_fn bdrv_co_copy_range_internal(
3204 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3205 int64_t dst_offset, int64_t bytes,
3206 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3207 bool recurse_src)
3208{
3209 BdrvTrackedRequest req;
3210 int ret;
3211
3212
3213 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3214 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3215
3216 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
3217 return -ENOMEDIUM;
3218 }
3219 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3220 if (ret) {
3221 return ret;
3222 }
3223 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3224 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3225 }
3226
3227 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
3228 return -ENOMEDIUM;
3229 }
3230 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3231 if (ret) {
3232 return ret;
3233 }
3234
3235 if (!src->bs->drv->bdrv_co_copy_range_from
3236 || !dst->bs->drv->bdrv_co_copy_range_to
3237 || src->bs->encrypted || dst->bs->encrypted) {
3238 return -ENOTSUP;
3239 }
3240
3241 if (recurse_src) {
3242 bdrv_inc_in_flight(src->bs);
3243 tracked_request_begin(&req, src->bs, src_offset, bytes,
3244 BDRV_TRACKED_READ);
3245
3246
3247 assert(!(read_flags & BDRV_REQ_SERIALISING));
3248 bdrv_wait_serialising_requests(&req);
3249
3250 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3251 src, src_offset,
3252 dst, dst_offset,
3253 bytes,
3254 read_flags, write_flags);
3255
3256 tracked_request_end(&req);
3257 bdrv_dec_in_flight(src->bs);
3258 } else {
3259 bdrv_inc_in_flight(dst->bs);
3260 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3261 BDRV_TRACKED_WRITE);
3262 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3263 write_flags);
3264 if (!ret) {
3265 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3266 src, src_offset,
3267 dst, dst_offset,
3268 bytes,
3269 read_flags, write_flags);
3270 }
3271 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3272 tracked_request_end(&req);
3273 bdrv_dec_in_flight(dst->bs);
3274 }
3275
3276 return ret;
3277}
3278
3279
3280
3281
3282
3283int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3284 BdrvChild *dst, int64_t dst_offset,
3285 int64_t bytes,
3286 BdrvRequestFlags read_flags,
3287 BdrvRequestFlags write_flags)
3288{
3289 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3290 read_flags, write_flags);
3291 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3292 bytes, read_flags, write_flags, true);
3293}
3294
3295
3296
3297
3298
3299int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3300 BdrvChild *dst, int64_t dst_offset,
3301 int64_t bytes,
3302 BdrvRequestFlags read_flags,
3303 BdrvRequestFlags write_flags)
3304{
3305 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3306 read_flags, write_flags);
3307 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3308 bytes, read_flags, write_flags, false);
3309}
3310
3311int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3312 BdrvChild *dst, int64_t dst_offset,
3313 int64_t bytes, BdrvRequestFlags read_flags,
3314 BdrvRequestFlags write_flags)
3315{
3316 return bdrv_co_copy_range_from(src, src_offset,
3317 dst, dst_offset,
3318 bytes, read_flags, write_flags);
3319}
3320
3321static void bdrv_parent_cb_resize(BlockDriverState *bs)
3322{
3323 BdrvChild *c;
3324 QLIST_FOREACH(c, &bs->parents, next_parent) {
3325 if (c->klass->resize) {
3326 c->klass->resize(c);
3327 }
3328 }
3329}
3330
3331
3332
3333
3334
3335
3336
3337
3338int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3339 PreallocMode prealloc, BdrvRequestFlags flags,
3340 Error **errp)
3341{
3342 BlockDriverState *bs = child->bs;
3343 BdrvChild *filtered, *backing;
3344 BlockDriver *drv = bs->drv;
3345 BdrvTrackedRequest req;
3346 int64_t old_size, new_bytes;
3347 int ret;
3348
3349
3350
3351 if (!drv) {
3352 error_setg(errp, "No medium inserted");
3353 return -ENOMEDIUM;
3354 }
3355 if (offset < 0) {
3356 error_setg(errp, "Image size cannot be negative");
3357 return -EINVAL;
3358 }
3359
3360 ret = bdrv_check_request(offset, 0, errp);
3361 if (ret < 0) {
3362 return ret;
3363 }
3364
3365 old_size = bdrv_getlength(bs);
3366 if (old_size < 0) {
3367 error_setg_errno(errp, -old_size, "Failed to get old image size");
3368 return old_size;
3369 }
3370
3371 if (offset > old_size) {
3372 new_bytes = offset - old_size;
3373 } else {
3374 new_bytes = 0;
3375 }
3376
3377 bdrv_inc_in_flight(bs);
3378 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3379 BDRV_TRACKED_TRUNCATE);
3380
3381
3382
3383
3384 if (new_bytes) {
3385 bdrv_make_request_serialising(&req, 1);
3386 }
3387 if (bs->read_only) {
3388 error_setg(errp, "Image is read-only");
3389 ret = -EACCES;
3390 goto out;
3391 }
3392 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3393 0);
3394 if (ret < 0) {
3395 error_setg_errno(errp, -ret,
3396 "Failed to prepare request for truncation");
3397 goto out;
3398 }
3399
3400 filtered = bdrv_filter_child(bs);
3401 backing = bdrv_cow_child(bs);
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413 if (new_bytes && backing) {
3414 int64_t backing_len;
3415
3416 backing_len = bdrv_getlength(backing->bs);
3417 if (backing_len < 0) {
3418 ret = backing_len;
3419 error_setg_errno(errp, -ret, "Could not get backing file size");
3420 goto out;
3421 }
3422
3423 if (backing_len > old_size) {
3424 flags |= BDRV_REQ_ZERO_WRITE;
3425 }
3426 }
3427
3428 if (drv->bdrv_co_truncate) {
3429 if (flags & ~bs->supported_truncate_flags) {
3430 error_setg(errp, "Block driver does not support requested flags");
3431 ret = -ENOTSUP;
3432 goto out;
3433 }
3434 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3435 } else if (filtered) {
3436 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3437 } else {
3438 error_setg(errp, "Image format driver does not support resize");
3439 ret = -ENOTSUP;
3440 goto out;
3441 }
3442 if (ret < 0) {
3443 goto out;
3444 }
3445
3446 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3447 if (ret < 0) {
3448 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3449 } else {
3450 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3451 }
3452
3453
3454
3455 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3456
3457out:
3458 tracked_request_end(&req);
3459 bdrv_dec_in_flight(bs);
3460
3461 return ret;
3462}
3463
3464void bdrv_cancel_in_flight(BlockDriverState *bs)
3465{
3466 if (!bs || !bs->drv) {
3467 return;
3468 }
3469
3470 if (bs->drv->bdrv_cancel_in_flight) {
3471 bs->drv->bdrv_cancel_in_flight(bs);
3472 }
3473}
3474