1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "qemu/cutils.h"
33#include "qapi/error.h"
34#include "qemu/error-report.h"
35
36#define NOT_DONE 0x7fffffff
37
38
39#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
40
41static void bdrv_parent_cb_resize(BlockDriverState *bs);
42static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
43 int64_t offset, int bytes, BdrvRequestFlags flags);
44
45void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
46 bool ignore_bds_parents)
47{
48 BdrvChild *c, *next;
49
50 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
51 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
52 continue;
53 }
54 bdrv_parent_drained_begin_single(c, false);
55 }
56}
57
58void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
59 bool ignore_bds_parents)
60{
61 BdrvChild *c, *next;
62
63 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
64 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
65 continue;
66 }
67 if (c->role->drained_end) {
68 c->role->drained_end(c);
69 }
70 }
71}
72
73static bool bdrv_parent_drained_poll_single(BdrvChild *c)
74{
75 if (c->role->drained_poll) {
76 return c->role->drained_poll(c);
77 }
78 return false;
79}
80
81static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
82 bool ignore_bds_parents)
83{
84 BdrvChild *c, *next;
85 bool busy = false;
86
87 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
88 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
89 continue;
90 }
91 busy |= bdrv_parent_drained_poll_single(c);
92 }
93
94 return busy;
95}
96
97void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
98{
99 if (c->role->drained_begin) {
100 c->role->drained_begin(c);
101 }
102 if (poll) {
103 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
104 }
105}
106
107static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
108{
109 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
110 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
111 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
112 src->opt_mem_alignment);
113 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
114 src->min_mem_alignment);
115 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
116}
117
118void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
119{
120 BlockDriver *drv = bs->drv;
121 Error *local_err = NULL;
122
123 memset(&bs->bl, 0, sizeof(bs->bl));
124
125 if (!drv) {
126 return;
127 }
128
129
130 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
131 drv->bdrv_aio_preadv) ? 1 : 512;
132
133
134 if (bs->file) {
135 bdrv_refresh_limits(bs->file->bs, &local_err);
136 if (local_err) {
137 error_propagate(errp, local_err);
138 return;
139 }
140 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
141 } else {
142 bs->bl.min_mem_alignment = 512;
143 bs->bl.opt_mem_alignment = getpagesize();
144
145
146 bs->bl.max_iov = IOV_MAX;
147 }
148
149 if (bs->backing) {
150 bdrv_refresh_limits(bs->backing->bs, &local_err);
151 if (local_err) {
152 error_propagate(errp, local_err);
153 return;
154 }
155 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
156 }
157
158
159 if (drv->bdrv_refresh_limits) {
160 drv->bdrv_refresh_limits(bs, errp);
161 }
162}
163
164
165
166
167
168
169void bdrv_enable_copy_on_read(BlockDriverState *bs)
170{
171 atomic_inc(&bs->copy_on_read);
172}
173
174void bdrv_disable_copy_on_read(BlockDriverState *bs)
175{
176 int old = atomic_fetch_dec(&bs->copy_on_read);
177 assert(old >= 1);
178}
179
180typedef struct {
181 Coroutine *co;
182 BlockDriverState *bs;
183 bool done;
184 bool begin;
185 bool recursive;
186 bool poll;
187 BdrvChild *parent;
188 bool ignore_bds_parents;
189} BdrvCoDrainData;
190
191static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
192{
193 BdrvCoDrainData *data = opaque;
194 BlockDriverState *bs = data->bs;
195
196 if (data->begin) {
197 bs->drv->bdrv_co_drain_begin(bs);
198 } else {
199 bs->drv->bdrv_co_drain_end(bs);
200 }
201
202
203 atomic_mb_set(&data->done, true);
204 bdrv_dec_in_flight(bs);
205
206 if (data->begin) {
207 g_free(data);
208 }
209}
210
211
212static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
213{
214 BdrvCoDrainData *data;
215
216 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
217 (!begin && !bs->drv->bdrv_co_drain_end)) {
218 return;
219 }
220
221 data = g_new(BdrvCoDrainData, 1);
222 *data = (BdrvCoDrainData) {
223 .bs = bs,
224 .done = false,
225 .begin = begin
226 };
227
228
229
230 bdrv_inc_in_flight(bs);
231 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
232 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
233
234 if (!begin) {
235 BDRV_POLL_WHILE(bs, !data->done);
236 g_free(data);
237 }
238}
239
240
241bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
242 BdrvChild *ignore_parent, bool ignore_bds_parents)
243{
244 BdrvChild *child, *next;
245
246 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
247 return true;
248 }
249
250 if (atomic_read(&bs->in_flight)) {
251 return true;
252 }
253
254 if (recursive) {
255 assert(!ignore_bds_parents);
256 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
257 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
258 return true;
259 }
260 }
261 }
262
263 return false;
264}
265
266static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
267 BdrvChild *ignore_parent)
268{
269 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
270}
271
272static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
273 BdrvChild *parent, bool ignore_bds_parents,
274 bool poll);
275static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
276 BdrvChild *parent, bool ignore_bds_parents);
277
278static void bdrv_co_drain_bh_cb(void *opaque)
279{
280 BdrvCoDrainData *data = opaque;
281 Coroutine *co = data->co;
282 BlockDriverState *bs = data->bs;
283
284 if (bs) {
285 AioContext *ctx = bdrv_get_aio_context(bs);
286 AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
287
288
289
290
291
292
293
294 if (ctx == co_ctx) {
295 aio_context_acquire(ctx);
296 }
297 bdrv_dec_in_flight(bs);
298 if (data->begin) {
299 bdrv_do_drained_begin(bs, data->recursive, data->parent,
300 data->ignore_bds_parents, data->poll);
301 } else {
302 bdrv_do_drained_end(bs, data->recursive, data->parent,
303 data->ignore_bds_parents);
304 }
305 if (ctx == co_ctx) {
306 aio_context_release(ctx);
307 }
308 } else {
309 assert(data->begin);
310 bdrv_drain_all_begin();
311 }
312
313 data->done = true;
314 aio_co_wake(co);
315}
316
317static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
318 bool begin, bool recursive,
319 BdrvChild *parent,
320 bool ignore_bds_parents,
321 bool poll)
322{
323 BdrvCoDrainData data;
324
325
326
327
328 assert(qemu_in_coroutine());
329 data = (BdrvCoDrainData) {
330 .co = qemu_coroutine_self(),
331 .bs = bs,
332 .done = false,
333 .begin = begin,
334 .recursive = recursive,
335 .parent = parent,
336 .ignore_bds_parents = ignore_bds_parents,
337 .poll = poll,
338 };
339 if (bs) {
340 bdrv_inc_in_flight(bs);
341 }
342 aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
343 bdrv_co_drain_bh_cb, &data);
344
345 qemu_coroutine_yield();
346
347
348 assert(data.done);
349}
350
351void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
352 BdrvChild *parent, bool ignore_bds_parents)
353{
354 assert(!qemu_in_coroutine());
355
356
357 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
358 aio_disable_external(bdrv_get_aio_context(bs));
359 }
360
361 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
362 bdrv_drain_invoke(bs, true);
363}
364
365static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
366 BdrvChild *parent, bool ignore_bds_parents,
367 bool poll)
368{
369 BdrvChild *child, *next;
370
371 if (qemu_in_coroutine()) {
372 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
373 poll);
374 return;
375 }
376
377 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
378
379 if (recursive) {
380 assert(!ignore_bds_parents);
381 bs->recursive_quiesce_counter++;
382 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
383 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
384 false);
385 }
386 }
387
388
389
390
391
392
393
394
395
396
397 if (poll) {
398 assert(!ignore_bds_parents);
399 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
400 }
401}
402
403void bdrv_drained_begin(BlockDriverState *bs)
404{
405 bdrv_do_drained_begin(bs, false, NULL, false, true);
406}
407
408void bdrv_subtree_drained_begin(BlockDriverState *bs)
409{
410 bdrv_do_drained_begin(bs, true, NULL, false, true);
411}
412
413static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
414 BdrvChild *parent, bool ignore_bds_parents)
415{
416 BdrvChild *child, *next;
417 int old_quiesce_counter;
418
419 if (qemu_in_coroutine()) {
420 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
421 false);
422 return;
423 }
424 assert(bs->quiesce_counter > 0);
425 old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
426
427
428 bdrv_drain_invoke(bs, false);
429 bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
430 if (old_quiesce_counter == 1) {
431 aio_enable_external(bdrv_get_aio_context(bs));
432 }
433
434 if (recursive) {
435 assert(!ignore_bds_parents);
436 bs->recursive_quiesce_counter--;
437 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
438 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
439 }
440 }
441}
442
443void bdrv_drained_end(BlockDriverState *bs)
444{
445 bdrv_do_drained_end(bs, false, NULL, false);
446}
447
448void bdrv_subtree_drained_end(BlockDriverState *bs)
449{
450 bdrv_do_drained_end(bs, true, NULL, false);
451}
452
453void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
454{
455 int i;
456
457 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
458 bdrv_do_drained_begin(child->bs, true, child, false, true);
459 }
460}
461
462void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
463{
464 int i;
465
466 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
467 bdrv_do_drained_end(child->bs, true, child, false);
468 }
469}
470
471
472
473
474
475
476
477
478void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
479{
480 assert(qemu_in_coroutine());
481 bdrv_drained_begin(bs);
482 bdrv_drained_end(bs);
483}
484
485void bdrv_drain(BlockDriverState *bs)
486{
487 bdrv_drained_begin(bs);
488 bdrv_drained_end(bs);
489}
490
491static void bdrv_drain_assert_idle(BlockDriverState *bs)
492{
493 BdrvChild *child, *next;
494
495 assert(atomic_read(&bs->in_flight) == 0);
496 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
497 bdrv_drain_assert_idle(child->bs);
498 }
499}
500
501unsigned int bdrv_drain_all_count = 0;
502
503static bool bdrv_drain_all_poll(void)
504{
505 BlockDriverState *bs = NULL;
506 bool result = false;
507
508
509
510 while ((bs = bdrv_next_all_states(bs))) {
511 AioContext *aio_context = bdrv_get_aio_context(bs);
512 aio_context_acquire(aio_context);
513 result |= bdrv_drain_poll(bs, false, NULL, true);
514 aio_context_release(aio_context);
515 }
516
517 return result;
518}
519
520
521
522
523
524
525
526
527
528
529
530
531
532void bdrv_drain_all_begin(void)
533{
534 BlockDriverState *bs = NULL;
535
536 if (qemu_in_coroutine()) {
537 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
538 return;
539 }
540
541
542
543 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
544 assert(bdrv_drain_all_count < INT_MAX);
545 bdrv_drain_all_count++;
546
547
548
549 while ((bs = bdrv_next_all_states(bs))) {
550 AioContext *aio_context = bdrv_get_aio_context(bs);
551
552 aio_context_acquire(aio_context);
553 bdrv_do_drained_begin(bs, false, NULL, true, false);
554 aio_context_release(aio_context);
555 }
556
557
558 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
559
560 while ((bs = bdrv_next_all_states(bs))) {
561 bdrv_drain_assert_idle(bs);
562 }
563}
564
565void bdrv_drain_all_end(void)
566{
567 BlockDriverState *bs = NULL;
568
569 while ((bs = bdrv_next_all_states(bs))) {
570 AioContext *aio_context = bdrv_get_aio_context(bs);
571
572 aio_context_acquire(aio_context);
573 bdrv_do_drained_end(bs, false, NULL, true);
574 aio_context_release(aio_context);
575 }
576
577 assert(bdrv_drain_all_count > 0);
578 bdrv_drain_all_count--;
579}
580
581void bdrv_drain_all(void)
582{
583 bdrv_drain_all_begin();
584 bdrv_drain_all_end();
585}
586
587
588
589
590
591
592static void tracked_request_end(BdrvTrackedRequest *req)
593{
594 if (req->serialising) {
595 atomic_dec(&req->bs->serialising_in_flight);
596 }
597
598 qemu_co_mutex_lock(&req->bs->reqs_lock);
599 QLIST_REMOVE(req, list);
600 qemu_co_queue_restart_all(&req->wait_queue);
601 qemu_co_mutex_unlock(&req->bs->reqs_lock);
602}
603
604
605
606
607static void tracked_request_begin(BdrvTrackedRequest *req,
608 BlockDriverState *bs,
609 int64_t offset,
610 uint64_t bytes,
611 enum BdrvTrackedRequestType type)
612{
613 assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
614
615 *req = (BdrvTrackedRequest){
616 .bs = bs,
617 .offset = offset,
618 .bytes = bytes,
619 .type = type,
620 .co = qemu_coroutine_self(),
621 .serialising = false,
622 .overlap_offset = offset,
623 .overlap_bytes = bytes,
624 };
625
626 qemu_co_queue_init(&req->wait_queue);
627
628 qemu_co_mutex_lock(&bs->reqs_lock);
629 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
630 qemu_co_mutex_unlock(&bs->reqs_lock);
631}
632
633static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
634{
635 int64_t overlap_offset = req->offset & ~(align - 1);
636 uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
637 - overlap_offset;
638
639 if (!req->serialising) {
640 atomic_inc(&req->bs->serialising_in_flight);
641 req->serialising = true;
642 }
643
644 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
645 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
646}
647
648static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
649{
650
651
652
653
654
655
656 return req->serialising && (req->offset == req->overlap_offset) &&
657 (req->bytes == req->overlap_bytes);
658}
659
660
661
662
663void bdrv_round_to_clusters(BlockDriverState *bs,
664 int64_t offset, int64_t bytes,
665 int64_t *cluster_offset,
666 int64_t *cluster_bytes)
667{
668 BlockDriverInfo bdi;
669
670 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
671 *cluster_offset = offset;
672 *cluster_bytes = bytes;
673 } else {
674 int64_t c = bdi.cluster_size;
675 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
676 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
677 }
678}
679
680static int bdrv_get_cluster_size(BlockDriverState *bs)
681{
682 BlockDriverInfo bdi;
683 int ret;
684
685 ret = bdrv_get_info(bs, &bdi);
686 if (ret < 0 || bdi.cluster_size == 0) {
687 return bs->bl.request_alignment;
688 } else {
689 return bdi.cluster_size;
690 }
691}
692
693static bool tracked_request_overlaps(BdrvTrackedRequest *req,
694 int64_t offset, uint64_t bytes)
695{
696
697 if (offset >= req->overlap_offset + req->overlap_bytes) {
698 return false;
699 }
700
701 if (req->overlap_offset >= offset + bytes) {
702 return false;
703 }
704 return true;
705}
706
707void bdrv_inc_in_flight(BlockDriverState *bs)
708{
709 atomic_inc(&bs->in_flight);
710}
711
712void bdrv_wakeup(BlockDriverState *bs)
713{
714 aio_wait_kick();
715}
716
717void bdrv_dec_in_flight(BlockDriverState *bs)
718{
719 atomic_dec(&bs->in_flight);
720 bdrv_wakeup(bs);
721}
722
723static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
724{
725 BlockDriverState *bs = self->bs;
726 BdrvTrackedRequest *req;
727 bool retry;
728 bool waited = false;
729
730 if (!atomic_read(&bs->serialising_in_flight)) {
731 return false;
732 }
733
734 do {
735 retry = false;
736 qemu_co_mutex_lock(&bs->reqs_lock);
737 QLIST_FOREACH(req, &bs->tracked_requests, list) {
738 if (req == self || (!req->serialising && !self->serialising)) {
739 continue;
740 }
741 if (tracked_request_overlaps(req, self->overlap_offset,
742 self->overlap_bytes))
743 {
744
745
746
747
748 assert(qemu_coroutine_self() != req->co);
749
750
751
752
753 if (!req->waiting_for) {
754 self->waiting_for = req;
755 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
756 self->waiting_for = NULL;
757 retry = true;
758 waited = true;
759 break;
760 }
761 }
762 }
763 qemu_co_mutex_unlock(&bs->reqs_lock);
764 } while (retry);
765
766 return waited;
767}
768
769static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
770 size_t size)
771{
772 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
773 return -EIO;
774 }
775
776 if (!bdrv_is_inserted(bs)) {
777 return -ENOMEDIUM;
778 }
779
780 if (offset < 0) {
781 return -EIO;
782 }
783
784 return 0;
785}
786
787typedef struct RwCo {
788 BdrvChild *child;
789 int64_t offset;
790 QEMUIOVector *qiov;
791 bool is_write;
792 int ret;
793 BdrvRequestFlags flags;
794} RwCo;
795
796static void coroutine_fn bdrv_rw_co_entry(void *opaque)
797{
798 RwCo *rwco = opaque;
799
800 if (!rwco->is_write) {
801 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
802 rwco->qiov->size, rwco->qiov,
803 rwco->flags);
804 } else {
805 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
806 rwco->qiov->size, rwco->qiov,
807 rwco->flags);
808 }
809 aio_wait_kick();
810}
811
812
813
814
815static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
816 QEMUIOVector *qiov, bool is_write,
817 BdrvRequestFlags flags)
818{
819 Coroutine *co;
820 RwCo rwco = {
821 .child = child,
822 .offset = offset,
823 .qiov = qiov,
824 .is_write = is_write,
825 .ret = NOT_DONE,
826 .flags = flags,
827 };
828
829 if (qemu_in_coroutine()) {
830
831 bdrv_rw_co_entry(&rwco);
832 } else {
833 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
834 bdrv_coroutine_enter(child->bs, co);
835 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
836 }
837 return rwco.ret;
838}
839
840
841
842
843static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
844 int nb_sectors, bool is_write, BdrvRequestFlags flags)
845{
846 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf,
847 nb_sectors * BDRV_SECTOR_SIZE);
848
849 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
850 return -EINVAL;
851 }
852
853 return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
854 &qiov, is_write, flags);
855}
856
857
858int bdrv_read(BdrvChild *child, int64_t sector_num,
859 uint8_t *buf, int nb_sectors)
860{
861 return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
862}
863
864
865
866
867
868
869
870int bdrv_write(BdrvChild *child, int64_t sector_num,
871 const uint8_t *buf, int nb_sectors)
872{
873 return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
874}
875
876int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
877 int bytes, BdrvRequestFlags flags)
878{
879 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
880
881 return bdrv_prwv_co(child, offset, &qiov, true,
882 BDRV_REQ_ZERO_WRITE | flags);
883}
884
885
886
887
888
889
890
891
892
893
894int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
895{
896 int ret;
897 int64_t target_size, bytes, offset = 0;
898 BlockDriverState *bs = child->bs;
899
900 target_size = bdrv_getlength(bs);
901 if (target_size < 0) {
902 return target_size;
903 }
904
905 for (;;) {
906 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
907 if (bytes <= 0) {
908 return 0;
909 }
910 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
911 if (ret < 0) {
912 return ret;
913 }
914 if (ret & BDRV_BLOCK_ZERO) {
915 offset += bytes;
916 continue;
917 }
918 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
919 if (ret < 0) {
920 return ret;
921 }
922 offset += bytes;
923 }
924}
925
926int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
927{
928 int ret;
929
930 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
931 if (ret < 0) {
932 return ret;
933 }
934
935 return qiov->size;
936}
937
938int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
939{
940 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
941
942 if (bytes < 0) {
943 return -EINVAL;
944 }
945
946 return bdrv_preadv(child, offset, &qiov);
947}
948
949int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
950{
951 int ret;
952
953 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
954 if (ret < 0) {
955 return ret;
956 }
957
958 return qiov->size;
959}
960
961int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
962{
963 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
964
965 if (bytes < 0) {
966 return -EINVAL;
967 }
968
969 return bdrv_pwritev(child, offset, &qiov);
970}
971
972
973
974
975
976
977
978int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
979 const void *buf, int count)
980{
981 int ret;
982
983 ret = bdrv_pwrite(child, offset, buf, count);
984 if (ret < 0) {
985 return ret;
986 }
987
988 ret = bdrv_flush(child->bs);
989 if (ret < 0) {
990 return ret;
991 }
992
993 return 0;
994}
995
996typedef struct CoroutineIOCompletion {
997 Coroutine *coroutine;
998 int ret;
999} CoroutineIOCompletion;
1000
1001static void bdrv_co_io_em_complete(void *opaque, int ret)
1002{
1003 CoroutineIOCompletion *co = opaque;
1004
1005 co->ret = ret;
1006 aio_co_wake(co->coroutine);
1007}
1008
1009static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1010 uint64_t offset, uint64_t bytes,
1011 QEMUIOVector *qiov, int flags)
1012{
1013 BlockDriver *drv = bs->drv;
1014 int64_t sector_num;
1015 unsigned int nb_sectors;
1016
1017 assert(!(flags & ~BDRV_REQ_MASK));
1018 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1019
1020 if (!drv) {
1021 return -ENOMEDIUM;
1022 }
1023
1024 if (drv->bdrv_co_preadv) {
1025 return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1026 }
1027
1028 if (drv->bdrv_aio_preadv) {
1029 BlockAIOCB *acb;
1030 CoroutineIOCompletion co = {
1031 .coroutine = qemu_coroutine_self(),
1032 };
1033
1034 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1035 bdrv_co_io_em_complete, &co);
1036 if (acb == NULL) {
1037 return -EIO;
1038 } else {
1039 qemu_coroutine_yield();
1040 return co.ret;
1041 }
1042 }
1043
1044 sector_num = offset >> BDRV_SECTOR_BITS;
1045 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1046
1047 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1048 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1049 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
1050 assert(drv->bdrv_co_readv);
1051
1052 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1053}
1054
1055static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1056 uint64_t offset, uint64_t bytes,
1057 QEMUIOVector *qiov, int flags)
1058{
1059 BlockDriver *drv = bs->drv;
1060 int64_t sector_num;
1061 unsigned int nb_sectors;
1062 int ret;
1063
1064 assert(!(flags & ~BDRV_REQ_MASK));
1065 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1066
1067 if (!drv) {
1068 return -ENOMEDIUM;
1069 }
1070
1071 if (drv->bdrv_co_pwritev) {
1072 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1073 flags & bs->supported_write_flags);
1074 flags &= ~bs->supported_write_flags;
1075 goto emulate_flags;
1076 }
1077
1078 if (drv->bdrv_aio_pwritev) {
1079 BlockAIOCB *acb;
1080 CoroutineIOCompletion co = {
1081 .coroutine = qemu_coroutine_self(),
1082 };
1083
1084 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1085 flags & bs->supported_write_flags,
1086 bdrv_co_io_em_complete, &co);
1087 flags &= ~bs->supported_write_flags;
1088 if (acb == NULL) {
1089 ret = -EIO;
1090 } else {
1091 qemu_coroutine_yield();
1092 ret = co.ret;
1093 }
1094 goto emulate_flags;
1095 }
1096
1097 sector_num = offset >> BDRV_SECTOR_BITS;
1098 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1099
1100 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1101 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1102 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
1103
1104 assert(drv->bdrv_co_writev);
1105 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1106 flags & bs->supported_write_flags);
1107 flags &= ~bs->supported_write_flags;
1108
1109emulate_flags:
1110 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1111 ret = bdrv_co_flush(bs);
1112 }
1113
1114 return ret;
1115}
1116
1117static int coroutine_fn
1118bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
1119 uint64_t bytes, QEMUIOVector *qiov)
1120{
1121 BlockDriver *drv = bs->drv;
1122
1123 if (!drv) {
1124 return -ENOMEDIUM;
1125 }
1126
1127 if (!drv->bdrv_co_pwritev_compressed) {
1128 return -ENOTSUP;
1129 }
1130
1131 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1132}
1133
1134static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1135 int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
1136{
1137 BlockDriverState *bs = child->bs;
1138
1139
1140
1141
1142
1143
1144 void *bounce_buffer;
1145
1146 BlockDriver *drv = bs->drv;
1147 QEMUIOVector local_qiov;
1148 int64_t cluster_offset;
1149 int64_t cluster_bytes;
1150 size_t skip_bytes;
1151 int ret;
1152 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1153 BDRV_REQUEST_MAX_BYTES);
1154 unsigned int progress = 0;
1155
1156 if (!drv) {
1157 return -ENOMEDIUM;
1158 }
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1175 skip_bytes = offset - cluster_offset;
1176
1177 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1178 cluster_offset, cluster_bytes);
1179
1180 bounce_buffer = qemu_try_blockalign(bs,
1181 MIN(MIN(max_transfer, cluster_bytes),
1182 MAX_BOUNCE_BUFFER));
1183 if (bounce_buffer == NULL) {
1184 ret = -ENOMEM;
1185 goto err;
1186 }
1187
1188 while (cluster_bytes) {
1189 int64_t pnum;
1190
1191 ret = bdrv_is_allocated(bs, cluster_offset,
1192 MIN(cluster_bytes, max_transfer), &pnum);
1193 if (ret < 0) {
1194
1195
1196
1197
1198 pnum = MIN(cluster_bytes, max_transfer);
1199 }
1200
1201
1202 if (ret == 0 && pnum == 0) {
1203 assert(progress >= bytes);
1204 break;
1205 }
1206
1207 assert(skip_bytes < pnum);
1208
1209 if (ret <= 0) {
1210
1211 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1212 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1213
1214 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1215 &local_qiov, 0);
1216 if (ret < 0) {
1217 goto err;
1218 }
1219
1220 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1221 if (drv->bdrv_co_pwrite_zeroes &&
1222 buffer_is_zero(bounce_buffer, pnum)) {
1223
1224
1225
1226 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1227 BDRV_REQ_WRITE_UNCHANGED);
1228 } else {
1229
1230
1231
1232 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1233 &local_qiov,
1234 BDRV_REQ_WRITE_UNCHANGED);
1235 }
1236
1237 if (ret < 0) {
1238
1239
1240
1241
1242
1243 goto err;
1244 }
1245
1246 qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
1247 pnum - skip_bytes);
1248 } else {
1249
1250 qemu_iovec_init(&local_qiov, qiov->niov);
1251 qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
1252 ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
1253 &local_qiov, 0);
1254 qemu_iovec_destroy(&local_qiov);
1255 if (ret < 0) {
1256 goto err;
1257 }
1258 }
1259
1260 cluster_offset += pnum;
1261 cluster_bytes -= pnum;
1262 progress += pnum - skip_bytes;
1263 skip_bytes = 0;
1264 }
1265 ret = 0;
1266
1267err:
1268 qemu_vfree(bounce_buffer);
1269 return ret;
1270}
1271
1272
1273
1274
1275
1276
1277static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1278 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1279 int64_t align, QEMUIOVector *qiov, int flags)
1280{
1281 BlockDriverState *bs = child->bs;
1282 int64_t total_bytes, max_bytes;
1283 int ret = 0;
1284 uint64_t bytes_remaining = bytes;
1285 int max_transfer;
1286
1287 assert(is_power_of_2(align));
1288 assert((offset & (align - 1)) == 0);
1289 assert((bytes & (align - 1)) == 0);
1290 assert(!qiov || bytes == qiov->size);
1291 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1292 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1293 align);
1294
1295
1296
1297
1298
1299 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
1300
1301
1302 if (flags & BDRV_REQ_COPY_ON_READ) {
1303
1304
1305
1306
1307
1308 mark_request_serialising(req, bdrv_get_cluster_size(bs));
1309 }
1310
1311
1312 assert(!(flags & BDRV_REQ_SERIALISING));
1313
1314 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1315 wait_serialising_requests(req);
1316 }
1317
1318 if (flags & BDRV_REQ_COPY_ON_READ) {
1319 int64_t pnum;
1320
1321 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1322 if (ret < 0) {
1323 goto out;
1324 }
1325
1326 if (!ret || pnum != bytes) {
1327 ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
1328 goto out;
1329 }
1330 }
1331
1332
1333 total_bytes = bdrv_getlength(bs);
1334 if (total_bytes < 0) {
1335 ret = total_bytes;
1336 goto out;
1337 }
1338
1339 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1340 if (bytes <= max_bytes && bytes <= max_transfer) {
1341 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
1342 goto out;
1343 }
1344
1345 while (bytes_remaining) {
1346 int num;
1347
1348 if (max_bytes) {
1349 QEMUIOVector local_qiov;
1350
1351 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1352 assert(num);
1353 qemu_iovec_init(&local_qiov, qiov->niov);
1354 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1355
1356 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1357 num, &local_qiov, 0);
1358 max_bytes -= num;
1359 qemu_iovec_destroy(&local_qiov);
1360 } else {
1361 num = bytes_remaining;
1362 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1363 bytes_remaining);
1364 }
1365 if (ret < 0) {
1366 goto out;
1367 }
1368 bytes_remaining -= num;
1369 }
1370
1371out:
1372 return ret < 0 ? ret : 0;
1373}
1374
1375
1376
1377
1378int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1379 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1380 BdrvRequestFlags flags)
1381{
1382 BlockDriverState *bs = child->bs;
1383 BlockDriver *drv = bs->drv;
1384 BdrvTrackedRequest req;
1385
1386 uint64_t align = bs->bl.request_alignment;
1387 uint8_t *head_buf = NULL;
1388 uint8_t *tail_buf = NULL;
1389 QEMUIOVector local_qiov;
1390 bool use_local_qiov = false;
1391 int ret;
1392
1393 trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
1394
1395 if (!drv) {
1396 return -ENOMEDIUM;
1397 }
1398
1399 ret = bdrv_check_byte_request(bs, offset, bytes);
1400 if (ret < 0) {
1401 return ret;
1402 }
1403
1404 bdrv_inc_in_flight(bs);
1405
1406
1407 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1408 flags |= BDRV_REQ_COPY_ON_READ;
1409 }
1410
1411
1412 if (offset & (align - 1)) {
1413 head_buf = qemu_blockalign(bs, align);
1414 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1415 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1416 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1417 use_local_qiov = true;
1418
1419 bytes += offset & (align - 1);
1420 offset = offset & ~(align - 1);
1421 }
1422
1423 if ((offset + bytes) & (align - 1)) {
1424 if (!use_local_qiov) {
1425 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1426 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1427 use_local_qiov = true;
1428 }
1429 tail_buf = qemu_blockalign(bs, align);
1430 qemu_iovec_add(&local_qiov, tail_buf,
1431 align - ((offset + bytes) & (align - 1)));
1432
1433 bytes = ROUND_UP(bytes, align);
1434 }
1435
1436 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1437 ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
1438 use_local_qiov ? &local_qiov : qiov,
1439 flags);
1440 tracked_request_end(&req);
1441 bdrv_dec_in_flight(bs);
1442
1443 if (use_local_qiov) {
1444 qemu_iovec_destroy(&local_qiov);
1445 qemu_vfree(head_buf);
1446 qemu_vfree(tail_buf);
1447 }
1448
1449 return ret;
1450}
1451
1452static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1453 int64_t offset, int bytes, BdrvRequestFlags flags)
1454{
1455 BlockDriver *drv = bs->drv;
1456 QEMUIOVector qiov;
1457 void *buf = NULL;
1458 int ret = 0;
1459 bool need_flush = false;
1460 int head = 0;
1461 int tail = 0;
1462
1463 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1464 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1465 bs->bl.request_alignment);
1466 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1467
1468 if (!drv) {
1469 return -ENOMEDIUM;
1470 }
1471
1472 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1473 return -ENOTSUP;
1474 }
1475
1476 assert(alignment % bs->bl.request_alignment == 0);
1477 head = offset % alignment;
1478 tail = (offset + bytes) % alignment;
1479 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1480 assert(max_write_zeroes >= bs->bl.request_alignment);
1481
1482 while (bytes > 0 && !ret) {
1483 int num = bytes;
1484
1485
1486
1487
1488
1489 if (head) {
1490
1491
1492
1493 num = MIN(MIN(bytes, max_transfer), alignment - head);
1494 head = (head + num) % alignment;
1495 assert(num < max_write_zeroes);
1496 } else if (tail && num > alignment) {
1497
1498 num -= tail;
1499 }
1500
1501
1502 if (num > max_write_zeroes) {
1503 num = max_write_zeroes;
1504 }
1505
1506 ret = -ENOTSUP;
1507
1508 if (drv->bdrv_co_pwrite_zeroes) {
1509 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1510 flags & bs->supported_zero_flags);
1511 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1512 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1513 need_flush = true;
1514 }
1515 } else {
1516 assert(!bs->supported_zero_flags);
1517 }
1518
1519 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1520
1521 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1522
1523 if ((flags & BDRV_REQ_FUA) &&
1524 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1525
1526
1527 write_flags &= ~BDRV_REQ_FUA;
1528 need_flush = true;
1529 }
1530 num = MIN(num, max_transfer);
1531 if (buf == NULL) {
1532 buf = qemu_try_blockalign0(bs, num);
1533 if (buf == NULL) {
1534 ret = -ENOMEM;
1535 goto fail;
1536 }
1537 }
1538 qemu_iovec_init_buf(&qiov, buf, num);
1539
1540 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
1541
1542
1543
1544
1545 if (num < max_transfer) {
1546 qemu_vfree(buf);
1547 buf = NULL;
1548 }
1549 }
1550
1551 offset += num;
1552 bytes -= num;
1553 }
1554
1555fail:
1556 if (ret == 0 && need_flush) {
1557 ret = bdrv_co_flush(bs);
1558 }
1559 qemu_vfree(buf);
1560 return ret;
1561}
1562
1563static inline int coroutine_fn
1564bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
1565 BdrvTrackedRequest *req, int flags)
1566{
1567 BlockDriverState *bs = child->bs;
1568 bool waited;
1569 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1570
1571 if (bs->read_only) {
1572 return -EPERM;
1573 }
1574
1575
1576 assert(!(flags & BDRV_REQ_NO_SERIALISING));
1577 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1578 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1579 assert(!(flags & ~BDRV_REQ_MASK));
1580
1581 if (flags & BDRV_REQ_SERIALISING) {
1582 mark_request_serialising(req, bdrv_get_cluster_size(bs));
1583 }
1584
1585 waited = wait_serialising_requests(req);
1586
1587 assert(!waited || !req->serialising ||
1588 is_request_serialising_and_aligned(req));
1589 assert(req->overlap_offset <= offset);
1590 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1591 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1592
1593 switch (req->type) {
1594 case BDRV_TRACKED_WRITE:
1595 case BDRV_TRACKED_DISCARD:
1596 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1597 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1598 } else {
1599 assert(child->perm & BLK_PERM_WRITE);
1600 }
1601 return notifier_with_return_list_notify(&bs->before_write_notifiers,
1602 req);
1603 case BDRV_TRACKED_TRUNCATE:
1604 assert(child->perm & BLK_PERM_RESIZE);
1605 return 0;
1606 default:
1607 abort();
1608 }
1609}
1610
1611static inline void coroutine_fn
1612bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
1613 BdrvTrackedRequest *req, int ret)
1614{
1615 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1616 BlockDriverState *bs = child->bs;
1617
1618 atomic_inc(&bs->write_gen);
1619
1620
1621
1622
1623
1624
1625
1626
1627 if (ret == 0 &&
1628 (req->type == BDRV_TRACKED_TRUNCATE ||
1629 end_sector > bs->total_sectors) &&
1630 req->type != BDRV_TRACKED_DISCARD) {
1631 bs->total_sectors = end_sector;
1632 bdrv_parent_cb_resize(bs);
1633 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
1634 }
1635 if (req->bytes) {
1636 switch (req->type) {
1637 case BDRV_TRACKED_WRITE:
1638 stat64_max(&bs->wr_highest_offset, offset + bytes);
1639
1640 case BDRV_TRACKED_DISCARD:
1641 bdrv_set_dirty(bs, offset, bytes);
1642 break;
1643 default:
1644 break;
1645 }
1646 }
1647}
1648
1649
1650
1651
1652
1653static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1654 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1655 int64_t align, QEMUIOVector *qiov, int flags)
1656{
1657 BlockDriverState *bs = child->bs;
1658 BlockDriver *drv = bs->drv;
1659 int ret;
1660
1661 uint64_t bytes_remaining = bytes;
1662 int max_transfer;
1663
1664 if (!drv) {
1665 return -ENOMEDIUM;
1666 }
1667
1668 if (bdrv_has_readonly_bitmaps(bs)) {
1669 return -EPERM;
1670 }
1671
1672 assert(is_power_of_2(align));
1673 assert((offset & (align - 1)) == 0);
1674 assert((bytes & (align - 1)) == 0);
1675 assert(!qiov || bytes == qiov->size);
1676 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1677 align);
1678
1679 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
1680
1681 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1682 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1683 qemu_iovec_is_zero(qiov)) {
1684 flags |= BDRV_REQ_ZERO_WRITE;
1685 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1686 flags |= BDRV_REQ_MAY_UNMAP;
1687 }
1688 }
1689
1690 if (ret < 0) {
1691
1692 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1693 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1694 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1695 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1696 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
1697 } else if (bytes <= max_transfer) {
1698 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1699 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
1700 } else {
1701 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1702 while (bytes_remaining) {
1703 int num = MIN(bytes_remaining, max_transfer);
1704 QEMUIOVector local_qiov;
1705 int local_flags = flags;
1706
1707 assert(num);
1708 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
1709 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1710
1711
1712 local_flags &= ~BDRV_REQ_FUA;
1713 }
1714 qemu_iovec_init(&local_qiov, qiov->niov);
1715 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1716
1717 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
1718 num, &local_qiov, local_flags);
1719 qemu_iovec_destroy(&local_qiov);
1720 if (ret < 0) {
1721 break;
1722 }
1723 bytes_remaining -= num;
1724 }
1725 }
1726 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
1727
1728 if (ret >= 0) {
1729 ret = 0;
1730 }
1731 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
1732
1733 return ret;
1734}
1735
1736static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1737 int64_t offset,
1738 unsigned int bytes,
1739 BdrvRequestFlags flags,
1740 BdrvTrackedRequest *req)
1741{
1742 BlockDriverState *bs = child->bs;
1743 uint8_t *buf = NULL;
1744 QEMUIOVector local_qiov;
1745 uint64_t align = bs->bl.request_alignment;
1746 unsigned int head_padding_bytes, tail_padding_bytes;
1747 int ret = 0;
1748
1749 head_padding_bytes = offset & (align - 1);
1750 tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
1751
1752
1753 assert(flags & BDRV_REQ_ZERO_WRITE);
1754 if (head_padding_bytes || tail_padding_bytes) {
1755 buf = qemu_blockalign(bs, align);
1756 qemu_iovec_init_buf(&local_qiov, buf, align);
1757 }
1758 if (head_padding_bytes) {
1759 uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
1760
1761
1762 mark_request_serialising(req, align);
1763 wait_serialising_requests(req);
1764 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1765 ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
1766 align, &local_qiov, 0);
1767 if (ret < 0) {
1768 goto fail;
1769 }
1770 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1771
1772 memset(buf + head_padding_bytes, 0, zero_bytes);
1773 ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
1774 align, &local_qiov,
1775 flags & ~BDRV_REQ_ZERO_WRITE);
1776 if (ret < 0) {
1777 goto fail;
1778 }
1779 offset += zero_bytes;
1780 bytes -= zero_bytes;
1781 }
1782
1783 assert(!bytes || (offset & (align - 1)) == 0);
1784 if (bytes >= align) {
1785
1786 uint64_t aligned_bytes = bytes & ~(align - 1);
1787 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
1788 NULL, flags);
1789 if (ret < 0) {
1790 goto fail;
1791 }
1792 bytes -= aligned_bytes;
1793 offset += aligned_bytes;
1794 }
1795
1796 assert(!bytes || (offset & (align - 1)) == 0);
1797 if (bytes) {
1798 assert(align == tail_padding_bytes + bytes);
1799
1800 mark_request_serialising(req, align);
1801 wait_serialising_requests(req);
1802 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1803 ret = bdrv_aligned_preadv(child, req, offset, align,
1804 align, &local_qiov, 0);
1805 if (ret < 0) {
1806 goto fail;
1807 }
1808 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1809
1810 memset(buf, 0, bytes);
1811 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
1812 &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
1813 }
1814fail:
1815 qemu_vfree(buf);
1816 return ret;
1817
1818}
1819
1820
1821
1822
1823int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1824 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1825 BdrvRequestFlags flags)
1826{
1827 BlockDriverState *bs = child->bs;
1828 BdrvTrackedRequest req;
1829 uint64_t align = bs->bl.request_alignment;
1830 uint8_t *head_buf = NULL;
1831 uint8_t *tail_buf = NULL;
1832 QEMUIOVector local_qiov;
1833 bool use_local_qiov = false;
1834 int ret;
1835
1836 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
1837
1838 if (!bs->drv) {
1839 return -ENOMEDIUM;
1840 }
1841
1842 ret = bdrv_check_byte_request(bs, offset, bytes);
1843 if (ret < 0) {
1844 return ret;
1845 }
1846
1847 bdrv_inc_in_flight(bs);
1848
1849
1850
1851
1852
1853 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
1854
1855 if (flags & BDRV_REQ_ZERO_WRITE) {
1856 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
1857 goto out;
1858 }
1859
1860 if (offset & (align - 1)) {
1861 QEMUIOVector head_qiov;
1862
1863 mark_request_serialising(&req, align);
1864 wait_serialising_requests(&req);
1865
1866 head_buf = qemu_blockalign(bs, align);
1867 qemu_iovec_init_buf(&head_qiov, head_buf, align);
1868
1869 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1870 ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
1871 align, &head_qiov, 0);
1872 if (ret < 0) {
1873 goto fail;
1874 }
1875 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1876
1877 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1878 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1879 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1880 use_local_qiov = true;
1881
1882 bytes += offset & (align - 1);
1883 offset = offset & ~(align - 1);
1884
1885
1886
1887
1888 if (bytes < align) {
1889 qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
1890 bytes = align;
1891 }
1892 }
1893
1894 if ((offset + bytes) & (align - 1)) {
1895 QEMUIOVector tail_qiov;
1896 size_t tail_bytes;
1897 bool waited;
1898
1899 mark_request_serialising(&req, align);
1900 waited = wait_serialising_requests(&req);
1901 assert(!waited || !use_local_qiov);
1902
1903 tail_buf = qemu_blockalign(bs, align);
1904 qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
1905
1906 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1907 ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
1908 align, align, &tail_qiov, 0);
1909 if (ret < 0) {
1910 goto fail;
1911 }
1912 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1913
1914 if (!use_local_qiov) {
1915 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1916 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1917 use_local_qiov = true;
1918 }
1919
1920 tail_bytes = (offset + bytes) & (align - 1);
1921 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
1922
1923 bytes = ROUND_UP(bytes, align);
1924 }
1925
1926 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
1927 use_local_qiov ? &local_qiov : qiov,
1928 flags);
1929
1930fail:
1931
1932 if (use_local_qiov) {
1933 qemu_iovec_destroy(&local_qiov);
1934 }
1935 qemu_vfree(head_buf);
1936 qemu_vfree(tail_buf);
1937out:
1938 tracked_request_end(&req);
1939 bdrv_dec_in_flight(bs);
1940 return ret;
1941}
1942
1943int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
1944 int bytes, BdrvRequestFlags flags)
1945{
1946 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
1947
1948 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
1949 flags &= ~BDRV_REQ_MAY_UNMAP;
1950 }
1951
1952 return bdrv_co_pwritev(child, offset, bytes, NULL,
1953 BDRV_REQ_ZERO_WRITE | flags);
1954}
1955
1956
1957
1958
1959int bdrv_flush_all(void)
1960{
1961 BdrvNextIterator it;
1962 BlockDriverState *bs = NULL;
1963 int result = 0;
1964
1965 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
1966 AioContext *aio_context = bdrv_get_aio_context(bs);
1967 int ret;
1968
1969 aio_context_acquire(aio_context);
1970 ret = bdrv_flush(bs);
1971 if (ret < 0 && !result) {
1972 result = ret;
1973 }
1974 aio_context_release(aio_context);
1975 }
1976
1977 return result;
1978}
1979
1980
1981typedef struct BdrvCoBlockStatusData {
1982 BlockDriverState *bs;
1983 BlockDriverState *base;
1984 bool want_zero;
1985 int64_t offset;
1986 int64_t bytes;
1987 int64_t *pnum;
1988 int64_t *map;
1989 BlockDriverState **file;
1990 int ret;
1991 bool done;
1992} BdrvCoBlockStatusData;
1993
1994int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
1995 bool want_zero,
1996 int64_t offset,
1997 int64_t bytes,
1998 int64_t *pnum,
1999 int64_t *map,
2000 BlockDriverState **file)
2001{
2002 assert(bs->file && bs->file->bs);
2003 *pnum = bytes;
2004 *map = offset;
2005 *file = bs->file->bs;
2006 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2007}
2008
2009int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
2010 bool want_zero,
2011 int64_t offset,
2012 int64_t bytes,
2013 int64_t *pnum,
2014 int64_t *map,
2015 BlockDriverState **file)
2016{
2017 assert(bs->backing && bs->backing->bs);
2018 *pnum = bytes;
2019 *map = offset;
2020 *file = bs->backing->bs;
2021 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2022}
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2052 bool want_zero,
2053 int64_t offset, int64_t bytes,
2054 int64_t *pnum, int64_t *map,
2055 BlockDriverState **file)
2056{
2057 int64_t total_size;
2058 int64_t n;
2059 int ret;
2060 int64_t local_map = 0;
2061 BlockDriverState *local_file = NULL;
2062 int64_t aligned_offset, aligned_bytes;
2063 uint32_t align;
2064
2065 assert(pnum);
2066 *pnum = 0;
2067 total_size = bdrv_getlength(bs);
2068 if (total_size < 0) {
2069 ret = total_size;
2070 goto early_out;
2071 }
2072
2073 if (offset >= total_size) {
2074 ret = BDRV_BLOCK_EOF;
2075 goto early_out;
2076 }
2077 if (!bytes) {
2078 ret = 0;
2079 goto early_out;
2080 }
2081
2082 n = total_size - offset;
2083 if (n < bytes) {
2084 bytes = n;
2085 }
2086
2087
2088 assert(bs->drv);
2089 if (!bs->drv->bdrv_co_block_status) {
2090 *pnum = bytes;
2091 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2092 if (offset + bytes == total_size) {
2093 ret |= BDRV_BLOCK_EOF;
2094 }
2095 if (bs->drv->protocol_name) {
2096 ret |= BDRV_BLOCK_OFFSET_VALID;
2097 local_map = offset;
2098 local_file = bs;
2099 }
2100 goto early_out;
2101 }
2102
2103 bdrv_inc_in_flight(bs);
2104
2105
2106 align = bs->bl.request_alignment;
2107 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2108 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2109
2110 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2111 aligned_bytes, pnum, &local_map,
2112 &local_file);
2113 if (ret < 0) {
2114 *pnum = 0;
2115 goto out;
2116 }
2117
2118
2119
2120
2121
2122 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2123 align > offset - aligned_offset);
2124 *pnum -= offset - aligned_offset;
2125 if (*pnum > bytes) {
2126 *pnum = bytes;
2127 }
2128 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2129 local_map += offset - aligned_offset;
2130 }
2131
2132 if (ret & BDRV_BLOCK_RAW) {
2133 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2134 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2135 *pnum, pnum, &local_map, &local_file);
2136 goto out;
2137 }
2138
2139 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2140 ret |= BDRV_BLOCK_ALLOCATED;
2141 } else if (want_zero) {
2142 if (bdrv_unallocated_blocks_are_zero(bs)) {
2143 ret |= BDRV_BLOCK_ZERO;
2144 } else if (bs->backing) {
2145 BlockDriverState *bs2 = bs->backing->bs;
2146 int64_t size2 = bdrv_getlength(bs2);
2147
2148 if (size2 >= 0 && offset >= size2) {
2149 ret |= BDRV_BLOCK_ZERO;
2150 }
2151 }
2152 }
2153
2154 if (want_zero && local_file && local_file != bs &&
2155 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2156 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2157 int64_t file_pnum;
2158 int ret2;
2159
2160 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2161 *pnum, &file_pnum, NULL, NULL);
2162 if (ret2 >= 0) {
2163
2164
2165
2166 if (ret2 & BDRV_BLOCK_EOF &&
2167 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2168
2169
2170
2171
2172
2173 ret |= BDRV_BLOCK_ZERO;
2174 } else {
2175
2176 *pnum = file_pnum;
2177 ret |= (ret2 & BDRV_BLOCK_ZERO);
2178 }
2179 }
2180 }
2181
2182out:
2183 bdrv_dec_in_flight(bs);
2184 if (ret >= 0 && offset + *pnum == total_size) {
2185 ret |= BDRV_BLOCK_EOF;
2186 }
2187early_out:
2188 if (file) {
2189 *file = local_file;
2190 }
2191 if (map) {
2192 *map = local_map;
2193 }
2194 return ret;
2195}
2196
2197static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2198 BlockDriverState *base,
2199 bool want_zero,
2200 int64_t offset,
2201 int64_t bytes,
2202 int64_t *pnum,
2203 int64_t *map,
2204 BlockDriverState **file)
2205{
2206 BlockDriverState *p;
2207 int ret = 0;
2208 bool first = true;
2209
2210 assert(bs != base);
2211 for (p = bs; p != base; p = backing_bs(p)) {
2212 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2213 file);
2214 if (ret < 0) {
2215 break;
2216 }
2217 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2218
2219
2220
2221
2222
2223
2224 *pnum = bytes;
2225 }
2226 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2227 break;
2228 }
2229
2230
2231 bytes = MIN(bytes, *pnum);
2232 first = false;
2233 }
2234 return ret;
2235}
2236
2237
2238static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2239{
2240 BdrvCoBlockStatusData *data = opaque;
2241
2242 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2243 data->want_zero,
2244 data->offset, data->bytes,
2245 data->pnum, data->map, data->file);
2246 data->done = true;
2247 aio_wait_kick();
2248}
2249
2250
2251
2252
2253
2254
2255static int bdrv_common_block_status_above(BlockDriverState *bs,
2256 BlockDriverState *base,
2257 bool want_zero, int64_t offset,
2258 int64_t bytes, int64_t *pnum,
2259 int64_t *map,
2260 BlockDriverState **file)
2261{
2262 Coroutine *co;
2263 BdrvCoBlockStatusData data = {
2264 .bs = bs,
2265 .base = base,
2266 .want_zero = want_zero,
2267 .offset = offset,
2268 .bytes = bytes,
2269 .pnum = pnum,
2270 .map = map,
2271 .file = file,
2272 .done = false,
2273 };
2274
2275 if (qemu_in_coroutine()) {
2276
2277 bdrv_block_status_above_co_entry(&data);
2278 } else {
2279 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2280 bdrv_coroutine_enter(bs, co);
2281 BDRV_POLL_WHILE(bs, !data.done);
2282 }
2283 return data.ret;
2284}
2285
2286int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2287 int64_t offset, int64_t bytes, int64_t *pnum,
2288 int64_t *map, BlockDriverState **file)
2289{
2290 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2291 pnum, map, file);
2292}
2293
2294int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2295 int64_t *pnum, int64_t *map, BlockDriverState **file)
2296{
2297 return bdrv_block_status_above(bs, backing_bs(bs),
2298 offset, bytes, pnum, map, file);
2299}
2300
2301int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2302 int64_t bytes, int64_t *pnum)
2303{
2304 int ret;
2305 int64_t dummy;
2306
2307 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2308 bytes, pnum ? pnum : &dummy, NULL,
2309 NULL);
2310 if (ret < 0) {
2311 return ret;
2312 }
2313 return !!(ret & BDRV_BLOCK_ALLOCATED);
2314}
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332int bdrv_is_allocated_above(BlockDriverState *top,
2333 BlockDriverState *base,
2334 int64_t offset, int64_t bytes, int64_t *pnum)
2335{
2336 BlockDriverState *intermediate;
2337 int ret;
2338 int64_t n = bytes;
2339
2340 intermediate = top;
2341 while (intermediate && intermediate != base) {
2342 int64_t pnum_inter;
2343 int64_t size_inter;
2344
2345 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2346 if (ret < 0) {
2347 return ret;
2348 }
2349 if (ret) {
2350 *pnum = pnum_inter;
2351 return 1;
2352 }
2353
2354 size_inter = bdrv_getlength(intermediate);
2355 if (size_inter < 0) {
2356 return size_inter;
2357 }
2358 if (n > pnum_inter &&
2359 (intermediate == top || offset + pnum_inter < size_inter)) {
2360 n = pnum_inter;
2361 }
2362
2363 intermediate = backing_bs(intermediate);
2364 }
2365
2366 *pnum = n;
2367 return 0;
2368}
2369
2370typedef struct BdrvVmstateCo {
2371 BlockDriverState *bs;
2372 QEMUIOVector *qiov;
2373 int64_t pos;
2374 bool is_read;
2375 int ret;
2376} BdrvVmstateCo;
2377
2378static int coroutine_fn
2379bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2380 bool is_read)
2381{
2382 BlockDriver *drv = bs->drv;
2383 int ret = -ENOTSUP;
2384
2385 bdrv_inc_in_flight(bs);
2386
2387 if (!drv) {
2388 ret = -ENOMEDIUM;
2389 } else if (drv->bdrv_load_vmstate) {
2390 if (is_read) {
2391 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2392 } else {
2393 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2394 }
2395 } else if (bs->file) {
2396 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2397 }
2398
2399 bdrv_dec_in_flight(bs);
2400 return ret;
2401}
2402
2403static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2404{
2405 BdrvVmstateCo *co = opaque;
2406 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2407 aio_wait_kick();
2408}
2409
2410static inline int
2411bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2412 bool is_read)
2413{
2414 if (qemu_in_coroutine()) {
2415 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2416 } else {
2417 BdrvVmstateCo data = {
2418 .bs = bs,
2419 .qiov = qiov,
2420 .pos = pos,
2421 .is_read = is_read,
2422 .ret = -EINPROGRESS,
2423 };
2424 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2425
2426 bdrv_coroutine_enter(bs, co);
2427 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2428 return data.ret;
2429 }
2430}
2431
2432int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2433 int64_t pos, int size)
2434{
2435 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2436 int ret;
2437
2438 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2439 if (ret < 0) {
2440 return ret;
2441 }
2442
2443 return size;
2444}
2445
2446int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2447{
2448 return bdrv_rw_vmstate(bs, qiov, pos, false);
2449}
2450
2451int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2452 int64_t pos, int size)
2453{
2454 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2455 int ret;
2456
2457 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2458 if (ret < 0) {
2459 return ret;
2460 }
2461
2462 return size;
2463}
2464
2465int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2466{
2467 return bdrv_rw_vmstate(bs, qiov, pos, true);
2468}
2469
2470
2471
2472
2473void bdrv_aio_cancel(BlockAIOCB *acb)
2474{
2475 qemu_aio_ref(acb);
2476 bdrv_aio_cancel_async(acb);
2477 while (acb->refcnt > 1) {
2478 if (acb->aiocb_info->get_aio_context) {
2479 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2480 } else if (acb->bs) {
2481
2482
2483
2484
2485 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2486 aio_poll(bdrv_get_aio_context(acb->bs), true);
2487 } else {
2488 abort();
2489 }
2490 }
2491 qemu_aio_unref(acb);
2492}
2493
2494
2495
2496
2497void bdrv_aio_cancel_async(BlockAIOCB *acb)
2498{
2499 if (acb->aiocb_info->cancel_async) {
2500 acb->aiocb_info->cancel_async(acb);
2501 }
2502}
2503
2504
2505
2506
2507typedef struct FlushCo {
2508 BlockDriverState *bs;
2509 int ret;
2510} FlushCo;
2511
2512
2513static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2514{
2515 FlushCo *rwco = opaque;
2516
2517 rwco->ret = bdrv_co_flush(rwco->bs);
2518 aio_wait_kick();
2519}
2520
2521int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2522{
2523 int current_gen;
2524 int ret = 0;
2525
2526 bdrv_inc_in_flight(bs);
2527
2528 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2529 bdrv_is_sg(bs)) {
2530 goto early_exit;
2531 }
2532
2533 qemu_co_mutex_lock(&bs->reqs_lock);
2534 current_gen = atomic_read(&bs->write_gen);
2535
2536
2537 while (bs->active_flush_req) {
2538 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2539 }
2540
2541
2542 bs->active_flush_req = true;
2543 qemu_co_mutex_unlock(&bs->reqs_lock);
2544
2545
2546 if (bs->drv->bdrv_co_flush) {
2547 ret = bs->drv->bdrv_co_flush(bs);
2548 goto out;
2549 }
2550
2551
2552 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2553 if (bs->drv->bdrv_co_flush_to_os) {
2554 ret = bs->drv->bdrv_co_flush_to_os(bs);
2555 if (ret < 0) {
2556 goto out;
2557 }
2558 }
2559
2560
2561 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2562 goto flush_parent;
2563 }
2564
2565
2566 if (bs->flushed_gen == current_gen) {
2567 goto flush_parent;
2568 }
2569
2570 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2571 if (!bs->drv) {
2572
2573
2574 ret = -ENOMEDIUM;
2575 goto out;
2576 }
2577 if (bs->drv->bdrv_co_flush_to_disk) {
2578 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2579 } else if (bs->drv->bdrv_aio_flush) {
2580 BlockAIOCB *acb;
2581 CoroutineIOCompletion co = {
2582 .coroutine = qemu_coroutine_self(),
2583 };
2584
2585 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2586 if (acb == NULL) {
2587 ret = -EIO;
2588 } else {
2589 qemu_coroutine_yield();
2590 ret = co.ret;
2591 }
2592 } else {
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604 ret = 0;
2605 }
2606
2607 if (ret < 0) {
2608 goto out;
2609 }
2610
2611
2612
2613
2614flush_parent:
2615 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2616out:
2617
2618 if (ret == 0) {
2619 bs->flushed_gen = current_gen;
2620 }
2621
2622 qemu_co_mutex_lock(&bs->reqs_lock);
2623 bs->active_flush_req = false;
2624
2625 qemu_co_queue_next(&bs->flush_queue);
2626 qemu_co_mutex_unlock(&bs->reqs_lock);
2627
2628early_exit:
2629 bdrv_dec_in_flight(bs);
2630 return ret;
2631}
2632
2633int bdrv_flush(BlockDriverState *bs)
2634{
2635 Coroutine *co;
2636 FlushCo flush_co = {
2637 .bs = bs,
2638 .ret = NOT_DONE,
2639 };
2640
2641 if (qemu_in_coroutine()) {
2642
2643 bdrv_flush_co_entry(&flush_co);
2644 } else {
2645 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2646 bdrv_coroutine_enter(bs, co);
2647 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2648 }
2649
2650 return flush_co.ret;
2651}
2652
2653typedef struct DiscardCo {
2654 BdrvChild *child;
2655 int64_t offset;
2656 int bytes;
2657 int ret;
2658} DiscardCo;
2659static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2660{
2661 DiscardCo *rwco = opaque;
2662
2663 rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
2664 aio_wait_kick();
2665}
2666
2667int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
2668{
2669 BdrvTrackedRequest req;
2670 int max_pdiscard, ret;
2671 int head, tail, align;
2672 BlockDriverState *bs = child->bs;
2673
2674 if (!bs || !bs->drv) {
2675 return -ENOMEDIUM;
2676 }
2677
2678 if (bdrv_has_readonly_bitmaps(bs)) {
2679 return -EPERM;
2680 }
2681
2682 ret = bdrv_check_byte_request(bs, offset, bytes);
2683 if (ret < 0) {
2684 return ret;
2685 }
2686
2687
2688 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2689 return 0;
2690 }
2691
2692 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2693 return 0;
2694 }
2695
2696
2697
2698
2699
2700
2701 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2702 assert(align % bs->bl.request_alignment == 0);
2703 head = offset % align;
2704 tail = (offset + bytes) % align;
2705
2706 bdrv_inc_in_flight(bs);
2707 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2708
2709 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
2710 if (ret < 0) {
2711 goto out;
2712 }
2713
2714 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2715 align);
2716 assert(max_pdiscard >= bs->bl.request_alignment);
2717
2718 while (bytes > 0) {
2719 int num = bytes;
2720
2721 if (head) {
2722
2723 num = MIN(bytes, align - head);
2724 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2725 num %= bs->bl.request_alignment;
2726 }
2727 head = (head + num) % align;
2728 assert(num < max_pdiscard);
2729 } else if (tail) {
2730 if (num > align) {
2731
2732 num -= tail;
2733 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2734 tail > bs->bl.request_alignment) {
2735 tail %= bs->bl.request_alignment;
2736 num -= tail;
2737 }
2738 }
2739
2740 if (num > max_pdiscard) {
2741 num = max_pdiscard;
2742 }
2743
2744 if (!bs->drv) {
2745 ret = -ENOMEDIUM;
2746 goto out;
2747 }
2748 if (bs->drv->bdrv_co_pdiscard) {
2749 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
2750 } else {
2751 BlockAIOCB *acb;
2752 CoroutineIOCompletion co = {
2753 .coroutine = qemu_coroutine_self(),
2754 };
2755
2756 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
2757 bdrv_co_io_em_complete, &co);
2758 if (acb == NULL) {
2759 ret = -EIO;
2760 goto out;
2761 } else {
2762 qemu_coroutine_yield();
2763 ret = co.ret;
2764 }
2765 }
2766 if (ret && ret != -ENOTSUP) {
2767 goto out;
2768 }
2769
2770 offset += num;
2771 bytes -= num;
2772 }
2773 ret = 0;
2774out:
2775 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
2776 tracked_request_end(&req);
2777 bdrv_dec_in_flight(bs);
2778 return ret;
2779}
2780
2781int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes)
2782{
2783 Coroutine *co;
2784 DiscardCo rwco = {
2785 .child = child,
2786 .offset = offset,
2787 .bytes = bytes,
2788 .ret = NOT_DONE,
2789 };
2790
2791 if (qemu_in_coroutine()) {
2792
2793 bdrv_pdiscard_co_entry(&rwco);
2794 } else {
2795 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
2796 bdrv_coroutine_enter(child->bs, co);
2797 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
2798 }
2799
2800 return rwco.ret;
2801}
2802
2803int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
2804{
2805 BlockDriver *drv = bs->drv;
2806 CoroutineIOCompletion co = {
2807 .coroutine = qemu_coroutine_self(),
2808 };
2809 BlockAIOCB *acb;
2810
2811 bdrv_inc_in_flight(bs);
2812 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
2813 co.ret = -ENOTSUP;
2814 goto out;
2815 }
2816
2817 if (drv->bdrv_co_ioctl) {
2818 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
2819 } else {
2820 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
2821 if (!acb) {
2822 co.ret = -ENOTSUP;
2823 goto out;
2824 }
2825 qemu_coroutine_yield();
2826 }
2827out:
2828 bdrv_dec_in_flight(bs);
2829 return co.ret;
2830}
2831
2832void *qemu_blockalign(BlockDriverState *bs, size_t size)
2833{
2834 return qemu_memalign(bdrv_opt_mem_align(bs), size);
2835}
2836
2837void *qemu_blockalign0(BlockDriverState *bs, size_t size)
2838{
2839 return memset(qemu_blockalign(bs, size), 0, size);
2840}
2841
2842void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
2843{
2844 size_t align = bdrv_opt_mem_align(bs);
2845
2846
2847 assert(align > 0);
2848 if (size == 0) {
2849 size = align;
2850 }
2851
2852 return qemu_try_memalign(align, size);
2853}
2854
2855void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
2856{
2857 void *mem = qemu_try_blockalign(bs, size);
2858
2859 if (mem) {
2860 memset(mem, 0, size);
2861 }
2862
2863 return mem;
2864}
2865
2866
2867
2868
2869bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
2870{
2871 int i;
2872 size_t alignment = bdrv_min_mem_align(bs);
2873
2874 for (i = 0; i < qiov->niov; i++) {
2875 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
2876 return false;
2877 }
2878 if (qiov->iov[i].iov_len % alignment) {
2879 return false;
2880 }
2881 }
2882
2883 return true;
2884}
2885
2886void bdrv_add_before_write_notifier(BlockDriverState *bs,
2887 NotifierWithReturn *notifier)
2888{
2889 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
2890}
2891
2892void bdrv_io_plug(BlockDriverState *bs)
2893{
2894 BdrvChild *child;
2895
2896 QLIST_FOREACH(child, &bs->children, next) {
2897 bdrv_io_plug(child->bs);
2898 }
2899
2900 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
2901 BlockDriver *drv = bs->drv;
2902 if (drv && drv->bdrv_io_plug) {
2903 drv->bdrv_io_plug(bs);
2904 }
2905 }
2906}
2907
2908void bdrv_io_unplug(BlockDriverState *bs)
2909{
2910 BdrvChild *child;
2911
2912 assert(bs->io_plugged);
2913 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
2914 BlockDriver *drv = bs->drv;
2915 if (drv && drv->bdrv_io_unplug) {
2916 drv->bdrv_io_unplug(bs);
2917 }
2918 }
2919
2920 QLIST_FOREACH(child, &bs->children, next) {
2921 bdrv_io_unplug(child->bs);
2922 }
2923}
2924
2925void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
2926{
2927 BdrvChild *child;
2928
2929 if (bs->drv && bs->drv->bdrv_register_buf) {
2930 bs->drv->bdrv_register_buf(bs, host, size);
2931 }
2932 QLIST_FOREACH(child, &bs->children, next) {
2933 bdrv_register_buf(child->bs, host, size);
2934 }
2935}
2936
2937void bdrv_unregister_buf(BlockDriverState *bs, void *host)
2938{
2939 BdrvChild *child;
2940
2941 if (bs->drv && bs->drv->bdrv_unregister_buf) {
2942 bs->drv->bdrv_unregister_buf(bs, host);
2943 }
2944 QLIST_FOREACH(child, &bs->children, next) {
2945 bdrv_unregister_buf(child->bs, host);
2946 }
2947}
2948
2949static int coroutine_fn bdrv_co_copy_range_internal(
2950 BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
2951 uint64_t dst_offset, uint64_t bytes,
2952 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
2953 bool recurse_src)
2954{
2955 BdrvTrackedRequest req;
2956 int ret;
2957
2958
2959 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
2960 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
2961
2962 if (!dst || !dst->bs) {
2963 return -ENOMEDIUM;
2964 }
2965 ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
2966 if (ret) {
2967 return ret;
2968 }
2969 if (write_flags & BDRV_REQ_ZERO_WRITE) {
2970 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
2971 }
2972
2973 if (!src || !src->bs) {
2974 return -ENOMEDIUM;
2975 }
2976 ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
2977 if (ret) {
2978 return ret;
2979 }
2980
2981 if (!src->bs->drv->bdrv_co_copy_range_from
2982 || !dst->bs->drv->bdrv_co_copy_range_to
2983 || src->bs->encrypted || dst->bs->encrypted) {
2984 return -ENOTSUP;
2985 }
2986
2987 if (recurse_src) {
2988 bdrv_inc_in_flight(src->bs);
2989 tracked_request_begin(&req, src->bs, src_offset, bytes,
2990 BDRV_TRACKED_READ);
2991
2992
2993 assert(!(read_flags & BDRV_REQ_SERIALISING));
2994 if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
2995 wait_serialising_requests(&req);
2996 }
2997
2998 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
2999 src, src_offset,
3000 dst, dst_offset,
3001 bytes,
3002 read_flags, write_flags);
3003
3004 tracked_request_end(&req);
3005 bdrv_dec_in_flight(src->bs);
3006 } else {
3007 bdrv_inc_in_flight(dst->bs);
3008 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3009 BDRV_TRACKED_WRITE);
3010 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3011 write_flags);
3012 if (!ret) {
3013 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3014 src, src_offset,
3015 dst, dst_offset,
3016 bytes,
3017 read_flags, write_flags);
3018 }
3019 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3020 tracked_request_end(&req);
3021 bdrv_dec_in_flight(dst->bs);
3022 }
3023
3024 return ret;
3025}
3026
3027
3028
3029
3030
3031int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
3032 BdrvChild *dst, uint64_t dst_offset,
3033 uint64_t bytes,
3034 BdrvRequestFlags read_flags,
3035 BdrvRequestFlags write_flags)
3036{
3037 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3038 read_flags, write_flags);
3039 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3040 bytes, read_flags, write_flags, true);
3041}
3042
3043
3044
3045
3046
3047int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
3048 BdrvChild *dst, uint64_t dst_offset,
3049 uint64_t bytes,
3050 BdrvRequestFlags read_flags,
3051 BdrvRequestFlags write_flags)
3052{
3053 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3054 read_flags, write_flags);
3055 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3056 bytes, read_flags, write_flags, false);
3057}
3058
3059int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
3060 BdrvChild *dst, uint64_t dst_offset,
3061 uint64_t bytes, BdrvRequestFlags read_flags,
3062 BdrvRequestFlags write_flags)
3063{
3064 return bdrv_co_copy_range_from(src, src_offset,
3065 dst, dst_offset,
3066 bytes, read_flags, write_flags);
3067}
3068
3069static void bdrv_parent_cb_resize(BlockDriverState *bs)
3070{
3071 BdrvChild *c;
3072 QLIST_FOREACH(c, &bs->parents, next_parent) {
3073 if (c->role->resize) {
3074 c->role->resize(c);
3075 }
3076 }
3077}
3078
3079
3080
3081
3082int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
3083 PreallocMode prealloc, Error **errp)
3084{
3085 BlockDriverState *bs = child->bs;
3086 BlockDriver *drv = bs->drv;
3087 BdrvTrackedRequest req;
3088 int64_t old_size, new_bytes;
3089 int ret;
3090
3091
3092
3093 if (!drv) {
3094 error_setg(errp, "No medium inserted");
3095 return -ENOMEDIUM;
3096 }
3097 if (offset < 0) {
3098 error_setg(errp, "Image size cannot be negative");
3099 return -EINVAL;
3100 }
3101
3102 old_size = bdrv_getlength(bs);
3103 if (old_size < 0) {
3104 error_setg_errno(errp, -old_size, "Failed to get old image size");
3105 return old_size;
3106 }
3107
3108 if (offset > old_size) {
3109 new_bytes = offset - old_size;
3110 } else {
3111 new_bytes = 0;
3112 }
3113
3114 bdrv_inc_in_flight(bs);
3115 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3116 BDRV_TRACKED_TRUNCATE);
3117
3118
3119
3120
3121 if (new_bytes) {
3122 mark_request_serialising(&req, 1);
3123 }
3124 if (bs->read_only) {
3125 error_setg(errp, "Image is read-only");
3126 ret = -EACCES;
3127 goto out;
3128 }
3129 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3130 0);
3131 if (ret < 0) {
3132 error_setg_errno(errp, -ret,
3133 "Failed to prepare request for truncation");
3134 goto out;
3135 }
3136
3137 if (!drv->bdrv_co_truncate) {
3138 if (bs->file && drv->is_filter) {
3139 ret = bdrv_co_truncate(bs->file, offset, prealloc, errp);
3140 goto out;
3141 }
3142 error_setg(errp, "Image format driver does not support resize");
3143 ret = -ENOTSUP;
3144 goto out;
3145 }
3146
3147 ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp);
3148 if (ret < 0) {
3149 goto out;
3150 }
3151 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3152 if (ret < 0) {
3153 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3154 } else {
3155 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3156 }
3157
3158
3159
3160 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3161
3162out:
3163 tracked_request_end(&req);
3164 bdrv_dec_in_flight(bs);
3165
3166 return ret;
3167}
3168
3169typedef struct TruncateCo {
3170 BdrvChild *child;
3171 int64_t offset;
3172 PreallocMode prealloc;
3173 Error **errp;
3174 int ret;
3175} TruncateCo;
3176
3177static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
3178{
3179 TruncateCo *tco = opaque;
3180 tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc,
3181 tco->errp);
3182 aio_wait_kick();
3183}
3184
3185int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
3186 Error **errp)
3187{
3188 Coroutine *co;
3189 TruncateCo tco = {
3190 .child = child,
3191 .offset = offset,
3192 .prealloc = prealloc,
3193 .errp = errp,
3194 .ret = NOT_DONE,
3195 };
3196
3197 if (qemu_in_coroutine()) {
3198
3199 bdrv_truncate_co_entry(&tco);
3200 } else {
3201 co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
3202 bdrv_coroutine_enter(child->bs, co);
3203 BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
3204 }
3205
3206 return tco.ret;
3207}
3208