1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "qemu/cutils.h"
33#include "qapi/error.h"
34#include "qemu/error-report.h"
35
36#define NOT_DONE 0x7fffffff
37
38
39#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
40
41static AioWait drain_all_aio_wait;
42
43static void bdrv_parent_cb_resize(BlockDriverState *bs);
44static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
45 int64_t offset, int bytes, BdrvRequestFlags flags);
46
47void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
48 bool ignore_bds_parents)
49{
50 BdrvChild *c, *next;
51
52 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
53 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
54 continue;
55 }
56 bdrv_parent_drained_begin_single(c, false);
57 }
58}
59
60void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
61 bool ignore_bds_parents)
62{
63 BdrvChild *c, *next;
64
65 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
66 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
67 continue;
68 }
69 if (c->role->drained_end) {
70 c->role->drained_end(c);
71 }
72 }
73}
74
75static bool bdrv_parent_drained_poll_single(BdrvChild *c)
76{
77 if (c->role->drained_poll) {
78 return c->role->drained_poll(c);
79 }
80 return false;
81}
82
83static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
84 bool ignore_bds_parents)
85{
86 BdrvChild *c, *next;
87 bool busy = false;
88
89 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
90 if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
91 continue;
92 }
93 busy |= bdrv_parent_drained_poll_single(c);
94 }
95
96 return busy;
97}
98
99void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
100{
101 if (c->role->drained_begin) {
102 c->role->drained_begin(c);
103 }
104 if (poll) {
105 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
106 }
107}
108
109static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
110{
111 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
112 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
113 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
114 src->opt_mem_alignment);
115 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
116 src->min_mem_alignment);
117 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
118}
119
120void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
121{
122 BlockDriver *drv = bs->drv;
123 Error *local_err = NULL;
124
125 memset(&bs->bl, 0, sizeof(bs->bl));
126
127 if (!drv) {
128 return;
129 }
130
131
132 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
133 drv->bdrv_aio_preadv) ? 1 : 512;
134
135
136 if (bs->file) {
137 bdrv_refresh_limits(bs->file->bs, &local_err);
138 if (local_err) {
139 error_propagate(errp, local_err);
140 return;
141 }
142 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
143 } else {
144 bs->bl.min_mem_alignment = 512;
145 bs->bl.opt_mem_alignment = getpagesize();
146
147
148 bs->bl.max_iov = IOV_MAX;
149 }
150
151 if (bs->backing) {
152 bdrv_refresh_limits(bs->backing->bs, &local_err);
153 if (local_err) {
154 error_propagate(errp, local_err);
155 return;
156 }
157 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
158 }
159
160
161 if (drv->bdrv_refresh_limits) {
162 drv->bdrv_refresh_limits(bs, errp);
163 }
164}
165
166
167
168
169
170
171void bdrv_enable_copy_on_read(BlockDriverState *bs)
172{
173 atomic_inc(&bs->copy_on_read);
174}
175
176void bdrv_disable_copy_on_read(BlockDriverState *bs)
177{
178 int old = atomic_fetch_dec(&bs->copy_on_read);
179 assert(old >= 1);
180}
181
182typedef struct {
183 Coroutine *co;
184 BlockDriverState *bs;
185 bool done;
186 bool begin;
187 bool recursive;
188 bool poll;
189 BdrvChild *parent;
190 bool ignore_bds_parents;
191} BdrvCoDrainData;
192
193static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
194{
195 BdrvCoDrainData *data = opaque;
196 BlockDriverState *bs = data->bs;
197
198 if (data->begin) {
199 bs->drv->bdrv_co_drain_begin(bs);
200 } else {
201 bs->drv->bdrv_co_drain_end(bs);
202 }
203
204
205 atomic_mb_set(&data->done, true);
206 bdrv_dec_in_flight(bs);
207
208 if (data->begin) {
209 g_free(data);
210 }
211}
212
213
214static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
215{
216 BdrvCoDrainData *data;
217
218 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
219 (!begin && !bs->drv->bdrv_co_drain_end)) {
220 return;
221 }
222
223 data = g_new(BdrvCoDrainData, 1);
224 *data = (BdrvCoDrainData) {
225 .bs = bs,
226 .done = false,
227 .begin = begin
228 };
229
230
231
232 bdrv_inc_in_flight(bs);
233 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
234 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
235
236 if (!begin) {
237 BDRV_POLL_WHILE(bs, !data->done);
238 g_free(data);
239 }
240}
241
242
243bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
244 BdrvChild *ignore_parent, bool ignore_bds_parents)
245{
246 BdrvChild *child, *next;
247
248 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
249 return true;
250 }
251
252 if (atomic_read(&bs->in_flight)) {
253 return true;
254 }
255
256 if (recursive) {
257 assert(!ignore_bds_parents);
258 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
259 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
260 return true;
261 }
262 }
263 }
264
265 return false;
266}
267
268static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
269 BdrvChild *ignore_parent)
270{
271
272
273 while (aio_poll(bs->aio_context, false));
274
275 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
276}
277
278static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
279 BdrvChild *parent, bool ignore_bds_parents,
280 bool poll);
281static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
282 BdrvChild *parent, bool ignore_bds_parents);
283
284static void bdrv_co_drain_bh_cb(void *opaque)
285{
286 BdrvCoDrainData *data = opaque;
287 Coroutine *co = data->co;
288 BlockDriverState *bs = data->bs;
289
290 if (bs) {
291 bdrv_dec_in_flight(bs);
292 if (data->begin) {
293 bdrv_do_drained_begin(bs, data->recursive, data->parent,
294 data->ignore_bds_parents, data->poll);
295 } else {
296 bdrv_do_drained_end(bs, data->recursive, data->parent,
297 data->ignore_bds_parents);
298 }
299 } else {
300 assert(data->begin);
301 bdrv_drain_all_begin();
302 }
303
304 data->done = true;
305 aio_co_wake(co);
306}
307
308static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
309 bool begin, bool recursive,
310 BdrvChild *parent,
311 bool ignore_bds_parents,
312 bool poll)
313{
314 BdrvCoDrainData data;
315
316
317
318
319 assert(qemu_in_coroutine());
320 data = (BdrvCoDrainData) {
321 .co = qemu_coroutine_self(),
322 .bs = bs,
323 .done = false,
324 .begin = begin,
325 .recursive = recursive,
326 .parent = parent,
327 .ignore_bds_parents = ignore_bds_parents,
328 .poll = poll,
329 };
330 if (bs) {
331 bdrv_inc_in_flight(bs);
332 }
333 aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
334 bdrv_co_drain_bh_cb, &data);
335
336 qemu_coroutine_yield();
337
338
339 assert(data.done);
340}
341
342void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
343 BdrvChild *parent, bool ignore_bds_parents)
344{
345 assert(!qemu_in_coroutine());
346
347
348 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
349 aio_disable_external(bdrv_get_aio_context(bs));
350 }
351
352 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
353 bdrv_drain_invoke(bs, true);
354}
355
356static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
357 BdrvChild *parent, bool ignore_bds_parents,
358 bool poll)
359{
360 BdrvChild *child, *next;
361
362 if (qemu_in_coroutine()) {
363 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
364 poll);
365 return;
366 }
367
368 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
369
370 if (recursive) {
371 assert(!ignore_bds_parents);
372 bs->recursive_quiesce_counter++;
373 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
374 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
375 false);
376 }
377 }
378
379
380
381
382
383
384
385
386
387
388 if (poll) {
389 assert(!ignore_bds_parents);
390 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
391 }
392}
393
394void bdrv_drained_begin(BlockDriverState *bs)
395{
396 bdrv_do_drained_begin(bs, false, NULL, false, true);
397}
398
399void bdrv_subtree_drained_begin(BlockDriverState *bs)
400{
401 bdrv_do_drained_begin(bs, true, NULL, false, true);
402}
403
404static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
405 BdrvChild *parent, bool ignore_bds_parents)
406{
407 BdrvChild *child, *next;
408 int old_quiesce_counter;
409
410 if (qemu_in_coroutine()) {
411 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
412 false);
413 return;
414 }
415 assert(bs->quiesce_counter > 0);
416 old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
417
418
419 bdrv_drain_invoke(bs, false);
420 bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
421 if (old_quiesce_counter == 1) {
422 aio_enable_external(bdrv_get_aio_context(bs));
423 }
424
425 if (recursive) {
426 assert(!ignore_bds_parents);
427 bs->recursive_quiesce_counter--;
428 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
429 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
430 }
431 }
432}
433
434void bdrv_drained_end(BlockDriverState *bs)
435{
436 bdrv_do_drained_end(bs, false, NULL, false);
437}
438
439void bdrv_subtree_drained_end(BlockDriverState *bs)
440{
441 bdrv_do_drained_end(bs, true, NULL, false);
442}
443
444void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
445{
446 int i;
447
448 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
449 bdrv_do_drained_begin(child->bs, true, child, false, true);
450 }
451}
452
453void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
454{
455 int i;
456
457 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
458 bdrv_do_drained_end(child->bs, true, child, false);
459 }
460}
461
462
463
464
465
466
467
468
469void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
470{
471 assert(qemu_in_coroutine());
472 bdrv_drained_begin(bs);
473 bdrv_drained_end(bs);
474}
475
476void bdrv_drain(BlockDriverState *bs)
477{
478 bdrv_drained_begin(bs);
479 bdrv_drained_end(bs);
480}
481
482static void bdrv_drain_assert_idle(BlockDriverState *bs)
483{
484 BdrvChild *child, *next;
485
486 assert(atomic_read(&bs->in_flight) == 0);
487 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
488 bdrv_drain_assert_idle(child->bs);
489 }
490}
491
492unsigned int bdrv_drain_all_count = 0;
493
494static bool bdrv_drain_all_poll(void)
495{
496 BlockDriverState *bs = NULL;
497 bool result = false;
498
499
500
501 while (aio_poll(qemu_get_aio_context(), false));
502
503
504
505 while ((bs = bdrv_next_all_states(bs))) {
506 AioContext *aio_context = bdrv_get_aio_context(bs);
507 aio_context_acquire(aio_context);
508 result |= bdrv_drain_poll(bs, false, NULL, true);
509 aio_context_release(aio_context);
510 }
511
512 return result;
513}
514
515
516
517
518
519
520
521
522
523
524
525
526
527void bdrv_drain_all_begin(void)
528{
529 BlockDriverState *bs = NULL;
530
531 if (qemu_in_coroutine()) {
532 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
533 return;
534 }
535
536
537
538 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
539 assert(bdrv_drain_all_count < INT_MAX);
540 bdrv_drain_all_count++;
541
542
543
544 while ((bs = bdrv_next_all_states(bs))) {
545 AioContext *aio_context = bdrv_get_aio_context(bs);
546
547 aio_context_acquire(aio_context);
548 bdrv_do_drained_begin(bs, false, NULL, true, false);
549 aio_context_release(aio_context);
550 }
551
552
553 AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
554
555 while ((bs = bdrv_next_all_states(bs))) {
556 bdrv_drain_assert_idle(bs);
557 }
558}
559
560void bdrv_drain_all_end(void)
561{
562 BlockDriverState *bs = NULL;
563
564 while ((bs = bdrv_next_all_states(bs))) {
565 AioContext *aio_context = bdrv_get_aio_context(bs);
566
567 aio_context_acquire(aio_context);
568 bdrv_do_drained_end(bs, false, NULL, true);
569 aio_context_release(aio_context);
570 }
571
572 assert(bdrv_drain_all_count > 0);
573 bdrv_drain_all_count--;
574}
575
576void bdrv_drain_all(void)
577{
578 bdrv_drain_all_begin();
579 bdrv_drain_all_end();
580}
581
582
583
584
585
586
587static void tracked_request_end(BdrvTrackedRequest *req)
588{
589 if (req->serialising) {
590 atomic_dec(&req->bs->serialising_in_flight);
591 }
592
593 qemu_co_mutex_lock(&req->bs->reqs_lock);
594 QLIST_REMOVE(req, list);
595 qemu_co_queue_restart_all(&req->wait_queue);
596 qemu_co_mutex_unlock(&req->bs->reqs_lock);
597}
598
599
600
601
602static void tracked_request_begin(BdrvTrackedRequest *req,
603 BlockDriverState *bs,
604 int64_t offset,
605 uint64_t bytes,
606 enum BdrvTrackedRequestType type)
607{
608 assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
609
610 *req = (BdrvTrackedRequest){
611 .bs = bs,
612 .offset = offset,
613 .bytes = bytes,
614 .type = type,
615 .co = qemu_coroutine_self(),
616 .serialising = false,
617 .overlap_offset = offset,
618 .overlap_bytes = bytes,
619 };
620
621 qemu_co_queue_init(&req->wait_queue);
622
623 qemu_co_mutex_lock(&bs->reqs_lock);
624 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
625 qemu_co_mutex_unlock(&bs->reqs_lock);
626}
627
628static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
629{
630 int64_t overlap_offset = req->offset & ~(align - 1);
631 uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
632 - overlap_offset;
633
634 if (!req->serialising) {
635 atomic_inc(&req->bs->serialising_in_flight);
636 req->serialising = true;
637 }
638
639 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
640 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
641}
642
643static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
644{
645
646
647
648
649
650
651 return req->serialising && (req->offset == req->overlap_offset) &&
652 (req->bytes == req->overlap_bytes);
653}
654
655
656
657
658void bdrv_round_to_clusters(BlockDriverState *bs,
659 int64_t offset, int64_t bytes,
660 int64_t *cluster_offset,
661 int64_t *cluster_bytes)
662{
663 BlockDriverInfo bdi;
664
665 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
666 *cluster_offset = offset;
667 *cluster_bytes = bytes;
668 } else {
669 int64_t c = bdi.cluster_size;
670 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
671 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
672 }
673}
674
675static int bdrv_get_cluster_size(BlockDriverState *bs)
676{
677 BlockDriverInfo bdi;
678 int ret;
679
680 ret = bdrv_get_info(bs, &bdi);
681 if (ret < 0 || bdi.cluster_size == 0) {
682 return bs->bl.request_alignment;
683 } else {
684 return bdi.cluster_size;
685 }
686}
687
688static bool tracked_request_overlaps(BdrvTrackedRequest *req,
689 int64_t offset, uint64_t bytes)
690{
691
692 if (offset >= req->overlap_offset + req->overlap_bytes) {
693 return false;
694 }
695
696 if (req->overlap_offset >= offset + bytes) {
697 return false;
698 }
699 return true;
700}
701
702void bdrv_inc_in_flight(BlockDriverState *bs)
703{
704 atomic_inc(&bs->in_flight);
705}
706
707void bdrv_wakeup(BlockDriverState *bs)
708{
709 aio_wait_kick(bdrv_get_aio_wait(bs));
710 aio_wait_kick(&drain_all_aio_wait);
711}
712
713void bdrv_dec_in_flight(BlockDriverState *bs)
714{
715 atomic_dec(&bs->in_flight);
716 bdrv_wakeup(bs);
717}
718
719static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
720{
721 BlockDriverState *bs = self->bs;
722 BdrvTrackedRequest *req;
723 bool retry;
724 bool waited = false;
725
726 if (!atomic_read(&bs->serialising_in_flight)) {
727 return false;
728 }
729
730 do {
731 retry = false;
732 qemu_co_mutex_lock(&bs->reqs_lock);
733 QLIST_FOREACH(req, &bs->tracked_requests, list) {
734 if (req == self || (!req->serialising && !self->serialising)) {
735 continue;
736 }
737 if (tracked_request_overlaps(req, self->overlap_offset,
738 self->overlap_bytes))
739 {
740
741
742
743
744 assert(qemu_coroutine_self() != req->co);
745
746
747
748
749 if (!req->waiting_for) {
750 self->waiting_for = req;
751 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
752 self->waiting_for = NULL;
753 retry = true;
754 waited = true;
755 break;
756 }
757 }
758 }
759 qemu_co_mutex_unlock(&bs->reqs_lock);
760 } while (retry);
761
762 return waited;
763}
764
765static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
766 size_t size)
767{
768 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
769 return -EIO;
770 }
771
772 if (!bdrv_is_inserted(bs)) {
773 return -ENOMEDIUM;
774 }
775
776 if (offset < 0) {
777 return -EIO;
778 }
779
780 return 0;
781}
782
783typedef struct RwCo {
784 BdrvChild *child;
785 int64_t offset;
786 QEMUIOVector *qiov;
787 bool is_write;
788 int ret;
789 BdrvRequestFlags flags;
790} RwCo;
791
792static void coroutine_fn bdrv_rw_co_entry(void *opaque)
793{
794 RwCo *rwco = opaque;
795
796 if (!rwco->is_write) {
797 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
798 rwco->qiov->size, rwco->qiov,
799 rwco->flags);
800 } else {
801 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
802 rwco->qiov->size, rwco->qiov,
803 rwco->flags);
804 }
805}
806
807
808
809
810static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
811 QEMUIOVector *qiov, bool is_write,
812 BdrvRequestFlags flags)
813{
814 Coroutine *co;
815 RwCo rwco = {
816 .child = child,
817 .offset = offset,
818 .qiov = qiov,
819 .is_write = is_write,
820 .ret = NOT_DONE,
821 .flags = flags,
822 };
823
824 if (qemu_in_coroutine()) {
825
826 bdrv_rw_co_entry(&rwco);
827 } else {
828 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
829 bdrv_coroutine_enter(child->bs, co);
830 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
831 }
832 return rwco.ret;
833}
834
835
836
837
838static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
839 int nb_sectors, bool is_write, BdrvRequestFlags flags)
840{
841 QEMUIOVector qiov;
842 struct iovec iov = {
843 .iov_base = (void *)buf,
844 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
845 };
846
847 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
848 return -EINVAL;
849 }
850
851 qemu_iovec_init_external(&qiov, &iov, 1);
852 return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
853 &qiov, is_write, flags);
854}
855
856
857int bdrv_read(BdrvChild *child, int64_t sector_num,
858 uint8_t *buf, int nb_sectors)
859{
860 return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
861}
862
863
864
865
866
867
868
869int bdrv_write(BdrvChild *child, int64_t sector_num,
870 const uint8_t *buf, int nb_sectors)
871{
872 return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
873}
874
875int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
876 int bytes, BdrvRequestFlags flags)
877{
878 QEMUIOVector qiov;
879 struct iovec iov = {
880 .iov_base = NULL,
881 .iov_len = bytes,
882 };
883
884 qemu_iovec_init_external(&qiov, &iov, 1);
885 return bdrv_prwv_co(child, offset, &qiov, true,
886 BDRV_REQ_ZERO_WRITE | flags);
887}
888
889
890
891
892
893
894
895
896
897
898int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
899{
900 int ret;
901 int64_t target_size, bytes, offset = 0;
902 BlockDriverState *bs = child->bs;
903
904 target_size = bdrv_getlength(bs);
905 if (target_size < 0) {
906 return target_size;
907 }
908
909 for (;;) {
910 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
911 if (bytes <= 0) {
912 return 0;
913 }
914 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
915 if (ret < 0) {
916 error_report("error getting block status at offset %" PRId64 ": %s",
917 offset, strerror(-ret));
918 return ret;
919 }
920 if (ret & BDRV_BLOCK_ZERO) {
921 offset += bytes;
922 continue;
923 }
924 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
925 if (ret < 0) {
926 error_report("error writing zeroes at offset %" PRId64 ": %s",
927 offset, strerror(-ret));
928 return ret;
929 }
930 offset += bytes;
931 }
932}
933
934int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
935{
936 int ret;
937
938 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
939 if (ret < 0) {
940 return ret;
941 }
942
943 return qiov->size;
944}
945
946int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
947{
948 QEMUIOVector qiov;
949 struct iovec iov = {
950 .iov_base = (void *)buf,
951 .iov_len = bytes,
952 };
953
954 if (bytes < 0) {
955 return -EINVAL;
956 }
957
958 qemu_iovec_init_external(&qiov, &iov, 1);
959 return bdrv_preadv(child, offset, &qiov);
960}
961
962int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
963{
964 int ret;
965
966 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
967 if (ret < 0) {
968 return ret;
969 }
970
971 return qiov->size;
972}
973
974int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
975{
976 QEMUIOVector qiov;
977 struct iovec iov = {
978 .iov_base = (void *) buf,
979 .iov_len = bytes,
980 };
981
982 if (bytes < 0) {
983 return -EINVAL;
984 }
985
986 qemu_iovec_init_external(&qiov, &iov, 1);
987 return bdrv_pwritev(child, offset, &qiov);
988}
989
990
991
992
993
994
995
996int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
997 const void *buf, int count)
998{
999 int ret;
1000
1001 ret = bdrv_pwrite(child, offset, buf, count);
1002 if (ret < 0) {
1003 return ret;
1004 }
1005
1006 ret = bdrv_flush(child->bs);
1007 if (ret < 0) {
1008 return ret;
1009 }
1010
1011 return 0;
1012}
1013
1014typedef struct CoroutineIOCompletion {
1015 Coroutine *coroutine;
1016 int ret;
1017} CoroutineIOCompletion;
1018
1019static void bdrv_co_io_em_complete(void *opaque, int ret)
1020{
1021 CoroutineIOCompletion *co = opaque;
1022
1023 co->ret = ret;
1024 aio_co_wake(co->coroutine);
1025}
1026
1027static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1028 uint64_t offset, uint64_t bytes,
1029 QEMUIOVector *qiov, int flags)
1030{
1031 BlockDriver *drv = bs->drv;
1032 int64_t sector_num;
1033 unsigned int nb_sectors;
1034
1035 assert(!(flags & ~BDRV_REQ_MASK));
1036
1037 if (!drv) {
1038 return -ENOMEDIUM;
1039 }
1040
1041 if (drv->bdrv_co_preadv) {
1042 return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1043 }
1044
1045 if (drv->bdrv_aio_preadv) {
1046 BlockAIOCB *acb;
1047 CoroutineIOCompletion co = {
1048 .coroutine = qemu_coroutine_self(),
1049 };
1050
1051 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1052 bdrv_co_io_em_complete, &co);
1053 if (acb == NULL) {
1054 return -EIO;
1055 } else {
1056 qemu_coroutine_yield();
1057 return co.ret;
1058 }
1059 }
1060
1061 sector_num = offset >> BDRV_SECTOR_BITS;
1062 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1063
1064 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1065 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1066 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
1067 assert(drv->bdrv_co_readv);
1068
1069 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1070}
1071
1072static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1073 uint64_t offset, uint64_t bytes,
1074 QEMUIOVector *qiov, int flags)
1075{
1076 BlockDriver *drv = bs->drv;
1077 int64_t sector_num;
1078 unsigned int nb_sectors;
1079 int ret;
1080
1081 assert(!(flags & ~BDRV_REQ_MASK));
1082
1083 if (!drv) {
1084 return -ENOMEDIUM;
1085 }
1086
1087 if (drv->bdrv_co_pwritev) {
1088 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1089 flags & bs->supported_write_flags);
1090 flags &= ~bs->supported_write_flags;
1091 goto emulate_flags;
1092 }
1093
1094 if (drv->bdrv_aio_pwritev) {
1095 BlockAIOCB *acb;
1096 CoroutineIOCompletion co = {
1097 .coroutine = qemu_coroutine_self(),
1098 };
1099
1100 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1101 flags & bs->supported_write_flags,
1102 bdrv_co_io_em_complete, &co);
1103 flags &= ~bs->supported_write_flags;
1104 if (acb == NULL) {
1105 ret = -EIO;
1106 } else {
1107 qemu_coroutine_yield();
1108 ret = co.ret;
1109 }
1110 goto emulate_flags;
1111 }
1112
1113 sector_num = offset >> BDRV_SECTOR_BITS;
1114 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1115
1116 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1117 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1118 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
1119
1120 assert(drv->bdrv_co_writev);
1121 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1122 flags & bs->supported_write_flags);
1123 flags &= ~bs->supported_write_flags;
1124
1125emulate_flags:
1126 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1127 ret = bdrv_co_flush(bs);
1128 }
1129
1130 return ret;
1131}
1132
1133static int coroutine_fn
1134bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
1135 uint64_t bytes, QEMUIOVector *qiov)
1136{
1137 BlockDriver *drv = bs->drv;
1138
1139 if (!drv) {
1140 return -ENOMEDIUM;
1141 }
1142
1143 if (!drv->bdrv_co_pwritev_compressed) {
1144 return -ENOTSUP;
1145 }
1146
1147 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1148}
1149
1150static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1151 int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
1152{
1153 BlockDriverState *bs = child->bs;
1154
1155
1156
1157
1158
1159
1160 void *bounce_buffer;
1161
1162 BlockDriver *drv = bs->drv;
1163 struct iovec iov;
1164 QEMUIOVector local_qiov;
1165 int64_t cluster_offset;
1166 int64_t cluster_bytes;
1167 size_t skip_bytes;
1168 int ret;
1169 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1170 BDRV_REQUEST_MAX_BYTES);
1171 unsigned int progress = 0;
1172
1173 if (!drv) {
1174 return -ENOMEDIUM;
1175 }
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1192 skip_bytes = offset - cluster_offset;
1193
1194 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1195 cluster_offset, cluster_bytes);
1196
1197 bounce_buffer = qemu_try_blockalign(bs,
1198 MIN(MIN(max_transfer, cluster_bytes),
1199 MAX_BOUNCE_BUFFER));
1200 if (bounce_buffer == NULL) {
1201 ret = -ENOMEM;
1202 goto err;
1203 }
1204
1205 while (cluster_bytes) {
1206 int64_t pnum;
1207
1208 ret = bdrv_is_allocated(bs, cluster_offset,
1209 MIN(cluster_bytes, max_transfer), &pnum);
1210 if (ret < 0) {
1211
1212
1213
1214
1215 pnum = MIN(cluster_bytes, max_transfer);
1216 }
1217
1218
1219 if (ret == 0 && pnum == 0) {
1220 assert(progress >= bytes);
1221 break;
1222 }
1223
1224 assert(skip_bytes < pnum);
1225
1226 if (ret <= 0) {
1227
1228 iov.iov_base = bounce_buffer;
1229 iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1230 qemu_iovec_init_external(&local_qiov, &iov, 1);
1231
1232 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1233 &local_qiov, 0);
1234 if (ret < 0) {
1235 goto err;
1236 }
1237
1238 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1239 if (drv->bdrv_co_pwrite_zeroes &&
1240 buffer_is_zero(bounce_buffer, pnum)) {
1241
1242
1243
1244 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1245 BDRV_REQ_WRITE_UNCHANGED);
1246 } else {
1247
1248
1249
1250 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1251 &local_qiov,
1252 BDRV_REQ_WRITE_UNCHANGED);
1253 }
1254
1255 if (ret < 0) {
1256
1257
1258
1259
1260
1261 goto err;
1262 }
1263
1264 qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
1265 pnum - skip_bytes);
1266 } else {
1267
1268 qemu_iovec_init(&local_qiov, qiov->niov);
1269 qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
1270 ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
1271 &local_qiov, 0);
1272 qemu_iovec_destroy(&local_qiov);
1273 if (ret < 0) {
1274 goto err;
1275 }
1276 }
1277
1278 cluster_offset += pnum;
1279 cluster_bytes -= pnum;
1280 progress += pnum - skip_bytes;
1281 skip_bytes = 0;
1282 }
1283 ret = 0;
1284
1285err:
1286 qemu_vfree(bounce_buffer);
1287 return ret;
1288}
1289
1290
1291
1292
1293
1294
1295static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1296 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1297 int64_t align, QEMUIOVector *qiov, int flags)
1298{
1299 BlockDriverState *bs = child->bs;
1300 int64_t total_bytes, max_bytes;
1301 int ret = 0;
1302 uint64_t bytes_remaining = bytes;
1303 int max_transfer;
1304
1305 assert(is_power_of_2(align));
1306 assert((offset & (align - 1)) == 0);
1307 assert((bytes & (align - 1)) == 0);
1308 assert(!qiov || bytes == qiov->size);
1309 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1310 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1311 align);
1312
1313
1314
1315
1316
1317 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
1318
1319
1320 if (flags & BDRV_REQ_COPY_ON_READ) {
1321
1322
1323
1324
1325
1326 mark_request_serialising(req, bdrv_get_cluster_size(bs));
1327 }
1328
1329
1330 assert(!(flags & BDRV_REQ_SERIALISING));
1331
1332 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1333 wait_serialising_requests(req);
1334 }
1335
1336 if (flags & BDRV_REQ_COPY_ON_READ) {
1337 int64_t pnum;
1338
1339 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1340 if (ret < 0) {
1341 goto out;
1342 }
1343
1344 if (!ret || pnum != bytes) {
1345 ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
1346 goto out;
1347 }
1348 }
1349
1350
1351 total_bytes = bdrv_getlength(bs);
1352 if (total_bytes < 0) {
1353 ret = total_bytes;
1354 goto out;
1355 }
1356
1357 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1358 if (bytes <= max_bytes && bytes <= max_transfer) {
1359 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
1360 goto out;
1361 }
1362
1363 while (bytes_remaining) {
1364 int num;
1365
1366 if (max_bytes) {
1367 QEMUIOVector local_qiov;
1368
1369 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1370 assert(num);
1371 qemu_iovec_init(&local_qiov, qiov->niov);
1372 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1373
1374 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1375 num, &local_qiov, 0);
1376 max_bytes -= num;
1377 qemu_iovec_destroy(&local_qiov);
1378 } else {
1379 num = bytes_remaining;
1380 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1381 bytes_remaining);
1382 }
1383 if (ret < 0) {
1384 goto out;
1385 }
1386 bytes_remaining -= num;
1387 }
1388
1389out:
1390 return ret < 0 ? ret : 0;
1391}
1392
1393
1394
1395
1396int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1397 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1398 BdrvRequestFlags flags)
1399{
1400 BlockDriverState *bs = child->bs;
1401 BlockDriver *drv = bs->drv;
1402 BdrvTrackedRequest req;
1403
1404 uint64_t align = bs->bl.request_alignment;
1405 uint8_t *head_buf = NULL;
1406 uint8_t *tail_buf = NULL;
1407 QEMUIOVector local_qiov;
1408 bool use_local_qiov = false;
1409 int ret;
1410
1411 trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
1412
1413 if (!drv) {
1414 return -ENOMEDIUM;
1415 }
1416
1417 ret = bdrv_check_byte_request(bs, offset, bytes);
1418 if (ret < 0) {
1419 return ret;
1420 }
1421
1422 bdrv_inc_in_flight(bs);
1423
1424
1425 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1426 flags |= BDRV_REQ_COPY_ON_READ;
1427 }
1428
1429
1430 if (offset & (align - 1)) {
1431 head_buf = qemu_blockalign(bs, align);
1432 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1433 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1434 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1435 use_local_qiov = true;
1436
1437 bytes += offset & (align - 1);
1438 offset = offset & ~(align - 1);
1439 }
1440
1441 if ((offset + bytes) & (align - 1)) {
1442 if (!use_local_qiov) {
1443 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1444 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1445 use_local_qiov = true;
1446 }
1447 tail_buf = qemu_blockalign(bs, align);
1448 qemu_iovec_add(&local_qiov, tail_buf,
1449 align - ((offset + bytes) & (align - 1)));
1450
1451 bytes = ROUND_UP(bytes, align);
1452 }
1453
1454 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1455 ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
1456 use_local_qiov ? &local_qiov : qiov,
1457 flags);
1458 tracked_request_end(&req);
1459 bdrv_dec_in_flight(bs);
1460
1461 if (use_local_qiov) {
1462 qemu_iovec_destroy(&local_qiov);
1463 qemu_vfree(head_buf);
1464 qemu_vfree(tail_buf);
1465 }
1466
1467 return ret;
1468}
1469
1470static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1471 int64_t offset, int bytes, BdrvRequestFlags flags)
1472{
1473 BlockDriver *drv = bs->drv;
1474 QEMUIOVector qiov;
1475 struct iovec iov = {0};
1476 int ret = 0;
1477 bool need_flush = false;
1478 int head = 0;
1479 int tail = 0;
1480
1481 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1482 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1483 bs->bl.request_alignment);
1484 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1485
1486 if (!drv) {
1487 return -ENOMEDIUM;
1488 }
1489
1490 assert(alignment % bs->bl.request_alignment == 0);
1491 head = offset % alignment;
1492 tail = (offset + bytes) % alignment;
1493 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1494 assert(max_write_zeroes >= bs->bl.request_alignment);
1495
1496 while (bytes > 0 && !ret) {
1497 int num = bytes;
1498
1499
1500
1501
1502
1503 if (head) {
1504
1505
1506
1507 num = MIN(MIN(bytes, max_transfer), alignment - head);
1508 head = (head + num) % alignment;
1509 assert(num < max_write_zeroes);
1510 } else if (tail && num > alignment) {
1511
1512 num -= tail;
1513 }
1514
1515
1516 if (num > max_write_zeroes) {
1517 num = max_write_zeroes;
1518 }
1519
1520 ret = -ENOTSUP;
1521
1522 if (drv->bdrv_co_pwrite_zeroes) {
1523 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1524 flags & bs->supported_zero_flags);
1525 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1526 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1527 need_flush = true;
1528 }
1529 } else {
1530 assert(!bs->supported_zero_flags);
1531 }
1532
1533 if (ret == -ENOTSUP) {
1534
1535 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1536
1537 if ((flags & BDRV_REQ_FUA) &&
1538 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1539
1540
1541 write_flags &= ~BDRV_REQ_FUA;
1542 need_flush = true;
1543 }
1544 num = MIN(num, max_transfer);
1545 iov.iov_len = num;
1546 if (iov.iov_base == NULL) {
1547 iov.iov_base = qemu_try_blockalign(bs, num);
1548 if (iov.iov_base == NULL) {
1549 ret = -ENOMEM;
1550 goto fail;
1551 }
1552 memset(iov.iov_base, 0, num);
1553 }
1554 qemu_iovec_init_external(&qiov, &iov, 1);
1555
1556 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
1557
1558
1559
1560
1561 if (num < max_transfer) {
1562 qemu_vfree(iov.iov_base);
1563 iov.iov_base = NULL;
1564 }
1565 }
1566
1567 offset += num;
1568 bytes -= num;
1569 }
1570
1571fail:
1572 if (ret == 0 && need_flush) {
1573 ret = bdrv_co_flush(bs);
1574 }
1575 qemu_vfree(iov.iov_base);
1576 return ret;
1577}
1578
1579static inline int coroutine_fn
1580bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
1581 BdrvTrackedRequest *req, int flags)
1582{
1583 BlockDriverState *bs = child->bs;
1584 bool waited;
1585 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1586
1587 if (bs->read_only) {
1588 return -EPERM;
1589 }
1590
1591
1592 assert(!(flags & BDRV_REQ_NO_SERIALISING));
1593 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1594 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1595 assert(!(flags & ~BDRV_REQ_MASK));
1596
1597 if (flags & BDRV_REQ_SERIALISING) {
1598 mark_request_serialising(req, bdrv_get_cluster_size(bs));
1599 }
1600
1601 waited = wait_serialising_requests(req);
1602
1603 assert(!waited || !req->serialising ||
1604 is_request_serialising_and_aligned(req));
1605 assert(req->overlap_offset <= offset);
1606 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1607 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1608
1609 switch (req->type) {
1610 case BDRV_TRACKED_WRITE:
1611 case BDRV_TRACKED_DISCARD:
1612 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
1613 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
1614 } else {
1615 assert(child->perm & BLK_PERM_WRITE);
1616 }
1617 return notifier_with_return_list_notify(&bs->before_write_notifiers,
1618 req);
1619 case BDRV_TRACKED_TRUNCATE:
1620 assert(child->perm & BLK_PERM_RESIZE);
1621 return 0;
1622 default:
1623 abort();
1624 }
1625}
1626
1627static inline void coroutine_fn
1628bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
1629 BdrvTrackedRequest *req, int ret)
1630{
1631 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1632 BlockDriverState *bs = child->bs;
1633
1634 atomic_inc(&bs->write_gen);
1635
1636
1637
1638
1639
1640
1641
1642
1643 if (ret == 0 &&
1644 (req->type == BDRV_TRACKED_TRUNCATE ||
1645 end_sector > bs->total_sectors) &&
1646 req->type != BDRV_TRACKED_DISCARD) {
1647 bs->total_sectors = end_sector;
1648 bdrv_parent_cb_resize(bs);
1649 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
1650 }
1651 if (req->bytes) {
1652 switch (req->type) {
1653 case BDRV_TRACKED_WRITE:
1654 stat64_max(&bs->wr_highest_offset, offset + bytes);
1655
1656 case BDRV_TRACKED_DISCARD:
1657 bdrv_set_dirty(bs, offset, bytes);
1658 break;
1659 default:
1660 break;
1661 }
1662 }
1663}
1664
1665
1666
1667
1668
1669static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1670 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1671 int64_t align, QEMUIOVector *qiov, int flags)
1672{
1673 BlockDriverState *bs = child->bs;
1674 BlockDriver *drv = bs->drv;
1675 int ret;
1676
1677 uint64_t bytes_remaining = bytes;
1678 int max_transfer;
1679
1680 if (!drv) {
1681 return -ENOMEDIUM;
1682 }
1683
1684 if (bdrv_has_readonly_bitmaps(bs)) {
1685 return -EPERM;
1686 }
1687
1688 assert(is_power_of_2(align));
1689 assert((offset & (align - 1)) == 0);
1690 assert((bytes & (align - 1)) == 0);
1691 assert(!qiov || bytes == qiov->size);
1692 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1693 align);
1694
1695 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
1696
1697 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1698 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1699 qemu_iovec_is_zero(qiov)) {
1700 flags |= BDRV_REQ_ZERO_WRITE;
1701 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1702 flags |= BDRV_REQ_MAY_UNMAP;
1703 }
1704 }
1705
1706 if (ret < 0) {
1707
1708 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1709 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1710 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1711 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1712 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
1713 } else if (bytes <= max_transfer) {
1714 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1715 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
1716 } else {
1717 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1718 while (bytes_remaining) {
1719 int num = MIN(bytes_remaining, max_transfer);
1720 QEMUIOVector local_qiov;
1721 int local_flags = flags;
1722
1723 assert(num);
1724 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
1725 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1726
1727
1728 local_flags &= ~BDRV_REQ_FUA;
1729 }
1730 qemu_iovec_init(&local_qiov, qiov->niov);
1731 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1732
1733 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
1734 num, &local_qiov, local_flags);
1735 qemu_iovec_destroy(&local_qiov);
1736 if (ret < 0) {
1737 break;
1738 }
1739 bytes_remaining -= num;
1740 }
1741 }
1742 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
1743
1744 if (ret >= 0) {
1745 ret = 0;
1746 }
1747 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
1748
1749 return ret;
1750}
1751
1752static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1753 int64_t offset,
1754 unsigned int bytes,
1755 BdrvRequestFlags flags,
1756 BdrvTrackedRequest *req)
1757{
1758 BlockDriverState *bs = child->bs;
1759 uint8_t *buf = NULL;
1760 QEMUIOVector local_qiov;
1761 struct iovec iov;
1762 uint64_t align = bs->bl.request_alignment;
1763 unsigned int head_padding_bytes, tail_padding_bytes;
1764 int ret = 0;
1765
1766 head_padding_bytes = offset & (align - 1);
1767 tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
1768
1769
1770 assert(flags & BDRV_REQ_ZERO_WRITE);
1771 if (head_padding_bytes || tail_padding_bytes) {
1772 buf = qemu_blockalign(bs, align);
1773 iov = (struct iovec) {
1774 .iov_base = buf,
1775 .iov_len = align,
1776 };
1777 qemu_iovec_init_external(&local_qiov, &iov, 1);
1778 }
1779 if (head_padding_bytes) {
1780 uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
1781
1782
1783 mark_request_serialising(req, align);
1784 wait_serialising_requests(req);
1785 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1786 ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
1787 align, &local_qiov, 0);
1788 if (ret < 0) {
1789 goto fail;
1790 }
1791 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1792
1793 memset(buf + head_padding_bytes, 0, zero_bytes);
1794 ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
1795 align, &local_qiov,
1796 flags & ~BDRV_REQ_ZERO_WRITE);
1797 if (ret < 0) {
1798 goto fail;
1799 }
1800 offset += zero_bytes;
1801 bytes -= zero_bytes;
1802 }
1803
1804 assert(!bytes || (offset & (align - 1)) == 0);
1805 if (bytes >= align) {
1806
1807 uint64_t aligned_bytes = bytes & ~(align - 1);
1808 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
1809 NULL, flags);
1810 if (ret < 0) {
1811 goto fail;
1812 }
1813 bytes -= aligned_bytes;
1814 offset += aligned_bytes;
1815 }
1816
1817 assert(!bytes || (offset & (align - 1)) == 0);
1818 if (bytes) {
1819 assert(align == tail_padding_bytes + bytes);
1820
1821 mark_request_serialising(req, align);
1822 wait_serialising_requests(req);
1823 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1824 ret = bdrv_aligned_preadv(child, req, offset, align,
1825 align, &local_qiov, 0);
1826 if (ret < 0) {
1827 goto fail;
1828 }
1829 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1830
1831 memset(buf, 0, bytes);
1832 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
1833 &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
1834 }
1835fail:
1836 qemu_vfree(buf);
1837 return ret;
1838
1839}
1840
1841
1842
1843
1844int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1845 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1846 BdrvRequestFlags flags)
1847{
1848 BlockDriverState *bs = child->bs;
1849 BdrvTrackedRequest req;
1850 uint64_t align = bs->bl.request_alignment;
1851 uint8_t *head_buf = NULL;
1852 uint8_t *tail_buf = NULL;
1853 QEMUIOVector local_qiov;
1854 bool use_local_qiov = false;
1855 int ret;
1856
1857 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
1858
1859 if (!bs->drv) {
1860 return -ENOMEDIUM;
1861 }
1862
1863 ret = bdrv_check_byte_request(bs, offset, bytes);
1864 if (ret < 0) {
1865 return ret;
1866 }
1867
1868 bdrv_inc_in_flight(bs);
1869
1870
1871
1872
1873
1874 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
1875
1876 if (flags & BDRV_REQ_ZERO_WRITE) {
1877 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
1878 goto out;
1879 }
1880
1881 if (offset & (align - 1)) {
1882 QEMUIOVector head_qiov;
1883 struct iovec head_iov;
1884
1885 mark_request_serialising(&req, align);
1886 wait_serialising_requests(&req);
1887
1888 head_buf = qemu_blockalign(bs, align);
1889 head_iov = (struct iovec) {
1890 .iov_base = head_buf,
1891 .iov_len = align,
1892 };
1893 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
1894
1895 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1896 ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
1897 align, &head_qiov, 0);
1898 if (ret < 0) {
1899 goto fail;
1900 }
1901 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1902
1903 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1904 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1905 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1906 use_local_qiov = true;
1907
1908 bytes += offset & (align - 1);
1909 offset = offset & ~(align - 1);
1910
1911
1912
1913
1914 if (bytes < align) {
1915 qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
1916 bytes = align;
1917 }
1918 }
1919
1920 if ((offset + bytes) & (align - 1)) {
1921 QEMUIOVector tail_qiov;
1922 struct iovec tail_iov;
1923 size_t tail_bytes;
1924 bool waited;
1925
1926 mark_request_serialising(&req, align);
1927 waited = wait_serialising_requests(&req);
1928 assert(!waited || !use_local_qiov);
1929
1930 tail_buf = qemu_blockalign(bs, align);
1931 tail_iov = (struct iovec) {
1932 .iov_base = tail_buf,
1933 .iov_len = align,
1934 };
1935 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
1936
1937 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1938 ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
1939 align, align, &tail_qiov, 0);
1940 if (ret < 0) {
1941 goto fail;
1942 }
1943 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1944
1945 if (!use_local_qiov) {
1946 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1947 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1948 use_local_qiov = true;
1949 }
1950
1951 tail_bytes = (offset + bytes) & (align - 1);
1952 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
1953
1954 bytes = ROUND_UP(bytes, align);
1955 }
1956
1957 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
1958 use_local_qiov ? &local_qiov : qiov,
1959 flags);
1960
1961fail:
1962
1963 if (use_local_qiov) {
1964 qemu_iovec_destroy(&local_qiov);
1965 }
1966 qemu_vfree(head_buf);
1967 qemu_vfree(tail_buf);
1968out:
1969 tracked_request_end(&req);
1970 bdrv_dec_in_flight(bs);
1971 return ret;
1972}
1973
1974int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
1975 int bytes, BdrvRequestFlags flags)
1976{
1977 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
1978
1979 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
1980 flags &= ~BDRV_REQ_MAY_UNMAP;
1981 }
1982
1983 return bdrv_co_pwritev(child, offset, bytes, NULL,
1984 BDRV_REQ_ZERO_WRITE | flags);
1985}
1986
1987
1988
1989
1990int bdrv_flush_all(void)
1991{
1992 BdrvNextIterator it;
1993 BlockDriverState *bs = NULL;
1994 int result = 0;
1995
1996 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
1997 AioContext *aio_context = bdrv_get_aio_context(bs);
1998 int ret;
1999
2000 aio_context_acquire(aio_context);
2001 ret = bdrv_flush(bs);
2002 if (ret < 0 && !result) {
2003 result = ret;
2004 }
2005 aio_context_release(aio_context);
2006 }
2007
2008 return result;
2009}
2010
2011
2012typedef struct BdrvCoBlockStatusData {
2013 BlockDriverState *bs;
2014 BlockDriverState *base;
2015 bool want_zero;
2016 int64_t offset;
2017 int64_t bytes;
2018 int64_t *pnum;
2019 int64_t *map;
2020 BlockDriverState **file;
2021 int ret;
2022 bool done;
2023} BdrvCoBlockStatusData;
2024
2025int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
2026 bool want_zero,
2027 int64_t offset,
2028 int64_t bytes,
2029 int64_t *pnum,
2030 int64_t *map,
2031 BlockDriverState **file)
2032{
2033 assert(bs->file && bs->file->bs);
2034 *pnum = bytes;
2035 *map = offset;
2036 *file = bs->file->bs;
2037 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2038}
2039
2040int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
2041 bool want_zero,
2042 int64_t offset,
2043 int64_t bytes,
2044 int64_t *pnum,
2045 int64_t *map,
2046 BlockDriverState **file)
2047{
2048 assert(bs->backing && bs->backing->bs);
2049 *pnum = bytes;
2050 *map = offset;
2051 *file = bs->backing->bs;
2052 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2053}
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2083 bool want_zero,
2084 int64_t offset, int64_t bytes,
2085 int64_t *pnum, int64_t *map,
2086 BlockDriverState **file)
2087{
2088 int64_t total_size;
2089 int64_t n;
2090 int ret;
2091 int64_t local_map = 0;
2092 BlockDriverState *local_file = NULL;
2093 int64_t aligned_offset, aligned_bytes;
2094 uint32_t align;
2095
2096 assert(pnum);
2097 *pnum = 0;
2098 total_size = bdrv_getlength(bs);
2099 if (total_size < 0) {
2100 ret = total_size;
2101 goto early_out;
2102 }
2103
2104 if (offset >= total_size) {
2105 ret = BDRV_BLOCK_EOF;
2106 goto early_out;
2107 }
2108 if (!bytes) {
2109 ret = 0;
2110 goto early_out;
2111 }
2112
2113 n = total_size - offset;
2114 if (n < bytes) {
2115 bytes = n;
2116 }
2117
2118
2119 assert(bs->drv);
2120 if (!bs->drv->bdrv_co_block_status) {
2121 *pnum = bytes;
2122 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2123 if (offset + bytes == total_size) {
2124 ret |= BDRV_BLOCK_EOF;
2125 }
2126 if (bs->drv->protocol_name) {
2127 ret |= BDRV_BLOCK_OFFSET_VALID;
2128 local_map = offset;
2129 local_file = bs;
2130 }
2131 goto early_out;
2132 }
2133
2134 bdrv_inc_in_flight(bs);
2135
2136
2137 align = bs->bl.request_alignment;
2138 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2139 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2140
2141 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2142 aligned_bytes, pnum, &local_map,
2143 &local_file);
2144 if (ret < 0) {
2145 *pnum = 0;
2146 goto out;
2147 }
2148
2149
2150
2151
2152
2153 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2154 align > offset - aligned_offset);
2155 *pnum -= offset - aligned_offset;
2156 if (*pnum > bytes) {
2157 *pnum = bytes;
2158 }
2159 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2160 local_map += offset - aligned_offset;
2161 }
2162
2163 if (ret & BDRV_BLOCK_RAW) {
2164 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2165 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2166 *pnum, pnum, &local_map, &local_file);
2167 goto out;
2168 }
2169
2170 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2171 ret |= BDRV_BLOCK_ALLOCATED;
2172 } else if (want_zero) {
2173 if (bdrv_unallocated_blocks_are_zero(bs)) {
2174 ret |= BDRV_BLOCK_ZERO;
2175 } else if (bs->backing) {
2176 BlockDriverState *bs2 = bs->backing->bs;
2177 int64_t size2 = bdrv_getlength(bs2);
2178
2179 if (size2 >= 0 && offset >= size2) {
2180 ret |= BDRV_BLOCK_ZERO;
2181 }
2182 }
2183 }
2184
2185 if (want_zero && local_file && local_file != bs &&
2186 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2187 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2188 int64_t file_pnum;
2189 int ret2;
2190
2191 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2192 *pnum, &file_pnum, NULL, NULL);
2193 if (ret2 >= 0) {
2194
2195
2196
2197 if (ret2 & BDRV_BLOCK_EOF &&
2198 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2199
2200
2201
2202
2203
2204 ret |= BDRV_BLOCK_ZERO;
2205 } else {
2206
2207 *pnum = file_pnum;
2208 ret |= (ret2 & BDRV_BLOCK_ZERO);
2209 }
2210 }
2211 }
2212
2213out:
2214 bdrv_dec_in_flight(bs);
2215 if (ret >= 0 && offset + *pnum == total_size) {
2216 ret |= BDRV_BLOCK_EOF;
2217 }
2218early_out:
2219 if (file) {
2220 *file = local_file;
2221 }
2222 if (map) {
2223 *map = local_map;
2224 }
2225 return ret;
2226}
2227
2228static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2229 BlockDriverState *base,
2230 bool want_zero,
2231 int64_t offset,
2232 int64_t bytes,
2233 int64_t *pnum,
2234 int64_t *map,
2235 BlockDriverState **file)
2236{
2237 BlockDriverState *p;
2238 int ret = 0;
2239 bool first = true;
2240
2241 assert(bs != base);
2242 for (p = bs; p != base; p = backing_bs(p)) {
2243 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2244 file);
2245 if (ret < 0) {
2246 break;
2247 }
2248 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2249
2250
2251
2252
2253
2254
2255 *pnum = bytes;
2256 }
2257 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2258 break;
2259 }
2260
2261
2262 bytes = MIN(bytes, *pnum);
2263 first = false;
2264 }
2265 return ret;
2266}
2267
2268
2269static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2270{
2271 BdrvCoBlockStatusData *data = opaque;
2272
2273 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2274 data->want_zero,
2275 data->offset, data->bytes,
2276 data->pnum, data->map, data->file);
2277 data->done = true;
2278}
2279
2280
2281
2282
2283
2284
2285static int bdrv_common_block_status_above(BlockDriverState *bs,
2286 BlockDriverState *base,
2287 bool want_zero, int64_t offset,
2288 int64_t bytes, int64_t *pnum,
2289 int64_t *map,
2290 BlockDriverState **file)
2291{
2292 Coroutine *co;
2293 BdrvCoBlockStatusData data = {
2294 .bs = bs,
2295 .base = base,
2296 .want_zero = want_zero,
2297 .offset = offset,
2298 .bytes = bytes,
2299 .pnum = pnum,
2300 .map = map,
2301 .file = file,
2302 .done = false,
2303 };
2304
2305 if (qemu_in_coroutine()) {
2306
2307 bdrv_block_status_above_co_entry(&data);
2308 } else {
2309 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2310 bdrv_coroutine_enter(bs, co);
2311 BDRV_POLL_WHILE(bs, !data.done);
2312 }
2313 return data.ret;
2314}
2315
2316int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2317 int64_t offset, int64_t bytes, int64_t *pnum,
2318 int64_t *map, BlockDriverState **file)
2319{
2320 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2321 pnum, map, file);
2322}
2323
2324int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2325 int64_t *pnum, int64_t *map, BlockDriverState **file)
2326{
2327 return bdrv_block_status_above(bs, backing_bs(bs),
2328 offset, bytes, pnum, map, file);
2329}
2330
2331int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2332 int64_t bytes, int64_t *pnum)
2333{
2334 int ret;
2335 int64_t dummy;
2336
2337 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2338 bytes, pnum ? pnum : &dummy, NULL,
2339 NULL);
2340 if (ret < 0) {
2341 return ret;
2342 }
2343 return !!(ret & BDRV_BLOCK_ALLOCATED);
2344}
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362int bdrv_is_allocated_above(BlockDriverState *top,
2363 BlockDriverState *base,
2364 int64_t offset, int64_t bytes, int64_t *pnum)
2365{
2366 BlockDriverState *intermediate;
2367 int ret;
2368 int64_t n = bytes;
2369
2370 intermediate = top;
2371 while (intermediate && intermediate != base) {
2372 int64_t pnum_inter;
2373 int64_t size_inter;
2374
2375 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2376 if (ret < 0) {
2377 return ret;
2378 }
2379 if (ret) {
2380 *pnum = pnum_inter;
2381 return 1;
2382 }
2383
2384 size_inter = bdrv_getlength(intermediate);
2385 if (size_inter < 0) {
2386 return size_inter;
2387 }
2388 if (n > pnum_inter &&
2389 (intermediate == top || offset + pnum_inter < size_inter)) {
2390 n = pnum_inter;
2391 }
2392
2393 intermediate = backing_bs(intermediate);
2394 }
2395
2396 *pnum = n;
2397 return 0;
2398}
2399
2400typedef struct BdrvVmstateCo {
2401 BlockDriverState *bs;
2402 QEMUIOVector *qiov;
2403 int64_t pos;
2404 bool is_read;
2405 int ret;
2406} BdrvVmstateCo;
2407
2408static int coroutine_fn
2409bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2410 bool is_read)
2411{
2412 BlockDriver *drv = bs->drv;
2413 int ret = -ENOTSUP;
2414
2415 bdrv_inc_in_flight(bs);
2416
2417 if (!drv) {
2418 ret = -ENOMEDIUM;
2419 } else if (drv->bdrv_load_vmstate) {
2420 if (is_read) {
2421 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2422 } else {
2423 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2424 }
2425 } else if (bs->file) {
2426 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2427 }
2428
2429 bdrv_dec_in_flight(bs);
2430 return ret;
2431}
2432
2433static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2434{
2435 BdrvVmstateCo *co = opaque;
2436 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2437}
2438
2439static inline int
2440bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2441 bool is_read)
2442{
2443 if (qemu_in_coroutine()) {
2444 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2445 } else {
2446 BdrvVmstateCo data = {
2447 .bs = bs,
2448 .qiov = qiov,
2449 .pos = pos,
2450 .is_read = is_read,
2451 .ret = -EINPROGRESS,
2452 };
2453 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2454
2455 bdrv_coroutine_enter(bs, co);
2456 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2457 return data.ret;
2458 }
2459}
2460
2461int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2462 int64_t pos, int size)
2463{
2464 QEMUIOVector qiov;
2465 struct iovec iov = {
2466 .iov_base = (void *) buf,
2467 .iov_len = size,
2468 };
2469 int ret;
2470
2471 qemu_iovec_init_external(&qiov, &iov, 1);
2472
2473 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2474 if (ret < 0) {
2475 return ret;
2476 }
2477
2478 return size;
2479}
2480
2481int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2482{
2483 return bdrv_rw_vmstate(bs, qiov, pos, false);
2484}
2485
2486int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2487 int64_t pos, int size)
2488{
2489 QEMUIOVector qiov;
2490 struct iovec iov = {
2491 .iov_base = buf,
2492 .iov_len = size,
2493 };
2494 int ret;
2495
2496 qemu_iovec_init_external(&qiov, &iov, 1);
2497 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2498 if (ret < 0) {
2499 return ret;
2500 }
2501
2502 return size;
2503}
2504
2505int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2506{
2507 return bdrv_rw_vmstate(bs, qiov, pos, true);
2508}
2509
2510
2511
2512
2513void bdrv_aio_cancel(BlockAIOCB *acb)
2514{
2515 qemu_aio_ref(acb);
2516 bdrv_aio_cancel_async(acb);
2517 while (acb->refcnt > 1) {
2518 if (acb->aiocb_info->get_aio_context) {
2519 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2520 } else if (acb->bs) {
2521
2522
2523
2524
2525 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2526 aio_poll(bdrv_get_aio_context(acb->bs), true);
2527 } else {
2528 abort();
2529 }
2530 }
2531 qemu_aio_unref(acb);
2532}
2533
2534
2535
2536
2537void bdrv_aio_cancel_async(BlockAIOCB *acb)
2538{
2539 if (acb->aiocb_info->cancel_async) {
2540 acb->aiocb_info->cancel_async(acb);
2541 }
2542}
2543
2544
2545
2546
2547typedef struct FlushCo {
2548 BlockDriverState *bs;
2549 int ret;
2550} FlushCo;
2551
2552
2553static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2554{
2555 FlushCo *rwco = opaque;
2556
2557 rwco->ret = bdrv_co_flush(rwco->bs);
2558}
2559
2560int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2561{
2562 int current_gen;
2563 int ret = 0;
2564
2565 bdrv_inc_in_flight(bs);
2566
2567 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2568 bdrv_is_sg(bs)) {
2569 goto early_exit;
2570 }
2571
2572 qemu_co_mutex_lock(&bs->reqs_lock);
2573 current_gen = atomic_read(&bs->write_gen);
2574
2575
2576 while (bs->active_flush_req) {
2577 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2578 }
2579
2580
2581 bs->active_flush_req = true;
2582 qemu_co_mutex_unlock(&bs->reqs_lock);
2583
2584
2585 if (bs->drv->bdrv_co_flush) {
2586 ret = bs->drv->bdrv_co_flush(bs);
2587 goto out;
2588 }
2589
2590
2591 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2592 if (bs->drv->bdrv_co_flush_to_os) {
2593 ret = bs->drv->bdrv_co_flush_to_os(bs);
2594 if (ret < 0) {
2595 goto out;
2596 }
2597 }
2598
2599
2600 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2601 goto flush_parent;
2602 }
2603
2604
2605 if (bs->flushed_gen == current_gen) {
2606 goto flush_parent;
2607 }
2608
2609 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2610 if (!bs->drv) {
2611
2612
2613 ret = -ENOMEDIUM;
2614 goto out;
2615 }
2616 if (bs->drv->bdrv_co_flush_to_disk) {
2617 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2618 } else if (bs->drv->bdrv_aio_flush) {
2619 BlockAIOCB *acb;
2620 CoroutineIOCompletion co = {
2621 .coroutine = qemu_coroutine_self(),
2622 };
2623
2624 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2625 if (acb == NULL) {
2626 ret = -EIO;
2627 } else {
2628 qemu_coroutine_yield();
2629 ret = co.ret;
2630 }
2631 } else {
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643 ret = 0;
2644 }
2645
2646 if (ret < 0) {
2647 goto out;
2648 }
2649
2650
2651
2652
2653flush_parent:
2654 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2655out:
2656
2657 if (ret == 0) {
2658 bs->flushed_gen = current_gen;
2659 }
2660
2661 qemu_co_mutex_lock(&bs->reqs_lock);
2662 bs->active_flush_req = false;
2663
2664 qemu_co_queue_next(&bs->flush_queue);
2665 qemu_co_mutex_unlock(&bs->reqs_lock);
2666
2667early_exit:
2668 bdrv_dec_in_flight(bs);
2669 return ret;
2670}
2671
2672int bdrv_flush(BlockDriverState *bs)
2673{
2674 Coroutine *co;
2675 FlushCo flush_co = {
2676 .bs = bs,
2677 .ret = NOT_DONE,
2678 };
2679
2680 if (qemu_in_coroutine()) {
2681
2682 bdrv_flush_co_entry(&flush_co);
2683 } else {
2684 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2685 bdrv_coroutine_enter(bs, co);
2686 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2687 }
2688
2689 return flush_co.ret;
2690}
2691
2692typedef struct DiscardCo {
2693 BdrvChild *child;
2694 int64_t offset;
2695 int bytes;
2696 int ret;
2697} DiscardCo;
2698static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2699{
2700 DiscardCo *rwco = opaque;
2701
2702 rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
2703}
2704
2705int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
2706{
2707 BdrvTrackedRequest req;
2708 int max_pdiscard, ret;
2709 int head, tail, align;
2710 BlockDriverState *bs = child->bs;
2711
2712 if (!bs || !bs->drv) {
2713 return -ENOMEDIUM;
2714 }
2715
2716 if (bdrv_has_readonly_bitmaps(bs)) {
2717 return -EPERM;
2718 }
2719
2720 ret = bdrv_check_byte_request(bs, offset, bytes);
2721 if (ret < 0) {
2722 return ret;
2723 }
2724
2725
2726 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2727 return 0;
2728 }
2729
2730 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2731 return 0;
2732 }
2733
2734
2735
2736
2737
2738
2739 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2740 assert(align % bs->bl.request_alignment == 0);
2741 head = offset % align;
2742 tail = (offset + bytes) % align;
2743
2744 bdrv_inc_in_flight(bs);
2745 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2746
2747 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
2748 if (ret < 0) {
2749 goto out;
2750 }
2751
2752 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2753 align);
2754 assert(max_pdiscard >= bs->bl.request_alignment);
2755
2756 while (bytes > 0) {
2757 int num = bytes;
2758
2759 if (head) {
2760
2761 num = MIN(bytes, align - head);
2762 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2763 num %= bs->bl.request_alignment;
2764 }
2765 head = (head + num) % align;
2766 assert(num < max_pdiscard);
2767 } else if (tail) {
2768 if (num > align) {
2769
2770 num -= tail;
2771 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2772 tail > bs->bl.request_alignment) {
2773 tail %= bs->bl.request_alignment;
2774 num -= tail;
2775 }
2776 }
2777
2778 if (num > max_pdiscard) {
2779 num = max_pdiscard;
2780 }
2781
2782 if (!bs->drv) {
2783 ret = -ENOMEDIUM;
2784 goto out;
2785 }
2786 if (bs->drv->bdrv_co_pdiscard) {
2787 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
2788 } else {
2789 BlockAIOCB *acb;
2790 CoroutineIOCompletion co = {
2791 .coroutine = qemu_coroutine_self(),
2792 };
2793
2794 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
2795 bdrv_co_io_em_complete, &co);
2796 if (acb == NULL) {
2797 ret = -EIO;
2798 goto out;
2799 } else {
2800 qemu_coroutine_yield();
2801 ret = co.ret;
2802 }
2803 }
2804 if (ret && ret != -ENOTSUP) {
2805 goto out;
2806 }
2807
2808 offset += num;
2809 bytes -= num;
2810 }
2811 ret = 0;
2812out:
2813 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
2814 tracked_request_end(&req);
2815 bdrv_dec_in_flight(bs);
2816 return ret;
2817}
2818
2819int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes)
2820{
2821 Coroutine *co;
2822 DiscardCo rwco = {
2823 .child = child,
2824 .offset = offset,
2825 .bytes = bytes,
2826 .ret = NOT_DONE,
2827 };
2828
2829 if (qemu_in_coroutine()) {
2830
2831 bdrv_pdiscard_co_entry(&rwco);
2832 } else {
2833 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
2834 bdrv_coroutine_enter(child->bs, co);
2835 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
2836 }
2837
2838 return rwco.ret;
2839}
2840
2841int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
2842{
2843 BlockDriver *drv = bs->drv;
2844 CoroutineIOCompletion co = {
2845 .coroutine = qemu_coroutine_self(),
2846 };
2847 BlockAIOCB *acb;
2848
2849 bdrv_inc_in_flight(bs);
2850 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
2851 co.ret = -ENOTSUP;
2852 goto out;
2853 }
2854
2855 if (drv->bdrv_co_ioctl) {
2856 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
2857 } else {
2858 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
2859 if (!acb) {
2860 co.ret = -ENOTSUP;
2861 goto out;
2862 }
2863 qemu_coroutine_yield();
2864 }
2865out:
2866 bdrv_dec_in_flight(bs);
2867 return co.ret;
2868}
2869
2870void *qemu_blockalign(BlockDriverState *bs, size_t size)
2871{
2872 return qemu_memalign(bdrv_opt_mem_align(bs), size);
2873}
2874
2875void *qemu_blockalign0(BlockDriverState *bs, size_t size)
2876{
2877 return memset(qemu_blockalign(bs, size), 0, size);
2878}
2879
2880void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
2881{
2882 size_t align = bdrv_opt_mem_align(bs);
2883
2884
2885 assert(align > 0);
2886 if (size == 0) {
2887 size = align;
2888 }
2889
2890 return qemu_try_memalign(align, size);
2891}
2892
2893void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
2894{
2895 void *mem = qemu_try_blockalign(bs, size);
2896
2897 if (mem) {
2898 memset(mem, 0, size);
2899 }
2900
2901 return mem;
2902}
2903
2904
2905
2906
2907bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
2908{
2909 int i;
2910 size_t alignment = bdrv_min_mem_align(bs);
2911
2912 for (i = 0; i < qiov->niov; i++) {
2913 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
2914 return false;
2915 }
2916 if (qiov->iov[i].iov_len % alignment) {
2917 return false;
2918 }
2919 }
2920
2921 return true;
2922}
2923
2924void bdrv_add_before_write_notifier(BlockDriverState *bs,
2925 NotifierWithReturn *notifier)
2926{
2927 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
2928}
2929
2930void bdrv_io_plug(BlockDriverState *bs)
2931{
2932 BdrvChild *child;
2933
2934 QLIST_FOREACH(child, &bs->children, next) {
2935 bdrv_io_plug(child->bs);
2936 }
2937
2938 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
2939 BlockDriver *drv = bs->drv;
2940 if (drv && drv->bdrv_io_plug) {
2941 drv->bdrv_io_plug(bs);
2942 }
2943 }
2944}
2945
2946void bdrv_io_unplug(BlockDriverState *bs)
2947{
2948 BdrvChild *child;
2949
2950 assert(bs->io_plugged);
2951 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
2952 BlockDriver *drv = bs->drv;
2953 if (drv && drv->bdrv_io_unplug) {
2954 drv->bdrv_io_unplug(bs);
2955 }
2956 }
2957
2958 QLIST_FOREACH(child, &bs->children, next) {
2959 bdrv_io_unplug(child->bs);
2960 }
2961}
2962
2963void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
2964{
2965 BdrvChild *child;
2966
2967 if (bs->drv && bs->drv->bdrv_register_buf) {
2968 bs->drv->bdrv_register_buf(bs, host, size);
2969 }
2970 QLIST_FOREACH(child, &bs->children, next) {
2971 bdrv_register_buf(child->bs, host, size);
2972 }
2973}
2974
2975void bdrv_unregister_buf(BlockDriverState *bs, void *host)
2976{
2977 BdrvChild *child;
2978
2979 if (bs->drv && bs->drv->bdrv_unregister_buf) {
2980 bs->drv->bdrv_unregister_buf(bs, host);
2981 }
2982 QLIST_FOREACH(child, &bs->children, next) {
2983 bdrv_unregister_buf(child->bs, host);
2984 }
2985}
2986
2987static int coroutine_fn bdrv_co_copy_range_internal(
2988 BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
2989 uint64_t dst_offset, uint64_t bytes,
2990 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
2991 bool recurse_src)
2992{
2993 BdrvTrackedRequest req;
2994 int ret;
2995
2996 if (!dst || !dst->bs) {
2997 return -ENOMEDIUM;
2998 }
2999 ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
3000 if (ret) {
3001 return ret;
3002 }
3003 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3004 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3005 }
3006
3007 if (!src || !src->bs) {
3008 return -ENOMEDIUM;
3009 }
3010 ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
3011 if (ret) {
3012 return ret;
3013 }
3014
3015 if (!src->bs->drv->bdrv_co_copy_range_from
3016 || !dst->bs->drv->bdrv_co_copy_range_to
3017 || src->bs->encrypted || dst->bs->encrypted) {
3018 return -ENOTSUP;
3019 }
3020
3021 if (recurse_src) {
3022 bdrv_inc_in_flight(src->bs);
3023 tracked_request_begin(&req, src->bs, src_offset, bytes,
3024 BDRV_TRACKED_READ);
3025
3026
3027 assert(!(read_flags & BDRV_REQ_SERIALISING));
3028 if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
3029 wait_serialising_requests(&req);
3030 }
3031
3032 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3033 src, src_offset,
3034 dst, dst_offset,
3035 bytes,
3036 read_flags, write_flags);
3037
3038 tracked_request_end(&req);
3039 bdrv_dec_in_flight(src->bs);
3040 } else {
3041 bdrv_inc_in_flight(dst->bs);
3042 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3043 BDRV_TRACKED_WRITE);
3044 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3045 write_flags);
3046 if (!ret) {
3047 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3048 src, src_offset,
3049 dst, dst_offset,
3050 bytes,
3051 read_flags, write_flags);
3052 }
3053 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3054 tracked_request_end(&req);
3055 bdrv_dec_in_flight(dst->bs);
3056 }
3057
3058 return ret;
3059}
3060
3061
3062
3063
3064
3065int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
3066 BdrvChild *dst, uint64_t dst_offset,
3067 uint64_t bytes,
3068 BdrvRequestFlags read_flags,
3069 BdrvRequestFlags write_flags)
3070{
3071 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3072 read_flags, write_flags);
3073 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3074 bytes, read_flags, write_flags, true);
3075}
3076
3077
3078
3079
3080
3081int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
3082 BdrvChild *dst, uint64_t dst_offset,
3083 uint64_t bytes,
3084 BdrvRequestFlags read_flags,
3085 BdrvRequestFlags write_flags)
3086{
3087 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3088 read_flags, write_flags);
3089 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3090 bytes, read_flags, write_flags, false);
3091}
3092
3093int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
3094 BdrvChild *dst, uint64_t dst_offset,
3095 uint64_t bytes, BdrvRequestFlags read_flags,
3096 BdrvRequestFlags write_flags)
3097{
3098 return bdrv_co_copy_range_from(src, src_offset,
3099 dst, dst_offset,
3100 bytes, read_flags, write_flags);
3101}
3102
3103static void bdrv_parent_cb_resize(BlockDriverState *bs)
3104{
3105 BdrvChild *c;
3106 QLIST_FOREACH(c, &bs->parents, next_parent) {
3107 if (c->role->resize) {
3108 c->role->resize(c);
3109 }
3110 }
3111}
3112
3113
3114
3115
3116int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
3117 PreallocMode prealloc, Error **errp)
3118{
3119 BlockDriverState *bs = child->bs;
3120 BlockDriver *drv = bs->drv;
3121 BdrvTrackedRequest req;
3122 int64_t old_size, new_bytes;
3123 int ret;
3124
3125
3126
3127 if (!drv) {
3128 error_setg(errp, "No medium inserted");
3129 return -ENOMEDIUM;
3130 }
3131 if (offset < 0) {
3132 error_setg(errp, "Image size cannot be negative");
3133 return -EINVAL;
3134 }
3135
3136 old_size = bdrv_getlength(bs);
3137 if (old_size < 0) {
3138 error_setg_errno(errp, -old_size, "Failed to get old image size");
3139 return old_size;
3140 }
3141
3142 if (offset > old_size) {
3143 new_bytes = offset - old_size;
3144 } else {
3145 new_bytes = 0;
3146 }
3147
3148 bdrv_inc_in_flight(bs);
3149 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3150 BDRV_TRACKED_TRUNCATE);
3151
3152
3153
3154
3155 if (new_bytes) {
3156 mark_request_serialising(&req, 1);
3157 }
3158 if (bs->read_only) {
3159 error_setg(errp, "Image is read-only");
3160 ret = -EACCES;
3161 goto out;
3162 }
3163 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3164 0);
3165 if (ret < 0) {
3166 error_setg_errno(errp, -ret,
3167 "Failed to prepare request for truncation");
3168 goto out;
3169 }
3170
3171 if (!drv->bdrv_co_truncate) {
3172 if (bs->file && drv->is_filter) {
3173 ret = bdrv_co_truncate(bs->file, offset, prealloc, errp);
3174 goto out;
3175 }
3176 error_setg(errp, "Image format driver does not support resize");
3177 ret = -ENOTSUP;
3178 goto out;
3179 }
3180
3181 ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp);
3182 if (ret < 0) {
3183 goto out;
3184 }
3185 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3186 if (ret < 0) {
3187 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3188 } else {
3189 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3190 }
3191
3192
3193
3194 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3195
3196out:
3197 tracked_request_end(&req);
3198 bdrv_dec_in_flight(bs);
3199
3200 return ret;
3201}
3202
3203typedef struct TruncateCo {
3204 BdrvChild *child;
3205 int64_t offset;
3206 PreallocMode prealloc;
3207 Error **errp;
3208 int ret;
3209} TruncateCo;
3210
3211static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
3212{
3213 TruncateCo *tco = opaque;
3214 tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc,
3215 tco->errp);
3216}
3217
3218int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
3219 Error **errp)
3220{
3221 Coroutine *co;
3222 TruncateCo tco = {
3223 .child = child,
3224 .offset = offset,
3225 .prealloc = prealloc,
3226 .errp = errp,
3227 .ret = NOT_DONE,
3228 };
3229
3230 if (qemu_in_coroutine()) {
3231
3232 bdrv_truncate_co_entry(&tco);
3233 } else {
3234 co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
3235 qemu_coroutine_enter(co);
3236 BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
3237 }
3238
3239 return tco.ret;
3240}
3241