1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/blockjob.h"
29#include "block/blockjob_int.h"
30#include "block/block_int.h"
31#include "qemu/cutils.h"
32#include "qapi/error.h"
33#include "qemu/error-report.h"
34
35#define NOT_DONE 0x7fffffff
36
37
38#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
39
40static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
41 int64_t offset, int bytes, BdrvRequestFlags flags);
42
43void bdrv_parent_drained_begin(BlockDriverState *bs)
44{
45 BdrvChild *c, *next;
46
47 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
48 if (c->role->drained_begin) {
49 c->role->drained_begin(c);
50 }
51 }
52}
53
54void bdrv_parent_drained_end(BlockDriverState *bs)
55{
56 BdrvChild *c, *next;
57
58 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
59 if (c->role->drained_end) {
60 c->role->drained_end(c);
61 }
62 }
63}
64
65static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
66{
67 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
68 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
69 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
70 src->opt_mem_alignment);
71 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
72 src->min_mem_alignment);
73 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
74}
75
76void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
77{
78 BlockDriver *drv = bs->drv;
79 Error *local_err = NULL;
80
81 memset(&bs->bl, 0, sizeof(bs->bl));
82
83 if (!drv) {
84 return;
85 }
86
87
88 bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
89
90
91 if (bs->file) {
92 bdrv_refresh_limits(bs->file->bs, &local_err);
93 if (local_err) {
94 error_propagate(errp, local_err);
95 return;
96 }
97 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
98 } else {
99 bs->bl.min_mem_alignment = 512;
100 bs->bl.opt_mem_alignment = getpagesize();
101
102
103 bs->bl.max_iov = IOV_MAX;
104 }
105
106 if (bs->backing) {
107 bdrv_refresh_limits(bs->backing->bs, &local_err);
108 if (local_err) {
109 error_propagate(errp, local_err);
110 return;
111 }
112 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
113 }
114
115
116 if (drv->bdrv_refresh_limits) {
117 drv->bdrv_refresh_limits(bs, errp);
118 }
119}
120
121
122
123
124
125
126void bdrv_enable_copy_on_read(BlockDriverState *bs)
127{
128 atomic_inc(&bs->copy_on_read);
129}
130
131void bdrv_disable_copy_on_read(BlockDriverState *bs)
132{
133 int old = atomic_fetch_dec(&bs->copy_on_read);
134 assert(old >= 1);
135}
136
137
138bool bdrv_requests_pending(BlockDriverState *bs)
139{
140 BdrvChild *child;
141
142 if (atomic_read(&bs->in_flight)) {
143 return true;
144 }
145
146 QLIST_FOREACH(child, &bs->children, next) {
147 if (bdrv_requests_pending(child->bs)) {
148 return true;
149 }
150 }
151
152 return false;
153}
154
155typedef struct {
156 Coroutine *co;
157 BlockDriverState *bs;
158 bool done;
159 bool begin;
160} BdrvCoDrainData;
161
162static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
163{
164 BdrvCoDrainData *data = opaque;
165 BlockDriverState *bs = data->bs;
166
167 if (data->begin) {
168 bs->drv->bdrv_co_drain_begin(bs);
169 } else {
170 bs->drv->bdrv_co_drain_end(bs);
171 }
172
173
174 atomic_mb_set(&data->done, true);
175 bdrv_wakeup(bs);
176}
177
178
179static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
180{
181 BdrvChild *child, *tmp;
182 BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
183
184 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
185 (!begin && !bs->drv->bdrv_co_drain_end)) {
186 return;
187 }
188
189 data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
190 bdrv_coroutine_enter(bs, data.co);
191 BDRV_POLL_WHILE(bs, !data.done);
192
193 QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
194 bdrv_drain_invoke(child->bs, begin);
195 }
196}
197
198static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
199{
200 BdrvChild *child, *tmp;
201 bool waited;
202
203
204 waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
205
206 QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
207 BlockDriverState *bs = child->bs;
208 bool in_main_loop =
209 qemu_get_current_aio_context() == qemu_get_aio_context();
210 assert(bs->refcnt > 0);
211 if (in_main_loop) {
212
213
214
215
216
217
218
219 bdrv_ref(bs);
220 }
221 waited |= bdrv_drain_recurse(bs, begin);
222 if (in_main_loop) {
223 bdrv_unref(bs);
224 }
225 }
226
227 return waited;
228}
229
230static void bdrv_co_drain_bh_cb(void *opaque)
231{
232 BdrvCoDrainData *data = opaque;
233 Coroutine *co = data->co;
234 BlockDriverState *bs = data->bs;
235
236 bdrv_dec_in_flight(bs);
237 if (data->begin) {
238 bdrv_drained_begin(bs);
239 } else {
240 bdrv_drained_end(bs);
241 }
242
243 data->done = true;
244 aio_co_wake(co);
245}
246
247static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
248 bool begin)
249{
250 BdrvCoDrainData data;
251
252
253
254
255
256 assert(qemu_in_coroutine());
257 data = (BdrvCoDrainData) {
258 .co = qemu_coroutine_self(),
259 .bs = bs,
260 .done = false,
261 .begin = begin,
262 };
263 bdrv_inc_in_flight(bs);
264 aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
265 bdrv_co_drain_bh_cb, &data);
266
267 qemu_coroutine_yield();
268
269
270 assert(data.done);
271}
272
273void bdrv_drained_begin(BlockDriverState *bs)
274{
275 if (qemu_in_coroutine()) {
276 bdrv_co_yield_to_drain(bs, true);
277 return;
278 }
279
280 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
281 aio_disable_external(bdrv_get_aio_context(bs));
282 bdrv_parent_drained_begin(bs);
283 }
284
285 bdrv_drain_invoke(bs, true);
286 bdrv_drain_recurse(bs, true);
287}
288
289void bdrv_drained_end(BlockDriverState *bs)
290{
291 if (qemu_in_coroutine()) {
292 bdrv_co_yield_to_drain(bs, false);
293 return;
294 }
295 assert(bs->quiesce_counter > 0);
296 if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
297 return;
298 }
299
300 bdrv_parent_drained_end(bs);
301 bdrv_drain_invoke(bs, false);
302 bdrv_drain_recurse(bs, false);
303 aio_enable_external(bdrv_get_aio_context(bs));
304}
305
306
307
308
309
310
311
312
313
314
315
316
317void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
318{
319 assert(qemu_in_coroutine());
320 bdrv_drained_begin(bs);
321 bdrv_drained_end(bs);
322}
323
324void bdrv_drain(BlockDriverState *bs)
325{
326 bdrv_drained_begin(bs);
327 bdrv_drained_end(bs);
328}
329
330
331
332
333
334
335
336
337
338
339
340
341
342void bdrv_drain_all_begin(void)
343{
344
345 bool waited = true;
346 BlockDriverState *bs;
347 BdrvNextIterator it;
348 GSList *aio_ctxs = NULL, *ctx;
349
350 block_job_pause_all();
351
352 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
353 AioContext *aio_context = bdrv_get_aio_context(bs);
354
355 aio_context_acquire(aio_context);
356 bdrv_parent_drained_begin(bs);
357 aio_disable_external(aio_context);
358 bdrv_drain_invoke(bs, true);
359 aio_context_release(aio_context);
360
361 if (!g_slist_find(aio_ctxs, aio_context)) {
362 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
363 }
364 }
365
366
367
368
369
370
371
372 while (waited) {
373 waited = false;
374
375 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
376 AioContext *aio_context = ctx->data;
377
378 aio_context_acquire(aio_context);
379 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
380 if (aio_context == bdrv_get_aio_context(bs)) {
381 waited |= bdrv_drain_recurse(bs, true);
382 }
383 }
384 aio_context_release(aio_context);
385 }
386 }
387
388 g_slist_free(aio_ctxs);
389}
390
391void bdrv_drain_all_end(void)
392{
393 BlockDriverState *bs;
394 BdrvNextIterator it;
395
396 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
397 AioContext *aio_context = bdrv_get_aio_context(bs);
398
399 aio_context_acquire(aio_context);
400 aio_enable_external(aio_context);
401 bdrv_parent_drained_end(bs);
402 bdrv_drain_invoke(bs, false);
403 bdrv_drain_recurse(bs, false);
404 aio_context_release(aio_context);
405 }
406
407 block_job_resume_all();
408}
409
410void bdrv_drain_all(void)
411{
412 bdrv_drain_all_begin();
413 bdrv_drain_all_end();
414}
415
416
417
418
419
420
421static void tracked_request_end(BdrvTrackedRequest *req)
422{
423 if (req->serialising) {
424 atomic_dec(&req->bs->serialising_in_flight);
425 }
426
427 qemu_co_mutex_lock(&req->bs->reqs_lock);
428 QLIST_REMOVE(req, list);
429 qemu_co_queue_restart_all(&req->wait_queue);
430 qemu_co_mutex_unlock(&req->bs->reqs_lock);
431}
432
433
434
435
436static void tracked_request_begin(BdrvTrackedRequest *req,
437 BlockDriverState *bs,
438 int64_t offset,
439 unsigned int bytes,
440 enum BdrvTrackedRequestType type)
441{
442 *req = (BdrvTrackedRequest){
443 .bs = bs,
444 .offset = offset,
445 .bytes = bytes,
446 .type = type,
447 .co = qemu_coroutine_self(),
448 .serialising = false,
449 .overlap_offset = offset,
450 .overlap_bytes = bytes,
451 };
452
453 qemu_co_queue_init(&req->wait_queue);
454
455 qemu_co_mutex_lock(&bs->reqs_lock);
456 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
457 qemu_co_mutex_unlock(&bs->reqs_lock);
458}
459
460static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
461{
462 int64_t overlap_offset = req->offset & ~(align - 1);
463 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
464 - overlap_offset;
465
466 if (!req->serialising) {
467 atomic_inc(&req->bs->serialising_in_flight);
468 req->serialising = true;
469 }
470
471 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
472 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
473}
474
475
476
477
478void bdrv_round_to_clusters(BlockDriverState *bs,
479 int64_t offset, int64_t bytes,
480 int64_t *cluster_offset,
481 int64_t *cluster_bytes)
482{
483 BlockDriverInfo bdi;
484
485 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
486 *cluster_offset = offset;
487 *cluster_bytes = bytes;
488 } else {
489 int64_t c = bdi.cluster_size;
490 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
491 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
492 }
493}
494
495static int bdrv_get_cluster_size(BlockDriverState *bs)
496{
497 BlockDriverInfo bdi;
498 int ret;
499
500 ret = bdrv_get_info(bs, &bdi);
501 if (ret < 0 || bdi.cluster_size == 0) {
502 return bs->bl.request_alignment;
503 } else {
504 return bdi.cluster_size;
505 }
506}
507
508static bool tracked_request_overlaps(BdrvTrackedRequest *req,
509 int64_t offset, unsigned int bytes)
510{
511
512 if (offset >= req->overlap_offset + req->overlap_bytes) {
513 return false;
514 }
515
516 if (req->overlap_offset >= offset + bytes) {
517 return false;
518 }
519 return true;
520}
521
522void bdrv_inc_in_flight(BlockDriverState *bs)
523{
524 atomic_inc(&bs->in_flight);
525}
526
527static void dummy_bh_cb(void *opaque)
528{
529}
530
531void bdrv_wakeup(BlockDriverState *bs)
532{
533
534 if (atomic_read(&bs->wakeup)) {
535 aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
536 }
537}
538
539void bdrv_dec_in_flight(BlockDriverState *bs)
540{
541 atomic_dec(&bs->in_flight);
542 bdrv_wakeup(bs);
543}
544
545static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
546{
547 BlockDriverState *bs = self->bs;
548 BdrvTrackedRequest *req;
549 bool retry;
550 bool waited = false;
551
552 if (!atomic_read(&bs->serialising_in_flight)) {
553 return false;
554 }
555
556 do {
557 retry = false;
558 qemu_co_mutex_lock(&bs->reqs_lock);
559 QLIST_FOREACH(req, &bs->tracked_requests, list) {
560 if (req == self || (!req->serialising && !self->serialising)) {
561 continue;
562 }
563 if (tracked_request_overlaps(req, self->overlap_offset,
564 self->overlap_bytes))
565 {
566
567
568
569
570 assert(qemu_coroutine_self() != req->co);
571
572
573
574
575 if (!req->waiting_for) {
576 self->waiting_for = req;
577 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
578 self->waiting_for = NULL;
579 retry = true;
580 waited = true;
581 break;
582 }
583 }
584 }
585 qemu_co_mutex_unlock(&bs->reqs_lock);
586 } while (retry);
587
588 return waited;
589}
590
591static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
592 size_t size)
593{
594 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
595 return -EIO;
596 }
597
598 if (!bdrv_is_inserted(bs)) {
599 return -ENOMEDIUM;
600 }
601
602 if (offset < 0) {
603 return -EIO;
604 }
605
606 return 0;
607}
608
609typedef struct RwCo {
610 BdrvChild *child;
611 int64_t offset;
612 QEMUIOVector *qiov;
613 bool is_write;
614 int ret;
615 BdrvRequestFlags flags;
616} RwCo;
617
618static void coroutine_fn bdrv_rw_co_entry(void *opaque)
619{
620 RwCo *rwco = opaque;
621
622 if (!rwco->is_write) {
623 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
624 rwco->qiov->size, rwco->qiov,
625 rwco->flags);
626 } else {
627 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
628 rwco->qiov->size, rwco->qiov,
629 rwco->flags);
630 }
631}
632
633
634
635
636static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
637 QEMUIOVector *qiov, bool is_write,
638 BdrvRequestFlags flags)
639{
640 Coroutine *co;
641 RwCo rwco = {
642 .child = child,
643 .offset = offset,
644 .qiov = qiov,
645 .is_write = is_write,
646 .ret = NOT_DONE,
647 .flags = flags,
648 };
649
650 if (qemu_in_coroutine()) {
651
652 bdrv_rw_co_entry(&rwco);
653 } else {
654 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
655 bdrv_coroutine_enter(child->bs, co);
656 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
657 }
658 return rwco.ret;
659}
660
661
662
663
664static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
665 int nb_sectors, bool is_write, BdrvRequestFlags flags)
666{
667 QEMUIOVector qiov;
668 struct iovec iov = {
669 .iov_base = (void *)buf,
670 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
671 };
672
673 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
674 return -EINVAL;
675 }
676
677 qemu_iovec_init_external(&qiov, &iov, 1);
678 return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
679 &qiov, is_write, flags);
680}
681
682
683int bdrv_read(BdrvChild *child, int64_t sector_num,
684 uint8_t *buf, int nb_sectors)
685{
686 return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
687}
688
689
690
691
692
693
694
695int bdrv_write(BdrvChild *child, int64_t sector_num,
696 const uint8_t *buf, int nb_sectors)
697{
698 return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
699}
700
701int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
702 int bytes, BdrvRequestFlags flags)
703{
704 QEMUIOVector qiov;
705 struct iovec iov = {
706 .iov_base = NULL,
707 .iov_len = bytes,
708 };
709
710 qemu_iovec_init_external(&qiov, &iov, 1);
711 return bdrv_prwv_co(child, offset, &qiov, true,
712 BDRV_REQ_ZERO_WRITE | flags);
713}
714
715
716
717
718
719
720
721
722
723
724int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
725{
726 int ret;
727 int64_t target_size, bytes, offset = 0;
728 BlockDriverState *bs = child->bs;
729
730 target_size = bdrv_getlength(bs);
731 if (target_size < 0) {
732 return target_size;
733 }
734
735 for (;;) {
736 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
737 if (bytes <= 0) {
738 return 0;
739 }
740 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
741 if (ret < 0) {
742 error_report("error getting block status at offset %" PRId64 ": %s",
743 offset, strerror(-ret));
744 return ret;
745 }
746 if (ret & BDRV_BLOCK_ZERO) {
747 offset += bytes;
748 continue;
749 }
750 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
751 if (ret < 0) {
752 error_report("error writing zeroes at offset %" PRId64 ": %s",
753 offset, strerror(-ret));
754 return ret;
755 }
756 offset += bytes;
757 }
758}
759
760int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
761{
762 int ret;
763
764 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
765 if (ret < 0) {
766 return ret;
767 }
768
769 return qiov->size;
770}
771
772int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
773{
774 QEMUIOVector qiov;
775 struct iovec iov = {
776 .iov_base = (void *)buf,
777 .iov_len = bytes,
778 };
779
780 if (bytes < 0) {
781 return -EINVAL;
782 }
783
784 qemu_iovec_init_external(&qiov, &iov, 1);
785 return bdrv_preadv(child, offset, &qiov);
786}
787
788int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
789{
790 int ret;
791
792 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
793 if (ret < 0) {
794 return ret;
795 }
796
797 return qiov->size;
798}
799
800int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
801{
802 QEMUIOVector qiov;
803 struct iovec iov = {
804 .iov_base = (void *) buf,
805 .iov_len = bytes,
806 };
807
808 if (bytes < 0) {
809 return -EINVAL;
810 }
811
812 qemu_iovec_init_external(&qiov, &iov, 1);
813 return bdrv_pwritev(child, offset, &qiov);
814}
815
816
817
818
819
820
821
822int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
823 const void *buf, int count)
824{
825 int ret;
826
827 ret = bdrv_pwrite(child, offset, buf, count);
828 if (ret < 0) {
829 return ret;
830 }
831
832 ret = bdrv_flush(child->bs);
833 if (ret < 0) {
834 return ret;
835 }
836
837 return 0;
838}
839
840typedef struct CoroutineIOCompletion {
841 Coroutine *coroutine;
842 int ret;
843} CoroutineIOCompletion;
844
845static void bdrv_co_io_em_complete(void *opaque, int ret)
846{
847 CoroutineIOCompletion *co = opaque;
848
849 co->ret = ret;
850 aio_co_wake(co->coroutine);
851}
852
853static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
854 uint64_t offset, uint64_t bytes,
855 QEMUIOVector *qiov, int flags)
856{
857 BlockDriver *drv = bs->drv;
858 int64_t sector_num;
859 unsigned int nb_sectors;
860
861 assert(!(flags & ~BDRV_REQ_MASK));
862
863 if (!drv) {
864 return -ENOMEDIUM;
865 }
866
867 if (drv->bdrv_co_preadv) {
868 return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
869 }
870
871 sector_num = offset >> BDRV_SECTOR_BITS;
872 nb_sectors = bytes >> BDRV_SECTOR_BITS;
873
874 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
875 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
876 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
877
878 if (drv->bdrv_co_readv) {
879 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
880 } else {
881 BlockAIOCB *acb;
882 CoroutineIOCompletion co = {
883 .coroutine = qemu_coroutine_self(),
884 };
885
886 acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
887 bdrv_co_io_em_complete, &co);
888 if (acb == NULL) {
889 return -EIO;
890 } else {
891 qemu_coroutine_yield();
892 return co.ret;
893 }
894 }
895}
896
897static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
898 uint64_t offset, uint64_t bytes,
899 QEMUIOVector *qiov, int flags)
900{
901 BlockDriver *drv = bs->drv;
902 int64_t sector_num;
903 unsigned int nb_sectors;
904 int ret;
905
906 assert(!(flags & ~BDRV_REQ_MASK));
907
908 if (!drv) {
909 return -ENOMEDIUM;
910 }
911
912 if (drv->bdrv_co_pwritev) {
913 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
914 flags & bs->supported_write_flags);
915 flags &= ~bs->supported_write_flags;
916 goto emulate_flags;
917 }
918
919 sector_num = offset >> BDRV_SECTOR_BITS;
920 nb_sectors = bytes >> BDRV_SECTOR_BITS;
921
922 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
923 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
924 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
925
926 if (drv->bdrv_co_writev_flags) {
927 ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
928 flags & bs->supported_write_flags);
929 flags &= ~bs->supported_write_flags;
930 } else if (drv->bdrv_co_writev) {
931 assert(!bs->supported_write_flags);
932 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
933 } else {
934 BlockAIOCB *acb;
935 CoroutineIOCompletion co = {
936 .coroutine = qemu_coroutine_self(),
937 };
938
939 acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
940 bdrv_co_io_em_complete, &co);
941 if (acb == NULL) {
942 ret = -EIO;
943 } else {
944 qemu_coroutine_yield();
945 ret = co.ret;
946 }
947 }
948
949emulate_flags:
950 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
951 ret = bdrv_co_flush(bs);
952 }
953
954 return ret;
955}
956
957static int coroutine_fn
958bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
959 uint64_t bytes, QEMUIOVector *qiov)
960{
961 BlockDriver *drv = bs->drv;
962
963 if (!drv) {
964 return -ENOMEDIUM;
965 }
966
967 if (!drv->bdrv_co_pwritev_compressed) {
968 return -ENOTSUP;
969 }
970
971 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
972}
973
974static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
975 int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
976{
977 BlockDriverState *bs = child->bs;
978
979
980
981
982
983
984 void *bounce_buffer;
985
986 BlockDriver *drv = bs->drv;
987 struct iovec iov;
988 QEMUIOVector local_qiov;
989 int64_t cluster_offset;
990 int64_t cluster_bytes;
991 size_t skip_bytes;
992 int ret;
993 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
994 BDRV_REQUEST_MAX_BYTES);
995 unsigned int progress = 0;
996
997 if (!drv) {
998 return -ENOMEDIUM;
999 }
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1016 skip_bytes = offset - cluster_offset;
1017
1018 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1019 cluster_offset, cluster_bytes);
1020
1021 bounce_buffer = qemu_try_blockalign(bs,
1022 MIN(MIN(max_transfer, cluster_bytes),
1023 MAX_BOUNCE_BUFFER));
1024 if (bounce_buffer == NULL) {
1025 ret = -ENOMEM;
1026 goto err;
1027 }
1028
1029 while (cluster_bytes) {
1030 int64_t pnum;
1031
1032 ret = bdrv_is_allocated(bs, cluster_offset,
1033 MIN(cluster_bytes, max_transfer), &pnum);
1034 if (ret < 0) {
1035
1036
1037
1038
1039 pnum = MIN(cluster_bytes, max_transfer);
1040 }
1041
1042 assert(skip_bytes < pnum);
1043
1044 if (ret <= 0) {
1045
1046 iov.iov_base = bounce_buffer;
1047 iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1048 qemu_iovec_init_external(&local_qiov, &iov, 1);
1049
1050 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1051 &local_qiov, 0);
1052 if (ret < 0) {
1053 goto err;
1054 }
1055
1056 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1057 if (drv->bdrv_co_pwrite_zeroes &&
1058 buffer_is_zero(bounce_buffer, pnum)) {
1059
1060
1061
1062 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
1063 } else {
1064
1065
1066
1067 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1068 &local_qiov, 0);
1069 }
1070
1071 if (ret < 0) {
1072
1073
1074
1075
1076
1077 goto err;
1078 }
1079
1080 qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
1081 pnum - skip_bytes);
1082 } else {
1083
1084 qemu_iovec_init(&local_qiov, qiov->niov);
1085 qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
1086 ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
1087 &local_qiov, 0);
1088 qemu_iovec_destroy(&local_qiov);
1089 if (ret < 0) {
1090 goto err;
1091 }
1092 }
1093
1094 cluster_offset += pnum;
1095 cluster_bytes -= pnum;
1096 progress += pnum - skip_bytes;
1097 skip_bytes = 0;
1098 }
1099 ret = 0;
1100
1101err:
1102 qemu_vfree(bounce_buffer);
1103 return ret;
1104}
1105
1106
1107
1108
1109
1110
1111static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1112 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1113 int64_t align, QEMUIOVector *qiov, int flags)
1114{
1115 BlockDriverState *bs = child->bs;
1116 int64_t total_bytes, max_bytes;
1117 int ret = 0;
1118 uint64_t bytes_remaining = bytes;
1119 int max_transfer;
1120
1121 assert(is_power_of_2(align));
1122 assert((offset & (align - 1)) == 0);
1123 assert((bytes & (align - 1)) == 0);
1124 assert(!qiov || bytes == qiov->size);
1125 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1126 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1127 align);
1128
1129
1130
1131
1132
1133 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
1134
1135
1136 if (flags & BDRV_REQ_COPY_ON_READ) {
1137
1138
1139
1140
1141
1142 mark_request_serialising(req, bdrv_get_cluster_size(bs));
1143 }
1144
1145 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1146 wait_serialising_requests(req);
1147 }
1148
1149 if (flags & BDRV_REQ_COPY_ON_READ) {
1150 int64_t pnum;
1151
1152 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1153 if (ret < 0) {
1154 goto out;
1155 }
1156
1157 if (!ret || pnum != bytes) {
1158 ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
1159 goto out;
1160 }
1161 }
1162
1163
1164 total_bytes = bdrv_getlength(bs);
1165 if (total_bytes < 0) {
1166 ret = total_bytes;
1167 goto out;
1168 }
1169
1170 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1171 if (bytes <= max_bytes && bytes <= max_transfer) {
1172 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
1173 goto out;
1174 }
1175
1176 while (bytes_remaining) {
1177 int num;
1178
1179 if (max_bytes) {
1180 QEMUIOVector local_qiov;
1181
1182 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1183 assert(num);
1184 qemu_iovec_init(&local_qiov, qiov->niov);
1185 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1186
1187 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1188 num, &local_qiov, 0);
1189 max_bytes -= num;
1190 qemu_iovec_destroy(&local_qiov);
1191 } else {
1192 num = bytes_remaining;
1193 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1194 bytes_remaining);
1195 }
1196 if (ret < 0) {
1197 goto out;
1198 }
1199 bytes_remaining -= num;
1200 }
1201
1202out:
1203 return ret < 0 ? ret : 0;
1204}
1205
1206
1207
1208
1209int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1210 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1211 BdrvRequestFlags flags)
1212{
1213 BlockDriverState *bs = child->bs;
1214 BlockDriver *drv = bs->drv;
1215 BdrvTrackedRequest req;
1216
1217 uint64_t align = bs->bl.request_alignment;
1218 uint8_t *head_buf = NULL;
1219 uint8_t *tail_buf = NULL;
1220 QEMUIOVector local_qiov;
1221 bool use_local_qiov = false;
1222 int ret;
1223
1224 trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
1225
1226 if (!drv) {
1227 return -ENOMEDIUM;
1228 }
1229
1230 ret = bdrv_check_byte_request(bs, offset, bytes);
1231 if (ret < 0) {
1232 return ret;
1233 }
1234
1235 bdrv_inc_in_flight(bs);
1236
1237
1238 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1239 flags |= BDRV_REQ_COPY_ON_READ;
1240 }
1241
1242
1243 if (offset & (align - 1)) {
1244 head_buf = qemu_blockalign(bs, align);
1245 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1246 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1247 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1248 use_local_qiov = true;
1249
1250 bytes += offset & (align - 1);
1251 offset = offset & ~(align - 1);
1252 }
1253
1254 if ((offset + bytes) & (align - 1)) {
1255 if (!use_local_qiov) {
1256 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1257 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1258 use_local_qiov = true;
1259 }
1260 tail_buf = qemu_blockalign(bs, align);
1261 qemu_iovec_add(&local_qiov, tail_buf,
1262 align - ((offset + bytes) & (align - 1)));
1263
1264 bytes = ROUND_UP(bytes, align);
1265 }
1266
1267 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1268 ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
1269 use_local_qiov ? &local_qiov : qiov,
1270 flags);
1271 tracked_request_end(&req);
1272 bdrv_dec_in_flight(bs);
1273
1274 if (use_local_qiov) {
1275 qemu_iovec_destroy(&local_qiov);
1276 qemu_vfree(head_buf);
1277 qemu_vfree(tail_buf);
1278 }
1279
1280 return ret;
1281}
1282
1283static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
1284 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1285 BdrvRequestFlags flags)
1286{
1287 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
1288 return -EINVAL;
1289 }
1290
1291 return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS,
1292 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
1293}
1294
1295int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
1296 int nb_sectors, QEMUIOVector *qiov)
1297{
1298 return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
1299}
1300
1301static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1302 int64_t offset, int bytes, BdrvRequestFlags flags)
1303{
1304 BlockDriver *drv = bs->drv;
1305 QEMUIOVector qiov;
1306 struct iovec iov = {0};
1307 int ret = 0;
1308 bool need_flush = false;
1309 int head = 0;
1310 int tail = 0;
1311
1312 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1313 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1314 bs->bl.request_alignment);
1315 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1316
1317 if (!drv) {
1318 return -ENOMEDIUM;
1319 }
1320
1321 assert(alignment % bs->bl.request_alignment == 0);
1322 head = offset % alignment;
1323 tail = (offset + bytes) % alignment;
1324 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1325 assert(max_write_zeroes >= bs->bl.request_alignment);
1326
1327 while (bytes > 0 && !ret) {
1328 int num = bytes;
1329
1330
1331
1332
1333
1334 if (head) {
1335
1336
1337
1338 num = MIN(MIN(bytes, max_transfer), alignment - head);
1339 head = (head + num) % alignment;
1340 assert(num < max_write_zeroes);
1341 } else if (tail && num > alignment) {
1342
1343 num -= tail;
1344 }
1345
1346
1347 if (num > max_write_zeroes) {
1348 num = max_write_zeroes;
1349 }
1350
1351 ret = -ENOTSUP;
1352
1353 if (drv->bdrv_co_pwrite_zeroes) {
1354 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1355 flags & bs->supported_zero_flags);
1356 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1357 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1358 need_flush = true;
1359 }
1360 } else {
1361 assert(!bs->supported_zero_flags);
1362 }
1363
1364 if (ret == -ENOTSUP) {
1365
1366 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1367
1368 if ((flags & BDRV_REQ_FUA) &&
1369 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1370
1371
1372 write_flags &= ~BDRV_REQ_FUA;
1373 need_flush = true;
1374 }
1375 num = MIN(num, max_transfer);
1376 iov.iov_len = num;
1377 if (iov.iov_base == NULL) {
1378 iov.iov_base = qemu_try_blockalign(bs, num);
1379 if (iov.iov_base == NULL) {
1380 ret = -ENOMEM;
1381 goto fail;
1382 }
1383 memset(iov.iov_base, 0, num);
1384 }
1385 qemu_iovec_init_external(&qiov, &iov, 1);
1386
1387 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
1388
1389
1390
1391
1392 if (num < max_transfer) {
1393 qemu_vfree(iov.iov_base);
1394 iov.iov_base = NULL;
1395 }
1396 }
1397
1398 offset += num;
1399 bytes -= num;
1400 }
1401
1402fail:
1403 if (ret == 0 && need_flush) {
1404 ret = bdrv_co_flush(bs);
1405 }
1406 qemu_vfree(iov.iov_base);
1407 return ret;
1408}
1409
1410
1411
1412
1413
1414static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1415 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1416 int64_t align, QEMUIOVector *qiov, int flags)
1417{
1418 BlockDriverState *bs = child->bs;
1419 BlockDriver *drv = bs->drv;
1420 bool waited;
1421 int ret;
1422
1423 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1424 uint64_t bytes_remaining = bytes;
1425 int max_transfer;
1426
1427 if (!drv) {
1428 return -ENOMEDIUM;
1429 }
1430
1431 if (bdrv_has_readonly_bitmaps(bs)) {
1432 return -EPERM;
1433 }
1434
1435 assert(is_power_of_2(align));
1436 assert((offset & (align - 1)) == 0);
1437 assert((bytes & (align - 1)) == 0);
1438 assert(!qiov || bytes == qiov->size);
1439 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1440 assert(!(flags & ~BDRV_REQ_MASK));
1441 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1442 align);
1443
1444 waited = wait_serialising_requests(req);
1445 assert(!waited || !req->serialising);
1446 assert(req->overlap_offset <= offset);
1447 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1448 assert(child->perm & BLK_PERM_WRITE);
1449 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1450
1451 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
1452
1453 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1454 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1455 qemu_iovec_is_zero(qiov)) {
1456 flags |= BDRV_REQ_ZERO_WRITE;
1457 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1458 flags |= BDRV_REQ_MAY_UNMAP;
1459 }
1460 }
1461
1462 if (ret < 0) {
1463
1464 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1465 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1466 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1467 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1468 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
1469 } else if (bytes <= max_transfer) {
1470 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1471 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
1472 } else {
1473 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1474 while (bytes_remaining) {
1475 int num = MIN(bytes_remaining, max_transfer);
1476 QEMUIOVector local_qiov;
1477 int local_flags = flags;
1478
1479 assert(num);
1480 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
1481 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1482
1483
1484 local_flags &= ~BDRV_REQ_FUA;
1485 }
1486 qemu_iovec_init(&local_qiov, qiov->niov);
1487 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1488
1489 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
1490 num, &local_qiov, local_flags);
1491 qemu_iovec_destroy(&local_qiov);
1492 if (ret < 0) {
1493 break;
1494 }
1495 bytes_remaining -= num;
1496 }
1497 }
1498 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
1499
1500 atomic_inc(&bs->write_gen);
1501 bdrv_set_dirty(bs, offset, bytes);
1502
1503 stat64_max(&bs->wr_highest_offset, offset + bytes);
1504
1505 if (ret >= 0) {
1506 bs->total_sectors = MAX(bs->total_sectors, end_sector);
1507 ret = 0;
1508 }
1509
1510 return ret;
1511}
1512
1513static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1514 int64_t offset,
1515 unsigned int bytes,
1516 BdrvRequestFlags flags,
1517 BdrvTrackedRequest *req)
1518{
1519 BlockDriverState *bs = child->bs;
1520 uint8_t *buf = NULL;
1521 QEMUIOVector local_qiov;
1522 struct iovec iov;
1523 uint64_t align = bs->bl.request_alignment;
1524 unsigned int head_padding_bytes, tail_padding_bytes;
1525 int ret = 0;
1526
1527 head_padding_bytes = offset & (align - 1);
1528 tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
1529
1530
1531 assert(flags & BDRV_REQ_ZERO_WRITE);
1532 if (head_padding_bytes || tail_padding_bytes) {
1533 buf = qemu_blockalign(bs, align);
1534 iov = (struct iovec) {
1535 .iov_base = buf,
1536 .iov_len = align,
1537 };
1538 qemu_iovec_init_external(&local_qiov, &iov, 1);
1539 }
1540 if (head_padding_bytes) {
1541 uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
1542
1543
1544 mark_request_serialising(req, align);
1545 wait_serialising_requests(req);
1546 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1547 ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
1548 align, &local_qiov, 0);
1549 if (ret < 0) {
1550 goto fail;
1551 }
1552 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1553
1554 memset(buf + head_padding_bytes, 0, zero_bytes);
1555 ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
1556 align, &local_qiov,
1557 flags & ~BDRV_REQ_ZERO_WRITE);
1558 if (ret < 0) {
1559 goto fail;
1560 }
1561 offset += zero_bytes;
1562 bytes -= zero_bytes;
1563 }
1564
1565 assert(!bytes || (offset & (align - 1)) == 0);
1566 if (bytes >= align) {
1567
1568 uint64_t aligned_bytes = bytes & ~(align - 1);
1569 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
1570 NULL, flags);
1571 if (ret < 0) {
1572 goto fail;
1573 }
1574 bytes -= aligned_bytes;
1575 offset += aligned_bytes;
1576 }
1577
1578 assert(!bytes || (offset & (align - 1)) == 0);
1579 if (bytes) {
1580 assert(align == tail_padding_bytes + bytes);
1581
1582 mark_request_serialising(req, align);
1583 wait_serialising_requests(req);
1584 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1585 ret = bdrv_aligned_preadv(child, req, offset, align,
1586 align, &local_qiov, 0);
1587 if (ret < 0) {
1588 goto fail;
1589 }
1590 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1591
1592 memset(buf, 0, bytes);
1593 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
1594 &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
1595 }
1596fail:
1597 qemu_vfree(buf);
1598 return ret;
1599
1600}
1601
1602
1603
1604
1605int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1606 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1607 BdrvRequestFlags flags)
1608{
1609 BlockDriverState *bs = child->bs;
1610 BdrvTrackedRequest req;
1611 uint64_t align = bs->bl.request_alignment;
1612 uint8_t *head_buf = NULL;
1613 uint8_t *tail_buf = NULL;
1614 QEMUIOVector local_qiov;
1615 bool use_local_qiov = false;
1616 int ret;
1617
1618 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
1619
1620 if (!bs->drv) {
1621 return -ENOMEDIUM;
1622 }
1623 if (bs->read_only) {
1624 return -EPERM;
1625 }
1626 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1627
1628 ret = bdrv_check_byte_request(bs, offset, bytes);
1629 if (ret < 0) {
1630 return ret;
1631 }
1632
1633 bdrv_inc_in_flight(bs);
1634
1635
1636
1637
1638
1639 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
1640
1641 if (!qiov) {
1642 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
1643 goto out;
1644 }
1645
1646 if (offset & (align - 1)) {
1647 QEMUIOVector head_qiov;
1648 struct iovec head_iov;
1649
1650 mark_request_serialising(&req, align);
1651 wait_serialising_requests(&req);
1652
1653 head_buf = qemu_blockalign(bs, align);
1654 head_iov = (struct iovec) {
1655 .iov_base = head_buf,
1656 .iov_len = align,
1657 };
1658 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
1659
1660 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1661 ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
1662 align, &head_qiov, 0);
1663 if (ret < 0) {
1664 goto fail;
1665 }
1666 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1667
1668 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1669 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1670 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1671 use_local_qiov = true;
1672
1673 bytes += offset & (align - 1);
1674 offset = offset & ~(align - 1);
1675
1676
1677
1678
1679 if (bytes < align) {
1680 qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
1681 bytes = align;
1682 }
1683 }
1684
1685 if ((offset + bytes) & (align - 1)) {
1686 QEMUIOVector tail_qiov;
1687 struct iovec tail_iov;
1688 size_t tail_bytes;
1689 bool waited;
1690
1691 mark_request_serialising(&req, align);
1692 waited = wait_serialising_requests(&req);
1693 assert(!waited || !use_local_qiov);
1694
1695 tail_buf = qemu_blockalign(bs, align);
1696 tail_iov = (struct iovec) {
1697 .iov_base = tail_buf,
1698 .iov_len = align,
1699 };
1700 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
1701
1702 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1703 ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
1704 align, align, &tail_qiov, 0);
1705 if (ret < 0) {
1706 goto fail;
1707 }
1708 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1709
1710 if (!use_local_qiov) {
1711 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1712 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1713 use_local_qiov = true;
1714 }
1715
1716 tail_bytes = (offset + bytes) & (align - 1);
1717 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
1718
1719 bytes = ROUND_UP(bytes, align);
1720 }
1721
1722 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
1723 use_local_qiov ? &local_qiov : qiov,
1724 flags);
1725
1726fail:
1727
1728 if (use_local_qiov) {
1729 qemu_iovec_destroy(&local_qiov);
1730 }
1731 qemu_vfree(head_buf);
1732 qemu_vfree(tail_buf);
1733out:
1734 tracked_request_end(&req);
1735 bdrv_dec_in_flight(bs);
1736 return ret;
1737}
1738
1739static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
1740 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1741 BdrvRequestFlags flags)
1742{
1743 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
1744 return -EINVAL;
1745 }
1746
1747 return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS,
1748 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
1749}
1750
1751int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
1752 int nb_sectors, QEMUIOVector *qiov)
1753{
1754 return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
1755}
1756
1757int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
1758 int bytes, BdrvRequestFlags flags)
1759{
1760 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
1761
1762 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
1763 flags &= ~BDRV_REQ_MAY_UNMAP;
1764 }
1765
1766 return bdrv_co_pwritev(child, offset, bytes, NULL,
1767 BDRV_REQ_ZERO_WRITE | flags);
1768}
1769
1770
1771
1772
1773int bdrv_flush_all(void)
1774{
1775 BdrvNextIterator it;
1776 BlockDriverState *bs = NULL;
1777 int result = 0;
1778
1779 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
1780 AioContext *aio_context = bdrv_get_aio_context(bs);
1781 int ret;
1782
1783 aio_context_acquire(aio_context);
1784 ret = bdrv_flush(bs);
1785 if (ret < 0 && !result) {
1786 result = ret;
1787 }
1788 aio_context_release(aio_context);
1789 }
1790
1791 return result;
1792}
1793
1794
1795typedef struct BdrvCoBlockStatusData {
1796 BlockDriverState *bs;
1797 BlockDriverState *base;
1798 bool want_zero;
1799 int64_t offset;
1800 int64_t bytes;
1801 int64_t *pnum;
1802 int64_t *map;
1803 BlockDriverState **file;
1804 int ret;
1805 bool done;
1806} BdrvCoBlockStatusData;
1807
1808int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
1809 int64_t sector_num,
1810 int nb_sectors,
1811 int *pnum,
1812 BlockDriverState **file)
1813{
1814 assert(bs->file && bs->file->bs);
1815 *pnum = nb_sectors;
1816 *file = bs->file->bs;
1817 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
1818 (sector_num << BDRV_SECTOR_BITS);
1819}
1820
1821int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
1822 int64_t sector_num,
1823 int nb_sectors,
1824 int *pnum,
1825 BlockDriverState **file)
1826{
1827 assert(bs->backing && bs->backing->bs);
1828 *pnum = nb_sectors;
1829 *file = bs->backing->bs;
1830 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
1831 (sector_num << BDRV_SECTOR_BITS);
1832}
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
1862 bool want_zero,
1863 int64_t offset, int64_t bytes,
1864 int64_t *pnum, int64_t *map,
1865 BlockDriverState **file)
1866{
1867 int64_t total_size;
1868 int64_t n;
1869 int ret;
1870 int64_t local_map = 0;
1871 BlockDriverState *local_file = NULL;
1872 int64_t aligned_offset, aligned_bytes;
1873 uint32_t align;
1874
1875 assert(pnum);
1876 *pnum = 0;
1877 total_size = bdrv_getlength(bs);
1878 if (total_size < 0) {
1879 ret = total_size;
1880 goto early_out;
1881 }
1882
1883 if (offset >= total_size) {
1884 ret = BDRV_BLOCK_EOF;
1885 goto early_out;
1886 }
1887 if (!bytes) {
1888 ret = 0;
1889 goto early_out;
1890 }
1891
1892 n = total_size - offset;
1893 if (n < bytes) {
1894 bytes = n;
1895 }
1896
1897
1898 assert(bs->drv);
1899 if (!bs->drv->bdrv_co_get_block_status) {
1900 *pnum = bytes;
1901 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
1902 if (offset + bytes == total_size) {
1903 ret |= BDRV_BLOCK_EOF;
1904 }
1905 if (bs->drv->protocol_name) {
1906 ret |= BDRV_BLOCK_OFFSET_VALID;
1907 local_map = offset;
1908 local_file = bs;
1909 }
1910 goto early_out;
1911 }
1912
1913 bdrv_inc_in_flight(bs);
1914
1915
1916
1917
1918 align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
1919 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
1920 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
1921
1922 {
1923 int count;
1924 int64_t longret;
1925
1926 assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
1927 BDRV_SECTOR_SIZE));
1928
1929
1930
1931
1932
1933 longret = bs->drv->bdrv_co_get_block_status(
1934 bs, aligned_offset >> BDRV_SECTOR_BITS,
1935 MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count,
1936 &local_file);
1937 if (longret < 0) {
1938 assert(INT_MIN <= longret);
1939 ret = longret;
1940 goto out;
1941 }
1942 if (longret & BDRV_BLOCK_OFFSET_VALID) {
1943 local_map = longret & BDRV_BLOCK_OFFSET_MASK;
1944 }
1945 ret = longret & ~BDRV_BLOCK_OFFSET_MASK;
1946 *pnum = count * BDRV_SECTOR_SIZE;
1947 }
1948
1949
1950
1951
1952
1953 assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset);
1954 *pnum -= offset - aligned_offset;
1955 if (*pnum > bytes) {
1956 *pnum = bytes;
1957 }
1958 if (ret & BDRV_BLOCK_OFFSET_VALID) {
1959 local_map += offset - aligned_offset;
1960 }
1961
1962 if (ret & BDRV_BLOCK_RAW) {
1963 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
1964 ret = bdrv_co_block_status(local_file, want_zero, local_map,
1965 *pnum, pnum, &local_map, &local_file);
1966 goto out;
1967 }
1968
1969 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
1970 ret |= BDRV_BLOCK_ALLOCATED;
1971 } else if (want_zero) {
1972 if (bdrv_unallocated_blocks_are_zero(bs)) {
1973 ret |= BDRV_BLOCK_ZERO;
1974 } else if (bs->backing) {
1975 BlockDriverState *bs2 = bs->backing->bs;
1976 int64_t size2 = bdrv_getlength(bs2);
1977
1978 if (size2 >= 0 && offset >= size2) {
1979 ret |= BDRV_BLOCK_ZERO;
1980 }
1981 }
1982 }
1983
1984 if (want_zero && local_file && local_file != bs &&
1985 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
1986 (ret & BDRV_BLOCK_OFFSET_VALID)) {
1987 int64_t file_pnum;
1988 int ret2;
1989
1990 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
1991 *pnum, &file_pnum, NULL, NULL);
1992 if (ret2 >= 0) {
1993
1994
1995
1996 if (ret2 & BDRV_BLOCK_EOF &&
1997 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
1998
1999
2000
2001
2002
2003 ret |= BDRV_BLOCK_ZERO;
2004 } else {
2005
2006 *pnum = file_pnum;
2007 ret |= (ret2 & BDRV_BLOCK_ZERO);
2008 }
2009 }
2010 }
2011
2012out:
2013 bdrv_dec_in_flight(bs);
2014 if (ret >= 0 && offset + *pnum == total_size) {
2015 ret |= BDRV_BLOCK_EOF;
2016 }
2017early_out:
2018 if (file) {
2019 *file = local_file;
2020 }
2021 if (map) {
2022 *map = local_map;
2023 }
2024 return ret;
2025}
2026
2027static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2028 BlockDriverState *base,
2029 bool want_zero,
2030 int64_t offset,
2031 int64_t bytes,
2032 int64_t *pnum,
2033 int64_t *map,
2034 BlockDriverState **file)
2035{
2036 BlockDriverState *p;
2037 int ret = 0;
2038 bool first = true;
2039
2040 assert(bs != base);
2041 for (p = bs; p != base; p = backing_bs(p)) {
2042 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2043 file);
2044 if (ret < 0) {
2045 break;
2046 }
2047 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2048
2049
2050
2051
2052
2053
2054 *pnum = bytes;
2055 }
2056 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2057 break;
2058 }
2059
2060
2061 bytes = MIN(bytes, *pnum);
2062 first = false;
2063 }
2064 return ret;
2065}
2066
2067
2068static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2069{
2070 BdrvCoBlockStatusData *data = opaque;
2071
2072 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2073 data->want_zero,
2074 data->offset, data->bytes,
2075 data->pnum, data->map, data->file);
2076 data->done = true;
2077}
2078
2079
2080
2081
2082
2083
2084static int bdrv_common_block_status_above(BlockDriverState *bs,
2085 BlockDriverState *base,
2086 bool want_zero, int64_t offset,
2087 int64_t bytes, int64_t *pnum,
2088 int64_t *map,
2089 BlockDriverState **file)
2090{
2091 Coroutine *co;
2092 BdrvCoBlockStatusData data = {
2093 .bs = bs,
2094 .base = base,
2095 .want_zero = want_zero,
2096 .offset = offset,
2097 .bytes = bytes,
2098 .pnum = pnum,
2099 .map = map,
2100 .file = file,
2101 .done = false,
2102 };
2103
2104 if (qemu_in_coroutine()) {
2105
2106 bdrv_block_status_above_co_entry(&data);
2107 } else {
2108 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2109 bdrv_coroutine_enter(bs, co);
2110 BDRV_POLL_WHILE(bs, !data.done);
2111 }
2112 return data.ret;
2113}
2114
2115int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2116 int64_t offset, int64_t bytes, int64_t *pnum,
2117 int64_t *map, BlockDriverState **file)
2118{
2119 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2120 pnum, map, file);
2121}
2122
2123int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2124 int64_t *pnum, int64_t *map, BlockDriverState **file)
2125{
2126 return bdrv_block_status_above(bs, backing_bs(bs),
2127 offset, bytes, pnum, map, file);
2128}
2129
2130int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2131 int64_t bytes, int64_t *pnum)
2132{
2133 int ret;
2134 int64_t dummy;
2135
2136 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2137 bytes, pnum ? pnum : &dummy, NULL,
2138 NULL);
2139 if (ret < 0) {
2140 return ret;
2141 }
2142 return !!(ret & BDRV_BLOCK_ALLOCATED);
2143}
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161int bdrv_is_allocated_above(BlockDriverState *top,
2162 BlockDriverState *base,
2163 int64_t offset, int64_t bytes, int64_t *pnum)
2164{
2165 BlockDriverState *intermediate;
2166 int ret;
2167 int64_t n = bytes;
2168
2169 intermediate = top;
2170 while (intermediate && intermediate != base) {
2171 int64_t pnum_inter;
2172 int64_t size_inter;
2173
2174 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2175 if (ret < 0) {
2176 return ret;
2177 }
2178 if (ret) {
2179 *pnum = pnum_inter;
2180 return 1;
2181 }
2182
2183 size_inter = bdrv_getlength(intermediate);
2184 if (size_inter < 0) {
2185 return size_inter;
2186 }
2187 if (n > pnum_inter &&
2188 (intermediate == top || offset + pnum_inter < size_inter)) {
2189 n = pnum_inter;
2190 }
2191
2192 intermediate = backing_bs(intermediate);
2193 }
2194
2195 *pnum = n;
2196 return 0;
2197}
2198
2199typedef struct BdrvVmstateCo {
2200 BlockDriverState *bs;
2201 QEMUIOVector *qiov;
2202 int64_t pos;
2203 bool is_read;
2204 int ret;
2205} BdrvVmstateCo;
2206
2207static int coroutine_fn
2208bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2209 bool is_read)
2210{
2211 BlockDriver *drv = bs->drv;
2212 int ret = -ENOTSUP;
2213
2214 bdrv_inc_in_flight(bs);
2215
2216 if (!drv) {
2217 ret = -ENOMEDIUM;
2218 } else if (drv->bdrv_load_vmstate) {
2219 if (is_read) {
2220 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2221 } else {
2222 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2223 }
2224 } else if (bs->file) {
2225 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2226 }
2227
2228 bdrv_dec_in_flight(bs);
2229 return ret;
2230}
2231
2232static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2233{
2234 BdrvVmstateCo *co = opaque;
2235 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2236}
2237
2238static inline int
2239bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2240 bool is_read)
2241{
2242 if (qemu_in_coroutine()) {
2243 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2244 } else {
2245 BdrvVmstateCo data = {
2246 .bs = bs,
2247 .qiov = qiov,
2248 .pos = pos,
2249 .is_read = is_read,
2250 .ret = -EINPROGRESS,
2251 };
2252 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2253
2254 bdrv_coroutine_enter(bs, co);
2255 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2256 return data.ret;
2257 }
2258}
2259
2260int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2261 int64_t pos, int size)
2262{
2263 QEMUIOVector qiov;
2264 struct iovec iov = {
2265 .iov_base = (void *) buf,
2266 .iov_len = size,
2267 };
2268 int ret;
2269
2270 qemu_iovec_init_external(&qiov, &iov, 1);
2271
2272 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2273 if (ret < 0) {
2274 return ret;
2275 }
2276
2277 return size;
2278}
2279
2280int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2281{
2282 return bdrv_rw_vmstate(bs, qiov, pos, false);
2283}
2284
2285int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2286 int64_t pos, int size)
2287{
2288 QEMUIOVector qiov;
2289 struct iovec iov = {
2290 .iov_base = buf,
2291 .iov_len = size,
2292 };
2293 int ret;
2294
2295 qemu_iovec_init_external(&qiov, &iov, 1);
2296 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2297 if (ret < 0) {
2298 return ret;
2299 }
2300
2301 return size;
2302}
2303
2304int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2305{
2306 return bdrv_rw_vmstate(bs, qiov, pos, true);
2307}
2308
2309
2310
2311
2312void bdrv_aio_cancel(BlockAIOCB *acb)
2313{
2314 qemu_aio_ref(acb);
2315 bdrv_aio_cancel_async(acb);
2316 while (acb->refcnt > 1) {
2317 if (acb->aiocb_info->get_aio_context) {
2318 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2319 } else if (acb->bs) {
2320
2321
2322
2323
2324 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2325 aio_poll(bdrv_get_aio_context(acb->bs), true);
2326 } else {
2327 abort();
2328 }
2329 }
2330 qemu_aio_unref(acb);
2331}
2332
2333
2334
2335
2336void bdrv_aio_cancel_async(BlockAIOCB *acb)
2337{
2338 if (acb->aiocb_info->cancel_async) {
2339 acb->aiocb_info->cancel_async(acb);
2340 }
2341}
2342
2343
2344
2345
2346typedef struct FlushCo {
2347 BlockDriverState *bs;
2348 int ret;
2349} FlushCo;
2350
2351
2352static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2353{
2354 FlushCo *rwco = opaque;
2355
2356 rwco->ret = bdrv_co_flush(rwco->bs);
2357}
2358
2359int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2360{
2361 int current_gen;
2362 int ret = 0;
2363
2364 bdrv_inc_in_flight(bs);
2365
2366 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2367 bdrv_is_sg(bs)) {
2368 goto early_exit;
2369 }
2370
2371 qemu_co_mutex_lock(&bs->reqs_lock);
2372 current_gen = atomic_read(&bs->write_gen);
2373
2374
2375 while (bs->active_flush_req) {
2376 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2377 }
2378
2379
2380 bs->active_flush_req = true;
2381 qemu_co_mutex_unlock(&bs->reqs_lock);
2382
2383
2384 if (bs->drv->bdrv_co_flush) {
2385 ret = bs->drv->bdrv_co_flush(bs);
2386 goto out;
2387 }
2388
2389
2390 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2391 if (bs->drv->bdrv_co_flush_to_os) {
2392 ret = bs->drv->bdrv_co_flush_to_os(bs);
2393 if (ret < 0) {
2394 goto out;
2395 }
2396 }
2397
2398
2399 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2400 goto flush_parent;
2401 }
2402
2403
2404 if (bs->flushed_gen == current_gen) {
2405 goto flush_parent;
2406 }
2407
2408 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2409 if (!bs->drv) {
2410
2411
2412 ret = -ENOMEDIUM;
2413 goto out;
2414 }
2415 if (bs->drv->bdrv_co_flush_to_disk) {
2416 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2417 } else if (bs->drv->bdrv_aio_flush) {
2418 BlockAIOCB *acb;
2419 CoroutineIOCompletion co = {
2420 .coroutine = qemu_coroutine_self(),
2421 };
2422
2423 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2424 if (acb == NULL) {
2425 ret = -EIO;
2426 } else {
2427 qemu_coroutine_yield();
2428 ret = co.ret;
2429 }
2430 } else {
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442 ret = 0;
2443 }
2444
2445 if (ret < 0) {
2446 goto out;
2447 }
2448
2449
2450
2451
2452flush_parent:
2453 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2454out:
2455
2456 if (ret == 0) {
2457 bs->flushed_gen = current_gen;
2458 }
2459
2460 qemu_co_mutex_lock(&bs->reqs_lock);
2461 bs->active_flush_req = false;
2462
2463 qemu_co_queue_next(&bs->flush_queue);
2464 qemu_co_mutex_unlock(&bs->reqs_lock);
2465
2466early_exit:
2467 bdrv_dec_in_flight(bs);
2468 return ret;
2469}
2470
2471int bdrv_flush(BlockDriverState *bs)
2472{
2473 Coroutine *co;
2474 FlushCo flush_co = {
2475 .bs = bs,
2476 .ret = NOT_DONE,
2477 };
2478
2479 if (qemu_in_coroutine()) {
2480
2481 bdrv_flush_co_entry(&flush_co);
2482 } else {
2483 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2484 bdrv_coroutine_enter(bs, co);
2485 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2486 }
2487
2488 return flush_co.ret;
2489}
2490
2491typedef struct DiscardCo {
2492 BlockDriverState *bs;
2493 int64_t offset;
2494 int bytes;
2495 int ret;
2496} DiscardCo;
2497static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2498{
2499 DiscardCo *rwco = opaque;
2500
2501 rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->bytes);
2502}
2503
2504int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
2505 int bytes)
2506{
2507 BdrvTrackedRequest req;
2508 int max_pdiscard, ret;
2509 int head, tail, align;
2510
2511 if (!bs->drv) {
2512 return -ENOMEDIUM;
2513 }
2514
2515 if (bdrv_has_readonly_bitmaps(bs)) {
2516 return -EPERM;
2517 }
2518
2519 ret = bdrv_check_byte_request(bs, offset, bytes);
2520 if (ret < 0) {
2521 return ret;
2522 } else if (bs->read_only) {
2523 return -EPERM;
2524 }
2525 assert(!(bs->open_flags & BDRV_O_INACTIVE));
2526
2527
2528 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2529 return 0;
2530 }
2531
2532 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2533 return 0;
2534 }
2535
2536
2537
2538
2539
2540
2541 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2542 assert(align % bs->bl.request_alignment == 0);
2543 head = offset % align;
2544 tail = (offset + bytes) % align;
2545
2546 bdrv_inc_in_flight(bs);
2547 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2548
2549 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
2550 if (ret < 0) {
2551 goto out;
2552 }
2553
2554 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2555 align);
2556 assert(max_pdiscard >= bs->bl.request_alignment);
2557
2558 while (bytes > 0) {
2559 int num = bytes;
2560
2561 if (head) {
2562
2563 num = MIN(bytes, align - head);
2564 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2565 num %= bs->bl.request_alignment;
2566 }
2567 head = (head + num) % align;
2568 assert(num < max_pdiscard);
2569 } else if (tail) {
2570 if (num > align) {
2571
2572 num -= tail;
2573 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2574 tail > bs->bl.request_alignment) {
2575 tail %= bs->bl.request_alignment;
2576 num -= tail;
2577 }
2578 }
2579
2580 if (num > max_pdiscard) {
2581 num = max_pdiscard;
2582 }
2583
2584 if (!bs->drv) {
2585 ret = -ENOMEDIUM;
2586 goto out;
2587 }
2588 if (bs->drv->bdrv_co_pdiscard) {
2589 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
2590 } else {
2591 BlockAIOCB *acb;
2592 CoroutineIOCompletion co = {
2593 .coroutine = qemu_coroutine_self(),
2594 };
2595
2596 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
2597 bdrv_co_io_em_complete, &co);
2598 if (acb == NULL) {
2599 ret = -EIO;
2600 goto out;
2601 } else {
2602 qemu_coroutine_yield();
2603 ret = co.ret;
2604 }
2605 }
2606 if (ret && ret != -ENOTSUP) {
2607 goto out;
2608 }
2609
2610 offset += num;
2611 bytes -= num;
2612 }
2613 ret = 0;
2614out:
2615 atomic_inc(&bs->write_gen);
2616 bdrv_set_dirty(bs, req.offset, req.bytes);
2617 tracked_request_end(&req);
2618 bdrv_dec_in_flight(bs);
2619 return ret;
2620}
2621
2622int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
2623{
2624 Coroutine *co;
2625 DiscardCo rwco = {
2626 .bs = bs,
2627 .offset = offset,
2628 .bytes = bytes,
2629 .ret = NOT_DONE,
2630 };
2631
2632 if (qemu_in_coroutine()) {
2633
2634 bdrv_pdiscard_co_entry(&rwco);
2635 } else {
2636 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
2637 bdrv_coroutine_enter(bs, co);
2638 BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
2639 }
2640
2641 return rwco.ret;
2642}
2643
2644int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
2645{
2646 BlockDriver *drv = bs->drv;
2647 CoroutineIOCompletion co = {
2648 .coroutine = qemu_coroutine_self(),
2649 };
2650 BlockAIOCB *acb;
2651
2652 bdrv_inc_in_flight(bs);
2653 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
2654 co.ret = -ENOTSUP;
2655 goto out;
2656 }
2657
2658 if (drv->bdrv_co_ioctl) {
2659 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
2660 } else {
2661 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
2662 if (!acb) {
2663 co.ret = -ENOTSUP;
2664 goto out;
2665 }
2666 qemu_coroutine_yield();
2667 }
2668out:
2669 bdrv_dec_in_flight(bs);
2670 return co.ret;
2671}
2672
2673void *qemu_blockalign(BlockDriverState *bs, size_t size)
2674{
2675 return qemu_memalign(bdrv_opt_mem_align(bs), size);
2676}
2677
2678void *qemu_blockalign0(BlockDriverState *bs, size_t size)
2679{
2680 return memset(qemu_blockalign(bs, size), 0, size);
2681}
2682
2683void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
2684{
2685 size_t align = bdrv_opt_mem_align(bs);
2686
2687
2688 assert(align > 0);
2689 if (size == 0) {
2690 size = align;
2691 }
2692
2693 return qemu_try_memalign(align, size);
2694}
2695
2696void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
2697{
2698 void *mem = qemu_try_blockalign(bs, size);
2699
2700 if (mem) {
2701 memset(mem, 0, size);
2702 }
2703
2704 return mem;
2705}
2706
2707
2708
2709
2710bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
2711{
2712 int i;
2713 size_t alignment = bdrv_min_mem_align(bs);
2714
2715 for (i = 0; i < qiov->niov; i++) {
2716 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
2717 return false;
2718 }
2719 if (qiov->iov[i].iov_len % alignment) {
2720 return false;
2721 }
2722 }
2723
2724 return true;
2725}
2726
2727void bdrv_add_before_write_notifier(BlockDriverState *bs,
2728 NotifierWithReturn *notifier)
2729{
2730 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
2731}
2732
2733void bdrv_io_plug(BlockDriverState *bs)
2734{
2735 BdrvChild *child;
2736
2737 QLIST_FOREACH(child, &bs->children, next) {
2738 bdrv_io_plug(child->bs);
2739 }
2740
2741 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
2742 BlockDriver *drv = bs->drv;
2743 if (drv && drv->bdrv_io_plug) {
2744 drv->bdrv_io_plug(bs);
2745 }
2746 }
2747}
2748
2749void bdrv_io_unplug(BlockDriverState *bs)
2750{
2751 BdrvChild *child;
2752
2753 assert(bs->io_plugged);
2754 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
2755 BlockDriver *drv = bs->drv;
2756 if (drv && drv->bdrv_io_unplug) {
2757 drv->bdrv_io_unplug(bs);
2758 }
2759 }
2760
2761 QLIST_FOREACH(child, &bs->children, next) {
2762 bdrv_io_unplug(child->bs);
2763 }
2764}
2765