1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/blockjob.h"
29#include "block/blockjob_int.h"
30#include "block/block_int.h"
31#include "qemu/cutils.h"
32#include "qapi/error.h"
33#include "qemu/error-report.h"
34
35#define NOT_DONE 0x7fffffff
36
37
38#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
39
40static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
41 int64_t offset, int bytes, BdrvRequestFlags flags);
42
43void bdrv_parent_drained_begin(BlockDriverState *bs)
44{
45 BdrvChild *c, *next;
46
47 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
48 if (c->role->drained_begin) {
49 c->role->drained_begin(c);
50 }
51 }
52}
53
54void bdrv_parent_drained_end(BlockDriverState *bs)
55{
56 BdrvChild *c, *next;
57
58 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
59 if (c->role->drained_end) {
60 c->role->drained_end(c);
61 }
62 }
63}
64
65static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
66{
67 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
68 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
69 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
70 src->opt_mem_alignment);
71 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
72 src->min_mem_alignment);
73 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
74}
75
76void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
77{
78 BlockDriver *drv = bs->drv;
79 Error *local_err = NULL;
80
81 memset(&bs->bl, 0, sizeof(bs->bl));
82
83 if (!drv) {
84 return;
85 }
86
87
88 bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
89
90
91 if (bs->file) {
92 bdrv_refresh_limits(bs->file->bs, &local_err);
93 if (local_err) {
94 error_propagate(errp, local_err);
95 return;
96 }
97 bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
98 } else {
99 bs->bl.min_mem_alignment = 512;
100 bs->bl.opt_mem_alignment = getpagesize();
101
102
103 bs->bl.max_iov = IOV_MAX;
104 }
105
106 if (bs->backing) {
107 bdrv_refresh_limits(bs->backing->bs, &local_err);
108 if (local_err) {
109 error_propagate(errp, local_err);
110 return;
111 }
112 bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
113 }
114
115
116 if (drv->bdrv_refresh_limits) {
117 drv->bdrv_refresh_limits(bs, errp);
118 }
119}
120
121
122
123
124
125
126void bdrv_enable_copy_on_read(BlockDriverState *bs)
127{
128 atomic_inc(&bs->copy_on_read);
129}
130
131void bdrv_disable_copy_on_read(BlockDriverState *bs)
132{
133 int old = atomic_fetch_dec(&bs->copy_on_read);
134 assert(old >= 1);
135}
136
137
138bool bdrv_requests_pending(BlockDriverState *bs)
139{
140 BdrvChild *child;
141
142 if (atomic_read(&bs->in_flight)) {
143 return true;
144 }
145
146 QLIST_FOREACH(child, &bs->children, next) {
147 if (bdrv_requests_pending(child->bs)) {
148 return true;
149 }
150 }
151
152 return false;
153}
154
155typedef struct {
156 Coroutine *co;
157 BlockDriverState *bs;
158 bool done;
159 bool begin;
160} BdrvCoDrainData;
161
162static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
163{
164 BdrvCoDrainData *data = opaque;
165 BlockDriverState *bs = data->bs;
166
167 if (data->begin) {
168 bs->drv->bdrv_co_drain_begin(bs);
169 } else {
170 bs->drv->bdrv_co_drain_end(bs);
171 }
172
173
174 atomic_mb_set(&data->done, true);
175 bdrv_wakeup(bs);
176}
177
178static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
179{
180 BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
181
182 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
183 (!begin && !bs->drv->bdrv_co_drain_end)) {
184 return;
185 }
186
187 data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
188 bdrv_coroutine_enter(bs, data.co);
189 BDRV_POLL_WHILE(bs, !data.done);
190}
191
192static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
193{
194 BdrvChild *child, *tmp;
195 bool waited;
196
197
198 bdrv_drain_invoke(bs, begin);
199
200
201 waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
202
203 QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
204 BlockDriverState *bs = child->bs;
205 bool in_main_loop =
206 qemu_get_current_aio_context() == qemu_get_aio_context();
207 assert(bs->refcnt > 0);
208 if (in_main_loop) {
209
210
211
212
213
214
215
216 bdrv_ref(bs);
217 }
218 waited |= bdrv_drain_recurse(bs, begin);
219 if (in_main_loop) {
220 bdrv_unref(bs);
221 }
222 }
223
224 return waited;
225}
226
227static void bdrv_co_drain_bh_cb(void *opaque)
228{
229 BdrvCoDrainData *data = opaque;
230 Coroutine *co = data->co;
231 BlockDriverState *bs = data->bs;
232
233 bdrv_dec_in_flight(bs);
234 if (data->begin) {
235 bdrv_drained_begin(bs);
236 } else {
237 bdrv_drained_end(bs);
238 }
239
240 data->done = true;
241 aio_co_wake(co);
242}
243
244static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
245 bool begin)
246{
247 BdrvCoDrainData data;
248
249
250
251
252
253 assert(qemu_in_coroutine());
254 data = (BdrvCoDrainData) {
255 .co = qemu_coroutine_self(),
256 .bs = bs,
257 .done = false,
258 .begin = begin,
259 };
260 bdrv_inc_in_flight(bs);
261 aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
262 bdrv_co_drain_bh_cb, &data);
263
264 qemu_coroutine_yield();
265
266
267 assert(data.done);
268}
269
270void bdrv_drained_begin(BlockDriverState *bs)
271{
272 if (qemu_in_coroutine()) {
273 bdrv_co_yield_to_drain(bs, true);
274 return;
275 }
276
277 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
278 aio_disable_external(bdrv_get_aio_context(bs));
279 bdrv_parent_drained_begin(bs);
280 }
281
282 bdrv_drain_recurse(bs, true);
283}
284
285void bdrv_drained_end(BlockDriverState *bs)
286{
287 if (qemu_in_coroutine()) {
288 bdrv_co_yield_to_drain(bs, false);
289 return;
290 }
291 assert(bs->quiesce_counter > 0);
292 if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
293 return;
294 }
295
296 bdrv_parent_drained_end(bs);
297 bdrv_drain_recurse(bs, false);
298 aio_enable_external(bdrv_get_aio_context(bs));
299}
300
301
302
303
304
305
306
307
308
309
310
311
312void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
313{
314 assert(qemu_in_coroutine());
315 bdrv_drained_begin(bs);
316 bdrv_drained_end(bs);
317}
318
319void bdrv_drain(BlockDriverState *bs)
320{
321 bdrv_drained_begin(bs);
322 bdrv_drained_end(bs);
323}
324
325
326
327
328
329
330
331
332
333
334
335
336
337void bdrv_drain_all_begin(void)
338{
339
340 bool waited = true;
341 BlockDriverState *bs;
342 BdrvNextIterator it;
343 GSList *aio_ctxs = NULL, *ctx;
344
345 block_job_pause_all();
346
347 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
348 AioContext *aio_context = bdrv_get_aio_context(bs);
349
350 aio_context_acquire(aio_context);
351 bdrv_parent_drained_begin(bs);
352 aio_disable_external(aio_context);
353 aio_context_release(aio_context);
354
355 if (!g_slist_find(aio_ctxs, aio_context)) {
356 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
357 }
358 }
359
360
361
362
363
364
365
366 while (waited) {
367 waited = false;
368
369 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
370 AioContext *aio_context = ctx->data;
371
372 aio_context_acquire(aio_context);
373 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
374 if (aio_context == bdrv_get_aio_context(bs)) {
375 waited |= bdrv_drain_recurse(bs, true);
376 }
377 }
378 aio_context_release(aio_context);
379 }
380 }
381
382 g_slist_free(aio_ctxs);
383}
384
385void bdrv_drain_all_end(void)
386{
387 BlockDriverState *bs;
388 BdrvNextIterator it;
389
390 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
391 AioContext *aio_context = bdrv_get_aio_context(bs);
392
393 aio_context_acquire(aio_context);
394 aio_enable_external(aio_context);
395 bdrv_parent_drained_end(bs);
396 bdrv_drain_recurse(bs, false);
397 aio_context_release(aio_context);
398 }
399
400 block_job_resume_all();
401}
402
403void bdrv_drain_all(void)
404{
405 bdrv_drain_all_begin();
406 bdrv_drain_all_end();
407}
408
409
410
411
412
413
414static void tracked_request_end(BdrvTrackedRequest *req)
415{
416 if (req->serialising) {
417 atomic_dec(&req->bs->serialising_in_flight);
418 }
419
420 qemu_co_mutex_lock(&req->bs->reqs_lock);
421 QLIST_REMOVE(req, list);
422 qemu_co_queue_restart_all(&req->wait_queue);
423 qemu_co_mutex_unlock(&req->bs->reqs_lock);
424}
425
426
427
428
429static void tracked_request_begin(BdrvTrackedRequest *req,
430 BlockDriverState *bs,
431 int64_t offset,
432 unsigned int bytes,
433 enum BdrvTrackedRequestType type)
434{
435 *req = (BdrvTrackedRequest){
436 .bs = bs,
437 .offset = offset,
438 .bytes = bytes,
439 .type = type,
440 .co = qemu_coroutine_self(),
441 .serialising = false,
442 .overlap_offset = offset,
443 .overlap_bytes = bytes,
444 };
445
446 qemu_co_queue_init(&req->wait_queue);
447
448 qemu_co_mutex_lock(&bs->reqs_lock);
449 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
450 qemu_co_mutex_unlock(&bs->reqs_lock);
451}
452
453static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
454{
455 int64_t overlap_offset = req->offset & ~(align - 1);
456 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
457 - overlap_offset;
458
459 if (!req->serialising) {
460 atomic_inc(&req->bs->serialising_in_flight);
461 req->serialising = true;
462 }
463
464 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
465 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
466}
467
468
469
470
471void bdrv_round_to_clusters(BlockDriverState *bs,
472 int64_t offset, int64_t bytes,
473 int64_t *cluster_offset,
474 int64_t *cluster_bytes)
475{
476 BlockDriverInfo bdi;
477
478 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
479 *cluster_offset = offset;
480 *cluster_bytes = bytes;
481 } else {
482 int64_t c = bdi.cluster_size;
483 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
484 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
485 }
486}
487
488static int bdrv_get_cluster_size(BlockDriverState *bs)
489{
490 BlockDriverInfo bdi;
491 int ret;
492
493 ret = bdrv_get_info(bs, &bdi);
494 if (ret < 0 || bdi.cluster_size == 0) {
495 return bs->bl.request_alignment;
496 } else {
497 return bdi.cluster_size;
498 }
499}
500
501static bool tracked_request_overlaps(BdrvTrackedRequest *req,
502 int64_t offset, unsigned int bytes)
503{
504
505 if (offset >= req->overlap_offset + req->overlap_bytes) {
506 return false;
507 }
508
509 if (req->overlap_offset >= offset + bytes) {
510 return false;
511 }
512 return true;
513}
514
515void bdrv_inc_in_flight(BlockDriverState *bs)
516{
517 atomic_inc(&bs->in_flight);
518}
519
520static void dummy_bh_cb(void *opaque)
521{
522}
523
524void bdrv_wakeup(BlockDriverState *bs)
525{
526
527 if (atomic_read(&bs->wakeup)) {
528 aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
529 }
530}
531
532void bdrv_dec_in_flight(BlockDriverState *bs)
533{
534 atomic_dec(&bs->in_flight);
535 bdrv_wakeup(bs);
536}
537
538static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
539{
540 BlockDriverState *bs = self->bs;
541 BdrvTrackedRequest *req;
542 bool retry;
543 bool waited = false;
544
545 if (!atomic_read(&bs->serialising_in_flight)) {
546 return false;
547 }
548
549 do {
550 retry = false;
551 qemu_co_mutex_lock(&bs->reqs_lock);
552 QLIST_FOREACH(req, &bs->tracked_requests, list) {
553 if (req == self || (!req->serialising && !self->serialising)) {
554 continue;
555 }
556 if (tracked_request_overlaps(req, self->overlap_offset,
557 self->overlap_bytes))
558 {
559
560
561
562
563 assert(qemu_coroutine_self() != req->co);
564
565
566
567
568 if (!req->waiting_for) {
569 self->waiting_for = req;
570 qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
571 self->waiting_for = NULL;
572 retry = true;
573 waited = true;
574 break;
575 }
576 }
577 }
578 qemu_co_mutex_unlock(&bs->reqs_lock);
579 } while (retry);
580
581 return waited;
582}
583
584static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
585 size_t size)
586{
587 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
588 return -EIO;
589 }
590
591 if (!bdrv_is_inserted(bs)) {
592 return -ENOMEDIUM;
593 }
594
595 if (offset < 0) {
596 return -EIO;
597 }
598
599 return 0;
600}
601
602typedef struct RwCo {
603 BdrvChild *child;
604 int64_t offset;
605 QEMUIOVector *qiov;
606 bool is_write;
607 int ret;
608 BdrvRequestFlags flags;
609} RwCo;
610
611static void coroutine_fn bdrv_rw_co_entry(void *opaque)
612{
613 RwCo *rwco = opaque;
614
615 if (!rwco->is_write) {
616 rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
617 rwco->qiov->size, rwco->qiov,
618 rwco->flags);
619 } else {
620 rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
621 rwco->qiov->size, rwco->qiov,
622 rwco->flags);
623 }
624}
625
626
627
628
629static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
630 QEMUIOVector *qiov, bool is_write,
631 BdrvRequestFlags flags)
632{
633 Coroutine *co;
634 RwCo rwco = {
635 .child = child,
636 .offset = offset,
637 .qiov = qiov,
638 .is_write = is_write,
639 .ret = NOT_DONE,
640 .flags = flags,
641 };
642
643 if (qemu_in_coroutine()) {
644
645 bdrv_rw_co_entry(&rwco);
646 } else {
647 co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
648 bdrv_coroutine_enter(child->bs, co);
649 BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
650 }
651 return rwco.ret;
652}
653
654
655
656
657static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
658 int nb_sectors, bool is_write, BdrvRequestFlags flags)
659{
660 QEMUIOVector qiov;
661 struct iovec iov = {
662 .iov_base = (void *)buf,
663 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
664 };
665
666 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
667 return -EINVAL;
668 }
669
670 qemu_iovec_init_external(&qiov, &iov, 1);
671 return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
672 &qiov, is_write, flags);
673}
674
675
676int bdrv_read(BdrvChild *child, int64_t sector_num,
677 uint8_t *buf, int nb_sectors)
678{
679 return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
680}
681
682
683
684
685
686
687
688int bdrv_write(BdrvChild *child, int64_t sector_num,
689 const uint8_t *buf, int nb_sectors)
690{
691 return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
692}
693
694int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
695 int bytes, BdrvRequestFlags flags)
696{
697 QEMUIOVector qiov;
698 struct iovec iov = {
699 .iov_base = NULL,
700 .iov_len = bytes,
701 };
702
703 qemu_iovec_init_external(&qiov, &iov, 1);
704 return bdrv_prwv_co(child, offset, &qiov, true,
705 BDRV_REQ_ZERO_WRITE | flags);
706}
707
708
709
710
711
712
713
714
715
716
717int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
718{
719 int ret;
720 int64_t target_size, bytes, offset = 0;
721 BlockDriverState *bs = child->bs;
722
723 target_size = bdrv_getlength(bs);
724 if (target_size < 0) {
725 return target_size;
726 }
727
728 for (;;) {
729 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
730 if (bytes <= 0) {
731 return 0;
732 }
733 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
734 if (ret < 0) {
735 error_report("error getting block status at offset %" PRId64 ": %s",
736 offset, strerror(-ret));
737 return ret;
738 }
739 if (ret & BDRV_BLOCK_ZERO) {
740 offset += bytes;
741 continue;
742 }
743 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
744 if (ret < 0) {
745 error_report("error writing zeroes at offset %" PRId64 ": %s",
746 offset, strerror(-ret));
747 return ret;
748 }
749 offset += bytes;
750 }
751}
752
753int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
754{
755 int ret;
756
757 ret = bdrv_prwv_co(child, offset, qiov, false, 0);
758 if (ret < 0) {
759 return ret;
760 }
761
762 return qiov->size;
763}
764
765int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
766{
767 QEMUIOVector qiov;
768 struct iovec iov = {
769 .iov_base = (void *)buf,
770 .iov_len = bytes,
771 };
772
773 if (bytes < 0) {
774 return -EINVAL;
775 }
776
777 qemu_iovec_init_external(&qiov, &iov, 1);
778 return bdrv_preadv(child, offset, &qiov);
779}
780
781int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
782{
783 int ret;
784
785 ret = bdrv_prwv_co(child, offset, qiov, true, 0);
786 if (ret < 0) {
787 return ret;
788 }
789
790 return qiov->size;
791}
792
793int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
794{
795 QEMUIOVector qiov;
796 struct iovec iov = {
797 .iov_base = (void *) buf,
798 .iov_len = bytes,
799 };
800
801 if (bytes < 0) {
802 return -EINVAL;
803 }
804
805 qemu_iovec_init_external(&qiov, &iov, 1);
806 return bdrv_pwritev(child, offset, &qiov);
807}
808
809
810
811
812
813
814
815int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
816 const void *buf, int count)
817{
818 int ret;
819
820 ret = bdrv_pwrite(child, offset, buf, count);
821 if (ret < 0) {
822 return ret;
823 }
824
825 ret = bdrv_flush(child->bs);
826 if (ret < 0) {
827 return ret;
828 }
829
830 return 0;
831}
832
833typedef struct CoroutineIOCompletion {
834 Coroutine *coroutine;
835 int ret;
836} CoroutineIOCompletion;
837
838static void bdrv_co_io_em_complete(void *opaque, int ret)
839{
840 CoroutineIOCompletion *co = opaque;
841
842 co->ret = ret;
843 aio_co_wake(co->coroutine);
844}
845
846static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
847 uint64_t offset, uint64_t bytes,
848 QEMUIOVector *qiov, int flags)
849{
850 BlockDriver *drv = bs->drv;
851 int64_t sector_num;
852 unsigned int nb_sectors;
853
854 assert(!(flags & ~BDRV_REQ_MASK));
855
856 if (!drv) {
857 return -ENOMEDIUM;
858 }
859
860 if (drv->bdrv_co_preadv) {
861 return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
862 }
863
864 sector_num = offset >> BDRV_SECTOR_BITS;
865 nb_sectors = bytes >> BDRV_SECTOR_BITS;
866
867 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
868 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
869 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
870
871 if (drv->bdrv_co_readv) {
872 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
873 } else {
874 BlockAIOCB *acb;
875 CoroutineIOCompletion co = {
876 .coroutine = qemu_coroutine_self(),
877 };
878
879 acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
880 bdrv_co_io_em_complete, &co);
881 if (acb == NULL) {
882 return -EIO;
883 } else {
884 qemu_coroutine_yield();
885 return co.ret;
886 }
887 }
888}
889
890static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
891 uint64_t offset, uint64_t bytes,
892 QEMUIOVector *qiov, int flags)
893{
894 BlockDriver *drv = bs->drv;
895 int64_t sector_num;
896 unsigned int nb_sectors;
897 int ret;
898
899 assert(!(flags & ~BDRV_REQ_MASK));
900
901 if (!drv) {
902 return -ENOMEDIUM;
903 }
904
905 if (drv->bdrv_co_pwritev) {
906 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
907 flags & bs->supported_write_flags);
908 flags &= ~bs->supported_write_flags;
909 goto emulate_flags;
910 }
911
912 sector_num = offset >> BDRV_SECTOR_BITS;
913 nb_sectors = bytes >> BDRV_SECTOR_BITS;
914
915 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
916 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
917 assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
918
919 if (drv->bdrv_co_writev_flags) {
920 ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
921 flags & bs->supported_write_flags);
922 flags &= ~bs->supported_write_flags;
923 } else if (drv->bdrv_co_writev) {
924 assert(!bs->supported_write_flags);
925 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
926 } else {
927 BlockAIOCB *acb;
928 CoroutineIOCompletion co = {
929 .coroutine = qemu_coroutine_self(),
930 };
931
932 acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
933 bdrv_co_io_em_complete, &co);
934 if (acb == NULL) {
935 ret = -EIO;
936 } else {
937 qemu_coroutine_yield();
938 ret = co.ret;
939 }
940 }
941
942emulate_flags:
943 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
944 ret = bdrv_co_flush(bs);
945 }
946
947 return ret;
948}
949
950static int coroutine_fn
951bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
952 uint64_t bytes, QEMUIOVector *qiov)
953{
954 BlockDriver *drv = bs->drv;
955
956 if (!drv) {
957 return -ENOMEDIUM;
958 }
959
960 if (!drv->bdrv_co_pwritev_compressed) {
961 return -ENOTSUP;
962 }
963
964 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
965}
966
967static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
968 int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
969{
970 BlockDriverState *bs = child->bs;
971
972
973
974
975
976
977 void *bounce_buffer;
978
979 BlockDriver *drv = bs->drv;
980 struct iovec iov;
981 QEMUIOVector local_qiov;
982 int64_t cluster_offset;
983 int64_t cluster_bytes;
984 size_t skip_bytes;
985 int ret;
986 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
987 BDRV_REQUEST_MAX_BYTES);
988 unsigned int progress = 0;
989
990 if (!drv) {
991 return -ENOMEDIUM;
992 }
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1009 skip_bytes = offset - cluster_offset;
1010
1011 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1012 cluster_offset, cluster_bytes);
1013
1014 bounce_buffer = qemu_try_blockalign(bs,
1015 MIN(MIN(max_transfer, cluster_bytes),
1016 MAX_BOUNCE_BUFFER));
1017 if (bounce_buffer == NULL) {
1018 ret = -ENOMEM;
1019 goto err;
1020 }
1021
1022 while (cluster_bytes) {
1023 int64_t pnum;
1024
1025 ret = bdrv_is_allocated(bs, cluster_offset,
1026 MIN(cluster_bytes, max_transfer), &pnum);
1027 if (ret < 0) {
1028
1029
1030
1031
1032 pnum = MIN(cluster_bytes, max_transfer);
1033 }
1034
1035 assert(skip_bytes < pnum);
1036
1037 if (ret <= 0) {
1038
1039 iov.iov_base = bounce_buffer;
1040 iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1041 qemu_iovec_init_external(&local_qiov, &iov, 1);
1042
1043 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1044 &local_qiov, 0);
1045 if (ret < 0) {
1046 goto err;
1047 }
1048
1049 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1050 if (drv->bdrv_co_pwrite_zeroes &&
1051 buffer_is_zero(bounce_buffer, pnum)) {
1052
1053
1054
1055 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
1056 } else {
1057
1058
1059
1060 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1061 &local_qiov, 0);
1062 }
1063
1064 if (ret < 0) {
1065
1066
1067
1068
1069
1070 goto err;
1071 }
1072
1073 qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
1074 pnum - skip_bytes);
1075 } else {
1076
1077 qemu_iovec_init(&local_qiov, qiov->niov);
1078 qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
1079 ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
1080 &local_qiov, 0);
1081 qemu_iovec_destroy(&local_qiov);
1082 if (ret < 0) {
1083 goto err;
1084 }
1085 }
1086
1087 cluster_offset += pnum;
1088 cluster_bytes -= pnum;
1089 progress += pnum - skip_bytes;
1090 skip_bytes = 0;
1091 }
1092 ret = 0;
1093
1094err:
1095 qemu_vfree(bounce_buffer);
1096 return ret;
1097}
1098
1099
1100
1101
1102
1103
1104static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1105 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1106 int64_t align, QEMUIOVector *qiov, int flags)
1107{
1108 BlockDriverState *bs = child->bs;
1109 int64_t total_bytes, max_bytes;
1110 int ret = 0;
1111 uint64_t bytes_remaining = bytes;
1112 int max_transfer;
1113
1114 assert(is_power_of_2(align));
1115 assert((offset & (align - 1)) == 0);
1116 assert((bytes & (align - 1)) == 0);
1117 assert(!qiov || bytes == qiov->size);
1118 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1119 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1120 align);
1121
1122
1123
1124
1125
1126 assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
1127
1128
1129 if (flags & BDRV_REQ_COPY_ON_READ) {
1130
1131
1132
1133
1134
1135 mark_request_serialising(req, bdrv_get_cluster_size(bs));
1136 }
1137
1138 if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1139 wait_serialising_requests(req);
1140 }
1141
1142 if (flags & BDRV_REQ_COPY_ON_READ) {
1143 int64_t pnum;
1144
1145 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1146 if (ret < 0) {
1147 goto out;
1148 }
1149
1150 if (!ret || pnum != bytes) {
1151 ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
1152 goto out;
1153 }
1154 }
1155
1156
1157 total_bytes = bdrv_getlength(bs);
1158 if (total_bytes < 0) {
1159 ret = total_bytes;
1160 goto out;
1161 }
1162
1163 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1164 if (bytes <= max_bytes && bytes <= max_transfer) {
1165 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
1166 goto out;
1167 }
1168
1169 while (bytes_remaining) {
1170 int num;
1171
1172 if (max_bytes) {
1173 QEMUIOVector local_qiov;
1174
1175 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1176 assert(num);
1177 qemu_iovec_init(&local_qiov, qiov->niov);
1178 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1179
1180 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1181 num, &local_qiov, 0);
1182 max_bytes -= num;
1183 qemu_iovec_destroy(&local_qiov);
1184 } else {
1185 num = bytes_remaining;
1186 ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
1187 bytes_remaining);
1188 }
1189 if (ret < 0) {
1190 goto out;
1191 }
1192 bytes_remaining -= num;
1193 }
1194
1195out:
1196 return ret < 0 ? ret : 0;
1197}
1198
1199
1200
1201
1202int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1203 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1204 BdrvRequestFlags flags)
1205{
1206 BlockDriverState *bs = child->bs;
1207 BlockDriver *drv = bs->drv;
1208 BdrvTrackedRequest req;
1209
1210 uint64_t align = bs->bl.request_alignment;
1211 uint8_t *head_buf = NULL;
1212 uint8_t *tail_buf = NULL;
1213 QEMUIOVector local_qiov;
1214 bool use_local_qiov = false;
1215 int ret;
1216
1217 trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
1218
1219 if (!drv) {
1220 return -ENOMEDIUM;
1221 }
1222
1223 ret = bdrv_check_byte_request(bs, offset, bytes);
1224 if (ret < 0) {
1225 return ret;
1226 }
1227
1228 bdrv_inc_in_flight(bs);
1229
1230
1231 if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
1232 flags |= BDRV_REQ_COPY_ON_READ;
1233 }
1234
1235
1236 if (offset & (align - 1)) {
1237 head_buf = qemu_blockalign(bs, align);
1238 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1239 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1240 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1241 use_local_qiov = true;
1242
1243 bytes += offset & (align - 1);
1244 offset = offset & ~(align - 1);
1245 }
1246
1247 if ((offset + bytes) & (align - 1)) {
1248 if (!use_local_qiov) {
1249 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1250 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1251 use_local_qiov = true;
1252 }
1253 tail_buf = qemu_blockalign(bs, align);
1254 qemu_iovec_add(&local_qiov, tail_buf,
1255 align - ((offset + bytes) & (align - 1)));
1256
1257 bytes = ROUND_UP(bytes, align);
1258 }
1259
1260 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1261 ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
1262 use_local_qiov ? &local_qiov : qiov,
1263 flags);
1264 tracked_request_end(&req);
1265 bdrv_dec_in_flight(bs);
1266
1267 if (use_local_qiov) {
1268 qemu_iovec_destroy(&local_qiov);
1269 qemu_vfree(head_buf);
1270 qemu_vfree(tail_buf);
1271 }
1272
1273 return ret;
1274}
1275
1276static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
1277 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1278 BdrvRequestFlags flags)
1279{
1280 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
1281 return -EINVAL;
1282 }
1283
1284 return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS,
1285 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
1286}
1287
1288int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
1289 int nb_sectors, QEMUIOVector *qiov)
1290{
1291 return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
1292}
1293
1294static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1295 int64_t offset, int bytes, BdrvRequestFlags flags)
1296{
1297 BlockDriver *drv = bs->drv;
1298 QEMUIOVector qiov;
1299 struct iovec iov = {0};
1300 int ret = 0;
1301 bool need_flush = false;
1302 int head = 0;
1303 int tail = 0;
1304
1305 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1306 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1307 bs->bl.request_alignment);
1308 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1309
1310 if (!drv) {
1311 return -ENOMEDIUM;
1312 }
1313
1314 assert(alignment % bs->bl.request_alignment == 0);
1315 head = offset % alignment;
1316 tail = (offset + bytes) % alignment;
1317 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1318 assert(max_write_zeroes >= bs->bl.request_alignment);
1319
1320 while (bytes > 0 && !ret) {
1321 int num = bytes;
1322
1323
1324
1325
1326
1327 if (head) {
1328
1329
1330
1331 num = MIN(MIN(bytes, max_transfer), alignment - head);
1332 head = (head + num) % alignment;
1333 assert(num < max_write_zeroes);
1334 } else if (tail && num > alignment) {
1335
1336 num -= tail;
1337 }
1338
1339
1340 if (num > max_write_zeroes) {
1341 num = max_write_zeroes;
1342 }
1343
1344 ret = -ENOTSUP;
1345
1346 if (drv->bdrv_co_pwrite_zeroes) {
1347 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1348 flags & bs->supported_zero_flags);
1349 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1350 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1351 need_flush = true;
1352 }
1353 } else {
1354 assert(!bs->supported_zero_flags);
1355 }
1356
1357 if (ret == -ENOTSUP) {
1358
1359 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1360
1361 if ((flags & BDRV_REQ_FUA) &&
1362 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1363
1364
1365 write_flags &= ~BDRV_REQ_FUA;
1366 need_flush = true;
1367 }
1368 num = MIN(num, max_transfer);
1369 iov.iov_len = num;
1370 if (iov.iov_base == NULL) {
1371 iov.iov_base = qemu_try_blockalign(bs, num);
1372 if (iov.iov_base == NULL) {
1373 ret = -ENOMEM;
1374 goto fail;
1375 }
1376 memset(iov.iov_base, 0, num);
1377 }
1378 qemu_iovec_init_external(&qiov, &iov, 1);
1379
1380 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
1381
1382
1383
1384
1385 if (num < max_transfer) {
1386 qemu_vfree(iov.iov_base);
1387 iov.iov_base = NULL;
1388 }
1389 }
1390
1391 offset += num;
1392 bytes -= num;
1393 }
1394
1395fail:
1396 if (ret == 0 && need_flush) {
1397 ret = bdrv_co_flush(bs);
1398 }
1399 qemu_vfree(iov.iov_base);
1400 return ret;
1401}
1402
1403
1404
1405
1406
1407static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1408 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
1409 int64_t align, QEMUIOVector *qiov, int flags)
1410{
1411 BlockDriverState *bs = child->bs;
1412 BlockDriver *drv = bs->drv;
1413 bool waited;
1414 int ret;
1415
1416 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
1417 uint64_t bytes_remaining = bytes;
1418 int max_transfer;
1419
1420 if (!drv) {
1421 return -ENOMEDIUM;
1422 }
1423
1424 if (bdrv_has_readonly_bitmaps(bs)) {
1425 return -EPERM;
1426 }
1427
1428 assert(is_power_of_2(align));
1429 assert((offset & (align - 1)) == 0);
1430 assert((bytes & (align - 1)) == 0);
1431 assert(!qiov || bytes == qiov->size);
1432 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1433 assert(!(flags & ~BDRV_REQ_MASK));
1434 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1435 align);
1436
1437 waited = wait_serialising_requests(req);
1438 assert(!waited || !req->serialising);
1439 assert(req->overlap_offset <= offset);
1440 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
1441 assert(child->perm & BLK_PERM_WRITE);
1442 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
1443
1444 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
1445
1446 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
1447 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
1448 qemu_iovec_is_zero(qiov)) {
1449 flags |= BDRV_REQ_ZERO_WRITE;
1450 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
1451 flags |= BDRV_REQ_MAY_UNMAP;
1452 }
1453 }
1454
1455 if (ret < 0) {
1456
1457 } else if (flags & BDRV_REQ_ZERO_WRITE) {
1458 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
1459 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
1460 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
1461 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
1462 } else if (bytes <= max_transfer) {
1463 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1464 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
1465 } else {
1466 bdrv_debug_event(bs, BLKDBG_PWRITEV);
1467 while (bytes_remaining) {
1468 int num = MIN(bytes_remaining, max_transfer);
1469 QEMUIOVector local_qiov;
1470 int local_flags = flags;
1471
1472 assert(num);
1473 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
1474 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1475
1476
1477 local_flags &= ~BDRV_REQ_FUA;
1478 }
1479 qemu_iovec_init(&local_qiov, qiov->niov);
1480 qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
1481
1482 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
1483 num, &local_qiov, local_flags);
1484 qemu_iovec_destroy(&local_qiov);
1485 if (ret < 0) {
1486 break;
1487 }
1488 bytes_remaining -= num;
1489 }
1490 }
1491 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
1492
1493 atomic_inc(&bs->write_gen);
1494 bdrv_set_dirty(bs, offset, bytes);
1495
1496 stat64_max(&bs->wr_highest_offset, offset + bytes);
1497
1498 if (ret >= 0) {
1499 bs->total_sectors = MAX(bs->total_sectors, end_sector);
1500 ret = 0;
1501 }
1502
1503 return ret;
1504}
1505
1506static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1507 int64_t offset,
1508 unsigned int bytes,
1509 BdrvRequestFlags flags,
1510 BdrvTrackedRequest *req)
1511{
1512 BlockDriverState *bs = child->bs;
1513 uint8_t *buf = NULL;
1514 QEMUIOVector local_qiov;
1515 struct iovec iov;
1516 uint64_t align = bs->bl.request_alignment;
1517 unsigned int head_padding_bytes, tail_padding_bytes;
1518 int ret = 0;
1519
1520 head_padding_bytes = offset & (align - 1);
1521 tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
1522
1523
1524 assert(flags & BDRV_REQ_ZERO_WRITE);
1525 if (head_padding_bytes || tail_padding_bytes) {
1526 buf = qemu_blockalign(bs, align);
1527 iov = (struct iovec) {
1528 .iov_base = buf,
1529 .iov_len = align,
1530 };
1531 qemu_iovec_init_external(&local_qiov, &iov, 1);
1532 }
1533 if (head_padding_bytes) {
1534 uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
1535
1536
1537 mark_request_serialising(req, align);
1538 wait_serialising_requests(req);
1539 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1540 ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
1541 align, &local_qiov, 0);
1542 if (ret < 0) {
1543 goto fail;
1544 }
1545 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1546
1547 memset(buf + head_padding_bytes, 0, zero_bytes);
1548 ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
1549 align, &local_qiov,
1550 flags & ~BDRV_REQ_ZERO_WRITE);
1551 if (ret < 0) {
1552 goto fail;
1553 }
1554 offset += zero_bytes;
1555 bytes -= zero_bytes;
1556 }
1557
1558 assert(!bytes || (offset & (align - 1)) == 0);
1559 if (bytes >= align) {
1560
1561 uint64_t aligned_bytes = bytes & ~(align - 1);
1562 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
1563 NULL, flags);
1564 if (ret < 0) {
1565 goto fail;
1566 }
1567 bytes -= aligned_bytes;
1568 offset += aligned_bytes;
1569 }
1570
1571 assert(!bytes || (offset & (align - 1)) == 0);
1572 if (bytes) {
1573 assert(align == tail_padding_bytes + bytes);
1574
1575 mark_request_serialising(req, align);
1576 wait_serialising_requests(req);
1577 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1578 ret = bdrv_aligned_preadv(child, req, offset, align,
1579 align, &local_qiov, 0);
1580 if (ret < 0) {
1581 goto fail;
1582 }
1583 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1584
1585 memset(buf, 0, bytes);
1586 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
1587 &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
1588 }
1589fail:
1590 qemu_vfree(buf);
1591 return ret;
1592
1593}
1594
1595
1596
1597
1598int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1599 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
1600 BdrvRequestFlags flags)
1601{
1602 BlockDriverState *bs = child->bs;
1603 BdrvTrackedRequest req;
1604 uint64_t align = bs->bl.request_alignment;
1605 uint8_t *head_buf = NULL;
1606 uint8_t *tail_buf = NULL;
1607 QEMUIOVector local_qiov;
1608 bool use_local_qiov = false;
1609 int ret;
1610
1611 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
1612
1613 if (!bs->drv) {
1614 return -ENOMEDIUM;
1615 }
1616 if (bs->read_only) {
1617 return -EPERM;
1618 }
1619 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1620
1621 ret = bdrv_check_byte_request(bs, offset, bytes);
1622 if (ret < 0) {
1623 return ret;
1624 }
1625
1626 bdrv_inc_in_flight(bs);
1627
1628
1629
1630
1631
1632 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
1633
1634 if (!qiov) {
1635 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
1636 goto out;
1637 }
1638
1639 if (offset & (align - 1)) {
1640 QEMUIOVector head_qiov;
1641 struct iovec head_iov;
1642
1643 mark_request_serialising(&req, align);
1644 wait_serialising_requests(&req);
1645
1646 head_buf = qemu_blockalign(bs, align);
1647 head_iov = (struct iovec) {
1648 .iov_base = head_buf,
1649 .iov_len = align,
1650 };
1651 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
1652
1653 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1654 ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
1655 align, &head_qiov, 0);
1656 if (ret < 0) {
1657 goto fail;
1658 }
1659 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1660
1661 qemu_iovec_init(&local_qiov, qiov->niov + 2);
1662 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
1663 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1664 use_local_qiov = true;
1665
1666 bytes += offset & (align - 1);
1667 offset = offset & ~(align - 1);
1668
1669
1670
1671
1672 if (bytes < align) {
1673 qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
1674 bytes = align;
1675 }
1676 }
1677
1678 if ((offset + bytes) & (align - 1)) {
1679 QEMUIOVector tail_qiov;
1680 struct iovec tail_iov;
1681 size_t tail_bytes;
1682 bool waited;
1683
1684 mark_request_serialising(&req, align);
1685 waited = wait_serialising_requests(&req);
1686 assert(!waited || !use_local_qiov);
1687
1688 tail_buf = qemu_blockalign(bs, align);
1689 tail_iov = (struct iovec) {
1690 .iov_base = tail_buf,
1691 .iov_len = align,
1692 };
1693 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
1694
1695 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1696 ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
1697 align, align, &tail_qiov, 0);
1698 if (ret < 0) {
1699 goto fail;
1700 }
1701 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1702
1703 if (!use_local_qiov) {
1704 qemu_iovec_init(&local_qiov, qiov->niov + 1);
1705 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
1706 use_local_qiov = true;
1707 }
1708
1709 tail_bytes = (offset + bytes) & (align - 1);
1710 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
1711
1712 bytes = ROUND_UP(bytes, align);
1713 }
1714
1715 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
1716 use_local_qiov ? &local_qiov : qiov,
1717 flags);
1718
1719fail:
1720
1721 if (use_local_qiov) {
1722 qemu_iovec_destroy(&local_qiov);
1723 }
1724 qemu_vfree(head_buf);
1725 qemu_vfree(tail_buf);
1726out:
1727 tracked_request_end(&req);
1728 bdrv_dec_in_flight(bs);
1729 return ret;
1730}
1731
1732static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
1733 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1734 BdrvRequestFlags flags)
1735{
1736 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
1737 return -EINVAL;
1738 }
1739
1740 return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS,
1741 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
1742}
1743
1744int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
1745 int nb_sectors, QEMUIOVector *qiov)
1746{
1747 return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
1748}
1749
1750int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
1751 int bytes, BdrvRequestFlags flags)
1752{
1753 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
1754
1755 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
1756 flags &= ~BDRV_REQ_MAY_UNMAP;
1757 }
1758
1759 return bdrv_co_pwritev(child, offset, bytes, NULL,
1760 BDRV_REQ_ZERO_WRITE | flags);
1761}
1762
1763
1764
1765
1766int bdrv_flush_all(void)
1767{
1768 BdrvNextIterator it;
1769 BlockDriverState *bs = NULL;
1770 int result = 0;
1771
1772 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
1773 AioContext *aio_context = bdrv_get_aio_context(bs);
1774 int ret;
1775
1776 aio_context_acquire(aio_context);
1777 ret = bdrv_flush(bs);
1778 if (ret < 0 && !result) {
1779 result = ret;
1780 }
1781 aio_context_release(aio_context);
1782 }
1783
1784 return result;
1785}
1786
1787
1788typedef struct BdrvCoBlockStatusData {
1789 BlockDriverState *bs;
1790 BlockDriverState *base;
1791 bool want_zero;
1792 int64_t offset;
1793 int64_t bytes;
1794 int64_t *pnum;
1795 int64_t *map;
1796 BlockDriverState **file;
1797 int ret;
1798 bool done;
1799} BdrvCoBlockStatusData;
1800
1801int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
1802 int64_t sector_num,
1803 int nb_sectors,
1804 int *pnum,
1805 BlockDriverState **file)
1806{
1807 assert(bs->file && bs->file->bs);
1808 *pnum = nb_sectors;
1809 *file = bs->file->bs;
1810 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
1811 (sector_num << BDRV_SECTOR_BITS);
1812}
1813
1814int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
1815 int64_t sector_num,
1816 int nb_sectors,
1817 int *pnum,
1818 BlockDriverState **file)
1819{
1820 assert(bs->backing && bs->backing->bs);
1821 *pnum = nb_sectors;
1822 *file = bs->backing->bs;
1823 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
1824 (sector_num << BDRV_SECTOR_BITS);
1825}
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
1855 bool want_zero,
1856 int64_t offset, int64_t bytes,
1857 int64_t *pnum, int64_t *map,
1858 BlockDriverState **file)
1859{
1860 int64_t total_size;
1861 int64_t n;
1862 int ret;
1863 int64_t local_map = 0;
1864 BlockDriverState *local_file = NULL;
1865 int64_t aligned_offset, aligned_bytes;
1866 uint32_t align;
1867
1868 assert(pnum);
1869 *pnum = 0;
1870 total_size = bdrv_getlength(bs);
1871 if (total_size < 0) {
1872 ret = total_size;
1873 goto early_out;
1874 }
1875
1876 if (offset >= total_size) {
1877 ret = BDRV_BLOCK_EOF;
1878 goto early_out;
1879 }
1880 if (!bytes) {
1881 ret = 0;
1882 goto early_out;
1883 }
1884
1885 n = total_size - offset;
1886 if (n < bytes) {
1887 bytes = n;
1888 }
1889
1890
1891 assert(bs->drv);
1892 if (!bs->drv->bdrv_co_get_block_status) {
1893 *pnum = bytes;
1894 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
1895 if (offset + bytes == total_size) {
1896 ret |= BDRV_BLOCK_EOF;
1897 }
1898 if (bs->drv->protocol_name) {
1899 ret |= BDRV_BLOCK_OFFSET_VALID;
1900 local_map = offset;
1901 local_file = bs;
1902 }
1903 goto early_out;
1904 }
1905
1906 bdrv_inc_in_flight(bs);
1907
1908
1909
1910
1911 align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
1912 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
1913 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
1914
1915 {
1916 int count;
1917 int64_t longret;
1918
1919 assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
1920 BDRV_SECTOR_SIZE));
1921
1922
1923
1924
1925
1926 longret = bs->drv->bdrv_co_get_block_status(
1927 bs, aligned_offset >> BDRV_SECTOR_BITS,
1928 MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count,
1929 &local_file);
1930 if (longret < 0) {
1931 assert(INT_MIN <= longret);
1932 ret = longret;
1933 goto out;
1934 }
1935 if (longret & BDRV_BLOCK_OFFSET_VALID) {
1936 local_map = longret & BDRV_BLOCK_OFFSET_MASK;
1937 }
1938 ret = longret & ~BDRV_BLOCK_OFFSET_MASK;
1939 *pnum = count * BDRV_SECTOR_SIZE;
1940 }
1941
1942
1943
1944
1945
1946 assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset);
1947 *pnum -= offset - aligned_offset;
1948 if (*pnum > bytes) {
1949 *pnum = bytes;
1950 }
1951 if (ret & BDRV_BLOCK_OFFSET_VALID) {
1952 local_map += offset - aligned_offset;
1953 }
1954
1955 if (ret & BDRV_BLOCK_RAW) {
1956 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
1957 ret = bdrv_co_block_status(local_file, want_zero, local_map,
1958 *pnum, pnum, &local_map, &local_file);
1959 goto out;
1960 }
1961
1962 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
1963 ret |= BDRV_BLOCK_ALLOCATED;
1964 } else if (want_zero) {
1965 if (bdrv_unallocated_blocks_are_zero(bs)) {
1966 ret |= BDRV_BLOCK_ZERO;
1967 } else if (bs->backing) {
1968 BlockDriverState *bs2 = bs->backing->bs;
1969 int64_t size2 = bdrv_getlength(bs2);
1970
1971 if (size2 >= 0 && offset >= size2) {
1972 ret |= BDRV_BLOCK_ZERO;
1973 }
1974 }
1975 }
1976
1977 if (want_zero && local_file && local_file != bs &&
1978 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
1979 (ret & BDRV_BLOCK_OFFSET_VALID)) {
1980 int64_t file_pnum;
1981 int ret2;
1982
1983 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
1984 *pnum, &file_pnum, NULL, NULL);
1985 if (ret2 >= 0) {
1986
1987
1988
1989 if (ret2 & BDRV_BLOCK_EOF &&
1990 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
1991
1992
1993
1994
1995
1996 ret |= BDRV_BLOCK_ZERO;
1997 } else {
1998
1999 *pnum = file_pnum;
2000 ret |= (ret2 & BDRV_BLOCK_ZERO);
2001 }
2002 }
2003 }
2004
2005out:
2006 bdrv_dec_in_flight(bs);
2007 if (ret >= 0 && offset + *pnum == total_size) {
2008 ret |= BDRV_BLOCK_EOF;
2009 }
2010early_out:
2011 if (file) {
2012 *file = local_file;
2013 }
2014 if (map) {
2015 *map = local_map;
2016 }
2017 return ret;
2018}
2019
2020static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
2021 BlockDriverState *base,
2022 bool want_zero,
2023 int64_t offset,
2024 int64_t bytes,
2025 int64_t *pnum,
2026 int64_t *map,
2027 BlockDriverState **file)
2028{
2029 BlockDriverState *p;
2030 int ret = 0;
2031 bool first = true;
2032
2033 assert(bs != base);
2034 for (p = bs; p != base; p = backing_bs(p)) {
2035 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2036 file);
2037 if (ret < 0) {
2038 break;
2039 }
2040 if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
2041
2042
2043
2044
2045
2046
2047 *pnum = bytes;
2048 }
2049 if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
2050 break;
2051 }
2052
2053
2054 bytes = MIN(bytes, *pnum);
2055 first = false;
2056 }
2057 return ret;
2058}
2059
2060
2061static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
2062{
2063 BdrvCoBlockStatusData *data = opaque;
2064
2065 data->ret = bdrv_co_block_status_above(data->bs, data->base,
2066 data->want_zero,
2067 data->offset, data->bytes,
2068 data->pnum, data->map, data->file);
2069 data->done = true;
2070}
2071
2072
2073
2074
2075
2076
2077static int bdrv_common_block_status_above(BlockDriverState *bs,
2078 BlockDriverState *base,
2079 bool want_zero, int64_t offset,
2080 int64_t bytes, int64_t *pnum,
2081 int64_t *map,
2082 BlockDriverState **file)
2083{
2084 Coroutine *co;
2085 BdrvCoBlockStatusData data = {
2086 .bs = bs,
2087 .base = base,
2088 .want_zero = want_zero,
2089 .offset = offset,
2090 .bytes = bytes,
2091 .pnum = pnum,
2092 .map = map,
2093 .file = file,
2094 .done = false,
2095 };
2096
2097 if (qemu_in_coroutine()) {
2098
2099 bdrv_block_status_above_co_entry(&data);
2100 } else {
2101 co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
2102 bdrv_coroutine_enter(bs, co);
2103 BDRV_POLL_WHILE(bs, !data.done);
2104 }
2105 return data.ret;
2106}
2107
2108int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2109 int64_t offset, int64_t bytes, int64_t *pnum,
2110 int64_t *map, BlockDriverState **file)
2111{
2112 return bdrv_common_block_status_above(bs, base, true, offset, bytes,
2113 pnum, map, file);
2114}
2115
2116int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2117 int64_t *pnum, int64_t *map, BlockDriverState **file)
2118{
2119 return bdrv_block_status_above(bs, backing_bs(bs),
2120 offset, bytes, pnum, map, file);
2121}
2122
2123int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2124 int64_t bytes, int64_t *pnum)
2125{
2126 int ret;
2127 int64_t dummy;
2128
2129 ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
2130 bytes, pnum ? pnum : &dummy, NULL,
2131 NULL);
2132 if (ret < 0) {
2133 return ret;
2134 }
2135 return !!(ret & BDRV_BLOCK_ALLOCATED);
2136}
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154int bdrv_is_allocated_above(BlockDriverState *top,
2155 BlockDriverState *base,
2156 int64_t offset, int64_t bytes, int64_t *pnum)
2157{
2158 BlockDriverState *intermediate;
2159 int ret;
2160 int64_t n = bytes;
2161
2162 intermediate = top;
2163 while (intermediate && intermediate != base) {
2164 int64_t pnum_inter;
2165 int64_t size_inter;
2166
2167 ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
2168 if (ret < 0) {
2169 return ret;
2170 }
2171 if (ret) {
2172 *pnum = pnum_inter;
2173 return 1;
2174 }
2175
2176 size_inter = bdrv_getlength(intermediate);
2177 if (size_inter < 0) {
2178 return size_inter;
2179 }
2180 if (n > pnum_inter &&
2181 (intermediate == top || offset + pnum_inter < size_inter)) {
2182 n = pnum_inter;
2183 }
2184
2185 intermediate = backing_bs(intermediate);
2186 }
2187
2188 *pnum = n;
2189 return 0;
2190}
2191
2192typedef struct BdrvVmstateCo {
2193 BlockDriverState *bs;
2194 QEMUIOVector *qiov;
2195 int64_t pos;
2196 bool is_read;
2197 int ret;
2198} BdrvVmstateCo;
2199
2200static int coroutine_fn
2201bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2202 bool is_read)
2203{
2204 BlockDriver *drv = bs->drv;
2205 int ret = -ENOTSUP;
2206
2207 bdrv_inc_in_flight(bs);
2208
2209 if (!drv) {
2210 ret = -ENOMEDIUM;
2211 } else if (drv->bdrv_load_vmstate) {
2212 if (is_read) {
2213 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2214 } else {
2215 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2216 }
2217 } else if (bs->file) {
2218 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
2219 }
2220
2221 bdrv_dec_in_flight(bs);
2222 return ret;
2223}
2224
2225static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
2226{
2227 BdrvVmstateCo *co = opaque;
2228 co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
2229}
2230
2231static inline int
2232bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
2233 bool is_read)
2234{
2235 if (qemu_in_coroutine()) {
2236 return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
2237 } else {
2238 BdrvVmstateCo data = {
2239 .bs = bs,
2240 .qiov = qiov,
2241 .pos = pos,
2242 .is_read = is_read,
2243 .ret = -EINPROGRESS,
2244 };
2245 Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
2246
2247 bdrv_coroutine_enter(bs, co);
2248 BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
2249 return data.ret;
2250 }
2251}
2252
2253int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2254 int64_t pos, int size)
2255{
2256 QEMUIOVector qiov;
2257 struct iovec iov = {
2258 .iov_base = (void *) buf,
2259 .iov_len = size,
2260 };
2261 int ret;
2262
2263 qemu_iovec_init_external(&qiov, &iov, 1);
2264
2265 ret = bdrv_writev_vmstate(bs, &qiov, pos);
2266 if (ret < 0) {
2267 return ret;
2268 }
2269
2270 return size;
2271}
2272
2273int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2274{
2275 return bdrv_rw_vmstate(bs, qiov, pos, false);
2276}
2277
2278int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2279 int64_t pos, int size)
2280{
2281 QEMUIOVector qiov;
2282 struct iovec iov = {
2283 .iov_base = buf,
2284 .iov_len = size,
2285 };
2286 int ret;
2287
2288 qemu_iovec_init_external(&qiov, &iov, 1);
2289 ret = bdrv_readv_vmstate(bs, &qiov, pos);
2290 if (ret < 0) {
2291 return ret;
2292 }
2293
2294 return size;
2295}
2296
2297int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2298{
2299 return bdrv_rw_vmstate(bs, qiov, pos, true);
2300}
2301
2302
2303
2304
2305void bdrv_aio_cancel(BlockAIOCB *acb)
2306{
2307 qemu_aio_ref(acb);
2308 bdrv_aio_cancel_async(acb);
2309 while (acb->refcnt > 1) {
2310 if (acb->aiocb_info->get_aio_context) {
2311 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2312 } else if (acb->bs) {
2313
2314
2315
2316
2317 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2318 aio_poll(bdrv_get_aio_context(acb->bs), true);
2319 } else {
2320 abort();
2321 }
2322 }
2323 qemu_aio_unref(acb);
2324}
2325
2326
2327
2328
2329void bdrv_aio_cancel_async(BlockAIOCB *acb)
2330{
2331 if (acb->aiocb_info->cancel_async) {
2332 acb->aiocb_info->cancel_async(acb);
2333 }
2334}
2335
2336
2337
2338
2339typedef struct FlushCo {
2340 BlockDriverState *bs;
2341 int ret;
2342} FlushCo;
2343
2344
2345static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2346{
2347 FlushCo *rwco = opaque;
2348
2349 rwco->ret = bdrv_co_flush(rwco->bs);
2350}
2351
2352int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2353{
2354 int current_gen;
2355 int ret = 0;
2356
2357 bdrv_inc_in_flight(bs);
2358
2359 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2360 bdrv_is_sg(bs)) {
2361 goto early_exit;
2362 }
2363
2364 qemu_co_mutex_lock(&bs->reqs_lock);
2365 current_gen = atomic_read(&bs->write_gen);
2366
2367
2368 while (bs->active_flush_req) {
2369 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2370 }
2371
2372
2373 bs->active_flush_req = true;
2374 qemu_co_mutex_unlock(&bs->reqs_lock);
2375
2376
2377 if (bs->drv->bdrv_co_flush) {
2378 ret = bs->drv->bdrv_co_flush(bs);
2379 goto out;
2380 }
2381
2382
2383 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
2384 if (bs->drv->bdrv_co_flush_to_os) {
2385 ret = bs->drv->bdrv_co_flush_to_os(bs);
2386 if (ret < 0) {
2387 goto out;
2388 }
2389 }
2390
2391
2392 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2393 goto flush_parent;
2394 }
2395
2396
2397 if (bs->flushed_gen == current_gen) {
2398 goto flush_parent;
2399 }
2400
2401 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
2402 if (!bs->drv) {
2403
2404
2405 ret = -ENOMEDIUM;
2406 goto out;
2407 }
2408 if (bs->drv->bdrv_co_flush_to_disk) {
2409 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2410 } else if (bs->drv->bdrv_aio_flush) {
2411 BlockAIOCB *acb;
2412 CoroutineIOCompletion co = {
2413 .coroutine = qemu_coroutine_self(),
2414 };
2415
2416 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2417 if (acb == NULL) {
2418 ret = -EIO;
2419 } else {
2420 qemu_coroutine_yield();
2421 ret = co.ret;
2422 }
2423 } else {
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435 ret = 0;
2436 }
2437
2438 if (ret < 0) {
2439 goto out;
2440 }
2441
2442
2443
2444
2445flush_parent:
2446 ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
2447out:
2448
2449 if (ret == 0) {
2450 bs->flushed_gen = current_gen;
2451 }
2452
2453 qemu_co_mutex_lock(&bs->reqs_lock);
2454 bs->active_flush_req = false;
2455
2456 qemu_co_queue_next(&bs->flush_queue);
2457 qemu_co_mutex_unlock(&bs->reqs_lock);
2458
2459early_exit:
2460 bdrv_dec_in_flight(bs);
2461 return ret;
2462}
2463
2464int bdrv_flush(BlockDriverState *bs)
2465{
2466 Coroutine *co;
2467 FlushCo flush_co = {
2468 .bs = bs,
2469 .ret = NOT_DONE,
2470 };
2471
2472 if (qemu_in_coroutine()) {
2473
2474 bdrv_flush_co_entry(&flush_co);
2475 } else {
2476 co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
2477 bdrv_coroutine_enter(bs, co);
2478 BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
2479 }
2480
2481 return flush_co.ret;
2482}
2483
2484typedef struct DiscardCo {
2485 BlockDriverState *bs;
2486 int64_t offset;
2487 int bytes;
2488 int ret;
2489} DiscardCo;
2490static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
2491{
2492 DiscardCo *rwco = opaque;
2493
2494 rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->bytes);
2495}
2496
2497int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
2498 int bytes)
2499{
2500 BdrvTrackedRequest req;
2501 int max_pdiscard, ret;
2502 int head, tail, align;
2503
2504 if (!bs->drv) {
2505 return -ENOMEDIUM;
2506 }
2507
2508 if (bdrv_has_readonly_bitmaps(bs)) {
2509 return -EPERM;
2510 }
2511
2512 ret = bdrv_check_byte_request(bs, offset, bytes);
2513 if (ret < 0) {
2514 return ret;
2515 } else if (bs->read_only) {
2516 return -EPERM;
2517 }
2518 assert(!(bs->open_flags & BDRV_O_INACTIVE));
2519
2520
2521 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2522 return 0;
2523 }
2524
2525 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
2526 return 0;
2527 }
2528
2529
2530
2531
2532
2533
2534 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
2535 assert(align % bs->bl.request_alignment == 0);
2536 head = offset % align;
2537 tail = (offset + bytes) % align;
2538
2539 bdrv_inc_in_flight(bs);
2540 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
2541
2542 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
2543 if (ret < 0) {
2544 goto out;
2545 }
2546
2547 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
2548 align);
2549 assert(max_pdiscard >= bs->bl.request_alignment);
2550
2551 while (bytes > 0) {
2552 int num = bytes;
2553
2554 if (head) {
2555
2556 num = MIN(bytes, align - head);
2557 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
2558 num %= bs->bl.request_alignment;
2559 }
2560 head = (head + num) % align;
2561 assert(num < max_pdiscard);
2562 } else if (tail) {
2563 if (num > align) {
2564
2565 num -= tail;
2566 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
2567 tail > bs->bl.request_alignment) {
2568 tail %= bs->bl.request_alignment;
2569 num -= tail;
2570 }
2571 }
2572
2573 if (num > max_pdiscard) {
2574 num = max_pdiscard;
2575 }
2576
2577 if (!bs->drv) {
2578 ret = -ENOMEDIUM;
2579 goto out;
2580 }
2581 if (bs->drv->bdrv_co_pdiscard) {
2582 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
2583 } else {
2584 BlockAIOCB *acb;
2585 CoroutineIOCompletion co = {
2586 .coroutine = qemu_coroutine_self(),
2587 };
2588
2589 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
2590 bdrv_co_io_em_complete, &co);
2591 if (acb == NULL) {
2592 ret = -EIO;
2593 goto out;
2594 } else {
2595 qemu_coroutine_yield();
2596 ret = co.ret;
2597 }
2598 }
2599 if (ret && ret != -ENOTSUP) {
2600 goto out;
2601 }
2602
2603 offset += num;
2604 bytes -= num;
2605 }
2606 ret = 0;
2607out:
2608 atomic_inc(&bs->write_gen);
2609 bdrv_set_dirty(bs, req.offset, req.bytes);
2610 tracked_request_end(&req);
2611 bdrv_dec_in_flight(bs);
2612 return ret;
2613}
2614
2615int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
2616{
2617 Coroutine *co;
2618 DiscardCo rwco = {
2619 .bs = bs,
2620 .offset = offset,
2621 .bytes = bytes,
2622 .ret = NOT_DONE,
2623 };
2624
2625 if (qemu_in_coroutine()) {
2626
2627 bdrv_pdiscard_co_entry(&rwco);
2628 } else {
2629 co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
2630 bdrv_coroutine_enter(bs, co);
2631 BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
2632 }
2633
2634 return rwco.ret;
2635}
2636
2637int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
2638{
2639 BlockDriver *drv = bs->drv;
2640 CoroutineIOCompletion co = {
2641 .coroutine = qemu_coroutine_self(),
2642 };
2643 BlockAIOCB *acb;
2644
2645 bdrv_inc_in_flight(bs);
2646 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
2647 co.ret = -ENOTSUP;
2648 goto out;
2649 }
2650
2651 if (drv->bdrv_co_ioctl) {
2652 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
2653 } else {
2654 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
2655 if (!acb) {
2656 co.ret = -ENOTSUP;
2657 goto out;
2658 }
2659 qemu_coroutine_yield();
2660 }
2661out:
2662 bdrv_dec_in_flight(bs);
2663 return co.ret;
2664}
2665
2666void *qemu_blockalign(BlockDriverState *bs, size_t size)
2667{
2668 return qemu_memalign(bdrv_opt_mem_align(bs), size);
2669}
2670
2671void *qemu_blockalign0(BlockDriverState *bs, size_t size)
2672{
2673 return memset(qemu_blockalign(bs, size), 0, size);
2674}
2675
2676void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
2677{
2678 size_t align = bdrv_opt_mem_align(bs);
2679
2680
2681 assert(align > 0);
2682 if (size == 0) {
2683 size = align;
2684 }
2685
2686 return qemu_try_memalign(align, size);
2687}
2688
2689void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
2690{
2691 void *mem = qemu_try_blockalign(bs, size);
2692
2693 if (mem) {
2694 memset(mem, 0, size);
2695 }
2696
2697 return mem;
2698}
2699
2700
2701
2702
2703bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
2704{
2705 int i;
2706 size_t alignment = bdrv_min_mem_align(bs);
2707
2708 for (i = 0; i < qiov->niov; i++) {
2709 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
2710 return false;
2711 }
2712 if (qiov->iov[i].iov_len % alignment) {
2713 return false;
2714 }
2715 }
2716
2717 return true;
2718}
2719
2720void bdrv_add_before_write_notifier(BlockDriverState *bs,
2721 NotifierWithReturn *notifier)
2722{
2723 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
2724}
2725
2726void bdrv_io_plug(BlockDriverState *bs)
2727{
2728 BdrvChild *child;
2729
2730 QLIST_FOREACH(child, &bs->children, next) {
2731 bdrv_io_plug(child->bs);
2732 }
2733
2734 if (atomic_fetch_inc(&bs->io_plugged) == 0) {
2735 BlockDriver *drv = bs->drv;
2736 if (drv && drv->bdrv_io_plug) {
2737 drv->bdrv_io_plug(bs);
2738 }
2739 }
2740}
2741
2742void bdrv_io_unplug(BlockDriverState *bs)
2743{
2744 BdrvChild *child;
2745
2746 assert(bs->io_plugged);
2747 if (atomic_fetch_dec(&bs->io_plugged) == 1) {
2748 BlockDriver *drv = bs->drv;
2749 if (drv && drv->bdrv_io_unplug) {
2750 drv->bdrv_io_unplug(bs);
2751 }
2752 }
2753
2754 QLIST_FOREACH(child, &bs->children, next) {
2755 bdrv_io_unplug(child->bs);
2756 }
2757}
2758