1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "trace.h"
27#include "sysemu/block-backend.h"
28#include "block/aio-wait.h"
29#include "block/blockjob.h"
30#include "block/blockjob_int.h"
31#include "block/block_int.h"
32#include "block/coroutines.h"
33#include "block/write-threshold.h"
34#include "qemu/cutils.h"
35#include "qapi/error.h"
36#include "qemu/error-report.h"
37#include "qemu/main-loop.h"
38#include "sysemu/replay.h"
39
40
41#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
42
43static void bdrv_parent_cb_resize(BlockDriverState *bs);
44static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
45 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
46
47static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
48 bool ignore_bds_parents)
49{
50 BdrvChild *c, *next;
51
52 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
53 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
54 continue;
55 }
56 bdrv_parent_drained_begin_single(c, false);
57 }
58}
59
60static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
61 int *drained_end_counter)
62{
63 assert(c->parent_quiesce_counter > 0);
64 c->parent_quiesce_counter--;
65 if (c->klass->drained_end) {
66 c->klass->drained_end(c, drained_end_counter);
67 }
68}
69
70void bdrv_parent_drained_end_single(BdrvChild *c)
71{
72 int drained_end_counter = 0;
73 bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
74 BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
75}
76
77static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
78 bool ignore_bds_parents,
79 int *drained_end_counter)
80{
81 BdrvChild *c;
82
83 QLIST_FOREACH(c, &bs->parents, next_parent) {
84 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
85 continue;
86 }
87 bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
88 }
89}
90
91static bool bdrv_parent_drained_poll_single(BdrvChild *c)
92{
93 if (c->klass->drained_poll) {
94 return c->klass->drained_poll(c);
95 }
96 return false;
97}
98
99static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
100 bool ignore_bds_parents)
101{
102 BdrvChild *c, *next;
103 bool busy = false;
104
105 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
106 if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
107 continue;
108 }
109 busy |= bdrv_parent_drained_poll_single(c);
110 }
111
112 return busy;
113}
114
115void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
116{
117 c->parent_quiesce_counter++;
118 if (c->klass->drained_begin) {
119 c->klass->drained_begin(c);
120 }
121 if (poll) {
122 BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
123 }
124}
125
126static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
127{
128 dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
129 src->pdiscard_alignment);
130 dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
131 dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
132 dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
133 src->max_hw_transfer);
134 dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
135 src->opt_mem_alignment);
136 dst->min_mem_alignment = MAX(dst->min_mem_alignment,
137 src->min_mem_alignment);
138 dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
139}
140
141typedef struct BdrvRefreshLimitsState {
142 BlockDriverState *bs;
143 BlockLimits old_bl;
144} BdrvRefreshLimitsState;
145
146static void bdrv_refresh_limits_abort(void *opaque)
147{
148 BdrvRefreshLimitsState *s = opaque;
149
150 s->bs->bl = s->old_bl;
151}
152
153static TransactionActionDrv bdrv_refresh_limits_drv = {
154 .abort = bdrv_refresh_limits_abort,
155 .clean = g_free,
156};
157
158
159void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
160{
161 ERRP_GUARD();
162 BlockDriver *drv = bs->drv;
163 BdrvChild *c;
164 bool have_limits;
165
166 if (tran) {
167 BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
168 *s = (BdrvRefreshLimitsState) {
169 .bs = bs,
170 .old_bl = bs->bl,
171 };
172 tran_add(tran, &bdrv_refresh_limits_drv, s);
173 }
174
175 memset(&bs->bl, 0, sizeof(bs->bl));
176
177 if (!drv) {
178 return;
179 }
180
181
182 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
183 drv->bdrv_aio_preadv ||
184 drv->bdrv_co_preadv_part) ? 1 : 512;
185
186
187 have_limits = false;
188 QLIST_FOREACH(c, &bs->children, next) {
189 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
190 {
191 bdrv_refresh_limits(c->bs, tran, errp);
192 if (*errp) {
193 return;
194 }
195 bdrv_merge_limits(&bs->bl, &c->bs->bl);
196 have_limits = true;
197 }
198 }
199
200 if (!have_limits) {
201 bs->bl.min_mem_alignment = 512;
202 bs->bl.opt_mem_alignment = qemu_real_host_page_size;
203
204
205 bs->bl.max_iov = IOV_MAX;
206 }
207
208
209 if (drv->bdrv_refresh_limits) {
210 drv->bdrv_refresh_limits(bs, errp);
211 if (*errp) {
212 return;
213 }
214 }
215
216 if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
217 error_setg(errp, "Driver requires too large request alignment");
218 }
219}
220
221
222
223
224
225
226void bdrv_enable_copy_on_read(BlockDriverState *bs)
227{
228 qatomic_inc(&bs->copy_on_read);
229}
230
231void bdrv_disable_copy_on_read(BlockDriverState *bs)
232{
233 int old = qatomic_fetch_dec(&bs->copy_on_read);
234 assert(old >= 1);
235}
236
237typedef struct {
238 Coroutine *co;
239 BlockDriverState *bs;
240 bool done;
241 bool begin;
242 bool recursive;
243 bool poll;
244 BdrvChild *parent;
245 bool ignore_bds_parents;
246 int *drained_end_counter;
247} BdrvCoDrainData;
248
249static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
250{
251 BdrvCoDrainData *data = opaque;
252 BlockDriverState *bs = data->bs;
253
254 if (data->begin) {
255 bs->drv->bdrv_co_drain_begin(bs);
256 } else {
257 bs->drv->bdrv_co_drain_end(bs);
258 }
259
260
261 qatomic_mb_set(&data->done, true);
262 if (!data->begin) {
263 qatomic_dec(data->drained_end_counter);
264 }
265 bdrv_dec_in_flight(bs);
266
267 g_free(data);
268}
269
270
271static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
272 int *drained_end_counter)
273{
274 BdrvCoDrainData *data;
275
276 if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
277 (!begin && !bs->drv->bdrv_co_drain_end)) {
278 return;
279 }
280
281 data = g_new(BdrvCoDrainData, 1);
282 *data = (BdrvCoDrainData) {
283 .bs = bs,
284 .done = false,
285 .begin = begin,
286 .drained_end_counter = drained_end_counter,
287 };
288
289 if (!begin) {
290 qatomic_inc(drained_end_counter);
291 }
292
293
294
295 bdrv_inc_in_flight(bs);
296 data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
297 aio_co_schedule(bdrv_get_aio_context(bs), data->co);
298}
299
300
301bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
302 BdrvChild *ignore_parent, bool ignore_bds_parents)
303{
304 BdrvChild *child, *next;
305
306 if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
307 return true;
308 }
309
310 if (qatomic_read(&bs->in_flight)) {
311 return true;
312 }
313
314 if (recursive) {
315 assert(!ignore_bds_parents);
316 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
317 if (bdrv_drain_poll(child->bs, recursive, child, false)) {
318 return true;
319 }
320 }
321 }
322
323 return false;
324}
325
326static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
327 BdrvChild *ignore_parent)
328{
329 return bdrv_drain_poll(bs, recursive, ignore_parent, false);
330}
331
332static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
333 BdrvChild *parent, bool ignore_bds_parents,
334 bool poll);
335static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
336 BdrvChild *parent, bool ignore_bds_parents,
337 int *drained_end_counter);
338
339static void bdrv_co_drain_bh_cb(void *opaque)
340{
341 BdrvCoDrainData *data = opaque;
342 Coroutine *co = data->co;
343 BlockDriverState *bs = data->bs;
344
345 if (bs) {
346 AioContext *ctx = bdrv_get_aio_context(bs);
347 aio_context_acquire(ctx);
348 bdrv_dec_in_flight(bs);
349 if (data->begin) {
350 assert(!data->drained_end_counter);
351 bdrv_do_drained_begin(bs, data->recursive, data->parent,
352 data->ignore_bds_parents, data->poll);
353 } else {
354 assert(!data->poll);
355 bdrv_do_drained_end(bs, data->recursive, data->parent,
356 data->ignore_bds_parents,
357 data->drained_end_counter);
358 }
359 aio_context_release(ctx);
360 } else {
361 assert(data->begin);
362 bdrv_drain_all_begin();
363 }
364
365 data->done = true;
366 aio_co_wake(co);
367}
368
369static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
370 bool begin, bool recursive,
371 BdrvChild *parent,
372 bool ignore_bds_parents,
373 bool poll,
374 int *drained_end_counter)
375{
376 BdrvCoDrainData data;
377 Coroutine *self = qemu_coroutine_self();
378 AioContext *ctx = bdrv_get_aio_context(bs);
379 AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
380
381
382
383
384 assert(qemu_in_coroutine());
385 data = (BdrvCoDrainData) {
386 .co = self,
387 .bs = bs,
388 .done = false,
389 .begin = begin,
390 .recursive = recursive,
391 .parent = parent,
392 .ignore_bds_parents = ignore_bds_parents,
393 .poll = poll,
394 .drained_end_counter = drained_end_counter,
395 };
396
397 if (bs) {
398 bdrv_inc_in_flight(bs);
399 }
400
401
402
403
404
405
406
407
408
409 if (ctx != co_ctx) {
410 aio_context_release(ctx);
411 }
412 replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
413
414 qemu_coroutine_yield();
415
416
417 assert(data.done);
418
419
420 if (ctx != co_ctx) {
421 aio_context_acquire(ctx);
422 }
423}
424
425void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
426 BdrvChild *parent, bool ignore_bds_parents)
427{
428 assert(!qemu_in_coroutine());
429
430
431 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
432 aio_disable_external(bdrv_get_aio_context(bs));
433 }
434
435 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
436 bdrv_drain_invoke(bs, true, NULL);
437}
438
439static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
440 BdrvChild *parent, bool ignore_bds_parents,
441 bool poll)
442{
443 BdrvChild *child, *next;
444
445 if (qemu_in_coroutine()) {
446 bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
447 poll, NULL);
448 return;
449 }
450
451 bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
452
453 if (recursive) {
454 assert(!ignore_bds_parents);
455 bs->recursive_quiesce_counter++;
456 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
457 bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
458 false);
459 }
460 }
461
462
463
464
465
466
467
468
469
470
471 if (poll) {
472 assert(!ignore_bds_parents);
473 BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
474 }
475}
476
477void bdrv_drained_begin(BlockDriverState *bs)
478{
479 bdrv_do_drained_begin(bs, false, NULL, false, true);
480}
481
482void bdrv_subtree_drained_begin(BlockDriverState *bs)
483{
484 bdrv_do_drained_begin(bs, true, NULL, false, true);
485}
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
502 BdrvChild *parent, bool ignore_bds_parents,
503 int *drained_end_counter)
504{
505 BdrvChild *child;
506 int old_quiesce_counter;
507
508 assert(drained_end_counter != NULL);
509
510 if (qemu_in_coroutine()) {
511 bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
512 false, drained_end_counter);
513 return;
514 }
515 assert(bs->quiesce_counter > 0);
516
517
518 bdrv_drain_invoke(bs, false, drained_end_counter);
519 bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
520 drained_end_counter);
521
522 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
523 if (old_quiesce_counter == 1) {
524 aio_enable_external(bdrv_get_aio_context(bs));
525 }
526
527 if (recursive) {
528 assert(!ignore_bds_parents);
529 bs->recursive_quiesce_counter--;
530 QLIST_FOREACH(child, &bs->children, next) {
531 bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
532 drained_end_counter);
533 }
534 }
535}
536
537void bdrv_drained_end(BlockDriverState *bs)
538{
539 int drained_end_counter = 0;
540 bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
541 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
542}
543
544void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
545{
546 bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
547}
548
549void bdrv_subtree_drained_end(BlockDriverState *bs)
550{
551 int drained_end_counter = 0;
552 bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
553 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
554}
555
556void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
557{
558 int i;
559
560 for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
561 bdrv_do_drained_begin(child->bs, true, child, false, true);
562 }
563}
564
565void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
566{
567 int drained_end_counter = 0;
568 int i;
569
570 for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
571 bdrv_do_drained_end(child->bs, true, child, false,
572 &drained_end_counter);
573 }
574
575 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
576}
577
578
579
580
581
582
583
584
585void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
586{
587 assert(qemu_in_coroutine());
588 bdrv_drained_begin(bs);
589 bdrv_drained_end(bs);
590}
591
592void bdrv_drain(BlockDriverState *bs)
593{
594 bdrv_drained_begin(bs);
595 bdrv_drained_end(bs);
596}
597
598static void bdrv_drain_assert_idle(BlockDriverState *bs)
599{
600 BdrvChild *child, *next;
601
602 assert(qatomic_read(&bs->in_flight) == 0);
603 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
604 bdrv_drain_assert_idle(child->bs);
605 }
606}
607
608unsigned int bdrv_drain_all_count = 0;
609
610static bool bdrv_drain_all_poll(void)
611{
612 BlockDriverState *bs = NULL;
613 bool result = false;
614
615
616
617 while ((bs = bdrv_next_all_states(bs))) {
618 AioContext *aio_context = bdrv_get_aio_context(bs);
619 aio_context_acquire(aio_context);
620 result |= bdrv_drain_poll(bs, false, NULL, true);
621 aio_context_release(aio_context);
622 }
623
624 return result;
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639void bdrv_drain_all_begin(void)
640{
641 BlockDriverState *bs = NULL;
642
643 if (qemu_in_coroutine()) {
644 bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
645 return;
646 }
647
648
649
650
651
652
653 if (replay_events_enabled()) {
654 return;
655 }
656
657
658
659 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
660 assert(bdrv_drain_all_count < INT_MAX);
661 bdrv_drain_all_count++;
662
663
664
665 while ((bs = bdrv_next_all_states(bs))) {
666 AioContext *aio_context = bdrv_get_aio_context(bs);
667
668 aio_context_acquire(aio_context);
669 bdrv_do_drained_begin(bs, false, NULL, true, false);
670 aio_context_release(aio_context);
671 }
672
673
674 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
675
676 while ((bs = bdrv_next_all_states(bs))) {
677 bdrv_drain_assert_idle(bs);
678 }
679}
680
681void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
682{
683 int drained_end_counter = 0;
684
685 g_assert(bs->quiesce_counter > 0);
686 g_assert(!bs->refcnt);
687
688 while (bs->quiesce_counter) {
689 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
690 }
691 BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
692}
693
694void bdrv_drain_all_end(void)
695{
696 BlockDriverState *bs = NULL;
697 int drained_end_counter = 0;
698
699
700
701
702
703
704 if (replay_events_enabled()) {
705 return;
706 }
707
708 while ((bs = bdrv_next_all_states(bs))) {
709 AioContext *aio_context = bdrv_get_aio_context(bs);
710
711 aio_context_acquire(aio_context);
712 bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
713 aio_context_release(aio_context);
714 }
715
716 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
717 AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
718
719 assert(bdrv_drain_all_count > 0);
720 bdrv_drain_all_count--;
721}
722
723void bdrv_drain_all(void)
724{
725 bdrv_drain_all_begin();
726 bdrv_drain_all_end();
727}
728
729
730
731
732
733
734static void tracked_request_end(BdrvTrackedRequest *req)
735{
736 if (req->serialising) {
737 qatomic_dec(&req->bs->serialising_in_flight);
738 }
739
740 qemu_co_mutex_lock(&req->bs->reqs_lock);
741 QLIST_REMOVE(req, list);
742 qemu_co_queue_restart_all(&req->wait_queue);
743 qemu_co_mutex_unlock(&req->bs->reqs_lock);
744}
745
746
747
748
749static void tracked_request_begin(BdrvTrackedRequest *req,
750 BlockDriverState *bs,
751 int64_t offset,
752 int64_t bytes,
753 enum BdrvTrackedRequestType type)
754{
755 bdrv_check_request(offset, bytes, &error_abort);
756
757 *req = (BdrvTrackedRequest){
758 .bs = bs,
759 .offset = offset,
760 .bytes = bytes,
761 .type = type,
762 .co = qemu_coroutine_self(),
763 .serialising = false,
764 .overlap_offset = offset,
765 .overlap_bytes = bytes,
766 };
767
768 qemu_co_queue_init(&req->wait_queue);
769
770 qemu_co_mutex_lock(&bs->reqs_lock);
771 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
772 qemu_co_mutex_unlock(&bs->reqs_lock);
773}
774
775static bool tracked_request_overlaps(BdrvTrackedRequest *req,
776 int64_t offset, int64_t bytes)
777{
778 bdrv_check_request(offset, bytes, &error_abort);
779
780
781 if (offset >= req->overlap_offset + req->overlap_bytes) {
782 return false;
783 }
784
785 if (req->overlap_offset >= offset + bytes) {
786 return false;
787 }
788 return true;
789}
790
791
792static BdrvTrackedRequest *
793bdrv_find_conflicting_request(BdrvTrackedRequest *self)
794{
795 BdrvTrackedRequest *req;
796
797 QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
798 if (req == self || (!req->serialising && !self->serialising)) {
799 continue;
800 }
801 if (tracked_request_overlaps(req, self->overlap_offset,
802 self->overlap_bytes))
803 {
804
805
806
807
808
809 assert(qemu_coroutine_self() != req->co);
810
811
812
813
814
815
816 if (!req->waiting_for) {
817 return req;
818 }
819 }
820 }
821
822 return NULL;
823}
824
825
826static bool coroutine_fn
827bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
828{
829 BdrvTrackedRequest *req;
830 bool waited = false;
831
832 while ((req = bdrv_find_conflicting_request(self))) {
833 self->waiting_for = req;
834 qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
835 self->waiting_for = NULL;
836 waited = true;
837 }
838
839 return waited;
840}
841
842
843static void tracked_request_set_serialising(BdrvTrackedRequest *req,
844 uint64_t align)
845{
846 int64_t overlap_offset = req->offset & ~(align - 1);
847 int64_t overlap_bytes =
848 ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
849
850 bdrv_check_request(req->offset, req->bytes, &error_abort);
851
852 if (!req->serialising) {
853 qatomic_inc(&req->bs->serialising_in_flight);
854 req->serialising = true;
855 }
856
857 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
858 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
859}
860
861
862
863
864
865BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
866{
867 BdrvTrackedRequest *req;
868 Coroutine *self = qemu_coroutine_self();
869
870 QLIST_FOREACH(req, &bs->tracked_requests, list) {
871 if (req->co == self) {
872 return req;
873 }
874 }
875
876 return NULL;
877}
878
879
880
881
882void bdrv_round_to_clusters(BlockDriverState *bs,
883 int64_t offset, int64_t bytes,
884 int64_t *cluster_offset,
885 int64_t *cluster_bytes)
886{
887 BlockDriverInfo bdi;
888
889 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
890 *cluster_offset = offset;
891 *cluster_bytes = bytes;
892 } else {
893 int64_t c = bdi.cluster_size;
894 *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
895 *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
896 }
897}
898
899static int bdrv_get_cluster_size(BlockDriverState *bs)
900{
901 BlockDriverInfo bdi;
902 int ret;
903
904 ret = bdrv_get_info(bs, &bdi);
905 if (ret < 0 || bdi.cluster_size == 0) {
906 return bs->bl.request_alignment;
907 } else {
908 return bdi.cluster_size;
909 }
910}
911
912void bdrv_inc_in_flight(BlockDriverState *bs)
913{
914 qatomic_inc(&bs->in_flight);
915}
916
917void bdrv_wakeup(BlockDriverState *bs)
918{
919 aio_wait_kick();
920}
921
922void bdrv_dec_in_flight(BlockDriverState *bs)
923{
924 qatomic_dec(&bs->in_flight);
925 bdrv_wakeup(bs);
926}
927
928static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
929{
930 BlockDriverState *bs = self->bs;
931 bool waited = false;
932
933 if (!qatomic_read(&bs->serialising_in_flight)) {
934 return false;
935 }
936
937 qemu_co_mutex_lock(&bs->reqs_lock);
938 waited = bdrv_wait_serialising_requests_locked(self);
939 qemu_co_mutex_unlock(&bs->reqs_lock);
940
941 return waited;
942}
943
944bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
945 uint64_t align)
946{
947 bool waited;
948
949 qemu_co_mutex_lock(&req->bs->reqs_lock);
950
951 tracked_request_set_serialising(req, align);
952 waited = bdrv_wait_serialising_requests_locked(req);
953
954 qemu_co_mutex_unlock(&req->bs->reqs_lock);
955
956 return waited;
957}
958
959static int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
960 QEMUIOVector *qiov, size_t qiov_offset,
961 Error **errp)
962{
963
964
965
966
967 if (offset < 0) {
968 error_setg(errp, "offset is negative: %" PRIi64, offset);
969 return -EIO;
970 }
971
972 if (bytes < 0) {
973 error_setg(errp, "bytes is negative: %" PRIi64, bytes);
974 return -EIO;
975 }
976
977 if (bytes > BDRV_MAX_LENGTH) {
978 error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
979 bytes, BDRV_MAX_LENGTH);
980 return -EIO;
981 }
982
983 if (offset > BDRV_MAX_LENGTH) {
984 error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
985 offset, BDRV_MAX_LENGTH);
986 return -EIO;
987 }
988
989 if (offset > BDRV_MAX_LENGTH - bytes) {
990 error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
991 "exceeds maximum(%" PRIi64 ")", offset, bytes,
992 BDRV_MAX_LENGTH);
993 return -EIO;
994 }
995
996 if (!qiov) {
997 return 0;
998 }
999
1000
1001
1002
1003
1004 if (qiov_offset > qiov->size) {
1005 error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
1006 qiov_offset, qiov->size);
1007 return -EIO;
1008 }
1009
1010 if (bytes > qiov->size - qiov_offset) {
1011 error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
1012 "vector size(%zu)", bytes, qiov_offset, qiov->size);
1013 return -EIO;
1014 }
1015
1016 return 0;
1017}
1018
1019int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
1020{
1021 return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
1022}
1023
1024static int bdrv_check_request32(int64_t offset, int64_t bytes,
1025 QEMUIOVector *qiov, size_t qiov_offset)
1026{
1027 int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
1028 if (ret < 0) {
1029 return ret;
1030 }
1031
1032 if (bytes > BDRV_REQUEST_MAX_BYTES) {
1033 return -EIO;
1034 }
1035
1036 return 0;
1037}
1038
1039int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
1040 int64_t bytes, BdrvRequestFlags flags)
1041{
1042 return bdrv_pwritev(child, offset, bytes, NULL,
1043 BDRV_REQ_ZERO_WRITE | flags);
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
1056{
1057 int ret;
1058 int64_t target_size, bytes, offset = 0;
1059 BlockDriverState *bs = child->bs;
1060
1061 target_size = bdrv_getlength(bs);
1062 if (target_size < 0) {
1063 return target_size;
1064 }
1065
1066 for (;;) {
1067 bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
1068 if (bytes <= 0) {
1069 return 0;
1070 }
1071 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
1072 if (ret < 0) {
1073 return ret;
1074 }
1075 if (ret & BDRV_BLOCK_ZERO) {
1076 offset += bytes;
1077 continue;
1078 }
1079 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
1080 if (ret < 0) {
1081 return ret;
1082 }
1083 offset += bytes;
1084 }
1085}
1086
1087
1088int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
1089{
1090 int ret;
1091 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1092
1093 if (bytes < 0) {
1094 return -EINVAL;
1095 }
1096
1097 ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
1098
1099 return ret < 0 ? ret : bytes;
1100}
1101
1102
1103
1104
1105
1106
1107
1108int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
1109 int64_t bytes)
1110{
1111 int ret;
1112 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1113
1114 if (bytes < 0) {
1115 return -EINVAL;
1116 }
1117
1118 ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
1119
1120 return ret < 0 ? ret : bytes;
1121}
1122
1123
1124
1125
1126
1127
1128
1129int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
1130 const void *buf, int64_t count)
1131{
1132 int ret;
1133
1134 ret = bdrv_pwrite(child, offset, buf, count);
1135 if (ret < 0) {
1136 return ret;
1137 }
1138
1139 ret = bdrv_flush(child->bs);
1140 if (ret < 0) {
1141 return ret;
1142 }
1143
1144 return 0;
1145}
1146
1147typedef struct CoroutineIOCompletion {
1148 Coroutine *coroutine;
1149 int ret;
1150} CoroutineIOCompletion;
1151
1152static void bdrv_co_io_em_complete(void *opaque, int ret)
1153{
1154 CoroutineIOCompletion *co = opaque;
1155
1156 co->ret = ret;
1157 aio_co_wake(co->coroutine);
1158}
1159
1160static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
1161 int64_t offset, int64_t bytes,
1162 QEMUIOVector *qiov,
1163 size_t qiov_offset, int flags)
1164{
1165 BlockDriver *drv = bs->drv;
1166 int64_t sector_num;
1167 unsigned int nb_sectors;
1168 QEMUIOVector local_qiov;
1169 int ret;
1170
1171 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1172 assert(!(flags & ~BDRV_REQ_MASK));
1173 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1174
1175 if (!drv) {
1176 return -ENOMEDIUM;
1177 }
1178
1179 if (drv->bdrv_co_preadv_part) {
1180 return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
1181 flags);
1182 }
1183
1184 if (qiov_offset > 0 || bytes != qiov->size) {
1185 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1186 qiov = &local_qiov;
1187 }
1188
1189 if (drv->bdrv_co_preadv) {
1190 ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
1191 goto out;
1192 }
1193
1194 if (drv->bdrv_aio_preadv) {
1195 BlockAIOCB *acb;
1196 CoroutineIOCompletion co = {
1197 .coroutine = qemu_coroutine_self(),
1198 };
1199
1200 acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
1201 bdrv_co_io_em_complete, &co);
1202 if (acb == NULL) {
1203 ret = -EIO;
1204 goto out;
1205 } else {
1206 qemu_coroutine_yield();
1207 ret = co.ret;
1208 goto out;
1209 }
1210 }
1211
1212 sector_num = offset >> BDRV_SECTOR_BITS;
1213 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1214
1215 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1216 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1217 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1218 assert(drv->bdrv_co_readv);
1219
1220 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1221
1222out:
1223 if (qiov == &local_qiov) {
1224 qemu_iovec_destroy(&local_qiov);
1225 }
1226
1227 return ret;
1228}
1229
1230static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
1231 int64_t offset, int64_t bytes,
1232 QEMUIOVector *qiov,
1233 size_t qiov_offset, int flags)
1234{
1235 BlockDriver *drv = bs->drv;
1236 int64_t sector_num;
1237 unsigned int nb_sectors;
1238 QEMUIOVector local_qiov;
1239 int ret;
1240
1241 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1242 assert(!(flags & ~BDRV_REQ_MASK));
1243 assert(!(flags & BDRV_REQ_NO_FALLBACK));
1244
1245 if (!drv) {
1246 return -ENOMEDIUM;
1247 }
1248
1249 if (drv->bdrv_co_pwritev_part) {
1250 ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
1251 flags & bs->supported_write_flags);
1252 flags &= ~bs->supported_write_flags;
1253 goto emulate_flags;
1254 }
1255
1256 if (qiov_offset > 0 || bytes != qiov->size) {
1257 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1258 qiov = &local_qiov;
1259 }
1260
1261 if (drv->bdrv_co_pwritev) {
1262 ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
1263 flags & bs->supported_write_flags);
1264 flags &= ~bs->supported_write_flags;
1265 goto emulate_flags;
1266 }
1267
1268 if (drv->bdrv_aio_pwritev) {
1269 BlockAIOCB *acb;
1270 CoroutineIOCompletion co = {
1271 .coroutine = qemu_coroutine_self(),
1272 };
1273
1274 acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
1275 flags & bs->supported_write_flags,
1276 bdrv_co_io_em_complete, &co);
1277 flags &= ~bs->supported_write_flags;
1278 if (acb == NULL) {
1279 ret = -EIO;
1280 } else {
1281 qemu_coroutine_yield();
1282 ret = co.ret;
1283 }
1284 goto emulate_flags;
1285 }
1286
1287 sector_num = offset >> BDRV_SECTOR_BITS;
1288 nb_sectors = bytes >> BDRV_SECTOR_BITS;
1289
1290 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
1291 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
1292 assert(bytes <= BDRV_REQUEST_MAX_BYTES);
1293
1294 assert(drv->bdrv_co_writev);
1295 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
1296 flags & bs->supported_write_flags);
1297 flags &= ~bs->supported_write_flags;
1298
1299emulate_flags:
1300 if (ret == 0 && (flags & BDRV_REQ_FUA)) {
1301 ret = bdrv_co_flush(bs);
1302 }
1303
1304 if (qiov == &local_qiov) {
1305 qemu_iovec_destroy(&local_qiov);
1306 }
1307
1308 return ret;
1309}
1310
1311static int coroutine_fn
1312bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
1313 int64_t bytes, QEMUIOVector *qiov,
1314 size_t qiov_offset)
1315{
1316 BlockDriver *drv = bs->drv;
1317 QEMUIOVector local_qiov;
1318 int ret;
1319
1320 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1321
1322 if (!drv) {
1323 return -ENOMEDIUM;
1324 }
1325
1326 if (!block_driver_can_compress(drv)) {
1327 return -ENOTSUP;
1328 }
1329
1330 if (drv->bdrv_co_pwritev_compressed_part) {
1331 return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
1332 qiov, qiov_offset);
1333 }
1334
1335 if (qiov_offset == 0) {
1336 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
1337 }
1338
1339 qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
1340 ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
1341 qemu_iovec_destroy(&local_qiov);
1342
1343 return ret;
1344}
1345
1346static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
1347 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1348 size_t qiov_offset, int flags)
1349{
1350 BlockDriverState *bs = child->bs;
1351
1352
1353
1354
1355
1356
1357 void *bounce_buffer = NULL;
1358
1359 BlockDriver *drv = bs->drv;
1360 int64_t cluster_offset;
1361 int64_t cluster_bytes;
1362 int64_t skip_bytes;
1363 int ret;
1364 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
1365 BDRV_REQUEST_MAX_BYTES);
1366 int64_t progress = 0;
1367 bool skip_write;
1368
1369 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1370
1371 if (!drv) {
1372 return -ENOMEDIUM;
1373 }
1374
1375
1376
1377
1378
1379 skip_write = (bs->open_flags & BDRV_O_INACTIVE);
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395 bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
1396 skip_bytes = offset - cluster_offset;
1397
1398 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
1399 cluster_offset, cluster_bytes);
1400
1401 while (cluster_bytes) {
1402 int64_t pnum;
1403
1404 if (skip_write) {
1405 ret = 1;
1406 pnum = MIN(cluster_bytes, max_transfer);
1407 } else {
1408 ret = bdrv_is_allocated(bs, cluster_offset,
1409 MIN(cluster_bytes, max_transfer), &pnum);
1410 if (ret < 0) {
1411
1412
1413
1414
1415
1416 pnum = MIN(cluster_bytes, max_transfer);
1417 }
1418
1419
1420 if (ret == 0 && pnum == 0) {
1421 assert(progress >= bytes);
1422 break;
1423 }
1424
1425 assert(skip_bytes < pnum);
1426 }
1427
1428 if (ret <= 0) {
1429 QEMUIOVector local_qiov;
1430
1431
1432 pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
1433 if (!bounce_buffer) {
1434 int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
1435 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
1436 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
1437
1438 bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
1439 if (!bounce_buffer) {
1440 ret = -ENOMEM;
1441 goto err;
1442 }
1443 }
1444 qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
1445
1446 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
1447 &local_qiov, 0, 0);
1448 if (ret < 0) {
1449 goto err;
1450 }
1451
1452 bdrv_debug_event(bs, BLKDBG_COR_WRITE);
1453 if (drv->bdrv_co_pwrite_zeroes &&
1454 buffer_is_zero(bounce_buffer, pnum)) {
1455
1456
1457
1458 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
1459 BDRV_REQ_WRITE_UNCHANGED);
1460 } else {
1461
1462
1463
1464 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
1465 &local_qiov, 0,
1466 BDRV_REQ_WRITE_UNCHANGED);
1467 }
1468
1469 if (ret < 0) {
1470
1471
1472
1473
1474
1475 goto err;
1476 }
1477
1478 if (!(flags & BDRV_REQ_PREFETCH)) {
1479 qemu_iovec_from_buf(qiov, qiov_offset + progress,
1480 bounce_buffer + skip_bytes,
1481 MIN(pnum - skip_bytes, bytes - progress));
1482 }
1483 } else if (!(flags & BDRV_REQ_PREFETCH)) {
1484
1485 ret = bdrv_driver_preadv(bs, offset + progress,
1486 MIN(pnum - skip_bytes, bytes - progress),
1487 qiov, qiov_offset + progress, 0);
1488 if (ret < 0) {
1489 goto err;
1490 }
1491 }
1492
1493 cluster_offset += pnum;
1494 cluster_bytes -= pnum;
1495 progress += pnum - skip_bytes;
1496 skip_bytes = 0;
1497 }
1498 ret = 0;
1499
1500err:
1501 qemu_vfree(bounce_buffer);
1502 return ret;
1503}
1504
1505
1506
1507
1508
1509
1510static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
1511 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
1512 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
1513{
1514 BlockDriverState *bs = child->bs;
1515 int64_t total_bytes, max_bytes;
1516 int ret = 0;
1517 int64_t bytes_remaining = bytes;
1518 int max_transfer;
1519
1520 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
1521 assert(is_power_of_2(align));
1522 assert((offset & (align - 1)) == 0);
1523 assert((bytes & (align - 1)) == 0);
1524 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1525 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1526 align);
1527
1528
1529
1530
1531
1532 assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
1533
1534
1535 if (flags & BDRV_REQ_COPY_ON_READ) {
1536
1537
1538
1539
1540
1541 bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
1542 } else {
1543 bdrv_wait_serialising_requests(req);
1544 }
1545
1546 if (flags & BDRV_REQ_COPY_ON_READ) {
1547 int64_t pnum;
1548
1549
1550 flags &= ~BDRV_REQ_COPY_ON_READ;
1551
1552 ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
1553 if (ret < 0) {
1554 goto out;
1555 }
1556
1557 if (!ret || pnum != bytes) {
1558 ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
1559 qiov, qiov_offset, flags);
1560 goto out;
1561 } else if (flags & BDRV_REQ_PREFETCH) {
1562 goto out;
1563 }
1564 }
1565
1566
1567 total_bytes = bdrv_getlength(bs);
1568 if (total_bytes < 0) {
1569 ret = total_bytes;
1570 goto out;
1571 }
1572
1573 assert(!(flags & ~bs->supported_read_flags));
1574
1575 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
1576 if (bytes <= max_bytes && bytes <= max_transfer) {
1577 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
1578 goto out;
1579 }
1580
1581 while (bytes_remaining) {
1582 int64_t num;
1583
1584 if (max_bytes) {
1585 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
1586 assert(num);
1587
1588 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
1589 num, qiov,
1590 qiov_offset + bytes - bytes_remaining,
1591 flags);
1592 max_bytes -= num;
1593 } else {
1594 num = bytes_remaining;
1595 ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining,
1596 0, bytes_remaining);
1597 }
1598 if (ret < 0) {
1599 goto out;
1600 }
1601 bytes_remaining -= num;
1602 }
1603
1604out:
1605 return ret < 0 ? ret : 0;
1606}
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630typedef struct BdrvRequestPadding {
1631 uint8_t *buf;
1632 size_t buf_len;
1633 uint8_t *tail_buf;
1634 size_t head;
1635 size_t tail;
1636 bool merge_reads;
1637 QEMUIOVector local_qiov;
1638} BdrvRequestPadding;
1639
1640static bool bdrv_init_padding(BlockDriverState *bs,
1641 int64_t offset, int64_t bytes,
1642 BdrvRequestPadding *pad)
1643{
1644 int64_t align = bs->bl.request_alignment;
1645 int64_t sum;
1646
1647 bdrv_check_request(offset, bytes, &error_abort);
1648 assert(align <= INT_MAX);
1649 assert(align <= SIZE_MAX / 2);
1650
1651 memset(pad, 0, sizeof(*pad));
1652
1653 pad->head = offset & (align - 1);
1654 pad->tail = ((offset + bytes) & (align - 1));
1655 if (pad->tail) {
1656 pad->tail = align - pad->tail;
1657 }
1658
1659 if (!pad->head && !pad->tail) {
1660 return false;
1661 }
1662
1663 assert(bytes);
1664
1665 sum = pad->head + bytes + pad->tail;
1666 pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
1667 pad->buf = qemu_blockalign(bs, pad->buf_len);
1668 pad->merge_reads = sum == pad->buf_len;
1669 if (pad->tail) {
1670 pad->tail_buf = pad->buf + pad->buf_len - align;
1671 }
1672
1673 return true;
1674}
1675
1676static int bdrv_padding_rmw_read(BdrvChild *child,
1677 BdrvTrackedRequest *req,
1678 BdrvRequestPadding *pad,
1679 bool zero_middle)
1680{
1681 QEMUIOVector local_qiov;
1682 BlockDriverState *bs = child->bs;
1683 uint64_t align = bs->bl.request_alignment;
1684 int ret;
1685
1686 assert(req->serialising && pad->buf);
1687
1688 if (pad->head || pad->merge_reads) {
1689 int64_t bytes = pad->merge_reads ? pad->buf_len : align;
1690
1691 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
1692
1693 if (pad->head) {
1694 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1695 }
1696 if (pad->merge_reads && pad->tail) {
1697 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1698 }
1699 ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
1700 align, &local_qiov, 0, 0);
1701 if (ret < 0) {
1702 return ret;
1703 }
1704 if (pad->head) {
1705 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1706 }
1707 if (pad->merge_reads && pad->tail) {
1708 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1709 }
1710
1711 if (pad->merge_reads) {
1712 goto zero_mem;
1713 }
1714 }
1715
1716 if (pad->tail) {
1717 qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
1718
1719 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1720 ret = bdrv_aligned_preadv(
1721 child, req,
1722 req->overlap_offset + req->overlap_bytes - align,
1723 align, align, &local_qiov, 0, 0);
1724 if (ret < 0) {
1725 return ret;
1726 }
1727 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1728 }
1729
1730zero_mem:
1731 if (zero_middle) {
1732 memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
1733 }
1734
1735 return 0;
1736}
1737
1738static void bdrv_padding_destroy(BdrvRequestPadding *pad)
1739{
1740 if (pad->buf) {
1741 qemu_vfree(pad->buf);
1742 qemu_iovec_destroy(&pad->local_qiov);
1743 }
1744 memset(pad, 0, sizeof(*pad));
1745}
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759static int bdrv_pad_request(BlockDriverState *bs,
1760 QEMUIOVector **qiov, size_t *qiov_offset,
1761 int64_t *offset, int64_t *bytes,
1762 BdrvRequestPadding *pad, bool *padded)
1763{
1764 int ret;
1765
1766 bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
1767
1768 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
1769 if (padded) {
1770 *padded = false;
1771 }
1772 return 0;
1773 }
1774
1775 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
1776 *qiov, *qiov_offset, *bytes,
1777 pad->buf + pad->buf_len - pad->tail,
1778 pad->tail);
1779 if (ret < 0) {
1780 bdrv_padding_destroy(pad);
1781 return ret;
1782 }
1783 *bytes += pad->head + pad->tail;
1784 *offset -= pad->head;
1785 *qiov = &pad->local_qiov;
1786 *qiov_offset = 0;
1787 if (padded) {
1788 *padded = true;
1789 }
1790
1791 return 0;
1792}
1793
1794int coroutine_fn bdrv_co_preadv(BdrvChild *child,
1795 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
1796 BdrvRequestFlags flags)
1797{
1798 return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
1799}
1800
1801int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
1802 int64_t offset, int64_t bytes,
1803 QEMUIOVector *qiov, size_t qiov_offset,
1804 BdrvRequestFlags flags)
1805{
1806 BlockDriverState *bs = child->bs;
1807 BdrvTrackedRequest req;
1808 BdrvRequestPadding pad;
1809 int ret;
1810
1811 trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
1812
1813 if (!bdrv_is_inserted(bs)) {
1814 return -ENOMEDIUM;
1815 }
1816
1817 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
1818 if (ret < 0) {
1819 return ret;
1820 }
1821
1822 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
1823
1824
1825
1826
1827
1828
1829
1830
1831 return 0;
1832 }
1833
1834 bdrv_inc_in_flight(bs);
1835
1836
1837 if (qatomic_read(&bs->copy_on_read)) {
1838 flags |= BDRV_REQ_COPY_ON_READ;
1839 }
1840
1841 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
1842 NULL);
1843 if (ret < 0) {
1844 goto fail;
1845 }
1846
1847 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
1848 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
1849 bs->bl.request_alignment,
1850 qiov, qiov_offset, flags);
1851 tracked_request_end(&req);
1852 bdrv_padding_destroy(&pad);
1853
1854fail:
1855 bdrv_dec_in_flight(bs);
1856
1857 return ret;
1858}
1859
1860static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1861 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
1862{
1863 BlockDriver *drv = bs->drv;
1864 QEMUIOVector qiov;
1865 void *buf = NULL;
1866 int ret = 0;
1867 bool need_flush = false;
1868 int head = 0;
1869 int tail = 0;
1870
1871 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
1872 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
1873 bs->bl.request_alignment);
1874 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
1875
1876 bdrv_check_request(offset, bytes, &error_abort);
1877
1878 if (!drv) {
1879 return -ENOMEDIUM;
1880 }
1881
1882 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
1883 return -ENOTSUP;
1884 }
1885
1886 assert(alignment % bs->bl.request_alignment == 0);
1887 head = offset % alignment;
1888 tail = (offset + bytes) % alignment;
1889 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
1890 assert(max_write_zeroes >= bs->bl.request_alignment);
1891
1892 while (bytes > 0 && !ret) {
1893 int64_t num = bytes;
1894
1895
1896
1897
1898
1899 if (head) {
1900
1901
1902
1903 num = MIN(MIN(bytes, max_transfer), alignment - head);
1904 head = (head + num) % alignment;
1905 assert(num < max_write_zeroes);
1906 } else if (tail && num > alignment) {
1907
1908 num -= tail;
1909 }
1910
1911
1912 if (num > max_write_zeroes) {
1913 num = max_write_zeroes;
1914 }
1915
1916 ret = -ENOTSUP;
1917
1918 if (drv->bdrv_co_pwrite_zeroes) {
1919 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
1920 flags & bs->supported_zero_flags);
1921 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
1922 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
1923 need_flush = true;
1924 }
1925 } else {
1926 assert(!bs->supported_zero_flags);
1927 }
1928
1929 if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
1930
1931 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
1932
1933 if ((flags & BDRV_REQ_FUA) &&
1934 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
1935
1936
1937 write_flags &= ~BDRV_REQ_FUA;
1938 need_flush = true;
1939 }
1940 num = MIN(num, max_transfer);
1941 if (buf == NULL) {
1942 buf = qemu_try_blockalign0(bs, num);
1943 if (buf == NULL) {
1944 ret = -ENOMEM;
1945 goto fail;
1946 }
1947 }
1948 qemu_iovec_init_buf(&qiov, buf, num);
1949
1950 ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
1951
1952
1953
1954
1955 if (num < max_transfer) {
1956 qemu_vfree(buf);
1957 buf = NULL;
1958 }
1959 }
1960
1961 offset += num;
1962 bytes -= num;
1963 }
1964
1965fail:
1966 if (ret == 0 && need_flush) {
1967 ret = bdrv_co_flush(bs);
1968 }
1969 qemu_vfree(buf);
1970 return ret;
1971}
1972
1973static inline int coroutine_fn
1974bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
1975 BdrvTrackedRequest *req, int flags)
1976{
1977 BlockDriverState *bs = child->bs;
1978
1979 bdrv_check_request(offset, bytes, &error_abort);
1980
1981 if (bdrv_is_read_only(bs)) {
1982 return -EPERM;
1983 }
1984
1985 assert(!(bs->open_flags & BDRV_O_INACTIVE));
1986 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
1987 assert(!(flags & ~BDRV_REQ_MASK));
1988 assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
1989
1990 if (flags & BDRV_REQ_SERIALISING) {
1991 QEMU_LOCK_GUARD(&bs->reqs_lock);
1992
1993 tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
1994
1995 if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
1996 return -EBUSY;
1997 }
1998
1999 bdrv_wait_serialising_requests_locked(req);
2000 } else {
2001 bdrv_wait_serialising_requests(req);
2002 }
2003
2004 assert(req->overlap_offset <= offset);
2005 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
2006 assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
2007 child->perm & BLK_PERM_RESIZE);
2008
2009 switch (req->type) {
2010 case BDRV_TRACKED_WRITE:
2011 case BDRV_TRACKED_DISCARD:
2012 if (flags & BDRV_REQ_WRITE_UNCHANGED) {
2013 assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
2014 } else {
2015 assert(child->perm & BLK_PERM_WRITE);
2016 }
2017 bdrv_write_threshold_check_write(bs, offset, bytes);
2018 return 0;
2019 case BDRV_TRACKED_TRUNCATE:
2020 assert(child->perm & BLK_PERM_RESIZE);
2021 return 0;
2022 default:
2023 abort();
2024 }
2025}
2026
2027static inline void coroutine_fn
2028bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
2029 BdrvTrackedRequest *req, int ret)
2030{
2031 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
2032 BlockDriverState *bs = child->bs;
2033
2034 bdrv_check_request(offset, bytes, &error_abort);
2035
2036 qatomic_inc(&bs->write_gen);
2037
2038
2039
2040
2041
2042
2043
2044
2045 if (ret == 0 &&
2046 (req->type == BDRV_TRACKED_TRUNCATE ||
2047 end_sector > bs->total_sectors) &&
2048 req->type != BDRV_TRACKED_DISCARD) {
2049 bs->total_sectors = end_sector;
2050 bdrv_parent_cb_resize(bs);
2051 bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS);
2052 }
2053 if (req->bytes) {
2054 switch (req->type) {
2055 case BDRV_TRACKED_WRITE:
2056 stat64_max(&bs->wr_highest_offset, offset + bytes);
2057
2058 case BDRV_TRACKED_DISCARD:
2059 bdrv_set_dirty(bs, offset, bytes);
2060 break;
2061 default:
2062 break;
2063 }
2064 }
2065}
2066
2067
2068
2069
2070
2071static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
2072 BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
2073 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
2074{
2075 BlockDriverState *bs = child->bs;
2076 BlockDriver *drv = bs->drv;
2077 int ret;
2078
2079 int64_t bytes_remaining = bytes;
2080 int max_transfer;
2081
2082 bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
2083
2084 if (!drv) {
2085 return -ENOMEDIUM;
2086 }
2087
2088 if (bdrv_has_readonly_bitmaps(bs)) {
2089 return -EPERM;
2090 }
2091
2092 assert(is_power_of_2(align));
2093 assert((offset & (align - 1)) == 0);
2094 assert((bytes & (align - 1)) == 0);
2095 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
2096 align);
2097
2098 ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);
2099
2100 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
2101 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
2102 qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
2103 flags |= BDRV_REQ_ZERO_WRITE;
2104 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
2105 flags |= BDRV_REQ_MAY_UNMAP;
2106 }
2107 }
2108
2109 if (ret < 0) {
2110
2111 } else if (flags & BDRV_REQ_ZERO_WRITE) {
2112 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
2113 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
2114 } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
2115 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
2116 qiov, qiov_offset);
2117 } else if (bytes <= max_transfer) {
2118 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2119 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
2120 } else {
2121 bdrv_debug_event(bs, BLKDBG_PWRITEV);
2122 while (bytes_remaining) {
2123 int num = MIN(bytes_remaining, max_transfer);
2124 int local_flags = flags;
2125
2126 assert(num);
2127 if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
2128 !(bs->supported_write_flags & BDRV_REQ_FUA)) {
2129
2130
2131 local_flags &= ~BDRV_REQ_FUA;
2132 }
2133
2134 ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
2135 num, qiov,
2136 qiov_offset + bytes - bytes_remaining,
2137 local_flags);
2138 if (ret < 0) {
2139 break;
2140 }
2141 bytes_remaining -= num;
2142 }
2143 }
2144 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
2145
2146 if (ret >= 0) {
2147 ret = 0;
2148 }
2149 bdrv_co_write_req_finish(child, offset, bytes, req, ret);
2150
2151 return ret;
2152}
2153
2154static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
2155 int64_t offset,
2156 int64_t bytes,
2157 BdrvRequestFlags flags,
2158 BdrvTrackedRequest *req)
2159{
2160 BlockDriverState *bs = child->bs;
2161 QEMUIOVector local_qiov;
2162 uint64_t align = bs->bl.request_alignment;
2163 int ret = 0;
2164 bool padding;
2165 BdrvRequestPadding pad;
2166
2167 padding = bdrv_init_padding(bs, offset, bytes, &pad);
2168 if (padding) {
2169 bdrv_make_request_serialising(req, align);
2170
2171 bdrv_padding_rmw_read(child, req, &pad, true);
2172
2173 if (pad.head || pad.merge_reads) {
2174 int64_t aligned_offset = offset & ~(align - 1);
2175 int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
2176
2177 qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
2178 ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
2179 align, &local_qiov, 0,
2180 flags & ~BDRV_REQ_ZERO_WRITE);
2181 if (ret < 0 || pad.merge_reads) {
2182
2183 goto out;
2184 }
2185 offset += write_bytes - pad.head;
2186 bytes -= write_bytes - pad.head;
2187 }
2188 }
2189
2190 assert(!bytes || (offset & (align - 1)) == 0);
2191 if (bytes >= align) {
2192
2193 int64_t aligned_bytes = bytes & ~(align - 1);
2194 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
2195 NULL, 0, flags);
2196 if (ret < 0) {
2197 goto out;
2198 }
2199 bytes -= aligned_bytes;
2200 offset += aligned_bytes;
2201 }
2202
2203 assert(!bytes || (offset & (align - 1)) == 0);
2204 if (bytes) {
2205 assert(align == pad.tail + bytes);
2206
2207 qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
2208 ret = bdrv_aligned_pwritev(child, req, offset, align, align,
2209 &local_qiov, 0,
2210 flags & ~BDRV_REQ_ZERO_WRITE);
2211 }
2212
2213out:
2214 bdrv_padding_destroy(&pad);
2215
2216 return ret;
2217}
2218
2219
2220
2221
2222int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
2223 int64_t offset, int64_t bytes, QEMUIOVector *qiov,
2224 BdrvRequestFlags flags)
2225{
2226 return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
2227}
2228
2229int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
2230 int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
2231 BdrvRequestFlags flags)
2232{
2233 BlockDriverState *bs = child->bs;
2234 BdrvTrackedRequest req;
2235 uint64_t align = bs->bl.request_alignment;
2236 BdrvRequestPadding pad;
2237 int ret;
2238 bool padded = false;
2239
2240 trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
2241
2242 if (!bdrv_is_inserted(bs)) {
2243 return -ENOMEDIUM;
2244 }
2245
2246 ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
2247 if (ret < 0) {
2248 return ret;
2249 }
2250
2251
2252 if ((flags & BDRV_REQ_NO_FALLBACK) &&
2253 !QEMU_IS_ALIGNED(offset | bytes, align))
2254 {
2255 return -ENOTSUP;
2256 }
2257
2258 if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) {
2259
2260
2261
2262
2263
2264
2265
2266
2267 return 0;
2268 }
2269
2270 if (!(flags & BDRV_REQ_ZERO_WRITE)) {
2271
2272
2273
2274
2275
2276 ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
2277 &padded);
2278 if (ret < 0) {
2279 return ret;
2280 }
2281 }
2282
2283 bdrv_inc_in_flight(bs);
2284 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
2285
2286 if (flags & BDRV_REQ_ZERO_WRITE) {
2287 assert(!padded);
2288 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
2289 goto out;
2290 }
2291
2292 if (padded) {
2293
2294
2295
2296
2297
2298
2299 bdrv_make_request_serialising(&req, align);
2300 bdrv_padding_rmw_read(child, &req, &pad, false);
2301 }
2302
2303 ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
2304 qiov, qiov_offset, flags);
2305
2306 bdrv_padding_destroy(&pad);
2307
2308out:
2309 tracked_request_end(&req);
2310 bdrv_dec_in_flight(bs);
2311
2312 return ret;
2313}
2314
2315int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
2316 int64_t bytes, BdrvRequestFlags flags)
2317{
2318 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
2319
2320 if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
2321 flags &= ~BDRV_REQ_MAY_UNMAP;
2322 }
2323
2324 return bdrv_co_pwritev(child, offset, bytes, NULL,
2325 BDRV_REQ_ZERO_WRITE | flags);
2326}
2327
2328
2329
2330
2331int bdrv_flush_all(void)
2332{
2333 BdrvNextIterator it;
2334 BlockDriverState *bs = NULL;
2335 int result = 0;
2336
2337
2338
2339
2340
2341
2342 if (replay_events_enabled()) {
2343 return result;
2344 }
2345
2346 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
2347 AioContext *aio_context = bdrv_get_aio_context(bs);
2348 int ret;
2349
2350 aio_context_acquire(aio_context);
2351 ret = bdrv_flush(bs);
2352 if (ret < 0 && !result) {
2353 result = ret;
2354 }
2355 aio_context_release(aio_context);
2356 }
2357
2358 return result;
2359}
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2389 bool want_zero,
2390 int64_t offset, int64_t bytes,
2391 int64_t *pnum, int64_t *map,
2392 BlockDriverState **file)
2393{
2394 int64_t total_size;
2395 int64_t n;
2396 int ret;
2397 int64_t local_map = 0;
2398 BlockDriverState *local_file = NULL;
2399 int64_t aligned_offset, aligned_bytes;
2400 uint32_t align;
2401 bool has_filtered_child;
2402
2403 assert(pnum);
2404 *pnum = 0;
2405 total_size = bdrv_getlength(bs);
2406 if (total_size < 0) {
2407 ret = total_size;
2408 goto early_out;
2409 }
2410
2411 if (offset >= total_size) {
2412 ret = BDRV_BLOCK_EOF;
2413 goto early_out;
2414 }
2415 if (!bytes) {
2416 ret = 0;
2417 goto early_out;
2418 }
2419
2420 n = total_size - offset;
2421 if (n < bytes) {
2422 bytes = n;
2423 }
2424
2425
2426 assert(bs->drv);
2427 has_filtered_child = bdrv_filter_child(bs);
2428 if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
2429 *pnum = bytes;
2430 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
2431 if (offset + bytes == total_size) {
2432 ret |= BDRV_BLOCK_EOF;
2433 }
2434 if (bs->drv->protocol_name) {
2435 ret |= BDRV_BLOCK_OFFSET_VALID;
2436 local_map = offset;
2437 local_file = bs;
2438 }
2439 goto early_out;
2440 }
2441
2442 bdrv_inc_in_flight(bs);
2443
2444
2445 align = bs->bl.request_alignment;
2446 aligned_offset = QEMU_ALIGN_DOWN(offset, align);
2447 aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
2448
2449 if (bs->drv->bdrv_co_block_status) {
2450 ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
2451 aligned_bytes, pnum, &local_map,
2452 &local_file);
2453 } else {
2454
2455
2456 local_file = bdrv_filter_bs(bs);
2457 assert(local_file);
2458
2459 *pnum = aligned_bytes;
2460 local_map = aligned_offset;
2461 ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
2462 }
2463 if (ret < 0) {
2464 *pnum = 0;
2465 goto out;
2466 }
2467
2468
2469
2470
2471
2472 assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
2473 align > offset - aligned_offset);
2474 if (ret & BDRV_BLOCK_RECURSE) {
2475 assert(ret & BDRV_BLOCK_DATA);
2476 assert(ret & BDRV_BLOCK_OFFSET_VALID);
2477 assert(!(ret & BDRV_BLOCK_ZERO));
2478 }
2479
2480 *pnum -= offset - aligned_offset;
2481 if (*pnum > bytes) {
2482 *pnum = bytes;
2483 }
2484 if (ret & BDRV_BLOCK_OFFSET_VALID) {
2485 local_map += offset - aligned_offset;
2486 }
2487
2488 if (ret & BDRV_BLOCK_RAW) {
2489 assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
2490 ret = bdrv_co_block_status(local_file, want_zero, local_map,
2491 *pnum, pnum, &local_map, &local_file);
2492 goto out;
2493 }
2494
2495 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
2496 ret |= BDRV_BLOCK_ALLOCATED;
2497 } else if (bs->drv->supports_backing) {
2498 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
2499
2500 if (!cow_bs) {
2501 ret |= BDRV_BLOCK_ZERO;
2502 } else if (want_zero) {
2503 int64_t size2 = bdrv_getlength(cow_bs);
2504
2505 if (size2 >= 0 && offset >= size2) {
2506 ret |= BDRV_BLOCK_ZERO;
2507 }
2508 }
2509 }
2510
2511 if (want_zero && ret & BDRV_BLOCK_RECURSE &&
2512 local_file && local_file != bs &&
2513 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
2514 (ret & BDRV_BLOCK_OFFSET_VALID)) {
2515 int64_t file_pnum;
2516 int ret2;
2517
2518 ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
2519 *pnum, &file_pnum, NULL, NULL);
2520 if (ret2 >= 0) {
2521
2522
2523
2524 if (ret2 & BDRV_BLOCK_EOF &&
2525 (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
2526
2527
2528
2529
2530
2531 ret |= BDRV_BLOCK_ZERO;
2532 } else {
2533
2534 *pnum = file_pnum;
2535 ret |= (ret2 & BDRV_BLOCK_ZERO);
2536 }
2537 }
2538 }
2539
2540out:
2541 bdrv_dec_in_flight(bs);
2542 if (ret >= 0 && offset + *pnum == total_size) {
2543 ret |= BDRV_BLOCK_EOF;
2544 }
2545early_out:
2546 if (file) {
2547 *file = local_file;
2548 }
2549 if (map) {
2550 *map = local_map;
2551 }
2552 return ret;
2553}
2554
2555int coroutine_fn
2556bdrv_co_common_block_status_above(BlockDriverState *bs,
2557 BlockDriverState *base,
2558 bool include_base,
2559 bool want_zero,
2560 int64_t offset,
2561 int64_t bytes,
2562 int64_t *pnum,
2563 int64_t *map,
2564 BlockDriverState **file,
2565 int *depth)
2566{
2567 int ret;
2568 BlockDriverState *p;
2569 int64_t eof = 0;
2570 int dummy;
2571
2572 assert(!include_base || base);
2573
2574 if (!depth) {
2575 depth = &dummy;
2576 }
2577 *depth = 0;
2578
2579 if (!include_base && bs == base) {
2580 *pnum = bytes;
2581 return 0;
2582 }
2583
2584 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
2585 ++*depth;
2586 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
2587 return ret;
2588 }
2589
2590 if (ret & BDRV_BLOCK_EOF) {
2591 eof = offset + *pnum;
2592 }
2593
2594 assert(*pnum <= bytes);
2595 bytes = *pnum;
2596
2597 for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
2598 p = bdrv_filter_or_cow_bs(p))
2599 {
2600 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
2601 file);
2602 ++*depth;
2603 if (ret < 0) {
2604 return ret;
2605 }
2606 if (*pnum == 0) {
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616 assert(ret & BDRV_BLOCK_EOF);
2617 *pnum = bytes;
2618 if (file) {
2619 *file = p;
2620 }
2621 ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
2622 break;
2623 }
2624 if (ret & BDRV_BLOCK_ALLOCATED) {
2625
2626
2627
2628
2629
2630
2631
2632 ret &= ~BDRV_BLOCK_EOF;
2633 break;
2634 }
2635
2636 if (p == base) {
2637 assert(include_base);
2638 break;
2639 }
2640
2641
2642
2643
2644
2645 assert(*pnum <= bytes);
2646 bytes = *pnum;
2647 }
2648
2649 if (offset + *pnum == eof) {
2650 ret |= BDRV_BLOCK_EOF;
2651 }
2652
2653 return ret;
2654}
2655
2656int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
2657 int64_t offset, int64_t bytes, int64_t *pnum,
2658 int64_t *map, BlockDriverState **file)
2659{
2660 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
2661 pnum, map, file, NULL);
2662}
2663
2664int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
2665 int64_t *pnum, int64_t *map, BlockDriverState **file)
2666{
2667 return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
2668 offset, bytes, pnum, map, file);
2669}
2670
2671
2672
2673
2674
2675
2676
2677
2678int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
2679 int64_t bytes)
2680{
2681 int ret;
2682 int64_t pnum = bytes;
2683
2684 if (!bytes) {
2685 return 1;
2686 }
2687
2688 ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
2689 bytes, &pnum, NULL, NULL, NULL);
2690
2691 if (ret < 0) {
2692 return ret;
2693 }
2694
2695 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
2696}
2697
2698int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
2699 int64_t bytes, int64_t *pnum)
2700{
2701 int ret;
2702 int64_t dummy;
2703
2704 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
2705 bytes, pnum ? pnum : &dummy, NULL,
2706 NULL, NULL);
2707 if (ret < 0) {
2708 return ret;
2709 }
2710 return !!(ret & BDRV_BLOCK_ALLOCATED);
2711}
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730int bdrv_is_allocated_above(BlockDriverState *top,
2731 BlockDriverState *base,
2732 bool include_base, int64_t offset,
2733 int64_t bytes, int64_t *pnum)
2734{
2735 int depth;
2736 int ret = bdrv_common_block_status_above(top, base, include_base, false,
2737 offset, bytes, pnum, NULL, NULL,
2738 &depth);
2739 if (ret < 0) {
2740 return ret;
2741 }
2742
2743 if (ret & BDRV_BLOCK_ALLOCATED) {
2744 return depth;
2745 }
2746 return 0;
2747}
2748
2749int coroutine_fn
2750bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2751{
2752 BlockDriver *drv = bs->drv;
2753 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2754 int ret = -ENOTSUP;
2755
2756 if (!drv) {
2757 return -ENOMEDIUM;
2758 }
2759
2760 bdrv_inc_in_flight(bs);
2761
2762 if (drv->bdrv_load_vmstate) {
2763 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
2764 } else if (child_bs) {
2765 ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
2766 }
2767
2768 bdrv_dec_in_flight(bs);
2769
2770 return ret;
2771}
2772
2773int coroutine_fn
2774bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
2775{
2776 BlockDriver *drv = bs->drv;
2777 BlockDriverState *child_bs = bdrv_primary_bs(bs);
2778 int ret = -ENOTSUP;
2779
2780 if (!drv) {
2781 return -ENOMEDIUM;
2782 }
2783
2784 bdrv_inc_in_flight(bs);
2785
2786 if (drv->bdrv_save_vmstate) {
2787 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
2788 } else if (child_bs) {
2789 ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
2790 }
2791
2792 bdrv_dec_in_flight(bs);
2793
2794 return ret;
2795}
2796
2797int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2798 int64_t pos, int size)
2799{
2800 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2801 int ret = bdrv_writev_vmstate(bs, &qiov, pos);
2802
2803 return ret < 0 ? ret : size;
2804}
2805
2806int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2807 int64_t pos, int size)
2808{
2809 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
2810 int ret = bdrv_readv_vmstate(bs, &qiov, pos);
2811
2812 return ret < 0 ? ret : size;
2813}
2814
2815
2816
2817
2818void bdrv_aio_cancel(BlockAIOCB *acb)
2819{
2820 qemu_aio_ref(acb);
2821 bdrv_aio_cancel_async(acb);
2822 while (acb->refcnt > 1) {
2823 if (acb->aiocb_info->get_aio_context) {
2824 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
2825 } else if (acb->bs) {
2826
2827
2828
2829
2830 assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
2831 aio_poll(bdrv_get_aio_context(acb->bs), true);
2832 } else {
2833 abort();
2834 }
2835 }
2836 qemu_aio_unref(acb);
2837}
2838
2839
2840
2841
2842void bdrv_aio_cancel_async(BlockAIOCB *acb)
2843{
2844 if (acb->aiocb_info->cancel_async) {
2845 acb->aiocb_info->cancel_async(acb);
2846 }
2847}
2848
2849
2850
2851
2852int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2853{
2854 BdrvChild *primary_child = bdrv_primary_child(bs);
2855 BdrvChild *child;
2856 int current_gen;
2857 int ret = 0;
2858
2859 bdrv_inc_in_flight(bs);
2860
2861 if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
2862 bdrv_is_sg(bs)) {
2863 goto early_exit;
2864 }
2865
2866 qemu_co_mutex_lock(&bs->reqs_lock);
2867 current_gen = qatomic_read(&bs->write_gen);
2868
2869
2870 while (bs->active_flush_req) {
2871 qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
2872 }
2873
2874
2875 bs->active_flush_req = true;
2876 qemu_co_mutex_unlock(&bs->reqs_lock);
2877
2878
2879 if (bs->drv->bdrv_co_flush) {
2880 ret = bs->drv->bdrv_co_flush(bs);
2881 goto out;
2882 }
2883
2884
2885 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
2886 if (bs->drv->bdrv_co_flush_to_os) {
2887 ret = bs->drv->bdrv_co_flush_to_os(bs);
2888 if (ret < 0) {
2889 goto out;
2890 }
2891 }
2892
2893
2894 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2895 goto flush_children;
2896 }
2897
2898
2899 if (bs->flushed_gen == current_gen) {
2900 goto flush_children;
2901 }
2902
2903 BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
2904 if (!bs->drv) {
2905
2906
2907 ret = -ENOMEDIUM;
2908 goto out;
2909 }
2910 if (bs->drv->bdrv_co_flush_to_disk) {
2911 ret = bs->drv->bdrv_co_flush_to_disk(bs);
2912 } else if (bs->drv->bdrv_aio_flush) {
2913 BlockAIOCB *acb;
2914 CoroutineIOCompletion co = {
2915 .coroutine = qemu_coroutine_self(),
2916 };
2917
2918 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2919 if (acb == NULL) {
2920 ret = -EIO;
2921 } else {
2922 qemu_coroutine_yield();
2923 ret = co.ret;
2924 }
2925 } else {
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937 ret = 0;
2938 }
2939
2940 if (ret < 0) {
2941 goto out;
2942 }
2943
2944
2945
2946
2947flush_children:
2948 ret = 0;
2949 QLIST_FOREACH(child, &bs->children, next) {
2950 if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
2951 int this_child_ret = bdrv_co_flush(child->bs);
2952 if (!ret) {
2953 ret = this_child_ret;
2954 }
2955 }
2956 }
2957
2958out:
2959
2960 if (ret == 0) {
2961 bs->flushed_gen = current_gen;
2962 }
2963
2964 qemu_co_mutex_lock(&bs->reqs_lock);
2965 bs->active_flush_req = false;
2966
2967 qemu_co_queue_next(&bs->flush_queue);
2968 qemu_co_mutex_unlock(&bs->reqs_lock);
2969
2970early_exit:
2971 bdrv_dec_in_flight(bs);
2972 return ret;
2973}
2974
2975int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
2976 int64_t bytes)
2977{
2978 BdrvTrackedRequest req;
2979 int max_pdiscard, ret;
2980 int head, tail, align;
2981 BlockDriverState *bs = child->bs;
2982
2983 if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
2984 return -ENOMEDIUM;
2985 }
2986
2987 if (bdrv_has_readonly_bitmaps(bs)) {
2988 return -EPERM;
2989 }
2990
2991 ret = bdrv_check_request(offset, bytes, NULL);
2992 if (ret < 0) {
2993 return ret;
2994 }
2995
2996
2997 if (!(bs->open_flags & BDRV_O_UNMAP)) {
2998 return 0;
2999 }
3000
3001 if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
3002 return 0;
3003 }
3004
3005
3006
3007
3008
3009
3010 align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
3011 assert(align % bs->bl.request_alignment == 0);
3012 head = offset % align;
3013 tail = (offset + bytes) % align;
3014
3015 bdrv_inc_in_flight(bs);
3016 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
3017
3018 ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
3019 if (ret < 0) {
3020 goto out;
3021 }
3022
3023 max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
3024 align);
3025 assert(max_pdiscard >= bs->bl.request_alignment);
3026
3027 while (bytes > 0) {
3028 int64_t num = bytes;
3029
3030 if (head) {
3031
3032 num = MIN(bytes, align - head);
3033 if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
3034 num %= bs->bl.request_alignment;
3035 }
3036 head = (head + num) % align;
3037 assert(num < max_pdiscard);
3038 } else if (tail) {
3039 if (num > align) {
3040
3041 num -= tail;
3042 } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
3043 tail > bs->bl.request_alignment) {
3044 tail %= bs->bl.request_alignment;
3045 num -= tail;
3046 }
3047 }
3048
3049 if (num > max_pdiscard) {
3050 num = max_pdiscard;
3051 }
3052
3053 if (!bs->drv) {
3054 ret = -ENOMEDIUM;
3055 goto out;
3056 }
3057 if (bs->drv->bdrv_co_pdiscard) {
3058 ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
3059 } else {
3060 BlockAIOCB *acb;
3061 CoroutineIOCompletion co = {
3062 .coroutine = qemu_coroutine_self(),
3063 };
3064
3065 acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
3066 bdrv_co_io_em_complete, &co);
3067 if (acb == NULL) {
3068 ret = -EIO;
3069 goto out;
3070 } else {
3071 qemu_coroutine_yield();
3072 ret = co.ret;
3073 }
3074 }
3075 if (ret && ret != -ENOTSUP) {
3076 goto out;
3077 }
3078
3079 offset += num;
3080 bytes -= num;
3081 }
3082 ret = 0;
3083out:
3084 bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
3085 tracked_request_end(&req);
3086 bdrv_dec_in_flight(bs);
3087 return ret;
3088}
3089
3090int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
3091{
3092 BlockDriver *drv = bs->drv;
3093 CoroutineIOCompletion co = {
3094 .coroutine = qemu_coroutine_self(),
3095 };
3096 BlockAIOCB *acb;
3097
3098 bdrv_inc_in_flight(bs);
3099 if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
3100 co.ret = -ENOTSUP;
3101 goto out;
3102 }
3103
3104 if (drv->bdrv_co_ioctl) {
3105 co.ret = drv->bdrv_co_ioctl(bs, req, buf);
3106 } else {
3107 acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
3108 if (!acb) {
3109 co.ret = -ENOTSUP;
3110 goto out;
3111 }
3112 qemu_coroutine_yield();
3113 }
3114out:
3115 bdrv_dec_in_flight(bs);
3116 return co.ret;
3117}
3118
3119void *qemu_blockalign(BlockDriverState *bs, size_t size)
3120{
3121 return qemu_memalign(bdrv_opt_mem_align(bs), size);
3122}
3123
3124void *qemu_blockalign0(BlockDriverState *bs, size_t size)
3125{
3126 return memset(qemu_blockalign(bs, size), 0, size);
3127}
3128
3129void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
3130{
3131 size_t align = bdrv_opt_mem_align(bs);
3132
3133
3134 assert(align > 0);
3135 if (size == 0) {
3136 size = align;
3137 }
3138
3139 return qemu_try_memalign(align, size);
3140}
3141
3142void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
3143{
3144 void *mem = qemu_try_blockalign(bs, size);
3145
3146 if (mem) {
3147 memset(mem, 0, size);
3148 }
3149
3150 return mem;
3151}
3152
3153
3154
3155
3156bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
3157{
3158 int i;
3159 size_t alignment = bdrv_min_mem_align(bs);
3160
3161 for (i = 0; i < qiov->niov; i++) {
3162 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
3163 return false;
3164 }
3165 if (qiov->iov[i].iov_len % alignment) {
3166 return false;
3167 }
3168 }
3169
3170 return true;
3171}
3172
3173void bdrv_io_plug(BlockDriverState *bs)
3174{
3175 BdrvChild *child;
3176
3177 QLIST_FOREACH(child, &bs->children, next) {
3178 bdrv_io_plug(child->bs);
3179 }
3180
3181 if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
3182 BlockDriver *drv = bs->drv;
3183 if (drv && drv->bdrv_io_plug) {
3184 drv->bdrv_io_plug(bs);
3185 }
3186 }
3187}
3188
3189void bdrv_io_unplug(BlockDriverState *bs)
3190{
3191 BdrvChild *child;
3192
3193 assert(bs->io_plugged);
3194 if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
3195 BlockDriver *drv = bs->drv;
3196 if (drv && drv->bdrv_io_unplug) {
3197 drv->bdrv_io_unplug(bs);
3198 }
3199 }
3200
3201 QLIST_FOREACH(child, &bs->children, next) {
3202 bdrv_io_unplug(child->bs);
3203 }
3204}
3205
3206void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
3207{
3208 BdrvChild *child;
3209
3210 if (bs->drv && bs->drv->bdrv_register_buf) {
3211 bs->drv->bdrv_register_buf(bs, host, size);
3212 }
3213 QLIST_FOREACH(child, &bs->children, next) {
3214 bdrv_register_buf(child->bs, host, size);
3215 }
3216}
3217
3218void bdrv_unregister_buf(BlockDriverState *bs, void *host)
3219{
3220 BdrvChild *child;
3221
3222 if (bs->drv && bs->drv->bdrv_unregister_buf) {
3223 bs->drv->bdrv_unregister_buf(bs, host);
3224 }
3225 QLIST_FOREACH(child, &bs->children, next) {
3226 bdrv_unregister_buf(child->bs, host);
3227 }
3228}
3229
3230static int coroutine_fn bdrv_co_copy_range_internal(
3231 BdrvChild *src, int64_t src_offset, BdrvChild *dst,
3232 int64_t dst_offset, int64_t bytes,
3233 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
3234 bool recurse_src)
3235{
3236 BdrvTrackedRequest req;
3237 int ret;
3238
3239
3240 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
3241 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
3242
3243 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
3244 return -ENOMEDIUM;
3245 }
3246 ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
3247 if (ret) {
3248 return ret;
3249 }
3250 if (write_flags & BDRV_REQ_ZERO_WRITE) {
3251 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
3252 }
3253
3254 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
3255 return -ENOMEDIUM;
3256 }
3257 ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
3258 if (ret) {
3259 return ret;
3260 }
3261
3262 if (!src->bs->drv->bdrv_co_copy_range_from
3263 || !dst->bs->drv->bdrv_co_copy_range_to
3264 || src->bs->encrypted || dst->bs->encrypted) {
3265 return -ENOTSUP;
3266 }
3267
3268 if (recurse_src) {
3269 bdrv_inc_in_flight(src->bs);
3270 tracked_request_begin(&req, src->bs, src_offset, bytes,
3271 BDRV_TRACKED_READ);
3272
3273
3274 assert(!(read_flags & BDRV_REQ_SERIALISING));
3275 bdrv_wait_serialising_requests(&req);
3276
3277 ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
3278 src, src_offset,
3279 dst, dst_offset,
3280 bytes,
3281 read_flags, write_flags);
3282
3283 tracked_request_end(&req);
3284 bdrv_dec_in_flight(src->bs);
3285 } else {
3286 bdrv_inc_in_flight(dst->bs);
3287 tracked_request_begin(&req, dst->bs, dst_offset, bytes,
3288 BDRV_TRACKED_WRITE);
3289 ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
3290 write_flags);
3291 if (!ret) {
3292 ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
3293 src, src_offset,
3294 dst, dst_offset,
3295 bytes,
3296 read_flags, write_flags);
3297 }
3298 bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret);
3299 tracked_request_end(&req);
3300 bdrv_dec_in_flight(dst->bs);
3301 }
3302
3303 return ret;
3304}
3305
3306
3307
3308
3309
3310int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
3311 BdrvChild *dst, int64_t dst_offset,
3312 int64_t bytes,
3313 BdrvRequestFlags read_flags,
3314 BdrvRequestFlags write_flags)
3315{
3316 trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
3317 read_flags, write_flags);
3318 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3319 bytes, read_flags, write_flags, true);
3320}
3321
3322
3323
3324
3325
3326int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
3327 BdrvChild *dst, int64_t dst_offset,
3328 int64_t bytes,
3329 BdrvRequestFlags read_flags,
3330 BdrvRequestFlags write_flags)
3331{
3332 trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
3333 read_flags, write_flags);
3334 return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
3335 bytes, read_flags, write_flags, false);
3336}
3337
3338int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
3339 BdrvChild *dst, int64_t dst_offset,
3340 int64_t bytes, BdrvRequestFlags read_flags,
3341 BdrvRequestFlags write_flags)
3342{
3343 return bdrv_co_copy_range_from(src, src_offset,
3344 dst, dst_offset,
3345 bytes, read_flags, write_flags);
3346}
3347
3348static void bdrv_parent_cb_resize(BlockDriverState *bs)
3349{
3350 BdrvChild *c;
3351 QLIST_FOREACH(c, &bs->parents, next_parent) {
3352 if (c->klass->resize) {
3353 c->klass->resize(c);
3354 }
3355 }
3356}
3357
3358
3359
3360
3361
3362
3363
3364
3365int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
3366 PreallocMode prealloc, BdrvRequestFlags flags,
3367 Error **errp)
3368{
3369 BlockDriverState *bs = child->bs;
3370 BdrvChild *filtered, *backing;
3371 BlockDriver *drv = bs->drv;
3372 BdrvTrackedRequest req;
3373 int64_t old_size, new_bytes;
3374 int ret;
3375
3376
3377
3378 if (!drv) {
3379 error_setg(errp, "No medium inserted");
3380 return -ENOMEDIUM;
3381 }
3382 if (offset < 0) {
3383 error_setg(errp, "Image size cannot be negative");
3384 return -EINVAL;
3385 }
3386
3387 ret = bdrv_check_request(offset, 0, errp);
3388 if (ret < 0) {
3389 return ret;
3390 }
3391
3392 old_size = bdrv_getlength(bs);
3393 if (old_size < 0) {
3394 error_setg_errno(errp, -old_size, "Failed to get old image size");
3395 return old_size;
3396 }
3397
3398 if (bdrv_is_read_only(bs)) {
3399 error_setg(errp, "Image is read-only");
3400 return -EACCES;
3401 }
3402
3403 if (offset > old_size) {
3404 new_bytes = offset - old_size;
3405 } else {
3406 new_bytes = 0;
3407 }
3408
3409 bdrv_inc_in_flight(bs);
3410 tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
3411 BDRV_TRACKED_TRUNCATE);
3412
3413
3414
3415
3416 if (new_bytes) {
3417 bdrv_make_request_serialising(&req, 1);
3418 }
3419 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
3420 0);
3421 if (ret < 0) {
3422 error_setg_errno(errp, -ret,
3423 "Failed to prepare request for truncation");
3424 goto out;
3425 }
3426
3427 filtered = bdrv_filter_child(bs);
3428 backing = bdrv_cow_child(bs);
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440 if (new_bytes && backing) {
3441 int64_t backing_len;
3442
3443 backing_len = bdrv_getlength(backing->bs);
3444 if (backing_len < 0) {
3445 ret = backing_len;
3446 error_setg_errno(errp, -ret, "Could not get backing file size");
3447 goto out;
3448 }
3449
3450 if (backing_len > old_size) {
3451 flags |= BDRV_REQ_ZERO_WRITE;
3452 }
3453 }
3454
3455 if (drv->bdrv_co_truncate) {
3456 if (flags & ~bs->supported_truncate_flags) {
3457 error_setg(errp, "Block driver does not support requested flags");
3458 ret = -ENOTSUP;
3459 goto out;
3460 }
3461 ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
3462 } else if (filtered) {
3463 ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
3464 } else {
3465 error_setg(errp, "Image format driver does not support resize");
3466 ret = -ENOTSUP;
3467 goto out;
3468 }
3469 if (ret < 0) {
3470 goto out;
3471 }
3472
3473 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3474 if (ret < 0) {
3475 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3476 } else {
3477 offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3478 }
3479
3480
3481
3482 bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
3483
3484out:
3485 tracked_request_end(&req);
3486 bdrv_dec_in_flight(bs);
3487
3488 return ret;
3489}
3490
3491void bdrv_cancel_in_flight(BlockDriverState *bs)
3492{
3493 if (!bs || !bs->drv) {
3494 return;
3495 }
3496
3497 if (bs->drv->bdrv_cancel_in_flight) {
3498 bs->drv->bdrv_cancel_in_flight(bs);
3499 }
3500}
3501