1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include "qemu/osdep.h"
28
29#include "qapi/error.h"
30#include "qemu/module.h"
31#include "qemu/option.h"
32#include "qemu/units.h"
33#include "block/block-io.h"
34#include "block/block_int.h"
35
36
37typedef struct PreallocateOpts {
38 int64_t prealloc_size;
39 int64_t prealloc_align;
40} PreallocateOpts;
41
42typedef struct BDRVPreallocateState {
43 PreallocateOpts opts;
44
45
46
47
48
49
50
51
52
53 int64_t data_end;
54
55
56
57
58
59
60
61
62
63
64 int64_t zero_start;
65
66
67
68
69
70
71 int64_t file_end;
72
73
74
75
76
77
78} BDRVPreallocateState;
79
80#define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
81#define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
82static QemuOptsList runtime_opts = {
83 .name = "preallocate",
84 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
85 .desc = {
86 {
87 .name = PREALLOCATE_OPT_PREALLOC_ALIGN,
88 .type = QEMU_OPT_SIZE,
89 .help = "on preallocation, align file length to this number, "
90 "default 1M",
91 },
92 {
93 .name = PREALLOCATE_OPT_PREALLOC_SIZE,
94 .type = QEMU_OPT_SIZE,
95 .help = "how much to preallocate, default 128M",
96 },
97 { }
98 },
99};
100
101static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
102 BlockDriverState *child_bs, Error **errp)
103{
104 QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
105
106 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
107 return false;
108 }
109
110 dest->prealloc_align =
111 qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_ALIGN, 1 * MiB);
112 dest->prealloc_size =
113 qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
114
115 qemu_opts_del(opts);
116
117 if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
118 error_setg(errp, "prealloc-align parameter of preallocate filter "
119 "is not aligned to %llu", BDRV_SECTOR_SIZE);
120 return false;
121 }
122
123 if (!QEMU_IS_ALIGNED(dest->prealloc_align,
124 child_bs->bl.request_alignment)) {
125 error_setg(errp, "prealloc-align parameter of preallocate filter "
126 "is not aligned to underlying node request alignment "
127 "(%" PRIi32 ")", child_bs->bl.request_alignment);
128 return false;
129 }
130
131 return true;
132}
133
134static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
135 Error **errp)
136{
137 BDRVPreallocateState *s = bs->opaque;
138 int ret;
139
140
141
142
143
144 s->file_end = s->zero_start = s->data_end = -EINVAL;
145
146 ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
147 if (ret < 0) {
148 return ret;
149 }
150
151 if (!preallocate_absorb_opts(&s->opts, options, bs->file->bs, errp)) {
152 return -EINVAL;
153 }
154
155 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
156 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
157
158 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
159 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
160 bs->file->bs->supported_zero_flags);
161
162 return 0;
163}
164
165static void preallocate_close(BlockDriverState *bs)
166{
167 int ret;
168 BDRVPreallocateState *s = bs->opaque;
169
170 if (s->data_end < 0) {
171 return;
172 }
173
174 if (s->file_end < 0) {
175 s->file_end = bdrv_getlength(bs->file->bs);
176 if (s->file_end < 0) {
177 return;
178 }
179 }
180
181 if (s->data_end < s->file_end) {
182 ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
183 NULL);
184 s->file_end = ret < 0 ? ret : s->data_end;
185 }
186}
187
188
189
190
191
192
193
194
195
196
197static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
198 BlockReopenQueue *queue, Error **errp)
199{
200 PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
201
202 if (!preallocate_absorb_opts(opts, reopen_state->options,
203 reopen_state->bs->file->bs, errp)) {
204 g_free(opts);
205 return -EINVAL;
206 }
207
208 reopen_state->opaque = opts;
209
210 return 0;
211}
212
213static void preallocate_reopen_commit(BDRVReopenState *state)
214{
215 BDRVPreallocateState *s = state->bs->opaque;
216
217 s->opts = *(PreallocateOpts *)state->opaque;
218
219 g_free(state->opaque);
220 state->opaque = NULL;
221}
222
223static void preallocate_reopen_abort(BDRVReopenState *state)
224{
225 g_free(state->opaque);
226 state->opaque = NULL;
227}
228
229static int coroutine_fn GRAPH_RDLOCK
230preallocate_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
231 QEMUIOVector *qiov, size_t qiov_offset,
232 BdrvRequestFlags flags)
233{
234 return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
235 flags);
236}
237
238static int coroutine_fn GRAPH_RDLOCK
239preallocate_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
240{
241 return bdrv_co_pdiscard(bs->file, offset, bytes);
242}
243
244static bool can_write_resize(uint64_t perm)
245{
246 return (perm & BLK_PERM_WRITE) && (perm & BLK_PERM_RESIZE);
247}
248
249static bool has_prealloc_perms(BlockDriverState *bs)
250{
251 BDRVPreallocateState *s = bs->opaque;
252
253 if (can_write_resize(bs->file->perm)) {
254 assert(!(bs->file->shared_perm & BLK_PERM_WRITE));
255 assert(!(bs->file->shared_perm & BLK_PERM_RESIZE));
256 return true;
257 }
258
259 assert(s->data_end < 0);
260 assert(s->zero_start < 0);
261 assert(s->file_end < 0);
262 return false;
263}
264
265
266
267
268
269
270
271
272
273static bool coroutine_fn GRAPH_RDLOCK
274handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,
275 bool want_merge_zero)
276{
277 BDRVPreallocateState *s = bs->opaque;
278 int64_t end = offset + bytes;
279 int64_t prealloc_start, prealloc_end;
280 int ret;
281 uint32_t file_align = bs->file->bs->bl.request_alignment;
282 uint32_t prealloc_align = MAX(s->opts.prealloc_align, file_align);
283
284 assert(QEMU_IS_ALIGNED(prealloc_align, file_align));
285
286 if (!has_prealloc_perms(bs)) {
287
288 return false;
289 }
290
291 if (s->data_end < 0) {
292 s->data_end = bdrv_co_getlength(bs->file->bs);
293 if (s->data_end < 0) {
294 return false;
295 }
296
297 if (s->file_end < 0) {
298 s->file_end = s->data_end;
299 }
300 }
301
302 if (end <= s->data_end) {
303 return false;
304 }
305
306
307
308 s->data_end = end;
309 if (s->zero_start < 0 || !want_merge_zero) {
310 s->zero_start = end;
311 }
312
313 if (s->file_end < 0) {
314 s->file_end = bdrv_co_getlength(bs->file->bs);
315 if (s->file_end < 0) {
316 return false;
317 }
318 }
319
320
321
322 if (end <= s->file_end) {
323
324 return want_merge_zero && offset >= s->zero_start;
325 }
326
327
328
329 prealloc_start = QEMU_ALIGN_UP(
330 want_merge_zero ? MIN(offset, s->file_end) : s->file_end,
331 file_align);
332 prealloc_end = QEMU_ALIGN_UP(
333 MAX(prealloc_start, end) + s->opts.prealloc_size,
334 prealloc_align);
335
336 want_merge_zero = want_merge_zero && (prealloc_start <= offset);
337
338 ret = bdrv_co_pwrite_zeroes(
339 bs->file, prealloc_start, prealloc_end - prealloc_start,
340 BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
341 if (ret < 0) {
342 s->file_end = ret;
343 return false;
344 }
345
346 s->file_end = prealloc_end;
347 return want_merge_zero;
348}
349
350static int coroutine_fn GRAPH_RDLOCK
351preallocate_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
352 int64_t bytes, BdrvRequestFlags flags)
353{
354 bool want_merge_zero =
355 !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
356 if (handle_write(bs, offset, bytes, want_merge_zero)) {
357 return 0;
358 }
359
360 return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
361}
362
363static int coroutine_fn GRAPH_RDLOCK
364preallocate_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
365 QEMUIOVector *qiov, size_t qiov_offset,
366 BdrvRequestFlags flags)
367{
368 handle_write(bs, offset, bytes, false);
369
370 return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
371 flags);
372}
373
374static int coroutine_fn GRAPH_RDLOCK
375preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
376 bool exact, PreallocMode prealloc,
377 BdrvRequestFlags flags, Error **errp)
378{
379 ERRP_GUARD();
380 BDRVPreallocateState *s = bs->opaque;
381 int ret;
382
383 if (s->data_end >= 0 && offset > s->data_end) {
384 if (s->file_end < 0) {
385 s->file_end = bdrv_co_getlength(bs->file->bs);
386 if (s->file_end < 0) {
387 error_setg(errp, "failed to get file length");
388 return s->file_end;
389 }
390 }
391
392 if (prealloc == PREALLOC_MODE_FALLOC) {
393
394
395
396
397
398
399 if (offset <= s->file_end) {
400 s->data_end = offset;
401 return 0;
402 }
403 } else {
404
405
406
407
408
409
410
411
412
413 if (s->file_end > s->data_end) {
414 ret = bdrv_co_truncate(bs->file, s->data_end, true,
415 PREALLOC_MODE_OFF, 0, errp);
416 if (ret < 0) {
417 s->file_end = ret;
418 error_prepend(errp, "preallocate-filter: failed to drop "
419 "write-zero preallocation: ");
420 return ret;
421 }
422 s->file_end = s->data_end;
423 }
424 }
425
426 s->data_end = offset;
427 }
428
429 ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
430 if (ret < 0) {
431 s->file_end = s->zero_start = s->data_end = ret;
432 return ret;
433 }
434
435 if (has_prealloc_perms(bs)) {
436 s->file_end = s->zero_start = s->data_end = offset;
437 }
438 return 0;
439}
440
441static int coroutine_fn GRAPH_RDLOCK preallocate_co_flush(BlockDriverState *bs)
442{
443 return bdrv_co_flush(bs->file->bs);
444}
445
446static int64_t coroutine_fn GRAPH_RDLOCK
447preallocate_co_getlength(BlockDriverState *bs)
448{
449 int64_t ret;
450 BDRVPreallocateState *s = bs->opaque;
451
452 if (s->data_end >= 0) {
453 return s->data_end;
454 }
455
456 ret = bdrv_co_getlength(bs->file->bs);
457
458 if (has_prealloc_perms(bs)) {
459 s->file_end = s->zero_start = s->data_end = ret;
460 }
461
462 return ret;
463}
464
465static int preallocate_check_perm(BlockDriverState *bs,
466 uint64_t perm, uint64_t shared, Error **errp)
467{
468 BDRVPreallocateState *s = bs->opaque;
469
470 if (s->data_end >= 0 && !can_write_resize(perm)) {
471
472
473
474
475
476 if (s->file_end < 0) {
477 s->file_end = bdrv_getlength(bs->file->bs);
478 if (s->file_end < 0) {
479 error_setg(errp, "Failed to get file length");
480 return s->file_end;
481 }
482 }
483
484 if (s->data_end < s->file_end) {
485 int ret = bdrv_truncate(bs->file, s->data_end, true,
486 PREALLOC_MODE_OFF, 0, NULL);
487 if (ret < 0) {
488 error_setg(errp, "Failed to drop preallocation");
489 s->file_end = ret;
490 return ret;
491 }
492 s->file_end = s->data_end;
493 }
494 }
495
496 return 0;
497}
498
499static void preallocate_set_perm(BlockDriverState *bs,
500 uint64_t perm, uint64_t shared)
501{
502 BDRVPreallocateState *s = bs->opaque;
503
504 if (can_write_resize(perm)) {
505 if (s->data_end < 0) {
506 s->data_end = s->file_end = s->zero_start =
507 bdrv_getlength(bs->file->bs);
508 }
509 } else {
510
511
512
513
514
515
516 s->data_end = s->file_end = s->zero_start = -EINVAL;
517 }
518}
519
520static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
521 BdrvChildRole role, BlockReopenQueue *reopen_queue,
522 uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
523{
524 bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
525
526 if (can_write_resize(perm)) {
527
528 *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
529
530
531
532
533
534 *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
535 }
536}
537
538BlockDriver bdrv_preallocate_filter = {
539 .format_name = "preallocate",
540 .instance_size = sizeof(BDRVPreallocateState),
541
542 .bdrv_co_getlength = preallocate_co_getlength,
543 .bdrv_open = preallocate_open,
544 .bdrv_close = preallocate_close,
545
546 .bdrv_reopen_prepare = preallocate_reopen_prepare,
547 .bdrv_reopen_commit = preallocate_reopen_commit,
548 .bdrv_reopen_abort = preallocate_reopen_abort,
549
550 .bdrv_co_preadv_part = preallocate_co_preadv_part,
551 .bdrv_co_pwritev_part = preallocate_co_pwritev_part,
552 .bdrv_co_pwrite_zeroes = preallocate_co_pwrite_zeroes,
553 .bdrv_co_pdiscard = preallocate_co_pdiscard,
554 .bdrv_co_flush = preallocate_co_flush,
555 .bdrv_co_truncate = preallocate_co_truncate,
556
557 .bdrv_check_perm = preallocate_check_perm,
558 .bdrv_set_perm = preallocate_set_perm,
559 .bdrv_child_perm = preallocate_child_perm,
560
561 .is_filter = true,
562};
563
564static void bdrv_preallocate_init(void)
565{
566 bdrv_register(&bdrv_preallocate_filter);
567}
568
569block_init(bdrv_preallocate_init);
570