1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include "qemu/osdep.h"
28
29#include "qapi/error.h"
30#include "qemu/module.h"
31#include "qemu/option.h"
32#include "qemu/units.h"
33#include "block/block_int.h"
34
35
36typedef struct PreallocateOpts {
37 int64_t prealloc_size;
38 int64_t prealloc_align;
39} PreallocateOpts;
40
41typedef struct BDRVPreallocateState {
42 PreallocateOpts opts;
43
44
45
46
47
48
49
50
51
52 int64_t data_end;
53
54
55
56
57
58
59
60
61
62
63 int64_t zero_start;
64
65
66
67
68
69
70 int64_t file_end;
71
72
73
74
75
76
77} BDRVPreallocateState;
78
79#define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
80#define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
81static QemuOptsList runtime_opts = {
82 .name = "preallocate",
83 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
84 .desc = {
85 {
86 .name = PREALLOCATE_OPT_PREALLOC_ALIGN,
87 .type = QEMU_OPT_SIZE,
88 .help = "on preallocation, align file length to this number, "
89 "default 1M",
90 },
91 {
92 .name = PREALLOCATE_OPT_PREALLOC_SIZE,
93 .type = QEMU_OPT_SIZE,
94 .help = "how much to preallocate, default 128M",
95 },
96 { }
97 },
98};
99
100static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
101 BlockDriverState *child_bs, Error **errp)
102{
103 QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
104
105 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
106 return false;
107 }
108
109 dest->prealloc_align =
110 qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_ALIGN, 1 * MiB);
111 dest->prealloc_size =
112 qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
113
114 qemu_opts_del(opts);
115
116 if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
117 error_setg(errp, "prealloc-align parameter of preallocate filter "
118 "is not aligned to %llu", BDRV_SECTOR_SIZE);
119 return false;
120 }
121
122 if (!QEMU_IS_ALIGNED(dest->prealloc_align,
123 child_bs->bl.request_alignment)) {
124 error_setg(errp, "prealloc-align parameter of preallocate filter "
125 "is not aligned to underlying node request alignment "
126 "(%" PRIi32 ")", child_bs->bl.request_alignment);
127 return false;
128 }
129
130 return true;
131}
132
133static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
134 Error **errp)
135{
136 BDRVPreallocateState *s = bs->opaque;
137
138
139
140
141
142 s->file_end = s->zero_start = s->data_end = -EINVAL;
143
144 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
145 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
146 false, errp);
147 if (!bs->file) {
148 return -EINVAL;
149 }
150
151 if (!preallocate_absorb_opts(&s->opts, options, bs->file->bs, errp)) {
152 return -EINVAL;
153 }
154
155 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
156 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
157
158 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
159 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
160 bs->file->bs->supported_zero_flags);
161
162 return 0;
163}
164
165static void preallocate_close(BlockDriverState *bs)
166{
167 int ret;
168 BDRVPreallocateState *s = bs->opaque;
169
170 if (s->data_end < 0) {
171 return;
172 }
173
174 if (s->file_end < 0) {
175 s->file_end = bdrv_getlength(bs->file->bs);
176 if (s->file_end < 0) {
177 return;
178 }
179 }
180
181 if (s->data_end < s->file_end) {
182 ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
183 NULL);
184 s->file_end = ret < 0 ? ret : s->data_end;
185 }
186}
187
188
189
190
191
192
193
194
195
196
197static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
198 BlockReopenQueue *queue, Error **errp)
199{
200 PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
201
202 if (!preallocate_absorb_opts(opts, reopen_state->options,
203 reopen_state->bs->file->bs, errp)) {
204 g_free(opts);
205 return -EINVAL;
206 }
207
208 reopen_state->opaque = opts;
209
210 return 0;
211}
212
213static void preallocate_reopen_commit(BDRVReopenState *state)
214{
215 BDRVPreallocateState *s = state->bs->opaque;
216
217 s->opts = *(PreallocateOpts *)state->opaque;
218
219 g_free(state->opaque);
220 state->opaque = NULL;
221}
222
223static void preallocate_reopen_abort(BDRVReopenState *state)
224{
225 g_free(state->opaque);
226 state->opaque = NULL;
227}
228
229static coroutine_fn int preallocate_co_preadv_part(
230 BlockDriverState *bs, int64_t offset, int64_t bytes,
231 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
232{
233 return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
234 flags);
235}
236
237static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
238 int64_t offset, int64_t bytes)
239{
240 return bdrv_co_pdiscard(bs->file, offset, bytes);
241}
242
243static bool can_write_resize(uint64_t perm)
244{
245 return (perm & BLK_PERM_WRITE) && (perm & BLK_PERM_RESIZE);
246}
247
248static bool has_prealloc_perms(BlockDriverState *bs)
249{
250 BDRVPreallocateState *s = bs->opaque;
251
252 if (can_write_resize(bs->file->perm)) {
253 assert(!(bs->file->shared_perm & BLK_PERM_WRITE));
254 assert(!(bs->file->shared_perm & BLK_PERM_RESIZE));
255 return true;
256 }
257
258 assert(s->data_end < 0);
259 assert(s->zero_start < 0);
260 assert(s->file_end < 0);
261 return false;
262}
263
264
265
266
267
268
269
270
271
272static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
273 int64_t bytes, bool want_merge_zero)
274{
275 BDRVPreallocateState *s = bs->opaque;
276 int64_t end = offset + bytes;
277 int64_t prealloc_start, prealloc_end;
278 int ret;
279 uint32_t file_align = bs->file->bs->bl.request_alignment;
280 uint32_t prealloc_align = MAX(s->opts.prealloc_align, file_align);
281
282 assert(QEMU_IS_ALIGNED(prealloc_align, file_align));
283
284 if (!has_prealloc_perms(bs)) {
285
286 return false;
287 }
288
289 if (s->data_end < 0) {
290 s->data_end = bdrv_getlength(bs->file->bs);
291 if (s->data_end < 0) {
292 return false;
293 }
294
295 if (s->file_end < 0) {
296 s->file_end = s->data_end;
297 }
298 }
299
300 if (end <= s->data_end) {
301 return false;
302 }
303
304
305
306 s->data_end = end;
307 if (s->zero_start < 0 || !want_merge_zero) {
308 s->zero_start = end;
309 }
310
311 if (s->file_end < 0) {
312 s->file_end = bdrv_getlength(bs->file->bs);
313 if (s->file_end < 0) {
314 return false;
315 }
316 }
317
318
319
320 if (end <= s->file_end) {
321
322 return want_merge_zero && offset >= s->zero_start;
323 }
324
325
326
327 prealloc_start = QEMU_ALIGN_UP(
328 want_merge_zero ? MIN(offset, s->file_end) : s->file_end,
329 file_align);
330 prealloc_end = QEMU_ALIGN_UP(
331 MAX(prealloc_start, end) + s->opts.prealloc_size,
332 prealloc_align);
333
334 want_merge_zero = want_merge_zero && (prealloc_start <= offset);
335
336 ret = bdrv_co_pwrite_zeroes(
337 bs->file, prealloc_start, prealloc_end - prealloc_start,
338 BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
339 if (ret < 0) {
340 s->file_end = ret;
341 return false;
342 }
343
344 s->file_end = prealloc_end;
345 return want_merge_zero;
346}
347
348static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
349 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
350{
351 bool want_merge_zero =
352 !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
353 if (handle_write(bs, offset, bytes, want_merge_zero)) {
354 return 0;
355 }
356
357 return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
358}
359
360static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
361 int64_t offset,
362 int64_t bytes,
363 QEMUIOVector *qiov,
364 size_t qiov_offset,
365 BdrvRequestFlags flags)
366{
367 handle_write(bs, offset, bytes, false);
368
369 return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
370 flags);
371}
372
373static int coroutine_fn
374preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
375 bool exact, PreallocMode prealloc,
376 BdrvRequestFlags flags, Error **errp)
377{
378 ERRP_GUARD();
379 BDRVPreallocateState *s = bs->opaque;
380 int ret;
381
382 if (s->data_end >= 0 && offset > s->data_end) {
383 if (s->file_end < 0) {
384 s->file_end = bdrv_getlength(bs->file->bs);
385 if (s->file_end < 0) {
386 error_setg(errp, "failed to get file length");
387 return s->file_end;
388 }
389 }
390
391 if (prealloc == PREALLOC_MODE_FALLOC) {
392
393
394
395
396
397
398 if (offset <= s->file_end) {
399 s->data_end = offset;
400 return 0;
401 }
402 } else {
403
404
405
406
407
408
409
410
411
412 if (s->file_end > s->data_end) {
413 ret = bdrv_co_truncate(bs->file, s->data_end, true,
414 PREALLOC_MODE_OFF, 0, errp);
415 if (ret < 0) {
416 s->file_end = ret;
417 error_prepend(errp, "preallocate-filter: failed to drop "
418 "write-zero preallocation: ");
419 return ret;
420 }
421 s->file_end = s->data_end;
422 }
423 }
424
425 s->data_end = offset;
426 }
427
428 ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
429 if (ret < 0) {
430 s->file_end = s->zero_start = s->data_end = ret;
431 return ret;
432 }
433
434 if (has_prealloc_perms(bs)) {
435 s->file_end = s->zero_start = s->data_end = offset;
436 }
437 return 0;
438}
439
440static int coroutine_fn preallocate_co_flush(BlockDriverState *bs)
441{
442 return bdrv_co_flush(bs->file->bs);
443}
444
445static int64_t preallocate_getlength(BlockDriverState *bs)
446{
447 int64_t ret;
448 BDRVPreallocateState *s = bs->opaque;
449
450 if (s->data_end >= 0) {
451 return s->data_end;
452 }
453
454 ret = bdrv_getlength(bs->file->bs);
455
456 if (has_prealloc_perms(bs)) {
457 s->file_end = s->zero_start = s->data_end = ret;
458 }
459
460 return ret;
461}
462
463static int preallocate_check_perm(BlockDriverState *bs,
464 uint64_t perm, uint64_t shared, Error **errp)
465{
466 BDRVPreallocateState *s = bs->opaque;
467
468 if (s->data_end >= 0 && !can_write_resize(perm)) {
469
470
471
472
473
474 if (s->file_end < 0) {
475 s->file_end = bdrv_getlength(bs->file->bs);
476 if (s->file_end < 0) {
477 error_setg(errp, "Failed to get file length");
478 return s->file_end;
479 }
480 }
481
482 if (s->data_end < s->file_end) {
483 int ret = bdrv_truncate(bs->file, s->data_end, true,
484 PREALLOC_MODE_OFF, 0, NULL);
485 if (ret < 0) {
486 error_setg(errp, "Failed to drop preallocation");
487 s->file_end = ret;
488 return ret;
489 }
490 s->file_end = s->data_end;
491 }
492 }
493
494 return 0;
495}
496
497static void preallocate_set_perm(BlockDriverState *bs,
498 uint64_t perm, uint64_t shared)
499{
500 BDRVPreallocateState *s = bs->opaque;
501
502 if (can_write_resize(perm)) {
503 if (s->data_end < 0) {
504 s->data_end = s->file_end = s->zero_start =
505 bdrv_getlength(bs->file->bs);
506 }
507 } else {
508
509
510
511
512
513
514 s->data_end = s->file_end = s->zero_start = -EINVAL;
515 }
516}
517
518static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
519 BdrvChildRole role, BlockReopenQueue *reopen_queue,
520 uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
521{
522 bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
523
524 if (can_write_resize(perm)) {
525
526 *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
527
528
529
530
531
532 *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
533 }
534}
535
536BlockDriver bdrv_preallocate_filter = {
537 .format_name = "preallocate",
538 .instance_size = sizeof(BDRVPreallocateState),
539
540 .bdrv_getlength = preallocate_getlength,
541 .bdrv_open = preallocate_open,
542 .bdrv_close = preallocate_close,
543
544 .bdrv_reopen_prepare = preallocate_reopen_prepare,
545 .bdrv_reopen_commit = preallocate_reopen_commit,
546 .bdrv_reopen_abort = preallocate_reopen_abort,
547
548 .bdrv_co_preadv_part = preallocate_co_preadv_part,
549 .bdrv_co_pwritev_part = preallocate_co_pwritev_part,
550 .bdrv_co_pwrite_zeroes = preallocate_co_pwrite_zeroes,
551 .bdrv_co_pdiscard = preallocate_co_pdiscard,
552 .bdrv_co_flush = preallocate_co_flush,
553 .bdrv_co_truncate = preallocate_co_truncate,
554
555 .bdrv_check_perm = preallocate_check_perm,
556 .bdrv_set_perm = preallocate_set_perm,
557 .bdrv_child_perm = preallocate_child_perm,
558
559 .has_variable_length = true,
560 .is_filter = true,
561};
562
563static void bdrv_preallocate_init(void)
564{
565 bdrv_register(&bdrv_preallocate_filter);
566}
567
568block_init(bdrv_preallocate_init);
569