qemu/block/export/fuse.c
<<
>>
Prefs
   1/*
   2 * Present a block device as a raw image through FUSE
   3 *
   4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; under version 2 or later of the License.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#define FUSE_USE_VERSION 31
  20
  21#include "qemu/osdep.h"
  22#include "qemu/memalign.h"
  23#include "block/aio.h"
  24#include "block/block_int-common.h"
  25#include "block/export.h"
  26#include "block/fuse.h"
  27#include "block/qapi.h"
  28#include "qapi/error.h"
  29#include "qapi/qapi-commands-block.h"
  30#include "qemu/main-loop.h"
  31#include "sysemu/block-backend.h"
  32
  33#include <fuse.h>
  34#include <fuse_lowlevel.h>
  35
  36#if defined(CONFIG_FALLOCATE_ZERO_RANGE)
  37#include <linux/falloc.h>
  38#endif
  39
  40#ifdef __linux__
  41#include <linux/fs.h>
  42#endif
  43
  44/* Prevent overly long bounce buffer allocations */
  45#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
  46
  47
  48typedef struct FuseExport {
  49    BlockExport common;
  50
  51    struct fuse_session *fuse_session;
  52    struct fuse_buf fuse_buf;
  53    unsigned int in_flight; /* atomic */
  54    bool mounted, fd_handler_set_up;
  55
  56    char *mountpoint;
  57    bool writable;
  58    bool growable;
  59    /* Whether allow_other was used as a mount option or not */
  60    bool allow_other;
  61
  62    mode_t st_mode;
  63    uid_t st_uid;
  64    gid_t st_gid;
  65} FuseExport;
  66
  67static GHashTable *exports;
  68static const struct fuse_lowlevel_ops fuse_ops;
  69
  70static void fuse_export_shutdown(BlockExport *exp);
  71static void fuse_export_delete(BlockExport *exp);
  72
  73static void init_exports_table(void);
  74
  75static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
  76                             bool allow_other, Error **errp);
  77static void read_from_fuse_export(void *opaque);
  78
  79static bool is_regular_file(const char *path, Error **errp);
  80
  81
  82static void fuse_export_drained_begin(void *opaque)
  83{
  84    FuseExport *exp = opaque;
  85
  86    aio_set_fd_handler(exp->common.ctx,
  87                       fuse_session_fd(exp->fuse_session),
  88                       NULL, NULL, NULL, NULL, NULL);
  89    exp->fd_handler_set_up = false;
  90}
  91
  92static void fuse_export_drained_end(void *opaque)
  93{
  94    FuseExport *exp = opaque;
  95
  96    /* Refresh AioContext in case it changed */
  97    exp->common.ctx = blk_get_aio_context(exp->common.blk);
  98
  99    aio_set_fd_handler(exp->common.ctx,
 100                       fuse_session_fd(exp->fuse_session),
 101                       read_from_fuse_export, NULL, NULL, NULL, exp);
 102    exp->fd_handler_set_up = true;
 103}
 104
 105static bool fuse_export_drained_poll(void *opaque)
 106{
 107    FuseExport *exp = opaque;
 108
 109    return qatomic_read(&exp->in_flight) > 0;
 110}
 111
 112static const BlockDevOps fuse_export_blk_dev_ops = {
 113    .drained_begin = fuse_export_drained_begin,
 114    .drained_end   = fuse_export_drained_end,
 115    .drained_poll  = fuse_export_drained_poll,
 116};
 117
 118static int fuse_export_create(BlockExport *blk_exp,
 119                              BlockExportOptions *blk_exp_args,
 120                              Error **errp)
 121{
 122    FuseExport *exp = container_of(blk_exp, FuseExport, common);
 123    BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
 124    int ret;
 125
 126    assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
 127
 128    /* For growable and writable exports, take the RESIZE permission */
 129    if (args->growable || blk_exp_args->writable) {
 130        uint64_t blk_perm, blk_shared_perm;
 131
 132        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
 133
 134        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
 135                           blk_shared_perm, errp);
 136        if (ret < 0) {
 137            return ret;
 138        }
 139    }
 140
 141    blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
 142
 143    /*
 144     * We handle draining ourselves using an in-flight counter and by disabling
 145     * the FUSE fd handler. Do not queue BlockBackend requests, they need to
 146     * complete so the in-flight counter reaches zero.
 147     */
 148    blk_set_disable_request_queuing(exp->common.blk, true);
 149
 150    init_exports_table();
 151
 152    /*
 153     * It is important to do this check before calling is_regular_file() --
 154     * that function will do a stat(), which we would have to handle if we
 155     * already exported something on @mountpoint.  But we cannot, because
 156     * we are currently caught up here.
 157     * (Note that ideally we would want to resolve relative paths here,
 158     * but bdrv_make_absolute_filename() might do the wrong thing for
 159     * paths that contain colons, and realpath() would resolve symlinks,
 160     * which we do not want: The mount point is not going to be the
 161     * symlink's destination, but the link itself.)
 162     * So this will not catch all potential clashes, but hopefully at
 163     * least the most common one of specifying exactly the same path
 164     * string twice.
 165     */
 166    if (g_hash_table_contains(exports, args->mountpoint)) {
 167        error_setg(errp, "There already is a FUSE export on '%s'",
 168                   args->mountpoint);
 169        ret = -EEXIST;
 170        goto fail;
 171    }
 172
 173    if (!is_regular_file(args->mountpoint, errp)) {
 174        ret = -EINVAL;
 175        goto fail;
 176    }
 177
 178    exp->mountpoint = g_strdup(args->mountpoint);
 179    exp->writable = blk_exp_args->writable;
 180    exp->growable = args->growable;
 181
 182    /* set default */
 183    if (!args->has_allow_other) {
 184        args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
 185    }
 186
 187    exp->st_mode = S_IFREG | S_IRUSR;
 188    if (exp->writable) {
 189        exp->st_mode |= S_IWUSR;
 190    }
 191    exp->st_uid = getuid();
 192    exp->st_gid = getgid();
 193
 194    if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
 195        /* Ignore errors on our first attempt */
 196        ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
 197        exp->allow_other = ret == 0;
 198        if (ret < 0) {
 199            ret = setup_fuse_export(exp, args->mountpoint, false, errp);
 200        }
 201    } else {
 202        exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
 203        ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
 204    }
 205    if (ret < 0) {
 206        goto fail;
 207    }
 208
 209    return 0;
 210
 211fail:
 212    fuse_export_delete(blk_exp);
 213    return ret;
 214}
 215
 216/**
 217 * Allocates the global @exports hash table.
 218 */
 219static void init_exports_table(void)
 220{
 221    if (exports) {
 222        return;
 223    }
 224
 225    exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
 226}
 227
 228/**
 229 * Create exp->fuse_session and mount it.
 230 */
 231static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
 232                             bool allow_other, Error **errp)
 233{
 234    const char *fuse_argv[4];
 235    char *mount_opts;
 236    struct fuse_args fuse_args;
 237    int ret;
 238
 239    /*
 240     * max_read needs to match what fuse_init() sets.
 241     * max_write need not be supplied.
 242     */
 243    mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
 244                                 FUSE_MAX_BOUNCE_BYTES,
 245                                 allow_other ? ",allow_other" : "");
 246
 247    fuse_argv[0] = ""; /* Dummy program name */
 248    fuse_argv[1] = "-o";
 249    fuse_argv[2] = mount_opts;
 250    fuse_argv[3] = NULL;
 251    fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
 252
 253    exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
 254                                         sizeof(fuse_ops), exp);
 255    g_free(mount_opts);
 256    if (!exp->fuse_session) {
 257        error_setg(errp, "Failed to set up FUSE session");
 258        ret = -EIO;
 259        goto fail;
 260    }
 261
 262    ret = fuse_session_mount(exp->fuse_session, mountpoint);
 263    if (ret < 0) {
 264        error_setg(errp, "Failed to mount FUSE session to export");
 265        ret = -EIO;
 266        goto fail;
 267    }
 268    exp->mounted = true;
 269
 270    g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
 271
 272    aio_set_fd_handler(exp->common.ctx,
 273                       fuse_session_fd(exp->fuse_session),
 274                       read_from_fuse_export, NULL, NULL, NULL, exp);
 275    exp->fd_handler_set_up = true;
 276
 277    return 0;
 278
 279fail:
 280    fuse_export_shutdown(&exp->common);
 281    return ret;
 282}
 283
 284/**
 285 * Callback to be invoked when the FUSE session FD can be read from.
 286 * (This is basically the FUSE event loop.)
 287 */
 288static void read_from_fuse_export(void *opaque)
 289{
 290    FuseExport *exp = opaque;
 291    int ret;
 292
 293    blk_exp_ref(&exp->common);
 294
 295    qatomic_inc(&exp->in_flight);
 296
 297    do {
 298        ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
 299    } while (ret == -EINTR);
 300    if (ret < 0) {
 301        goto out;
 302    }
 303
 304    fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
 305
 306out:
 307    if (qatomic_fetch_dec(&exp->in_flight) == 1) {
 308        aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
 309    }
 310
 311    blk_exp_unref(&exp->common);
 312}
 313
 314static void fuse_export_shutdown(BlockExport *blk_exp)
 315{
 316    FuseExport *exp = container_of(blk_exp, FuseExport, common);
 317
 318    if (exp->fuse_session) {
 319        fuse_session_exit(exp->fuse_session);
 320
 321        if (exp->fd_handler_set_up) {
 322            aio_set_fd_handler(exp->common.ctx,
 323                               fuse_session_fd(exp->fuse_session),
 324                               NULL, NULL, NULL, NULL, NULL);
 325            exp->fd_handler_set_up = false;
 326        }
 327    }
 328
 329    if (exp->mountpoint) {
 330        /*
 331         * Safe to drop now, because we will not handle any requests
 332         * for this export anymore anyway.
 333         */
 334        g_hash_table_remove(exports, exp->mountpoint);
 335    }
 336}
 337
 338static void fuse_export_delete(BlockExport *blk_exp)
 339{
 340    FuseExport *exp = container_of(blk_exp, FuseExport, common);
 341
 342    if (exp->fuse_session) {
 343        if (exp->mounted) {
 344            fuse_session_unmount(exp->fuse_session);
 345        }
 346
 347        fuse_session_destroy(exp->fuse_session);
 348    }
 349
 350    free(exp->fuse_buf.mem);
 351    g_free(exp->mountpoint);
 352}
 353
 354/**
 355 * Check whether @path points to a regular file.  If not, put an
 356 * appropriate message into *errp.
 357 */
 358static bool is_regular_file(const char *path, Error **errp)
 359{
 360    struct stat statbuf;
 361    int ret;
 362
 363    ret = stat(path, &statbuf);
 364    if (ret < 0) {
 365        error_setg_errno(errp, errno, "Failed to stat '%s'", path);
 366        return false;
 367    }
 368
 369    if (!S_ISREG(statbuf.st_mode)) {
 370        error_setg(errp, "'%s' is not a regular file", path);
 371        return false;
 372    }
 373
 374    return true;
 375}
 376
 377/**
 378 * A chance to set change some parameters supplied to FUSE_INIT.
 379 */
 380static void fuse_init(void *userdata, struct fuse_conn_info *conn)
 381{
 382    /*
 383     * MIN_NON_ZERO() would not be wrong here, but what we set here
 384     * must equal what has been passed to fuse_session_new().
 385     * Therefore, as long as max_read must be passed as a mount option
 386     * (which libfuse claims will be changed at some point), we have
 387     * to set max_read to a fixed value here.
 388     */
 389    conn->max_read = FUSE_MAX_BOUNCE_BYTES;
 390
 391    conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
 392}
 393
 394/**
 395 * Let clients look up files.  Always return ENOENT because we only
 396 * care about the mountpoint itself.
 397 */
 398static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
 399{
 400    fuse_reply_err(req, ENOENT);
 401}
 402
 403/**
 404 * Let clients get file attributes (i.e., stat() the file).
 405 */
 406static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
 407                         struct fuse_file_info *fi)
 408{
 409    struct stat statbuf;
 410    int64_t length, allocated_blocks;
 411    time_t now = time(NULL);
 412    FuseExport *exp = fuse_req_userdata(req);
 413
 414    length = blk_getlength(exp->common.blk);
 415    if (length < 0) {
 416        fuse_reply_err(req, -length);
 417        return;
 418    }
 419
 420    allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
 421    if (allocated_blocks <= 0) {
 422        allocated_blocks = DIV_ROUND_UP(length, 512);
 423    } else {
 424        allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
 425    }
 426
 427    statbuf = (struct stat) {
 428        .st_ino     = inode,
 429        .st_mode    = exp->st_mode,
 430        .st_nlink   = 1,
 431        .st_uid     = exp->st_uid,
 432        .st_gid     = exp->st_gid,
 433        .st_size    = length,
 434        .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
 435        .st_blocks  = allocated_blocks,
 436        .st_atime   = now,
 437        .st_mtime   = now,
 438        .st_ctime   = now,
 439    };
 440
 441    fuse_reply_attr(req, &statbuf, 1.);
 442}
 443
 444static int fuse_do_truncate(const FuseExport *exp, int64_t size,
 445                            bool req_zero_write, PreallocMode prealloc)
 446{
 447    uint64_t blk_perm, blk_shared_perm;
 448    BdrvRequestFlags truncate_flags = 0;
 449    bool add_resize_perm;
 450    int ret, ret_check;
 451
 452    /* Growable and writable exports have a permanent RESIZE permission */
 453    add_resize_perm = !exp->growable && !exp->writable;
 454
 455    if (req_zero_write) {
 456        truncate_flags |= BDRV_REQ_ZERO_WRITE;
 457    }
 458
 459    if (add_resize_perm) {
 460
 461        if (!qemu_in_main_thread()) {
 462            /* Changing permissions like below only works in the main thread */
 463            return -EPERM;
 464        }
 465
 466        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
 467
 468        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
 469                           blk_shared_perm, NULL);
 470        if (ret < 0) {
 471            return ret;
 472        }
 473    }
 474
 475    ret = blk_truncate(exp->common.blk, size, true, prealloc,
 476                       truncate_flags, NULL);
 477
 478    if (add_resize_perm) {
 479        /* Must succeed, because we are only giving up the RESIZE permission */
 480        ret_check = blk_set_perm(exp->common.blk, blk_perm,
 481                                 blk_shared_perm, &error_abort);
 482        assert(ret_check == 0);
 483    }
 484
 485    return ret;
 486}
 487
 488/**
 489 * Let clients set file attributes.  Only resizing and changing
 490 * permissions (st_mode, st_uid, st_gid) is allowed.
 491 * Changing permissions is only allowed as far as it will actually
 492 * permit access: Read-only exports cannot be given +w, and exports
 493 * without allow_other cannot be given a different UID or GID, and
 494 * they cannot be given non-owner access.
 495 */
 496static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
 497                         int to_set, struct fuse_file_info *fi)
 498{
 499    FuseExport *exp = fuse_req_userdata(req);
 500    int supported_attrs;
 501    int ret;
 502
 503    supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
 504    if (exp->allow_other) {
 505        supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
 506    }
 507
 508    if (to_set & ~supported_attrs) {
 509        fuse_reply_err(req, ENOTSUP);
 510        return;
 511    }
 512
 513    /* Do some argument checks first before committing to anything */
 514    if (to_set & FUSE_SET_ATTR_MODE) {
 515        /*
 516         * Without allow_other, non-owners can never access the export, so do
 517         * not allow setting permissions for them
 518         */
 519        if (!exp->allow_other &&
 520            (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
 521        {
 522            fuse_reply_err(req, EPERM);
 523            return;
 524        }
 525
 526        /* +w for read-only exports makes no sense, disallow it */
 527        if (!exp->writable &&
 528            (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
 529        {
 530            fuse_reply_err(req, EROFS);
 531            return;
 532        }
 533    }
 534
 535    if (to_set & FUSE_SET_ATTR_SIZE) {
 536        if (!exp->writable) {
 537            fuse_reply_err(req, EACCES);
 538            return;
 539        }
 540
 541        ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
 542        if (ret < 0) {
 543            fuse_reply_err(req, -ret);
 544            return;
 545        }
 546    }
 547
 548    if (to_set & FUSE_SET_ATTR_MODE) {
 549        /* Ignore FUSE-supplied file type, only change the mode */
 550        exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
 551    }
 552
 553    if (to_set & FUSE_SET_ATTR_UID) {
 554        exp->st_uid = statbuf->st_uid;
 555    }
 556
 557    if (to_set & FUSE_SET_ATTR_GID) {
 558        exp->st_gid = statbuf->st_gid;
 559    }
 560
 561    fuse_getattr(req, inode, fi);
 562}
 563
 564/**
 565 * Let clients open a file (i.e., the exported image).
 566 */
 567static void fuse_open(fuse_req_t req, fuse_ino_t inode,
 568                      struct fuse_file_info *fi)
 569{
 570    fuse_reply_open(req, fi);
 571}
 572
 573/**
 574 * Handle client reads from the exported image.
 575 */
 576static void fuse_read(fuse_req_t req, fuse_ino_t inode,
 577                      size_t size, off_t offset, struct fuse_file_info *fi)
 578{
 579    FuseExport *exp = fuse_req_userdata(req);
 580    int64_t length;
 581    void *buf;
 582    int ret;
 583
 584    /* Limited by max_read, should not happen */
 585    if (size > FUSE_MAX_BOUNCE_BYTES) {
 586        fuse_reply_err(req, EINVAL);
 587        return;
 588    }
 589
 590    /**
 591     * Clients will expect short reads at EOF, so we have to limit
 592     * offset+size to the image length.
 593     */
 594    length = blk_getlength(exp->common.blk);
 595    if (length < 0) {
 596        fuse_reply_err(req, -length);
 597        return;
 598    }
 599
 600    if (offset + size > length) {
 601        size = length - offset;
 602    }
 603
 604    buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
 605    if (!buf) {
 606        fuse_reply_err(req, ENOMEM);
 607        return;
 608    }
 609
 610    ret = blk_pread(exp->common.blk, offset, size, buf, 0);
 611    if (ret >= 0) {
 612        fuse_reply_buf(req, buf, size);
 613    } else {
 614        fuse_reply_err(req, -ret);
 615    }
 616
 617    qemu_vfree(buf);
 618}
 619
 620/**
 621 * Handle client writes to the exported image.
 622 */
 623static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
 624                       size_t size, off_t offset, struct fuse_file_info *fi)
 625{
 626    FuseExport *exp = fuse_req_userdata(req);
 627    int64_t length;
 628    int ret;
 629
 630    /* Limited by max_write, should not happen */
 631    if (size > BDRV_REQUEST_MAX_BYTES) {
 632        fuse_reply_err(req, EINVAL);
 633        return;
 634    }
 635
 636    if (!exp->writable) {
 637        fuse_reply_err(req, EACCES);
 638        return;
 639    }
 640
 641    /**
 642     * Clients will expect short writes at EOF, so we have to limit
 643     * offset+size to the image length.
 644     */
 645    length = blk_getlength(exp->common.blk);
 646    if (length < 0) {
 647        fuse_reply_err(req, -length);
 648        return;
 649    }
 650
 651    if (offset + size > length) {
 652        if (exp->growable) {
 653            ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
 654            if (ret < 0) {
 655                fuse_reply_err(req, -ret);
 656                return;
 657            }
 658        } else {
 659            size = length - offset;
 660        }
 661    }
 662
 663    ret = blk_pwrite(exp->common.blk, offset, size, buf, 0);
 664    if (ret >= 0) {
 665        fuse_reply_write(req, size);
 666    } else {
 667        fuse_reply_err(req, -ret);
 668    }
 669}
 670
 671/**
 672 * Let clients perform various fallocate() operations.
 673 */
 674static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
 675                           off_t offset, off_t length,
 676                           struct fuse_file_info *fi)
 677{
 678    FuseExport *exp = fuse_req_userdata(req);
 679    int64_t blk_len;
 680    int ret;
 681
 682    if (!exp->writable) {
 683        fuse_reply_err(req, EACCES);
 684        return;
 685    }
 686
 687    blk_len = blk_getlength(exp->common.blk);
 688    if (blk_len < 0) {
 689        fuse_reply_err(req, -blk_len);
 690        return;
 691    }
 692
 693#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
 694    if (mode & FALLOC_FL_KEEP_SIZE) {
 695        length = MIN(length, blk_len - offset);
 696    }
 697#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
 698
 699    if (!mode) {
 700        /* We can only fallocate at the EOF with a truncate */
 701        if (offset < blk_len) {
 702            fuse_reply_err(req, EOPNOTSUPP);
 703            return;
 704        }
 705
 706        if (offset > blk_len) {
 707            /* No preallocation needed here */
 708            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
 709            if (ret < 0) {
 710                fuse_reply_err(req, -ret);
 711                return;
 712            }
 713        }
 714
 715        ret = fuse_do_truncate(exp, offset + length, true,
 716                               PREALLOC_MODE_FALLOC);
 717    }
 718#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
 719    else if (mode & FALLOC_FL_PUNCH_HOLE) {
 720        if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 721            fuse_reply_err(req, EINVAL);
 722            return;
 723        }
 724
 725        do {
 726            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
 727
 728            ret = blk_pwrite_zeroes(exp->common.blk, offset, size,
 729                                    BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
 730            if (ret == -ENOTSUP) {
 731                /*
 732                 * fallocate() specifies to return EOPNOTSUPP for unsupported
 733                 * operations
 734                 */
 735                ret = -EOPNOTSUPP;
 736            }
 737
 738            offset += size;
 739            length -= size;
 740        } while (ret == 0 && length > 0);
 741    }
 742#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
 743#ifdef CONFIG_FALLOCATE_ZERO_RANGE
 744    else if (mode & FALLOC_FL_ZERO_RANGE) {
 745        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
 746            /* No need for zeroes, we are going to write them ourselves */
 747            ret = fuse_do_truncate(exp, offset + length, false,
 748                                   PREALLOC_MODE_OFF);
 749            if (ret < 0) {
 750                fuse_reply_err(req, -ret);
 751                return;
 752            }
 753        }
 754
 755        do {
 756            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
 757
 758            ret = blk_pwrite_zeroes(exp->common.blk,
 759                                    offset, size, 0);
 760            offset += size;
 761            length -= size;
 762        } while (ret == 0 && length > 0);
 763    }
 764#endif /* CONFIG_FALLOCATE_ZERO_RANGE */
 765    else {
 766        ret = -EOPNOTSUPP;
 767    }
 768
 769    fuse_reply_err(req, ret < 0 ? -ret : 0);
 770}
 771
 772/**
 773 * Let clients fsync the exported image.
 774 */
 775static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
 776                       struct fuse_file_info *fi)
 777{
 778    FuseExport *exp = fuse_req_userdata(req);
 779    int ret;
 780
 781    ret = blk_flush(exp->common.blk);
 782    fuse_reply_err(req, ret < 0 ? -ret : 0);
 783}
 784
 785/**
 786 * Called before an FD to the exported image is closed.  (libfuse
 787 * notes this to be a way to return last-minute errors.)
 788 */
 789static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
 790                        struct fuse_file_info *fi)
 791{
 792    fuse_fsync(req, inode, 1, fi);
 793}
 794
 795#ifdef CONFIG_FUSE_LSEEK
 796/**
 797 * Let clients inquire allocation status.
 798 */
 799static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
 800                       int whence, struct fuse_file_info *fi)
 801{
 802    FuseExport *exp = fuse_req_userdata(req);
 803
 804    if (whence != SEEK_HOLE && whence != SEEK_DATA) {
 805        fuse_reply_err(req, EINVAL);
 806        return;
 807    }
 808
 809    while (true) {
 810        int64_t pnum;
 811        int ret;
 812
 813        ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
 814                                      offset, INT64_MAX, &pnum, NULL, NULL);
 815        if (ret < 0) {
 816            fuse_reply_err(req, -ret);
 817            return;
 818        }
 819
 820        if (!pnum && (ret & BDRV_BLOCK_EOF)) {
 821            int64_t blk_len;
 822
 823            /*
 824             * If blk_getlength() rounds (e.g. by sectors), then the
 825             * export length will be rounded, too.  However,
 826             * bdrv_block_status_above() may return EOF at unaligned
 827             * offsets.  We must not let this become visible and thus
 828             * always simulate a hole between @offset (the real EOF)
 829             * and @blk_len (the client-visible EOF).
 830             */
 831
 832            blk_len = blk_getlength(exp->common.blk);
 833            if (blk_len < 0) {
 834                fuse_reply_err(req, -blk_len);
 835                return;
 836            }
 837
 838            if (offset > blk_len || whence == SEEK_DATA) {
 839                fuse_reply_err(req, ENXIO);
 840            } else {
 841                fuse_reply_lseek(req, offset);
 842            }
 843            return;
 844        }
 845
 846        if (ret & BDRV_BLOCK_DATA) {
 847            if (whence == SEEK_DATA) {
 848                fuse_reply_lseek(req, offset);
 849                return;
 850            }
 851        } else {
 852            if (whence == SEEK_HOLE) {
 853                fuse_reply_lseek(req, offset);
 854                return;
 855            }
 856        }
 857
 858        /* Safety check against infinite loops */
 859        if (!pnum) {
 860            fuse_reply_err(req, ENXIO);
 861            return;
 862        }
 863
 864        offset += pnum;
 865    }
 866}
 867#endif
 868
 869static const struct fuse_lowlevel_ops fuse_ops = {
 870    .init       = fuse_init,
 871    .lookup     = fuse_lookup,
 872    .getattr    = fuse_getattr,
 873    .setattr    = fuse_setattr,
 874    .open       = fuse_open,
 875    .read       = fuse_read,
 876    .write      = fuse_write,
 877    .fallocate  = fuse_fallocate,
 878    .flush      = fuse_flush,
 879    .fsync      = fuse_fsync,
 880#ifdef CONFIG_FUSE_LSEEK
 881    .lseek      = fuse_lseek,
 882#endif
 883};
 884
 885const BlockExportDriver blk_exp_fuse = {
 886    .type               = BLOCK_EXPORT_TYPE_FUSE,
 887    .instance_size      = sizeof(FuseExport),
 888    .create             = fuse_export_create,
 889    .delete             = fuse_export_delete,
 890    .request_shutdown   = fuse_export_shutdown,
 891};
 892