qemu/block/export/fuse.c
<<
>>
Prefs
   1/*
   2 * Present a block device as a raw image through FUSE
   3 *
   4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; under version 2 or later of the License.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#define FUSE_USE_VERSION 31
  20
  21#include "qemu/osdep.h"
  22#include "block/aio.h"
  23#include "block/block.h"
  24#include "block/export.h"
  25#include "block/fuse.h"
  26#include "block/qapi.h"
  27#include "qapi/error.h"
  28#include "qapi/qapi-commands-block.h"
  29#include "sysemu/block-backend.h"
  30
  31#include <fuse.h>
  32#include <fuse_lowlevel.h>
  33
  34
  35/* Prevent overly long bounce buffer allocations */
  36#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
  37
  38
  39typedef struct FuseExport {
  40    BlockExport common;
  41
  42    struct fuse_session *fuse_session;
  43    struct fuse_buf fuse_buf;
  44    bool mounted, fd_handler_set_up;
  45
  46    char *mountpoint;
  47    bool writable;
  48    bool growable;
  49} FuseExport;
  50
  51static GHashTable *exports;
  52static const struct fuse_lowlevel_ops fuse_ops;
  53
  54static void fuse_export_shutdown(BlockExport *exp);
  55static void fuse_export_delete(BlockExport *exp);
  56
  57static void init_exports_table(void);
  58
  59static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
  60                             Error **errp);
  61static void read_from_fuse_export(void *opaque);
  62
  63static bool is_regular_file(const char *path, Error **errp);
  64
  65
  66static int fuse_export_create(BlockExport *blk_exp,
  67                              BlockExportOptions *blk_exp_args,
  68                              Error **errp)
  69{
  70    FuseExport *exp = container_of(blk_exp, FuseExport, common);
  71    BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
  72    int ret;
  73
  74    assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
  75
  76    /* For growable exports, take the RESIZE permission */
  77    if (args->growable) {
  78        uint64_t blk_perm, blk_shared_perm;
  79
  80        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
  81
  82        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
  83                           blk_shared_perm, errp);
  84        if (ret < 0) {
  85            return ret;
  86        }
  87    }
  88
  89    init_exports_table();
  90
  91    /*
  92     * It is important to do this check before calling is_regular_file() --
  93     * that function will do a stat(), which we would have to handle if we
  94     * already exported something on @mountpoint.  But we cannot, because
  95     * we are currently caught up here.
  96     * (Note that ideally we would want to resolve relative paths here,
  97     * but bdrv_make_absolute_filename() might do the wrong thing for
  98     * paths that contain colons, and realpath() would resolve symlinks,
  99     * which we do not want: The mount point is not going to be the
 100     * symlink's destination, but the link itself.)
 101     * So this will not catch all potential clashes, but hopefully at
 102     * least the most common one of specifying exactly the same path
 103     * string twice.
 104     */
 105    if (g_hash_table_contains(exports, args->mountpoint)) {
 106        error_setg(errp, "There already is a FUSE export on '%s'",
 107                   args->mountpoint);
 108        ret = -EEXIST;
 109        goto fail;
 110    }
 111
 112    if (!is_regular_file(args->mountpoint, errp)) {
 113        ret = -EINVAL;
 114        goto fail;
 115    }
 116
 117    exp->mountpoint = g_strdup(args->mountpoint);
 118    exp->writable = blk_exp_args->writable;
 119    exp->growable = args->growable;
 120
 121    ret = setup_fuse_export(exp, args->mountpoint, errp);
 122    if (ret < 0) {
 123        goto fail;
 124    }
 125
 126    return 0;
 127
 128fail:
 129    fuse_export_delete(blk_exp);
 130    return ret;
 131}
 132
 133/**
 134 * Allocates the global @exports hash table.
 135 */
 136static void init_exports_table(void)
 137{
 138    if (exports) {
 139        return;
 140    }
 141
 142    exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
 143}
 144
 145/**
 146 * Create exp->fuse_session and mount it.
 147 */
 148static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
 149                             Error **errp)
 150{
 151    const char *fuse_argv[4];
 152    char *mount_opts;
 153    struct fuse_args fuse_args;
 154    int ret;
 155
 156    /* Needs to match what fuse_init() sets.  Only max_read must be supplied. */
 157    mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES);
 158
 159    fuse_argv[0] = ""; /* Dummy program name */
 160    fuse_argv[1] = "-o";
 161    fuse_argv[2] = mount_opts;
 162    fuse_argv[3] = NULL;
 163    fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
 164
 165    exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
 166                                         sizeof(fuse_ops), exp);
 167    g_free(mount_opts);
 168    if (!exp->fuse_session) {
 169        error_setg(errp, "Failed to set up FUSE session");
 170        ret = -EIO;
 171        goto fail;
 172    }
 173
 174    ret = fuse_session_mount(exp->fuse_session, mountpoint);
 175    if (ret < 0) {
 176        error_setg(errp, "Failed to mount FUSE session to export");
 177        ret = -EIO;
 178        goto fail;
 179    }
 180    exp->mounted = true;
 181
 182    g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
 183
 184    aio_set_fd_handler(exp->common.ctx,
 185                       fuse_session_fd(exp->fuse_session), true,
 186                       read_from_fuse_export, NULL, NULL, exp);
 187    exp->fd_handler_set_up = true;
 188
 189    return 0;
 190
 191fail:
 192    fuse_export_shutdown(&exp->common);
 193    return ret;
 194}
 195
 196/**
 197 * Callback to be invoked when the FUSE session FD can be read from.
 198 * (This is basically the FUSE event loop.)
 199 */
 200static void read_from_fuse_export(void *opaque)
 201{
 202    FuseExport *exp = opaque;
 203    int ret;
 204
 205    blk_exp_ref(&exp->common);
 206
 207    do {
 208        ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
 209    } while (ret == -EINTR);
 210    if (ret < 0) {
 211        goto out;
 212    }
 213
 214    fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
 215
 216out:
 217    blk_exp_unref(&exp->common);
 218}
 219
 220static void fuse_export_shutdown(BlockExport *blk_exp)
 221{
 222    FuseExport *exp = container_of(blk_exp, FuseExport, common);
 223
 224    if (exp->fuse_session) {
 225        fuse_session_exit(exp->fuse_session);
 226
 227        if (exp->fd_handler_set_up) {
 228            aio_set_fd_handler(exp->common.ctx,
 229                               fuse_session_fd(exp->fuse_session), true,
 230                               NULL, NULL, NULL, NULL);
 231            exp->fd_handler_set_up = false;
 232        }
 233    }
 234
 235    if (exp->mountpoint) {
 236        /*
 237         * Safe to drop now, because we will not handle any requests
 238         * for this export anymore anyway.
 239         */
 240        g_hash_table_remove(exports, exp->mountpoint);
 241    }
 242}
 243
 244static void fuse_export_delete(BlockExport *blk_exp)
 245{
 246    FuseExport *exp = container_of(blk_exp, FuseExport, common);
 247
 248    if (exp->fuse_session) {
 249        if (exp->mounted) {
 250            fuse_session_unmount(exp->fuse_session);
 251        }
 252
 253        fuse_session_destroy(exp->fuse_session);
 254    }
 255
 256    free(exp->fuse_buf.mem);
 257    g_free(exp->mountpoint);
 258}
 259
 260/**
 261 * Check whether @path points to a regular file.  If not, put an
 262 * appropriate message into *errp.
 263 */
 264static bool is_regular_file(const char *path, Error **errp)
 265{
 266    struct stat statbuf;
 267    int ret;
 268
 269    ret = stat(path, &statbuf);
 270    if (ret < 0) {
 271        error_setg_errno(errp, errno, "Failed to stat '%s'", path);
 272        return false;
 273    }
 274
 275    if (!S_ISREG(statbuf.st_mode)) {
 276        error_setg(errp, "'%s' is not a regular file", path);
 277        return false;
 278    }
 279
 280    return true;
 281}
 282
 283/**
 284 * A chance to set change some parameters supplied to FUSE_INIT.
 285 */
 286static void fuse_init(void *userdata, struct fuse_conn_info *conn)
 287{
 288    /*
 289     * MIN_NON_ZERO() would not be wrong here, but what we set here
 290     * must equal what has been passed to fuse_session_new().
 291     * Therefore, as long as max_read must be passed as a mount option
 292     * (which libfuse claims will be changed at some point), we have
 293     * to set max_read to a fixed value here.
 294     */
 295    conn->max_read = FUSE_MAX_BOUNCE_BYTES;
 296
 297    conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
 298}
 299
 300/**
 301 * Let clients look up files.  Always return ENOENT because we only
 302 * care about the mountpoint itself.
 303 */
 304static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
 305{
 306    fuse_reply_err(req, ENOENT);
 307}
 308
 309/**
 310 * Let clients get file attributes (i.e., stat() the file).
 311 */
 312static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
 313                         struct fuse_file_info *fi)
 314{
 315    struct stat statbuf;
 316    int64_t length, allocated_blocks;
 317    time_t now = time(NULL);
 318    FuseExport *exp = fuse_req_userdata(req);
 319    mode_t mode;
 320
 321    length = blk_getlength(exp->common.blk);
 322    if (length < 0) {
 323        fuse_reply_err(req, -length);
 324        return;
 325    }
 326
 327    allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
 328    if (allocated_blocks <= 0) {
 329        allocated_blocks = DIV_ROUND_UP(length, 512);
 330    } else {
 331        allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
 332    }
 333
 334    mode = S_IFREG | S_IRUSR;
 335    if (exp->writable) {
 336        mode |= S_IWUSR;
 337    }
 338
 339    statbuf = (struct stat) {
 340        .st_ino     = inode,
 341        .st_mode    = mode,
 342        .st_nlink   = 1,
 343        .st_uid     = getuid(),
 344        .st_gid     = getgid(),
 345        .st_size    = length,
 346        .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
 347        .st_blocks  = allocated_blocks,
 348        .st_atime   = now,
 349        .st_mtime   = now,
 350        .st_ctime   = now,
 351    };
 352
 353    fuse_reply_attr(req, &statbuf, 1.);
 354}
 355
 356static int fuse_do_truncate(const FuseExport *exp, int64_t size,
 357                            bool req_zero_write, PreallocMode prealloc)
 358{
 359    uint64_t blk_perm, blk_shared_perm;
 360    BdrvRequestFlags truncate_flags = 0;
 361    int ret;
 362
 363    if (req_zero_write) {
 364        truncate_flags |= BDRV_REQ_ZERO_WRITE;
 365    }
 366
 367    /* Growable exports have a permanent RESIZE permission */
 368    if (!exp->growable) {
 369        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
 370
 371        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
 372                           blk_shared_perm, NULL);
 373        if (ret < 0) {
 374            return ret;
 375        }
 376    }
 377
 378    ret = blk_truncate(exp->common.blk, size, true, prealloc,
 379                       truncate_flags, NULL);
 380
 381    if (!exp->growable) {
 382        /* Must succeed, because we are only giving up the RESIZE permission */
 383        blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
 384    }
 385
 386    return ret;
 387}
 388
 389/**
 390 * Let clients set file attributes.  Only resizing is supported.
 391 */
 392static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
 393                         int to_set, struct fuse_file_info *fi)
 394{
 395    FuseExport *exp = fuse_req_userdata(req);
 396    int ret;
 397
 398    if (!exp->writable) {
 399        fuse_reply_err(req, EACCES);
 400        return;
 401    }
 402
 403    if (to_set & ~FUSE_SET_ATTR_SIZE) {
 404        fuse_reply_err(req, ENOTSUP);
 405        return;
 406    }
 407
 408    ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
 409    if (ret < 0) {
 410        fuse_reply_err(req, -ret);
 411        return;
 412    }
 413
 414    fuse_getattr(req, inode, fi);
 415}
 416
 417/**
 418 * Let clients open a file (i.e., the exported image).
 419 */
 420static void fuse_open(fuse_req_t req, fuse_ino_t inode,
 421                      struct fuse_file_info *fi)
 422{
 423    fuse_reply_open(req, fi);
 424}
 425
 426/**
 427 * Handle client reads from the exported image.
 428 */
 429static void fuse_read(fuse_req_t req, fuse_ino_t inode,
 430                      size_t size, off_t offset, struct fuse_file_info *fi)
 431{
 432    FuseExport *exp = fuse_req_userdata(req);
 433    int64_t length;
 434    void *buf;
 435    int ret;
 436
 437    /* Limited by max_read, should not happen */
 438    if (size > FUSE_MAX_BOUNCE_BYTES) {
 439        fuse_reply_err(req, EINVAL);
 440        return;
 441    }
 442
 443    /**
 444     * Clients will expect short reads at EOF, so we have to limit
 445     * offset+size to the image length.
 446     */
 447    length = blk_getlength(exp->common.blk);
 448    if (length < 0) {
 449        fuse_reply_err(req, -length);
 450        return;
 451    }
 452
 453    if (offset + size > length) {
 454        size = length - offset;
 455    }
 456
 457    buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
 458    if (!buf) {
 459        fuse_reply_err(req, ENOMEM);
 460        return;
 461    }
 462
 463    ret = blk_pread(exp->common.blk, offset, buf, size);
 464    if (ret >= 0) {
 465        fuse_reply_buf(req, buf, size);
 466    } else {
 467        fuse_reply_err(req, -ret);
 468    }
 469
 470    qemu_vfree(buf);
 471}
 472
 473/**
 474 * Handle client writes to the exported image.
 475 */
 476static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
 477                       size_t size, off_t offset, struct fuse_file_info *fi)
 478{
 479    FuseExport *exp = fuse_req_userdata(req);
 480    int64_t length;
 481    int ret;
 482
 483    /* Limited by max_write, should not happen */
 484    if (size > BDRV_REQUEST_MAX_BYTES) {
 485        fuse_reply_err(req, EINVAL);
 486        return;
 487    }
 488
 489    if (!exp->writable) {
 490        fuse_reply_err(req, EACCES);
 491        return;
 492    }
 493
 494    /**
 495     * Clients will expect short writes at EOF, so we have to limit
 496     * offset+size to the image length.
 497     */
 498    length = blk_getlength(exp->common.blk);
 499    if (length < 0) {
 500        fuse_reply_err(req, -length);
 501        return;
 502    }
 503
 504    if (offset + size > length) {
 505        if (exp->growable) {
 506            ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
 507            if (ret < 0) {
 508                fuse_reply_err(req, -ret);
 509                return;
 510            }
 511        } else {
 512            size = length - offset;
 513        }
 514    }
 515
 516    ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
 517    if (ret >= 0) {
 518        fuse_reply_write(req, size);
 519    } else {
 520        fuse_reply_err(req, -ret);
 521    }
 522}
 523
 524/**
 525 * Let clients perform various fallocate() operations.
 526 */
 527static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
 528                           off_t offset, off_t length,
 529                           struct fuse_file_info *fi)
 530{
 531    FuseExport *exp = fuse_req_userdata(req);
 532    int64_t blk_len;
 533    int ret;
 534
 535    if (!exp->writable) {
 536        fuse_reply_err(req, EACCES);
 537        return;
 538    }
 539
 540    blk_len = blk_getlength(exp->common.blk);
 541    if (blk_len < 0) {
 542        fuse_reply_err(req, -blk_len);
 543        return;
 544    }
 545
 546    if (mode & FALLOC_FL_KEEP_SIZE) {
 547        length = MIN(length, blk_len - offset);
 548    }
 549
 550    if (mode & FALLOC_FL_PUNCH_HOLE) {
 551        if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 552            fuse_reply_err(req, EINVAL);
 553            return;
 554        }
 555
 556        do {
 557            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
 558
 559            ret = blk_pdiscard(exp->common.blk, offset, size);
 560            offset += size;
 561            length -= size;
 562        } while (ret == 0 && length > 0);
 563    } else if (mode & FALLOC_FL_ZERO_RANGE) {
 564        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
 565            /* No need for zeroes, we are going to write them ourselves */
 566            ret = fuse_do_truncate(exp, offset + length, false,
 567                                   PREALLOC_MODE_OFF);
 568            if (ret < 0) {
 569                fuse_reply_err(req, -ret);
 570                return;
 571            }
 572        }
 573
 574        do {
 575            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
 576
 577            ret = blk_pwrite_zeroes(exp->common.blk,
 578                                    offset, size, 0);
 579            offset += size;
 580            length -= size;
 581        } while (ret == 0 && length > 0);
 582    } else if (!mode) {
 583        /* We can only fallocate at the EOF with a truncate */
 584        if (offset < blk_len) {
 585            fuse_reply_err(req, EOPNOTSUPP);
 586            return;
 587        }
 588
 589        if (offset > blk_len) {
 590            /* No preallocation needed here */
 591            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
 592            if (ret < 0) {
 593                fuse_reply_err(req, -ret);
 594                return;
 595            }
 596        }
 597
 598        ret = fuse_do_truncate(exp, offset + length, true,
 599                               PREALLOC_MODE_FALLOC);
 600    } else {
 601        ret = -EOPNOTSUPP;
 602    }
 603
 604    fuse_reply_err(req, ret < 0 ? -ret : 0);
 605}
 606
 607/**
 608 * Let clients fsync the exported image.
 609 */
 610static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
 611                       struct fuse_file_info *fi)
 612{
 613    FuseExport *exp = fuse_req_userdata(req);
 614    int ret;
 615
 616    ret = blk_flush(exp->common.blk);
 617    fuse_reply_err(req, ret < 0 ? -ret : 0);
 618}
 619
 620/**
 621 * Called before an FD to the exported image is closed.  (libfuse
 622 * notes this to be a way to return last-minute errors.)
 623 */
 624static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
 625                        struct fuse_file_info *fi)
 626{
 627    fuse_fsync(req, inode, 1, fi);
 628}
 629
 630#ifdef CONFIG_FUSE_LSEEK
 631/**
 632 * Let clients inquire allocation status.
 633 */
 634static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
 635                       int whence, struct fuse_file_info *fi)
 636{
 637    FuseExport *exp = fuse_req_userdata(req);
 638
 639    if (whence != SEEK_HOLE && whence != SEEK_DATA) {
 640        fuse_reply_err(req, EINVAL);
 641        return;
 642    }
 643
 644    while (true) {
 645        int64_t pnum;
 646        int ret;
 647
 648        ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
 649                                      offset, INT64_MAX, &pnum, NULL, NULL);
 650        if (ret < 0) {
 651            fuse_reply_err(req, -ret);
 652            return;
 653        }
 654
 655        if (!pnum && (ret & BDRV_BLOCK_EOF)) {
 656            int64_t blk_len;
 657
 658            /*
 659             * If blk_getlength() rounds (e.g. by sectors), then the
 660             * export length will be rounded, too.  However,
 661             * bdrv_block_status_above() may return EOF at unaligned
 662             * offsets.  We must not let this become visible and thus
 663             * always simulate a hole between @offset (the real EOF)
 664             * and @blk_len (the client-visible EOF).
 665             */
 666
 667            blk_len = blk_getlength(exp->common.blk);
 668            if (blk_len < 0) {
 669                fuse_reply_err(req, -blk_len);
 670                return;
 671            }
 672
 673            if (offset > blk_len || whence == SEEK_DATA) {
 674                fuse_reply_err(req, ENXIO);
 675            } else {
 676                fuse_reply_lseek(req, offset);
 677            }
 678            return;
 679        }
 680
 681        if (ret & BDRV_BLOCK_DATA) {
 682            if (whence == SEEK_DATA) {
 683                fuse_reply_lseek(req, offset);
 684                return;
 685            }
 686        } else {
 687            if (whence == SEEK_HOLE) {
 688                fuse_reply_lseek(req, offset);
 689                return;
 690            }
 691        }
 692
 693        /* Safety check against infinite loops */
 694        if (!pnum) {
 695            fuse_reply_err(req, ENXIO);
 696            return;
 697        }
 698
 699        offset += pnum;
 700    }
 701}
 702#endif
 703
 704static const struct fuse_lowlevel_ops fuse_ops = {
 705    .init       = fuse_init,
 706    .lookup     = fuse_lookup,
 707    .getattr    = fuse_getattr,
 708    .setattr    = fuse_setattr,
 709    .open       = fuse_open,
 710    .read       = fuse_read,
 711    .write      = fuse_write,
 712    .fallocate  = fuse_fallocate,
 713    .flush      = fuse_flush,
 714    .fsync      = fuse_fsync,
 715#ifdef CONFIG_FUSE_LSEEK
 716    .lseek      = fuse_lseek,
 717#endif
 718};
 719
 720const BlockExportDriver blk_exp_fuse = {
 721    .type               = BLOCK_EXPORT_TYPE_FUSE,
 722    .instance_size      = sizeof(FuseExport),
 723    .create             = fuse_export_create,
 724    .delete             = fuse_export_delete,
 725    .request_shutdown   = fuse_export_shutdown,
 726};
 727