qemu/qemu-nbd.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
   3 *
   4 *  Network Block Device
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; under version 2 of the License.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License
  16 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#include "qemu/osdep.h"
  20#include <getopt.h>
  21#include <libgen.h>
  22#include <pthread.h>
  23
  24#include "qemu/help-texts.h"
  25#include "qapi/error.h"
  26#include "qemu/cutils.h"
  27#include "system/block-backend.h"
  28#include "system/runstate.h" /* for qemu_system_killed() prototype */
  29#include "block/block_int.h"
  30#include "block/nbd.h"
  31#include "qemu/main-loop.h"
  32#include "qemu/module.h"
  33#include "qemu/option.h"
  34#include "qemu/error-report.h"
  35#include "qemu/config-file.h"
  36#include "qemu/bswap.h"
  37#include "qemu/log.h"
  38#include "qemu/systemd.h"
  39#include "block/snapshot.h"
  40#include "qobject/qdict.h"
  41#include "qobject/qstring.h"
  42#include "qom/object_interfaces.h"
  43#include "io/channel-socket.h"
  44#include "io/net-listener.h"
  45#include "crypto/init.h"
  46#include "crypto/tlscreds.h"
  47#include "trace/control.h"
  48#include "qemu-version.h"
  49
  50#ifdef CONFIG_SELINUX
  51#include <selinux/selinux.h>
  52#endif
  53
  54#ifdef __linux__
  55#define HAVE_NBD_DEVICE 1
  56#else
  57#define HAVE_NBD_DEVICE 0
  58#endif
  59
  60#define SOCKET_PATH                  "/var/lock/qemu-nbd-%s"
  61#define QEMU_NBD_OPT_CACHE           256
  62#define QEMU_NBD_OPT_AIO             257
  63#define QEMU_NBD_OPT_DISCARD         258
  64#define QEMU_NBD_OPT_DETECT_ZEROES   259
  65#define QEMU_NBD_OPT_OBJECT          260
  66#define QEMU_NBD_OPT_TLSCREDS        261
  67#define QEMU_NBD_OPT_IMAGE_OPTS      262
  68#define QEMU_NBD_OPT_FORK            263
  69#define QEMU_NBD_OPT_TLSAUTHZ        264
  70#define QEMU_NBD_OPT_PID_FILE        265
  71#define QEMU_NBD_OPT_SELINUX_LABEL   266
  72#define QEMU_NBD_OPT_TLSHOSTNAME     267
  73#define QEMU_NBD_OPT_HANDSHAKE_LIMIT 268
  74
  75#define MBR_SIZE 512
  76
  77static int persistent = 0;
  78static enum { RUNNING, TERMINATE, TERMINATED } state;
  79static int shared = 1;
  80static int nb_fds;
  81static QIONetListener *server;
  82static QCryptoTLSCreds *tlscreds;
  83static const char *tlsauthz;
  84static int handshake_limit = NBD_DEFAULT_HANDSHAKE_MAX_SECS;
  85
  86static void usage(const char *name)
  87{
  88    (printf) (
  89"Usage: %s [OPTIONS] FILE\n"
  90"  or:  %s -L [OPTIONS]\n"
  91"QEMU Disk Network Block Device Utility\n"
  92"\n"
  93"  -h, --help                display this help and exit\n"
  94"  -V, --version             output version information and exit\n"
  95"\n"
  96"Connection properties:\n"
  97"  -p, --port=PORT           port to listen on (default `%d')\n"
  98"  -b, --bind=IFACE          interface to bind to (default `0.0.0.0')\n"
  99"  -k, --socket=PATH         path to the unix socket\n"
 100"                            (default '"SOCKET_PATH"')\n"
 101"  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
 102"  -t, --persistent          don't exit on the last connection\n"
 103"  -v, --verbose             display extra debugging information\n"
 104"  -x, --export-name=NAME    expose export by name (default is empty string)\n"
 105"  -D, --description=TEXT    export a human-readable description\n"
 106"      --handshake-limit=N   limit client's handshake to N seconds (default 10)\n"
 107"\n"
 108"Exposing part of the image:\n"
 109"  -o, --offset=OFFSET       offset into the image\n"
 110"  -A, --allocation-depth    expose the allocation depth\n"
 111"  -B, --bitmap=NAME         expose a persistent dirty bitmap\n"
 112"\n"
 113"General purpose options:\n"
 114"  -L, --list                list exports available from another NBD server\n"
 115"  --object type,id=ID,...   define an object such as 'secret' for providing\n"
 116"                            passwords and/or encryption keys\n"
 117"  --tls-creds=ID            use id of an earlier --object to provide TLS\n"
 118"  --tls-authz=ID            use id of an earlier --object to provide\n"
 119"                            authorization\n"
 120"  --tls-hostname=HOSTNAME   override hostname used to check x509 certificate\n"
 121"  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
 122"                            specify tracing options\n"
 123"  --fork                    fork off the server process and exit the parent\n"
 124"                            once the server is running\n"
 125"  --pid-file=PATH           store the server's process ID in the given file\n"
 126#ifdef CONFIG_SELINUX
 127"  --selinux-label=LABEL     set SELinux process label on listening socket\n"
 128#endif
 129#if HAVE_NBD_DEVICE
 130"\n"
 131"Kernel NBD client support:\n"
 132"  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
 133"  -d, --disconnect          disconnect the specified device\n"
 134#endif
 135"\n"
 136"Block device options:\n"
 137"  -f, --format=FORMAT       set image format (raw, qcow2, ...)\n"
 138"  -r, --read-only           export read-only\n"
 139"  -s, --snapshot            use FILE as an external snapshot, create a temporary\n"
 140"                            file with backing_file=FILE, redirect the write to\n"
 141"                            the temporary one\n"
 142"  -l, --load-snapshot=SNAPSHOT_PARAM\n"
 143"                            load an internal snapshot inside FILE and export it\n"
 144"                            as an read-only device, SNAPSHOT_PARAM format is\n"
 145"                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 146"                            '[ID_OR_NAME]'\n"
 147"  -n, --nocache             disable host cache\n"
 148"      --cache=MODE          set cache mode used to access the disk image, the\n"
 149"                            valid options are: 'none', 'writeback' (default),\n"
 150"                            'writethrough', 'directsync' and 'unsafe'\n"
 151"      --aio=MODE            set AIO mode (native, io_uring or threads)\n"
 152"      --discard=MODE        set discard mode (ignore, unmap)\n"
 153"      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
 154"      --image-opts          treat FILE as a full set of image options\n"
 155"\n"
 156QEMU_HELP_BOTTOM "\n"
 157    , name, name, NBD_DEFAULT_PORT, "DEVICE");
 158}
 159
 160static void version(const char *name)
 161{
 162    printf(
 163"%s " QEMU_FULL_VERSION "\n"
 164"Written by Anthony Liguori.\n"
 165"\n"
 166QEMU_COPYRIGHT "\n"
 167"This is free software; see the source for copying conditions.  There is NO\n"
 168"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
 169    , name);
 170}
 171
 172#ifdef CONFIG_POSIX
 173/*
 174 * The client thread uses SIGTERM to interrupt the server.  A signal
 175 * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
 176 */
 177void qemu_system_killed(int signum, pid_t pid)
 178{
 179    qatomic_cmpxchg(&state, RUNNING, TERMINATE);
 180    qemu_notify_event();
 181}
 182#endif /* CONFIG_POSIX */
 183
 184static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
 185                                const char *hostname)
 186{
 187    int ret = EXIT_FAILURE;
 188    int rc;
 189    Error *err = NULL;
 190    QIOChannelSocket *sioc;
 191    NBDExportInfo *list;
 192    int i, j;
 193
 194    sioc = qio_channel_socket_new();
 195    if (qio_channel_socket_connect_sync(sioc, saddr, &err) < 0) {
 196        error_report_err(err);
 197        goto out;
 198    }
 199    rc = nbd_receive_export_list(QIO_CHANNEL(sioc), tls, hostname, &list,
 200                                 &err);
 201    if (rc < 0) {
 202        if (err) {
 203            error_report_err(err);
 204        }
 205        goto out;
 206    }
 207    printf("exports available: %d\n", rc);
 208    for (i = 0; i < rc; i++) {
 209        printf(" export: '%s'\n", list[i].name);
 210        if (list[i].description && *list[i].description) {
 211            printf("  description: %s\n", list[i].description);
 212        }
 213        if (list[i].flags & NBD_FLAG_HAS_FLAGS) {
 214            static const char *const flag_names[] = {
 215                [NBD_FLAG_READ_ONLY_BIT]            = "readonly",
 216                [NBD_FLAG_SEND_FLUSH_BIT]           = "flush",
 217                [NBD_FLAG_SEND_FUA_BIT]             = "fua",
 218                [NBD_FLAG_ROTATIONAL_BIT]           = "rotational",
 219                [NBD_FLAG_SEND_TRIM_BIT]            = "trim",
 220                [NBD_FLAG_SEND_WRITE_ZEROES_BIT]    = "zeroes",
 221                [NBD_FLAG_SEND_DF_BIT]              = "df",
 222                [NBD_FLAG_CAN_MULTI_CONN_BIT]       = "multi",
 223                [NBD_FLAG_SEND_RESIZE_BIT]          = "resize",
 224                [NBD_FLAG_SEND_CACHE_BIT]           = "cache",
 225                [NBD_FLAG_SEND_FAST_ZERO_BIT]       = "fast-zero",
 226                [NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT]   = "block-status-payload",
 227            };
 228
 229            printf("  size:  %" PRIu64 "\n", list[i].size);
 230            printf("  flags: 0x%x (", list[i].flags);
 231            for (size_t bit = 0; bit < ARRAY_SIZE(flag_names); bit++) {
 232                if (flag_names[bit] && (list[i].flags & (1 << bit))) {
 233                    printf(" %s", flag_names[bit]);
 234                }
 235            }
 236            printf(" )\n");
 237        }
 238        if (list[i].min_block) {
 239            printf("  min block: %u\n", list[i].min_block);
 240            printf("  opt block: %u\n", list[i].opt_block);
 241            printf("  max block: %u\n", list[i].max_block);
 242        }
 243        printf("  transaction size: %s\n",
 244               list[i].mode >= NBD_MODE_EXTENDED ?
 245               "64-bit" : "32-bit");
 246        if (list[i].n_contexts) {
 247            printf("  available meta contexts: %d\n", list[i].n_contexts);
 248            for (j = 0; j < list[i].n_contexts; j++) {
 249                printf("   %s\n", list[i].contexts[j]);
 250            }
 251        }
 252    }
 253    nbd_free_export_list(list, rc);
 254
 255    ret = EXIT_SUCCESS;
 256 out:
 257    object_unref(OBJECT(sioc));
 258    return ret;
 259}
 260
 261
 262struct NbdClientOpts {
 263    char *device;
 264    char *srcpath;
 265    SocketAddress *saddr;
 266    int old_stderr;
 267    bool fork_process;
 268    bool verbose;
 269};
 270
 271static void nbd_client_release_pipe(int old_stderr)
 272{
 273    /* Close stderr so that the qemu-nbd process exits.  */
 274    if (dup2(old_stderr, STDERR_FILENO) < 0) {
 275        error_report("Could not release pipe to parent: %s",
 276                     strerror(errno));
 277        exit(EXIT_FAILURE);
 278    }
 279    if (old_stderr != STDOUT_FILENO && close(old_stderr) < 0) {
 280        error_report("Could not release qemu-nbd: %s", strerror(errno));
 281        exit(EXIT_FAILURE);
 282    }
 283}
 284
 285#if HAVE_NBD_DEVICE
 286static void *show_parts(void *arg)
 287{
 288    char *device = arg;
 289    int nbd;
 290
 291    /* linux just needs an open() to trigger
 292     * the partition table update
 293     * but remember to load the module with max_part != 0 :
 294     *     modprobe nbd max_part=63
 295     */
 296    nbd = open(device, O_RDWR);
 297    if (nbd >= 0) {
 298        close(nbd);
 299    }
 300    return NULL;
 301}
 302
 303static void *nbd_client_thread(void *arg)
 304{
 305    struct NbdClientOpts *opts = arg;
 306    /* TODO: Revisit this if nbd.ko ever gains support for structured reply */
 307    NBDExportInfo info = { .request_sizes = false, .name = g_strdup(""),
 308                           .mode = NBD_MODE_SIMPLE };
 309    QIOChannelSocket *sioc;
 310    int fd = -1;
 311    int ret = EXIT_FAILURE;
 312    pthread_t show_parts_thread;
 313    Error *local_error = NULL;
 314
 315    sioc = qio_channel_socket_new();
 316    if (qio_channel_socket_connect_sync(sioc,
 317                                        opts->saddr,
 318                                        &local_error) < 0) {
 319        error_report_err(local_error);
 320        goto out;
 321    }
 322
 323    if (nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, NULL, NULL,
 324                              &info, &local_error) < 0) {
 325        if (local_error) {
 326            error_report_err(local_error);
 327        }
 328        goto out;
 329    }
 330
 331    fd = open(opts->device, O_RDWR);
 332    if (fd < 0) {
 333        /* Linux-only, we can use %m in printf.  */
 334        error_report("Failed to open %s: %m", opts->device);
 335        goto out;
 336    }
 337
 338    if (nbd_init(fd, sioc, &info, &local_error) < 0) {
 339        error_report_err(local_error);
 340        goto out;
 341    }
 342
 343    /* update partition table */
 344    pthread_create(&show_parts_thread, NULL, show_parts, opts->device);
 345
 346    if (opts->verbose && !opts->fork_process) {
 347        fprintf(stderr, "NBD device %s is now connected to %s\n",
 348                opts->device, opts->srcpath);
 349    } else {
 350        nbd_client_release_pipe(opts->old_stderr);
 351    }
 352
 353    if (nbd_client(fd) < 0) {
 354        goto out;
 355    }
 356
 357    ret = EXIT_SUCCESS;
 358
 359 out:
 360    if (fd >= 0) {
 361        close(fd);
 362    }
 363    object_unref(OBJECT(sioc));
 364    g_free(info.name);
 365    kill(getpid(), SIGTERM);
 366    return (void *) (intptr_t) ret;
 367}
 368#endif /* HAVE_NBD_DEVICE */
 369
 370static int nbd_can_accept(void)
 371{
 372    return state == RUNNING && (shared == 0 || nb_fds < shared);
 373}
 374
 375static void nbd_update_server_watch(void);
 376
 377static void nbd_client_closed(NBDClient *client, bool negotiated)
 378{
 379    nb_fds--;
 380    if (negotiated && nb_fds == 0 && !persistent && state == RUNNING) {
 381        state = TERMINATE;
 382    }
 383    nbd_update_server_watch();
 384    nbd_client_put(client);
 385}
 386
 387static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
 388                       gpointer opaque)
 389{
 390    if (state >= TERMINATE) {
 391        return;
 392    }
 393
 394    nb_fds++;
 395    nbd_update_server_watch();
 396    nbd_client_new(cioc, handshake_limit,
 397                   tlscreds, tlsauthz, nbd_client_closed, NULL);
 398}
 399
 400static void nbd_update_server_watch(void)
 401{
 402    if (nbd_can_accept()) {
 403        qio_net_listener_set_client_func(server, nbd_accept, NULL, NULL);
 404    } else {
 405        qio_net_listener_set_client_func(server, NULL, NULL, NULL);
 406    }
 407}
 408
 409
 410static SocketAddress *nbd_build_socket_address(const char *sockpath,
 411                                               const char *bindto,
 412                                               const char *port)
 413{
 414    SocketAddress *saddr;
 415
 416    saddr = g_new0(SocketAddress, 1);
 417    if (sockpath) {
 418        saddr->type = SOCKET_ADDRESS_TYPE_UNIX;
 419        saddr->u.q_unix.path = g_strdup(sockpath);
 420    } else {
 421        InetSocketAddress *inet;
 422        saddr->type = SOCKET_ADDRESS_TYPE_INET;
 423        inet = &saddr->u.inet;
 424        inet->host = g_strdup(bindto);
 425        if (port) {
 426            inet->port = g_strdup(port);
 427        } else  {
 428            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
 429        }
 430    }
 431
 432    return saddr;
 433}
 434
 435
 436static QemuOptsList file_opts = {
 437    .name = "file",
 438    .implied_opt_name = "file",
 439    .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
 440    .desc = {
 441        /* no elements => accept any params */
 442        { /* end of list */ }
 443    },
 444};
 445
 446static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, bool list,
 447                                          Error **errp)
 448{
 449    Object *obj;
 450    QCryptoTLSCreds *creds;
 451
 452    obj = object_resolve_path_component(
 453        object_get_objects_root(), id);
 454    if (!obj) {
 455        error_setg(errp, "No TLS credentials with id '%s'",
 456                   id);
 457        return NULL;
 458    }
 459    creds = (QCryptoTLSCreds *)
 460        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
 461    if (!creds) {
 462        error_setg(errp, "Object with id '%s' is not TLS credentials",
 463                   id);
 464        return NULL;
 465    }
 466
 467    if (!qcrypto_tls_creds_check_endpoint(creds,
 468                                          list
 469                                          ? QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT
 470                                          : QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
 471                                          errp)) {
 472        return NULL;
 473    }
 474    object_ref(obj);
 475    return creds;
 476}
 477
 478static void setup_address_and_port(const char **address, const char **port)
 479{
 480    if (*address == NULL) {
 481        *address = "0.0.0.0";
 482    }
 483
 484    if (*port == NULL) {
 485        *port = stringify(NBD_DEFAULT_PORT);
 486    }
 487}
 488
 489/*
 490 * Check socket parameters compatibility when socket activation is used.
 491 */
 492static const char *socket_activation_validate_opts(const char *device,
 493                                                   const char *sockpath,
 494                                                   const char *address,
 495                                                   const char *port,
 496                                                   const char *selinux,
 497                                                   bool list)
 498{
 499    if (device != NULL) {
 500        return "NBD device can't be set when using socket activation";
 501    }
 502
 503    if (sockpath != NULL) {
 504        return "Unix socket can't be set when using socket activation";
 505    }
 506
 507    if (address != NULL) {
 508        return "The interface can't be set when using socket activation";
 509    }
 510
 511    if (port != NULL) {
 512        return "TCP port number can't be set when using socket activation";
 513    }
 514
 515    if (selinux != NULL) {
 516        return "SELinux label can't be set when using socket activation";
 517    }
 518
 519    if (list) {
 520        return "List mode is incompatible with socket activation";
 521    }
 522
 523    return NULL;
 524}
 525
 526static void qemu_nbd_shutdown(void)
 527{
 528    job_cancel_sync_all();
 529    blk_exp_close_all();
 530    bdrv_close_all();
 531}
 532
 533int main(int argc, char **argv)
 534{
 535    BlockBackend *blk;
 536    BlockDriverState *bs;
 537    uint64_t dev_offset = 0;
 538    bool readonly = false;
 539    bool disconnect = false;
 540    const char *bindto = NULL;
 541    const char *port = NULL;
 542    char *sockpath = NULL;
 543    QemuOpts *sn_opts = NULL;
 544    const char *sn_id_or_name = NULL;
 545    const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:AB:L";
 546    struct option lopt[] = {
 547        { "help", no_argument, NULL, 'h' },
 548        { "version", no_argument, NULL, 'V' },
 549        { "bind", required_argument, NULL, 'b' },
 550        { "port", required_argument, NULL, 'p' },
 551        { "socket", required_argument, NULL, 'k' },
 552        { "offset", required_argument, NULL, 'o' },
 553        { "read-only", no_argument, NULL, 'r' },
 554        { "allocation-depth", no_argument, NULL, 'A' },
 555        { "bitmap", required_argument, NULL, 'B' },
 556        { "connect", required_argument, NULL, 'c' },
 557        { "disconnect", no_argument, NULL, 'd' },
 558        { "list", no_argument, NULL, 'L' },
 559        { "snapshot", no_argument, NULL, 's' },
 560        { "load-snapshot", required_argument, NULL, 'l' },
 561        { "nocache", no_argument, NULL, 'n' },
 562        { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
 563        { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
 564        { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
 565        { "detect-zeroes", required_argument, NULL,
 566          QEMU_NBD_OPT_DETECT_ZEROES },
 567        { "shared", required_argument, NULL, 'e' },
 568        { "format", required_argument, NULL, 'f' },
 569        { "persistent", no_argument, NULL, 't' },
 570        { "verbose", no_argument, NULL, 'v' },
 571        { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
 572        { "export-name", required_argument, NULL, 'x' },
 573        { "description", required_argument, NULL, 'D' },
 574        { "handshake-limit", required_argument, NULL,
 575          QEMU_NBD_OPT_HANDSHAKE_LIMIT },
 576        { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
 577        { "tls-hostname", required_argument, NULL, QEMU_NBD_OPT_TLSHOSTNAME },
 578        { "tls-authz", required_argument, NULL, QEMU_NBD_OPT_TLSAUTHZ },
 579        { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
 580        { "trace", required_argument, NULL, 'T' },
 581        { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
 582        { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE },
 583        { "selinux-label", required_argument, NULL,
 584          QEMU_NBD_OPT_SELINUX_LABEL },
 585        { NULL, 0, NULL, 0 }
 586    };
 587    int ch;
 588    int opt_ind = 0;
 589    int flags = BDRV_O_RDWR;
 590    int ret = 0;
 591    bool seen_cache = false;
 592    bool seen_discard = false;
 593    bool seen_aio = false;
 594    pthread_t client_thread;
 595    const char *fmt = NULL;
 596    Error *local_err = NULL;
 597    BlockdevDetectZeroesOptions detect_zeroes =
 598        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
 599    QDict *options = NULL;
 600    const char *export_name = NULL; /* defaults to "" later for server mode */
 601    const char *export_description = NULL;
 602    BlockDirtyBitmapOrStrList *bitmaps = NULL;
 603    bool alloc_depth = false;
 604    const char *tlscredsid = NULL;
 605    const char *tlshostname = NULL;
 606    bool imageOpts = false;
 607    bool writethrough = false; /* Client will flush as needed. */
 608    bool list = false;
 609    unsigned socket_activation;
 610    const char *pid_file_name = NULL;
 611    const char *selinux_label = NULL;
 612    BlockExportOptions *export_opts;
 613    struct NbdClientOpts opts = {
 614        .fork_process = false,
 615        .verbose = false,
 616        .device = NULL,
 617        .srcpath = NULL,
 618        .saddr = NULL,
 619        .old_stderr = STDOUT_FILENO,
 620    };
 621
 622#ifdef CONFIG_POSIX
 623    os_setup_early_signal_handling();
 624    os_setup_signal_handling();
 625#endif
 626
 627    socket_init();
 628    error_init(argv[0]);
 629    module_call_init(MODULE_INIT_TRACE);
 630    qcrypto_init(&error_fatal);
 631
 632    module_call_init(MODULE_INIT_QOM);
 633    qemu_add_opts(&qemu_trace_opts);
 634    qemu_init_exec_dir(argv[0]);
 635
 636    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
 637        switch (ch) {
 638        case 's':
 639            flags |= BDRV_O_SNAPSHOT;
 640            break;
 641        case 'n':
 642            optarg = (char *) "none";
 643            /* fallthrough */
 644        case QEMU_NBD_OPT_CACHE:
 645            if (seen_cache) {
 646                error_report("-n and --cache can only be specified once");
 647                exit(EXIT_FAILURE);
 648            }
 649            seen_cache = true;
 650            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) == -1) {
 651                error_report("Invalid cache mode `%s'", optarg);
 652                exit(EXIT_FAILURE);
 653            }
 654            break;
 655        case QEMU_NBD_OPT_AIO:
 656            if (seen_aio) {
 657                error_report("--aio can only be specified once");
 658                exit(EXIT_FAILURE);
 659            }
 660            seen_aio = true;
 661            if (bdrv_parse_aio(optarg, &flags) < 0) {
 662                error_report("Invalid aio mode '%s'", optarg);
 663                exit(EXIT_FAILURE);
 664            }
 665            break;
 666        case QEMU_NBD_OPT_DISCARD:
 667            if (seen_discard) {
 668                error_report("--discard can only be specified once");
 669                exit(EXIT_FAILURE);
 670            }
 671            seen_discard = true;
 672            if (bdrv_parse_discard_flags(optarg, &flags) == -1) {
 673                error_report("Invalid discard mode `%s'", optarg);
 674                exit(EXIT_FAILURE);
 675            }
 676            break;
 677        case QEMU_NBD_OPT_DETECT_ZEROES:
 678            detect_zeroes =
 679                qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
 680                                optarg,
 681                                BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
 682                                &local_err);
 683            if (local_err) {
 684                error_reportf_err(local_err,
 685                                  "Failed to parse detect_zeroes mode: ");
 686                exit(EXIT_FAILURE);
 687            }
 688            if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
 689                !(flags & BDRV_O_UNMAP)) {
 690                error_report("setting detect-zeroes to unmap is not allowed "
 691                             "without setting discard operation to unmap");
 692                exit(EXIT_FAILURE);
 693            }
 694            break;
 695        case 'b':
 696            bindto = optarg;
 697            break;
 698        case 'p':
 699            port = optarg;
 700            break;
 701        case 'o':
 702            if (qemu_strtou64(optarg, NULL, 0, &dev_offset) < 0) {
 703                error_report("Invalid offset '%s'", optarg);
 704                exit(EXIT_FAILURE);
 705            }
 706            break;
 707        case 'l':
 708            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
 709                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
 710                                                  optarg, false);
 711                if (!sn_opts) {
 712                    error_report("Failed in parsing snapshot param `%s'",
 713                                 optarg);
 714                    exit(EXIT_FAILURE);
 715                }
 716            } else {
 717                sn_id_or_name = optarg;
 718            }
 719            /* fall through */
 720        case 'r':
 721            readonly = true;
 722            flags &= ~BDRV_O_RDWR;
 723            break;
 724        case 'A':
 725            alloc_depth = true;
 726            break;
 727        case 'B':
 728            {
 729                BlockDirtyBitmapOrStr *el = g_new(BlockDirtyBitmapOrStr, 1);
 730                *el = (BlockDirtyBitmapOrStr) {
 731                    .type = QTYPE_QSTRING,
 732                    .u.local = g_strdup(optarg),
 733                };
 734                QAPI_LIST_PREPEND(bitmaps, el);
 735            }
 736            break;
 737        case 'k':
 738            sockpath = optarg;
 739            if (sockpath[0] != '/') {
 740                error_report("socket path must be absolute");
 741                exit(EXIT_FAILURE);
 742            }
 743            break;
 744        case 'd':
 745            disconnect = true;
 746            break;
 747        case 'c':
 748            opts.device = optarg;
 749            break;
 750        case 'e':
 751            if (qemu_strtoi(optarg, NULL, 0, &shared) < 0 ||
 752                shared < 0) {
 753                error_report("Invalid shared device number '%s'", optarg);
 754                exit(EXIT_FAILURE);
 755            }
 756            break;
 757        case 'f':
 758            fmt = optarg;
 759            break;
 760        case 't':
 761            persistent = 1;
 762            break;
 763        case 'x':
 764            export_name = optarg;
 765            if (strlen(export_name) > NBD_MAX_STRING_SIZE) {
 766                error_report("export name '%s' too long", export_name);
 767                exit(EXIT_FAILURE);
 768            }
 769            break;
 770        case 'D':
 771            export_description = optarg;
 772            if (strlen(export_description) > NBD_MAX_STRING_SIZE) {
 773                error_report("export description '%s' too long",
 774                             export_description);
 775                exit(EXIT_FAILURE);
 776            }
 777            break;
 778        case 'v':
 779            opts.verbose = true;
 780            break;
 781        case 'V':
 782            version(argv[0]);
 783            exit(0);
 784            break;
 785        case 'h':
 786            usage(argv[0]);
 787            exit(0);
 788            break;
 789        case '?':
 790            error_report("Try `%s --help' for more information.", argv[0]);
 791            exit(EXIT_FAILURE);
 792        case QEMU_NBD_OPT_OBJECT:
 793            user_creatable_process_cmdline(optarg);
 794            break;
 795        case QEMU_NBD_OPT_TLSCREDS:
 796            tlscredsid = optarg;
 797            break;
 798        case QEMU_NBD_OPT_TLSHOSTNAME:
 799            tlshostname = optarg;
 800            break;
 801        case QEMU_NBD_OPT_IMAGE_OPTS:
 802            imageOpts = true;
 803            break;
 804        case 'T':
 805            trace_opt_parse(optarg);
 806            break;
 807        case QEMU_NBD_OPT_TLSAUTHZ:
 808            tlsauthz = optarg;
 809            break;
 810        case QEMU_NBD_OPT_FORK:
 811            opts.fork_process = true;
 812            break;
 813        case 'L':
 814            list = true;
 815            break;
 816        case QEMU_NBD_OPT_PID_FILE:
 817            pid_file_name = optarg;
 818            break;
 819        case QEMU_NBD_OPT_SELINUX_LABEL:
 820            selinux_label = optarg;
 821            break;
 822        case QEMU_NBD_OPT_HANDSHAKE_LIMIT:
 823            if (qemu_strtoi(optarg, NULL, 0, &handshake_limit) < 0 ||
 824                handshake_limit < 0) {
 825                error_report("Invalid handshake limit '%s'", optarg);
 826                exit(EXIT_FAILURE);
 827            }
 828            break;
 829        }
 830    }
 831
 832    if (list) {
 833        if (argc != optind) {
 834            error_report("List mode is incompatible with a file name");
 835            exit(EXIT_FAILURE);
 836        }
 837        if (export_name || export_description || dev_offset ||
 838            opts.device || disconnect || fmt || sn_id_or_name || bitmaps ||
 839            alloc_depth || seen_aio || seen_discard || seen_cache) {
 840            error_report("List mode is incompatible with per-device settings");
 841            exit(EXIT_FAILURE);
 842        }
 843        if (opts.fork_process) {
 844            error_report("List mode is incompatible with forking");
 845            exit(EXIT_FAILURE);
 846        }
 847    } else if ((argc - optind) != 1) {
 848        error_report("Invalid number of arguments");
 849        error_printf("Try `%s --help' for more information.\n", argv[0]);
 850        exit(EXIT_FAILURE);
 851    } else if (!export_name) {
 852        export_name = "";
 853    }
 854
 855    qemu_set_log(LOG_TRACE, &error_fatal);
 856
 857    socket_activation = check_socket_activation();
 858    if (socket_activation == 0) {
 859        if (!sockpath) {
 860            setup_address_and_port(&bindto, &port);
 861        }
 862    } else {
 863        /* Using socket activation - check user didn't use -p etc. */
 864        const char *err_msg = socket_activation_validate_opts(opts.device,
 865                                                              sockpath,
 866                                                              bindto, port,
 867                                                              selinux_label,
 868                                                              list);
 869        if (err_msg != NULL) {
 870            error_report("%s", err_msg);
 871            exit(EXIT_FAILURE);
 872        }
 873
 874        /* qemu-nbd can only listen on a single socket.  */
 875        if (socket_activation > 1) {
 876            error_report("qemu-nbd does not support socket activation with %s > 1",
 877                         "LISTEN_FDS");
 878            exit(EXIT_FAILURE);
 879        }
 880    }
 881
 882    if (tlscredsid) {
 883        if (opts.device) {
 884            error_report("TLS is not supported with a host device");
 885            exit(EXIT_FAILURE);
 886        }
 887        if (tlsauthz && list) {
 888            error_report("TLS authorization is incompatible with export list");
 889            exit(EXIT_FAILURE);
 890        }
 891        if (tlshostname && !list) {
 892            error_report("TLS hostname is only supported with export list");
 893            exit(EXIT_FAILURE);
 894        }
 895        tlscreds = nbd_get_tls_creds(tlscredsid, list, &local_err);
 896        if (local_err) {
 897            error_reportf_err(local_err, "Failed to get TLS creds: ");
 898            exit(EXIT_FAILURE);
 899        }
 900    } else {
 901        if (tlsauthz) {
 902            error_report("--tls-authz is not permitted without --tls-creds");
 903            exit(EXIT_FAILURE);
 904        }
 905        if (tlshostname) {
 906            error_report("--tls-hostname is not permitted without --tls-creds");
 907            exit(EXIT_FAILURE);
 908        }
 909    }
 910
 911    if (selinux_label) {
 912#ifdef CONFIG_SELINUX
 913        if (sockpath == NULL && opts.device == NULL) {
 914            error_report("--selinux-label is not permitted without --socket");
 915            exit(EXIT_FAILURE);
 916        }
 917#else
 918        error_report("SELinux support not enabled in this binary");
 919        exit(EXIT_FAILURE);
 920#endif
 921    }
 922
 923    if (list) {
 924        opts.saddr = nbd_build_socket_address(sockpath, bindto, port);
 925        return qemu_nbd_client_list(opts.saddr, tlscreds,
 926                                    tlshostname ? tlshostname : bindto);
 927    }
 928
 929#if !HAVE_NBD_DEVICE
 930    if (disconnect || opts.device) {
 931        error_report("Kernel /dev/nbdN support not available");
 932        exit(EXIT_FAILURE);
 933    }
 934#else /* HAVE_NBD_DEVICE */
 935    if (disconnect) {
 936        int nbdfd = open(argv[optind], O_RDWR);
 937        if (nbdfd < 0) {
 938            error_report("Cannot open %s: %s", argv[optind],
 939                         strerror(errno));
 940            exit(EXIT_FAILURE);
 941        }
 942        nbd_disconnect(nbdfd);
 943
 944        close(nbdfd);
 945
 946        printf("%s disconnected\n", argv[optind]);
 947
 948        return 0;
 949    }
 950#endif
 951
 952    if ((opts.device && !opts.verbose) || opts.fork_process) {
 953#ifndef WIN32
 954        g_autoptr(GError) err = NULL;
 955        int stderr_fd[2];
 956        pid_t pid;
 957
 958        if (!g_unix_open_pipe(stderr_fd, FD_CLOEXEC, &err)) {
 959            error_report("Error setting up communication pipe: %s",
 960                         err->message);
 961            exit(EXIT_FAILURE);
 962        }
 963
 964        /* Now daemonize, but keep a communication channel open to
 965         * print errors and exit with the proper status code.
 966         */
 967        pid = fork();
 968        if (pid < 0) {
 969            error_report("Failed to fork: %s", strerror(errno));
 970            exit(EXIT_FAILURE);
 971        } else if (pid == 0) {
 972            int saved_errno;
 973
 974            close(stderr_fd[0]);
 975
 976            /* Remember parent's stderr if we will be restoring it. */
 977            if (opts.verbose /* fork_process is set */) {
 978                opts.old_stderr = dup(STDERR_FILENO);
 979                if (opts.old_stderr < 0) {
 980                    error_report("Could not dup original stderr: %s",
 981                                 strerror(errno));
 982                    exit(EXIT_FAILURE);
 983                }
 984            }
 985
 986            ret = qemu_daemon(1, 0);
 987            saved_errno = errno;    /* dup2 will overwrite error below */
 988
 989            /* Temporarily redirect stderr to the parent's pipe...  */
 990            if (dup2(stderr_fd[1], STDERR_FILENO) < 0) {
 991                char str[256];
 992                snprintf(str, sizeof(str),
 993                         "%s: Failed to link stderr to the pipe: %s\n",
 994                         g_get_prgname(), strerror(errno));
 995                /*
 996                 * We are unable to use error_report() here as we need to get
 997                 * stderr pointed to the parent's pipe. Write to that pipe
 998                 * manually.
 999                 */
1000                ret = write(stderr_fd[1], str, strlen(str));
1001                exit(EXIT_FAILURE);
1002            }
1003
1004            if (ret < 0) {
1005                error_report("Failed to daemonize: %s", strerror(saved_errno));
1006                exit(EXIT_FAILURE);
1007            }
1008
1009            /* ... close the descriptor we inherited and go on.  */
1010            close(stderr_fd[1]);
1011        } else {
1012            bool errors = false;
1013            char *buf;
1014
1015            /* In the parent.  Print error messages from the child until
1016             * it closes the pipe.
1017             */
1018            close(stderr_fd[1]);
1019            buf = g_malloc(1024);
1020            while ((ret = read(stderr_fd[0], buf, 1024)) > 0) {
1021                errors = true;
1022                ret = qemu_write_full(STDERR_FILENO, buf, ret);
1023                if (ret < 0) {
1024                    exit(EXIT_FAILURE);
1025                }
1026            }
1027            if (ret < 0) {
1028                error_report("Cannot read from daemon: %s",
1029                             strerror(errno));
1030                exit(EXIT_FAILURE);
1031            }
1032
1033            /* Usually the daemon should not print any message.
1034             * Exit with zero status in that case.
1035             */
1036            exit(errors);
1037        }
1038#else /* WIN32 */
1039        error_report("Unable to fork into background on Windows hosts");
1040        exit(EXIT_FAILURE);
1041#endif /* WIN32 */
1042    }
1043
1044    /*
1045     * trace_init must be done after daemonization.  Why? Because at
1046     * least the simple backend spins up a helper thread as well as an
1047     * atexit() handler that waits on that thread, but the helper
1048     * thread won't survive a fork, leading to deadlock in the child
1049     * if we initialized pre-fork.
1050     */
1051    if (!trace_init_backends()) {
1052        exit(1);
1053    }
1054    trace_init_file();
1055
1056    if (opts.device != NULL && sockpath == NULL) {
1057        sockpath = g_malloc(128);
1058        snprintf(sockpath, 128, SOCKET_PATH, basename(opts.device));
1059    }
1060
1061    server = qio_net_listener_new();
1062    if (socket_activation == 0) {
1063        int backlog;
1064
1065        if (persistent || shared == 0) {
1066            backlog = SOMAXCONN;
1067        } else {
1068            backlog = MIN(shared, SOMAXCONN);
1069        }
1070#ifdef CONFIG_SELINUX
1071        if (selinux_label && setsockcreatecon_raw(selinux_label) == -1) {
1072            error_report("Cannot set SELinux socket create context to %s: %s",
1073                         selinux_label, strerror(errno));
1074            exit(EXIT_FAILURE);
1075        }
1076#endif
1077        opts.saddr = nbd_build_socket_address(sockpath, bindto, port);
1078        if (qio_net_listener_open_sync(server, opts.saddr, backlog,
1079                                       &local_err) < 0) {
1080            object_unref(OBJECT(server));
1081            error_report_err(local_err);
1082            exit(EXIT_FAILURE);
1083        }
1084#ifdef CONFIG_SELINUX
1085        if (selinux_label && setsockcreatecon_raw(NULL) == -1) {
1086            error_report("Cannot clear SELinux socket create context: %s",
1087                         strerror(errno));
1088            exit(EXIT_FAILURE);
1089        }
1090#endif
1091    } else {
1092        size_t i;
1093        /* See comment in check_socket_activation above. */
1094        for (i = 0; i < socket_activation; i++) {
1095            QIOChannelSocket *sioc;
1096            sioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD + i,
1097                                             &local_err);
1098            if (sioc == NULL) {
1099                object_unref(OBJECT(server));
1100                error_reportf_err(local_err,
1101                                  "Failed to use socket activation: ");
1102                exit(EXIT_FAILURE);
1103            }
1104            qio_net_listener_add(server, sioc);
1105            object_unref(OBJECT(sioc));
1106        }
1107    }
1108
1109    qemu_init_main_loop(&error_fatal);
1110    bdrv_init();
1111    atexit(qemu_nbd_shutdown);
1112
1113    opts.srcpath = argv[optind];
1114    if (imageOpts) {
1115        QemuOpts *o;
1116        if (fmt) {
1117            error_report("--image-opts and -f are mutually exclusive");
1118            exit(EXIT_FAILURE);
1119        }
1120        o = qemu_opts_parse_noisily(&file_opts, opts.srcpath, true);
1121        if (!o) {
1122            qemu_opts_reset(&file_opts);
1123            exit(EXIT_FAILURE);
1124        }
1125        options = qemu_opts_to_qdict(o, NULL);
1126        qemu_opts_reset(&file_opts);
1127        blk = blk_new_open(NULL, NULL, options, flags, &local_err);
1128    } else {
1129        if (fmt) {
1130            options = qdict_new();
1131            qdict_put_str(options, "driver", fmt);
1132        }
1133        blk = blk_new_open(opts.srcpath, NULL, options, flags, &local_err);
1134    }
1135
1136    if (!blk) {
1137        error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
1138                          argv[optind]);
1139        exit(EXIT_FAILURE);
1140    }
1141    bs = blk_bs(blk);
1142
1143    if (dev_offset) {
1144        QDict *raw_opts = qdict_new();
1145        qdict_put_str(raw_opts, "driver", "raw");
1146        qdict_put_str(raw_opts, "file", bs->node_name);
1147        qdict_put_int(raw_opts, "offset", dev_offset);
1148
1149        bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal);
1150
1151        blk_remove_bs(blk);
1152        blk_insert_bs(blk, bs, &error_fatal);
1153        bdrv_unref(bs);
1154    }
1155
1156    blk_set_enable_write_cache(blk, !writethrough);
1157
1158    if (sn_opts) {
1159        ret = bdrv_snapshot_load_tmp(bs,
1160                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
1161                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
1162                                     &local_err);
1163    } else if (sn_id_or_name) {
1164        ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name,
1165                                                   &local_err);
1166    }
1167    if (ret < 0) {
1168        error_reportf_err(local_err, "Failed to load snapshot: ");
1169        exit(EXIT_FAILURE);
1170    }
1171
1172    bs->detect_zeroes = detect_zeroes;
1173
1174    nbd_server_is_qemu_nbd(shared);
1175
1176    export_opts = g_new(BlockExportOptions, 1);
1177    *export_opts = (BlockExportOptions) {
1178        .type               = BLOCK_EXPORT_TYPE_NBD,
1179        .id                 = g_strdup("qemu-nbd-export"),
1180        .node_name          = g_strdup(bdrv_get_node_name(bs)),
1181        .has_writethrough   = true,
1182        .writethrough       = writethrough,
1183        .has_writable       = true,
1184        .writable           = !readonly,
1185        .u.nbd = {
1186            .name                 = g_strdup(export_name),
1187            .description          = g_strdup(export_description),
1188            .has_bitmaps          = !!bitmaps,
1189            .bitmaps              = bitmaps,
1190            .has_allocation_depth = alloc_depth,
1191            .allocation_depth     = alloc_depth,
1192        },
1193    };
1194    blk_exp_add(export_opts, &error_fatal);
1195    qapi_free_BlockExportOptions(export_opts);
1196
1197    if (opts.device) {
1198#if HAVE_NBD_DEVICE
1199        ret = pthread_create(&client_thread, NULL, nbd_client_thread, &opts);
1200        if (ret != 0) {
1201            error_report("Failed to create client thread: %s", strerror(ret));
1202            exit(EXIT_FAILURE);
1203        }
1204#endif
1205    } else {
1206        /* Shut up GCC warnings.  */
1207        memset(&client_thread, 0, sizeof(client_thread));
1208    }
1209
1210    nbd_update_server_watch();
1211
1212    if (pid_file_name) {
1213        qemu_write_pidfile(pid_file_name, &error_fatal);
1214    }
1215
1216    /* now when the initialization is (almost) complete, chdir("/")
1217     * to free any busy filesystems */
1218    if (chdir("/") < 0) {
1219        error_report("Could not chdir to root directory: %s",
1220                     strerror(errno));
1221        exit(EXIT_FAILURE);
1222    }
1223
1224    if (opts.fork_process) {
1225        nbd_client_release_pipe(opts.old_stderr);
1226    }
1227
1228    state = RUNNING;
1229    do {
1230        main_loop_wait(false);
1231        if (state == TERMINATE) {
1232            blk_exp_close_all();
1233            state = TERMINATED;
1234        }
1235    } while (state != TERMINATED);
1236
1237    blk_unref(blk);
1238    if (sockpath) {
1239        unlink(sockpath);
1240    }
1241
1242    qemu_opts_del(sn_opts);
1243
1244    if (opts.device) {
1245        void *result;
1246        pthread_join(client_thread, &result);
1247        ret = (intptr_t)result;
1248        exit(ret);
1249    } else {
1250        exit(EXIT_SUCCESS);
1251    }
1252}
1253