qemu/include/block/nbd.h
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2016-2020 Red Hat, Inc.
   3 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
   4 *
   5 *  Network Block Device
   6 *
   7 *  This program is free software; you can redistribute it and/or modify
   8 *  it under the terms of the GNU General Public License as published by
   9 *  the Free Software Foundation; under version 2 of the License.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#ifndef NBD_H
  21#define NBD_H
  22
  23#include "block/export.h"
  24#include "io/channel-socket.h"
  25#include "crypto/tlscreds.h"
  26#include "qapi/error.h"
  27
  28extern const BlockExportDriver blk_exp_nbd;
  29
  30/* Handshake phase structs - this struct is passed on the wire */
  31
  32struct NBDOption {
  33    uint64_t magic; /* NBD_OPTS_MAGIC */
  34    uint32_t option; /* NBD_OPT_* */
  35    uint32_t length;
  36} QEMU_PACKED;
  37typedef struct NBDOption NBDOption;
  38
  39struct NBDOptionReply {
  40    uint64_t magic; /* NBD_REP_MAGIC */
  41    uint32_t option; /* NBD_OPT_* */
  42    uint32_t type; /* NBD_REP_* */
  43    uint32_t length;
  44} QEMU_PACKED;
  45typedef struct NBDOptionReply NBDOptionReply;
  46
  47typedef struct NBDOptionReplyMetaContext {
  48    NBDOptionReply h; /* h.type = NBD_REP_META_CONTEXT, h.length > 4 */
  49    uint32_t context_id;
  50    /* metadata context name follows */
  51} QEMU_PACKED NBDOptionReplyMetaContext;
  52
  53/* Transmission phase structs
  54 *
  55 * Note: these are _NOT_ the same as the network representation of an NBD
  56 * request and reply!
  57 */
  58struct NBDRequest {
  59    uint64_t handle;
  60    uint64_t from;
  61    uint32_t len;
  62    uint16_t flags; /* NBD_CMD_FLAG_* */
  63    uint16_t type; /* NBD_CMD_* */
  64};
  65typedef struct NBDRequest NBDRequest;
  66
  67typedef struct NBDSimpleReply {
  68    uint32_t magic;  /* NBD_SIMPLE_REPLY_MAGIC */
  69    uint32_t error;
  70    uint64_t handle;
  71} QEMU_PACKED NBDSimpleReply;
  72
  73/* Header of all structured replies */
  74typedef struct NBDStructuredReplyChunk {
  75    uint32_t magic;  /* NBD_STRUCTURED_REPLY_MAGIC */
  76    uint16_t flags;  /* combination of NBD_REPLY_FLAG_* */
  77    uint16_t type;   /* NBD_REPLY_TYPE_* */
  78    uint64_t handle; /* request handle */
  79    uint32_t length; /* length of payload */
  80} QEMU_PACKED NBDStructuredReplyChunk;
  81
  82typedef union NBDReply {
  83    NBDSimpleReply simple;
  84    NBDStructuredReplyChunk structured;
  85    struct {
  86        /* @magic and @handle fields have the same offset and size both in
  87         * simple reply and structured reply chunk, so let them be accessible
  88         * without ".simple." or ".structured." specification
  89         */
  90        uint32_t magic;
  91        uint32_t _skip;
  92        uint64_t handle;
  93    } QEMU_PACKED;
  94} NBDReply;
  95
  96/* Header of chunk for NBD_REPLY_TYPE_OFFSET_DATA */
  97typedef struct NBDStructuredReadData {
  98    NBDStructuredReplyChunk h; /* h.length >= 9 */
  99    uint64_t offset;
 100    /* At least one byte of data payload follows, calculated from h.length */
 101} QEMU_PACKED NBDStructuredReadData;
 102
 103/* Complete chunk for NBD_REPLY_TYPE_OFFSET_HOLE */
 104typedef struct NBDStructuredReadHole {
 105    NBDStructuredReplyChunk h; /* h.length == 12 */
 106    uint64_t offset;
 107    uint32_t length;
 108} QEMU_PACKED NBDStructuredReadHole;
 109
 110/* Header of all NBD_REPLY_TYPE_ERROR* errors */
 111typedef struct NBDStructuredError {
 112    NBDStructuredReplyChunk h; /* h.length >= 6 */
 113    uint32_t error;
 114    uint16_t message_length;
 115} QEMU_PACKED NBDStructuredError;
 116
 117/* Header of NBD_REPLY_TYPE_BLOCK_STATUS */
 118typedef struct NBDStructuredMeta {
 119    NBDStructuredReplyChunk h; /* h.length >= 12 (at least one extent) */
 120    uint32_t context_id;
 121    /* extents follows */
 122} QEMU_PACKED NBDStructuredMeta;
 123
 124/* Extent chunk for NBD_REPLY_TYPE_BLOCK_STATUS */
 125typedef struct NBDExtent {
 126    uint32_t length;
 127    uint32_t flags; /* NBD_STATE_* */
 128} QEMU_PACKED NBDExtent;
 129
 130/* Transmission (export) flags: sent from server to client during handshake,
 131   but describe what will happen during transmission */
 132enum {
 133    NBD_FLAG_HAS_FLAGS_BIT          =  0, /* Flags are there */
 134    NBD_FLAG_READ_ONLY_BIT          =  1, /* Device is read-only */
 135    NBD_FLAG_SEND_FLUSH_BIT         =  2, /* Send FLUSH */
 136    NBD_FLAG_SEND_FUA_BIT           =  3, /* Send FUA (Force Unit Access) */
 137    NBD_FLAG_ROTATIONAL_BIT         =  4, /* Use elevator algorithm -
 138                                             rotational media */
 139    NBD_FLAG_SEND_TRIM_BIT          =  5, /* Send TRIM (discard) */
 140    NBD_FLAG_SEND_WRITE_ZEROES_BIT  =  6, /* Send WRITE_ZEROES */
 141    NBD_FLAG_SEND_DF_BIT            =  7, /* Send DF (Do not Fragment) */
 142    NBD_FLAG_CAN_MULTI_CONN_BIT     =  8, /* Multi-client cache consistent */
 143    NBD_FLAG_SEND_RESIZE_BIT        =  9, /* Send resize */
 144    NBD_FLAG_SEND_CACHE_BIT         = 10, /* Send CACHE (prefetch) */
 145    NBD_FLAG_SEND_FAST_ZERO_BIT     = 11, /* FAST_ZERO flag for WRITE_ZEROES */
 146};
 147
 148#define NBD_FLAG_HAS_FLAGS         (1 << NBD_FLAG_HAS_FLAGS_BIT)
 149#define NBD_FLAG_READ_ONLY         (1 << NBD_FLAG_READ_ONLY_BIT)
 150#define NBD_FLAG_SEND_FLUSH        (1 << NBD_FLAG_SEND_FLUSH_BIT)
 151#define NBD_FLAG_SEND_FUA          (1 << NBD_FLAG_SEND_FUA_BIT)
 152#define NBD_FLAG_ROTATIONAL        (1 << NBD_FLAG_ROTATIONAL_BIT)
 153#define NBD_FLAG_SEND_TRIM         (1 << NBD_FLAG_SEND_TRIM_BIT)
 154#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT)
 155#define NBD_FLAG_SEND_DF           (1 << NBD_FLAG_SEND_DF_BIT)
 156#define NBD_FLAG_CAN_MULTI_CONN    (1 << NBD_FLAG_CAN_MULTI_CONN_BIT)
 157#define NBD_FLAG_SEND_RESIZE       (1 << NBD_FLAG_SEND_RESIZE_BIT)
 158#define NBD_FLAG_SEND_CACHE        (1 << NBD_FLAG_SEND_CACHE_BIT)
 159#define NBD_FLAG_SEND_FAST_ZERO    (1 << NBD_FLAG_SEND_FAST_ZERO_BIT)
 160
 161/* New-style handshake (global) flags, sent from server to client, and
 162   control what will happen during handshake phase. */
 163#define NBD_FLAG_FIXED_NEWSTYLE   (1 << 0) /* Fixed newstyle protocol. */
 164#define NBD_FLAG_NO_ZEROES        (1 << 1) /* End handshake without zeroes. */
 165
 166/* New-style client flags, sent from client to server to control what happens
 167   during handshake phase. */
 168#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
 169#define NBD_FLAG_C_NO_ZEROES      (1 << 1) /* End handshake without zeroes. */
 170
 171/* Option requests. */
 172#define NBD_OPT_EXPORT_NAME       (1)
 173#define NBD_OPT_ABORT             (2)
 174#define NBD_OPT_LIST              (3)
 175/* #define NBD_OPT_PEEK_EXPORT    (4) not in use */
 176#define NBD_OPT_STARTTLS          (5)
 177#define NBD_OPT_INFO              (6)
 178#define NBD_OPT_GO                (7)
 179#define NBD_OPT_STRUCTURED_REPLY  (8)
 180#define NBD_OPT_LIST_META_CONTEXT (9)
 181#define NBD_OPT_SET_META_CONTEXT  (10)
 182
 183/* Option reply types. */
 184#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
 185
 186#define NBD_REP_ACK             (1)    /* Data sending finished. */
 187#define NBD_REP_SERVER          (2)    /* Export description. */
 188#define NBD_REP_INFO            (3)    /* NBD_OPT_INFO/GO. */
 189#define NBD_REP_META_CONTEXT    (4)    /* NBD_OPT_{LIST,SET}_META_CONTEXT */
 190
 191#define NBD_REP_ERR_UNSUP           NBD_REP_ERR(1)  /* Unknown option */
 192#define NBD_REP_ERR_POLICY          NBD_REP_ERR(2)  /* Server denied */
 193#define NBD_REP_ERR_INVALID         NBD_REP_ERR(3)  /* Invalid length */
 194#define NBD_REP_ERR_PLATFORM        NBD_REP_ERR(4)  /* Not compiled in */
 195#define NBD_REP_ERR_TLS_REQD        NBD_REP_ERR(5)  /* TLS required */
 196#define NBD_REP_ERR_UNKNOWN         NBD_REP_ERR(6)  /* Export unknown */
 197#define NBD_REP_ERR_SHUTDOWN        NBD_REP_ERR(7)  /* Server shutting down */
 198#define NBD_REP_ERR_BLOCK_SIZE_REQD NBD_REP_ERR(8)  /* Need INFO_BLOCK_SIZE */
 199
 200/* Info types, used during NBD_REP_INFO */
 201#define NBD_INFO_EXPORT         0
 202#define NBD_INFO_NAME           1
 203#define NBD_INFO_DESCRIPTION    2
 204#define NBD_INFO_BLOCK_SIZE     3
 205
 206/* Request flags, sent from client to server during transmission phase */
 207#define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
 208#define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
 209#define NBD_CMD_FLAG_DF         (1 << 2) /* don't fragment structured read */
 210#define NBD_CMD_FLAG_REQ_ONE    (1 << 3) /* only one extent in BLOCK_STATUS
 211                                          * reply chunk */
 212#define NBD_CMD_FLAG_FAST_ZERO  (1 << 4) /* fail if WRITE_ZEROES is not fast */
 213
 214/* Supported request types */
 215enum {
 216    NBD_CMD_READ = 0,
 217    NBD_CMD_WRITE = 1,
 218    NBD_CMD_DISC = 2,
 219    NBD_CMD_FLUSH = 3,
 220    NBD_CMD_TRIM = 4,
 221    NBD_CMD_CACHE = 5,
 222    NBD_CMD_WRITE_ZEROES = 6,
 223    NBD_CMD_BLOCK_STATUS = 7,
 224};
 225
 226#define NBD_DEFAULT_PORT        10809
 227
 228/* Maximum size of a single READ/WRITE data buffer */
 229#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
 230
 231/*
 232 * Maximum size of a protocol string (export name, metadata context name,
 233 * etc.).  Use malloc rather than stack allocation for storage of a
 234 * string.
 235 */
 236#define NBD_MAX_STRING_SIZE 4096
 237
 238/* Two types of reply structures */
 239#define NBD_SIMPLE_REPLY_MAGIC      0x67446698
 240#define NBD_STRUCTURED_REPLY_MAGIC  0x668e33ef
 241
 242/* Structured reply flags */
 243#define NBD_REPLY_FLAG_DONE          (1 << 0) /* This reply-chunk is last */
 244
 245/* Structured reply types */
 246#define NBD_REPLY_ERR(value)         ((1 << 15) | (value))
 247
 248#define NBD_REPLY_TYPE_NONE          0
 249#define NBD_REPLY_TYPE_OFFSET_DATA   1
 250#define NBD_REPLY_TYPE_OFFSET_HOLE   2
 251#define NBD_REPLY_TYPE_BLOCK_STATUS  5
 252#define NBD_REPLY_TYPE_ERROR         NBD_REPLY_ERR(1)
 253#define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)
 254
 255/* Extent flags for base:allocation in NBD_REPLY_TYPE_BLOCK_STATUS */
 256#define NBD_STATE_HOLE (1 << 0)
 257#define NBD_STATE_ZERO (1 << 1)
 258
 259/* Extent flags for qemu:dirty-bitmap in NBD_REPLY_TYPE_BLOCK_STATUS */
 260#define NBD_STATE_DIRTY (1 << 0)
 261
 262/* No flags needed for qemu:allocation-depth in NBD_REPLY_TYPE_BLOCK_STATUS */
 263
 264static inline bool nbd_reply_type_is_error(int type)
 265{
 266    return type & (1 << 15);
 267}
 268
 269/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
 270 * but only a limited set of errno values is specified in the protocol.
 271 * Everything else is squashed to EINVAL.
 272 */
 273#define NBD_SUCCESS    0
 274#define NBD_EPERM      1
 275#define NBD_EIO        5
 276#define NBD_ENOMEM     12
 277#define NBD_EINVAL     22
 278#define NBD_ENOSPC     28
 279#define NBD_EOVERFLOW  75
 280#define NBD_ENOTSUP    95
 281#define NBD_ESHUTDOWN  108
 282
 283/* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
 284struct NBDExportInfo {
 285    /* Set by client before nbd_receive_negotiate() */
 286    bool request_sizes;
 287    char *x_dirty_bitmap;
 288
 289    /* Set by client before nbd_receive_negotiate(), or by server results
 290     * during nbd_receive_export_list() */
 291    char *name; /* must be non-NULL */
 292
 293    /* In-out fields, set by client before nbd_receive_negotiate() and
 294     * updated by server results during nbd_receive_negotiate() */
 295    bool structured_reply;
 296    bool base_allocation; /* base:allocation context for NBD_CMD_BLOCK_STATUS */
 297
 298    /* Set by server results during nbd_receive_negotiate() and
 299     * nbd_receive_export_list() */
 300    uint64_t size;
 301    uint16_t flags;
 302    uint32_t min_block;
 303    uint32_t opt_block;
 304    uint32_t max_block;
 305
 306    uint32_t context_id;
 307
 308    /* Set by server results during nbd_receive_export_list() */
 309    char *description;
 310    int n_contexts;
 311    char **contexts;
 312};
 313typedef struct NBDExportInfo NBDExportInfo;
 314
 315int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
 316                          QCryptoTLSCreds *tlscreds,
 317                          const char *hostname, QIOChannel **outioc,
 318                          NBDExportInfo *info, Error **errp);
 319void nbd_free_export_list(NBDExportInfo *info, int count);
 320int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
 321                            const char *hostname, NBDExportInfo **info,
 322                            Error **errp);
 323int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
 324             Error **errp);
 325int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
 326int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
 327                                   NBDReply *reply, Error **errp);
 328int nbd_client(int fd);
 329int nbd_disconnect(int fd);
 330int nbd_errno_to_system_errno(int err);
 331
 332typedef struct NBDExport NBDExport;
 333typedef struct NBDClient NBDClient;
 334
 335void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk);
 336
 337AioContext *nbd_export_aio_context(NBDExport *exp);
 338NBDExport *nbd_export_find(const char *name);
 339
 340void nbd_client_new(QIOChannelSocket *sioc,
 341                    QCryptoTLSCreds *tlscreds,
 342                    const char *tlsauthz,
 343                    void (*close_fn)(NBDClient *, bool));
 344void nbd_client_get(NBDClient *client);
 345void nbd_client_put(NBDClient *client);
 346
 347void nbd_server_is_qemu_nbd(bool value);
 348bool nbd_server_is_running(void);
 349void nbd_server_start(SocketAddress *addr, const char *tls_creds,
 350                      const char *tls_authz, uint32_t max_connections,
 351                      Error **errp);
 352void nbd_server_start_options(NbdServerOptions *arg, Error **errp);
 353
 354/* nbd_read
 355 * Reads @size bytes from @ioc. Returns 0 on success.
 356 */
 357static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
 358                           const char *desc, Error **errp)
 359{
 360    ERRP_GUARD();
 361    int ret = qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
 362
 363    if (ret < 0) {
 364        if (desc) {
 365            error_prepend(errp, "Failed to read %s: ", desc);
 366        }
 367        return ret;
 368    }
 369
 370    return 0;
 371}
 372
 373#define DEF_NBD_READ_N(bits)                                            \
 374static inline int nbd_read##bits(QIOChannel *ioc,                       \
 375                                 uint##bits##_t *val,                   \
 376                                 const char *desc, Error **errp)        \
 377{                                                                       \
 378    int ret = nbd_read(ioc, val, sizeof(*val), desc, errp);             \
 379    if (ret < 0) {                                                      \
 380        return ret;                                                     \
 381    }                                                                   \
 382    *val = be##bits##_to_cpu(*val);                                     \
 383    return 0;                                                           \
 384}
 385
 386DEF_NBD_READ_N(16) /* Defines nbd_read16(). */
 387DEF_NBD_READ_N(32) /* Defines nbd_read32(). */
 388DEF_NBD_READ_N(64) /* Defines nbd_read64(). */
 389
 390#undef DEF_NBD_READ_N
 391
 392static inline bool nbd_reply_is_simple(NBDReply *reply)
 393{
 394    return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
 395}
 396
 397static inline bool nbd_reply_is_structured(NBDReply *reply)
 398{
 399    return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
 400}
 401
 402const char *nbd_reply_type_lookup(uint16_t type);
 403const char *nbd_opt_lookup(uint32_t opt);
 404const char *nbd_rep_lookup(uint32_t rep);
 405const char *nbd_info_lookup(uint16_t info);
 406const char *nbd_cmd_lookup(uint16_t info);
 407const char *nbd_err_lookup(int err);
 408
 409/* nbd/client-connection.c */
 410typedef struct NBDClientConnection NBDClientConnection;
 411
 412void nbd_client_connection_enable_retry(NBDClientConnection *conn);
 413
 414NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
 415                                               bool do_negotiation,
 416                                               const char *export_name,
 417                                               const char *x_dirty_bitmap,
 418                                               QCryptoTLSCreds *tlscreds,
 419                                               const char *tlshostname);
 420void nbd_client_connection_release(NBDClientConnection *conn);
 421
 422QIOChannel *coroutine_fn
 423nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
 424                            bool blocking, Error **errp);
 425
 426void coroutine_fn nbd_co_establish_connection_cancel(NBDClientConnection *conn);
 427
 428#endif
 429