qemu/nbd/client.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2016-2018 Red Hat, Inc.
   3 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
   4 *
   5 *  Network Block Device Client Side
   6 *
   7 *  This program is free software; you can redistribute it and/or modify
   8 *  it under the terms of the GNU General Public License as published by
   9 *  the Free Software Foundation; under version 2 of the License.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qapi/error.h"
  22#include "trace.h"
  23#include "nbd-internal.h"
  24#include "qemu/cutils.h"
  25
  26/* Definitions for opaque data types */
  27
  28static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
  29
  30/* That's all folks */
  31
  32/* Basic flow for negotiation
  33
  34   Server         Client
  35   Negotiate
  36
  37   or
  38
  39   Server         Client
  40   Negotiate #1
  41                  Option
  42   Negotiate #2
  43
  44   ----
  45
  46   followed by
  47
  48   Server         Client
  49                  Request
  50   Response
  51                  Request
  52   Response
  53                  ...
  54   ...
  55                  Request (type == 2)
  56
  57*/
  58
  59/* Send an option request.
  60 *
  61 * The request is for option @opt, with @data containing @len bytes of
  62 * additional payload for the request (@len may be -1 to treat @data as
  63 * a C string; and @data may be NULL if @len is 0).
  64 * Return 0 if successful, -1 with errp set if it is impossible to
  65 * continue. */
  66static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
  67                                   uint32_t len, const char *data,
  68                                   Error **errp)
  69{
  70    NBDOption req;
  71    QEMU_BUILD_BUG_ON(sizeof(req) != 16);
  72
  73    if (len == -1) {
  74        req.length = len = strlen(data);
  75    }
  76    trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
  77
  78    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
  79    stl_be_p(&req.option, opt);
  80    stl_be_p(&req.length, len);
  81
  82    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
  83        error_prepend(errp, "Failed to send option request header: ");
  84        return -1;
  85    }
  86
  87    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
  88        error_prepend(errp, "Failed to send option request data: ");
  89        return -1;
  90    }
  91
  92    return 0;
  93}
  94
  95/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
  96 * not going to attempt further negotiation. */
  97static void nbd_send_opt_abort(QIOChannel *ioc)
  98{
  99    /* Technically, a compliant server is supposed to reply to us; but
 100     * older servers disconnected instead. At any rate, we're allowed
 101     * to disconnect without waiting for the server reply, so we don't
 102     * even care if the request makes it to the server, let alone
 103     * waiting around for whether the server replies. */
 104    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
 105}
 106
 107
 108/* Receive the header of an option reply, which should match the given
 109 * opt.  Read through the length field, but NOT the length bytes of
 110 * payload. Return 0 if successful, -1 with errp set if it is
 111 * impossible to continue. */
 112static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
 113                                    NBDOptionReply *reply, Error **errp)
 114{
 115    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
 116    if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
 117        nbd_send_opt_abort(ioc);
 118        return -1;
 119    }
 120    reply->magic = be64_to_cpu(reply->magic);
 121    reply->option = be32_to_cpu(reply->option);
 122    reply->type = be32_to_cpu(reply->type);
 123    reply->length = be32_to_cpu(reply->length);
 124
 125    trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
 126                                   reply->type, nbd_rep_lookup(reply->type),
 127                                   reply->length);
 128
 129    if (reply->magic != NBD_REP_MAGIC) {
 130        error_setg(errp, "Unexpected option reply magic");
 131        nbd_send_opt_abort(ioc);
 132        return -1;
 133    }
 134    if (reply->option != opt) {
 135        error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)",
 136                   reply->option, nbd_opt_lookup(reply->option),
 137                   opt, nbd_opt_lookup(opt));
 138        nbd_send_opt_abort(ioc);
 139        return -1;
 140    }
 141    return 0;
 142}
 143
 144/* If reply represents success, return 1 without further action.
 145 * If reply represents an error, consume the optional payload of
 146 * the packet on ioc.  Then return 0 for unsupported (so the client
 147 * can fall back to other approaches), or -1 with errp set for other
 148 * errors.
 149 */
 150static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
 151                                Error **errp)
 152{
 153    char *msg = NULL;
 154    int result = -1;
 155
 156    if (!(reply->type & (1 << 31))) {
 157        return 1;
 158    }
 159
 160    if (reply->length) {
 161        if (reply->length > NBD_MAX_BUFFER_SIZE) {
 162            error_setg(errp, "server error %" PRIu32
 163                       " (%s) message is too long",
 164                       reply->type, nbd_rep_lookup(reply->type));
 165            goto cleanup;
 166        }
 167        msg = g_malloc(reply->length + 1);
 168        if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
 169            error_prepend(errp, "Failed to read option error %" PRIu32
 170                          " (%s) message: ",
 171                          reply->type, nbd_rep_lookup(reply->type));
 172            goto cleanup;
 173        }
 174        msg[reply->length] = '\0';
 175        trace_nbd_server_error_msg(reply->type,
 176                                   nbd_reply_type_lookup(reply->type), msg);
 177    }
 178
 179    switch (reply->type) {
 180    case NBD_REP_ERR_UNSUP:
 181        trace_nbd_reply_err_unsup(reply->option, nbd_opt_lookup(reply->option));
 182        result = 0;
 183        goto cleanup;
 184
 185    case NBD_REP_ERR_POLICY:
 186        error_setg(errp, "Denied by server for option %" PRIu32 " (%s)",
 187                   reply->option, nbd_opt_lookup(reply->option));
 188        break;
 189
 190    case NBD_REP_ERR_INVALID:
 191        error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)",
 192                   reply->option, nbd_opt_lookup(reply->option));
 193        break;
 194
 195    case NBD_REP_ERR_PLATFORM:
 196        error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)",
 197                   reply->option, nbd_opt_lookup(reply->option));
 198        break;
 199
 200    case NBD_REP_ERR_TLS_REQD:
 201        error_setg(errp, "TLS negotiation required before option %" PRIu32
 202                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 203        break;
 204
 205    case NBD_REP_ERR_UNKNOWN:
 206        error_setg(errp, "Requested export not available");
 207        break;
 208
 209    case NBD_REP_ERR_SHUTDOWN:
 210        error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)",
 211                   reply->option, nbd_opt_lookup(reply->option));
 212        break;
 213
 214    case NBD_REP_ERR_BLOCK_SIZE_REQD:
 215        error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32
 216                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 217        break;
 218
 219    default:
 220        error_setg(errp, "Unknown error code when asking for option %" PRIu32
 221                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 222        break;
 223    }
 224
 225    if (msg) {
 226        error_append_hint(errp, "server reported: %s\n", msg);
 227    }
 228
 229 cleanup:
 230    g_free(msg);
 231    if (result < 0) {
 232        nbd_send_opt_abort(ioc);
 233    }
 234    return result;
 235}
 236
 237/* nbd_receive_list:
 238 * Process another portion of the NBD_OPT_LIST reply, populating any
 239 * name received into *@name. If @description is non-NULL, and the
 240 * server provided a description, that is also populated. The caller
 241 * must eventually call g_free() on success.
 242 * Returns 1 if name and description were set and iteration must continue,
 243 *         0 if iteration is complete (including if OPT_LIST unsupported),
 244 *         -1 with @errp set if an unrecoverable error occurred.
 245 */
 246static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
 247                            Error **errp)
 248{
 249    int ret = -1;
 250    NBDOptionReply reply;
 251    uint32_t len;
 252    uint32_t namelen;
 253    char *local_name = NULL;
 254    char *local_desc = NULL;
 255    int error;
 256
 257    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
 258        return -1;
 259    }
 260    error = nbd_handle_reply_err(ioc, &reply, errp);
 261    if (error <= 0) {
 262        return error;
 263    }
 264    len = reply.length;
 265
 266    if (reply.type == NBD_REP_ACK) {
 267        if (len != 0) {
 268            error_setg(errp, "length too long for option end");
 269            nbd_send_opt_abort(ioc);
 270            return -1;
 271        }
 272        return 0;
 273    } else if (reply.type != NBD_REP_SERVER) {
 274        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
 275                   reply.type, nbd_rep_lookup(reply.type),
 276                   NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER));
 277        nbd_send_opt_abort(ioc);
 278        return -1;
 279    }
 280
 281    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
 282        error_setg(errp, "incorrect option length %" PRIu32, len);
 283        nbd_send_opt_abort(ioc);
 284        return -1;
 285    }
 286    if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
 287        nbd_send_opt_abort(ioc);
 288        return -1;
 289    }
 290    len -= sizeof(namelen);
 291    if (len < namelen) {
 292        error_setg(errp, "incorrect option name length");
 293        nbd_send_opt_abort(ioc);
 294        return -1;
 295    }
 296
 297    local_name = g_malloc(namelen + 1);
 298    if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
 299        nbd_send_opt_abort(ioc);
 300        goto out;
 301    }
 302    local_name[namelen] = '\0';
 303    len -= namelen;
 304    if (len) {
 305        local_desc = g_malloc(len + 1);
 306        if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
 307            nbd_send_opt_abort(ioc);
 308            goto out;
 309        }
 310        local_desc[len] = '\0';
 311    }
 312
 313    trace_nbd_receive_list(local_name, local_desc ?: "");
 314    *name = local_name;
 315    local_name = NULL;
 316    if (description) {
 317        *description = local_desc;
 318        local_desc = NULL;
 319    }
 320    ret = 1;
 321
 322 out:
 323    g_free(local_name);
 324    g_free(local_desc);
 325    return ret;
 326}
 327
 328
 329/*
 330 * nbd_opt_info_or_go:
 331 * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply.
 332 * Returns -1 if the option proves the export @info->name cannot be
 333 * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and
 334 * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
 335 * go (with the rest of @info populated).
 336 */
 337static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
 338                              NBDExportInfo *info, Error **errp)
 339{
 340    NBDOptionReply reply;
 341    uint32_t len = strlen(info->name);
 342    uint16_t type;
 343    int error;
 344    char *buf;
 345
 346    /* The protocol requires that the server send NBD_INFO_EXPORT with
 347     * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
 348     * flags still 0 is a witness of a broken server. */
 349    info->flags = 0;
 350
 351    assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
 352    trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
 353    buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
 354    stl_be_p(buf, len);
 355    memcpy(buf + 4, info->name, len);
 356    /* At most one request, everything else up to server */
 357    stw_be_p(buf + 4 + len, info->request_sizes);
 358    if (info->request_sizes) {
 359        stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
 360    }
 361    error = nbd_send_option_request(ioc, opt,
 362                                    4 + len + 2 + 2 * info->request_sizes,
 363                                    buf, errp);
 364    g_free(buf);
 365    if (error < 0) {
 366        return -1;
 367    }
 368
 369    while (1) {
 370        if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
 371            return -1;
 372        }
 373        error = nbd_handle_reply_err(ioc, &reply, errp);
 374        if (error <= 0) {
 375            return error;
 376        }
 377        len = reply.length;
 378
 379        if (reply.type == NBD_REP_ACK) {
 380            /*
 381             * Server is done sending info, and moved into transmission
 382             * phase for NBD_OPT_GO, but make sure it sent flags
 383             */
 384            if (len) {
 385                error_setg(errp, "server sent invalid NBD_REP_ACK");
 386                return -1;
 387            }
 388            if (!info->flags) {
 389                error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
 390                return -1;
 391            }
 392            trace_nbd_opt_info_go_success(nbd_opt_lookup(opt));
 393            return 1;
 394        }
 395        if (reply.type != NBD_REP_INFO) {
 396            error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)",
 397                       reply.type, nbd_rep_lookup(reply.type),
 398                       NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO));
 399            nbd_send_opt_abort(ioc);
 400            return -1;
 401        }
 402        if (len < sizeof(type)) {
 403            error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
 404                       len);
 405            nbd_send_opt_abort(ioc);
 406            return -1;
 407        }
 408        if (nbd_read16(ioc, &type, "info type", errp) < 0) {
 409            nbd_send_opt_abort(ioc);
 410            return -1;
 411        }
 412        len -= sizeof(type);
 413        switch (type) {
 414        case NBD_INFO_EXPORT:
 415            if (len != sizeof(info->size) + sizeof(info->flags)) {
 416                error_setg(errp, "remaining export info len %" PRIu32
 417                           " is unexpected size", len);
 418                nbd_send_opt_abort(ioc);
 419                return -1;
 420            }
 421            if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
 422                nbd_send_opt_abort(ioc);
 423                return -1;
 424            }
 425            if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
 426                nbd_send_opt_abort(ioc);
 427                return -1;
 428            }
 429            if (info->min_block &&
 430                !QEMU_IS_ALIGNED(info->size, info->min_block)) {
 431                error_setg(errp, "export size %" PRIu64 " is not multiple of "
 432                           "minimum block size %" PRIu32, info->size,
 433                           info->min_block);
 434                nbd_send_opt_abort(ioc);
 435                return -1;
 436            }
 437            trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
 438            break;
 439
 440        case NBD_INFO_BLOCK_SIZE:
 441            if (len != sizeof(info->min_block) * 3) {
 442                error_setg(errp, "remaining export info len %" PRIu32
 443                           " is unexpected size", len);
 444                nbd_send_opt_abort(ioc);
 445                return -1;
 446            }
 447            if (nbd_read32(ioc, &info->min_block, "info minimum block size",
 448                           errp) < 0) {
 449                nbd_send_opt_abort(ioc);
 450                return -1;
 451            }
 452            if (!is_power_of_2(info->min_block)) {
 453                error_setg(errp, "server minimum block size %" PRIu32
 454                           " is not a power of two", info->min_block);
 455                nbd_send_opt_abort(ioc);
 456                return -1;
 457            }
 458            if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
 459                           errp) < 0)
 460            {
 461                nbd_send_opt_abort(ioc);
 462                return -1;
 463            }
 464            if (!is_power_of_2(info->opt_block) ||
 465                info->opt_block < info->min_block) {
 466                error_setg(errp, "server preferred block size %" PRIu32
 467                           " is not valid", info->opt_block);
 468                nbd_send_opt_abort(ioc);
 469                return -1;
 470            }
 471            if (nbd_read32(ioc, &info->max_block, "info maximum block size",
 472                           errp) < 0)
 473            {
 474                nbd_send_opt_abort(ioc);
 475                return -1;
 476            }
 477            if (info->max_block < info->min_block) {
 478                error_setg(errp, "server maximum block size %" PRIu32
 479                           " is not valid", info->max_block);
 480                nbd_send_opt_abort(ioc);
 481                return -1;
 482            }
 483            trace_nbd_opt_info_block_size(info->min_block, info->opt_block,
 484                                          info->max_block);
 485            break;
 486
 487        default:
 488            trace_nbd_opt_info_unknown(type, nbd_info_lookup(type));
 489            if (nbd_drop(ioc, len, errp) < 0) {
 490                error_prepend(errp, "Failed to read info payload: ");
 491                nbd_send_opt_abort(ioc);
 492                return -1;
 493            }
 494            break;
 495        }
 496    }
 497}
 498
 499/* Return -1 on failure, 0 if wantname is an available export. */
 500static int nbd_receive_query_exports(QIOChannel *ioc,
 501                                     const char *wantname,
 502                                     Error **errp)
 503{
 504    bool list_empty = true;
 505    bool found_export = false;
 506
 507    trace_nbd_receive_query_exports_start(wantname);
 508    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
 509        return -1;
 510    }
 511
 512    while (1) {
 513        char *name;
 514        int ret = nbd_receive_list(ioc, &name, NULL, errp);
 515
 516        if (ret < 0) {
 517            /* Server gave unexpected reply */
 518            return -1;
 519        } else if (ret == 0) {
 520            /* Done iterating. */
 521            if (list_empty) {
 522                /*
 523                 * We don't have enough context to tell a server that
 524                 * sent an empty list apart from a server that does
 525                 * not support the list command; but as this function
 526                 * is just used to trigger a nicer error message
 527                 * before trying NBD_OPT_EXPORT_NAME, assume the
 528                 * export is available.
 529                 */
 530                return 0;
 531            } else if (!found_export) {
 532                error_setg(errp, "No export with name '%s' available",
 533                           wantname);
 534                nbd_send_opt_abort(ioc);
 535                return -1;
 536            }
 537            trace_nbd_receive_query_exports_success(wantname);
 538            return 0;
 539        }
 540        list_empty = false;
 541        if (!strcmp(name, wantname)) {
 542            found_export = true;
 543        }
 544        g_free(name);
 545    }
 546}
 547
 548/* nbd_request_simple_option: Send an option request, and parse the reply
 549 * return 1 for successful negotiation,
 550 *        0 if operation is unsupported,
 551 *        -1 with errp set for any other error
 552 */
 553static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp)
 554{
 555    NBDOptionReply reply;
 556    int error;
 557
 558    if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
 559        return -1;
 560    }
 561
 562    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
 563        return -1;
 564    }
 565    error = nbd_handle_reply_err(ioc, &reply, errp);
 566    if (error <= 0) {
 567        return error;
 568    }
 569
 570    if (reply.type != NBD_REP_ACK) {
 571        error_setg(errp, "Server answered option %d (%s) with unexpected "
 572                   "reply %" PRIu32 " (%s)", opt, nbd_opt_lookup(opt),
 573                   reply.type, nbd_rep_lookup(reply.type));
 574        nbd_send_opt_abort(ioc);
 575        return -1;
 576    }
 577
 578    if (reply.length != 0) {
 579        error_setg(errp, "Option %d ('%s') response length is %" PRIu32
 580                   " (it should be zero)", opt, nbd_opt_lookup(opt),
 581                   reply.length);
 582        nbd_send_opt_abort(ioc);
 583        return -1;
 584    }
 585
 586    return 1;
 587}
 588
 589static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
 590                                        QCryptoTLSCreds *tlscreds,
 591                                        const char *hostname, Error **errp)
 592{
 593    int ret;
 594    QIOChannelTLS *tioc;
 595    struct NBDTLSHandshakeData data = { 0 };
 596
 597    ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp);
 598    if (ret <= 0) {
 599        if (ret == 0) {
 600            error_setg(errp, "Server don't support STARTTLS option");
 601            nbd_send_opt_abort(ioc);
 602        }
 603        return NULL;
 604    }
 605
 606    trace_nbd_receive_starttls_new_client();
 607    tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
 608    if (!tioc) {
 609        return NULL;
 610    }
 611    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
 612    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
 613    trace_nbd_receive_starttls_tls_handshake();
 614    qio_channel_tls_handshake(tioc,
 615                              nbd_tls_handshake,
 616                              &data,
 617                              NULL,
 618                              NULL);
 619
 620    if (!data.complete) {
 621        g_main_loop_run(data.loop);
 622    }
 623    g_main_loop_unref(data.loop);
 624    if (data.error) {
 625        error_propagate(errp, data.error);
 626        object_unref(OBJECT(tioc));
 627        return NULL;
 628    }
 629
 630    return QIO_CHANNEL(tioc);
 631}
 632
 633/*
 634 * nbd_send_meta_query:
 635 * Send 0 or 1 set/list meta context queries.
 636 * Return 0 on success, -1 with errp set for any error
 637 */
 638static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
 639                               const char *export, const char *query,
 640                               Error **errp)
 641{
 642    int ret;
 643    uint32_t export_len = strlen(export);
 644    uint32_t queries = !!query;
 645    uint32_t query_len = 0;
 646    uint32_t data_len;
 647    char *data;
 648    char *p;
 649
 650    data_len = sizeof(export_len) + export_len + sizeof(queries);
 651    if (query) {
 652        query_len = strlen(query);
 653        data_len += sizeof(query_len) + query_len;
 654    } else {
 655        assert(opt == NBD_OPT_LIST_META_CONTEXT);
 656    }
 657    p = data = g_malloc(data_len);
 658
 659    trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)", export);
 660    stl_be_p(p, export_len);
 661    memcpy(p += sizeof(export_len), export, export_len);
 662    stl_be_p(p += export_len, queries);
 663    if (query) {
 664        stl_be_p(p += sizeof(queries), query_len);
 665        memcpy(p += sizeof(query_len), query, query_len);
 666    }
 667
 668    ret = nbd_send_option_request(ioc, opt, data_len, data, errp);
 669    g_free(data);
 670    return ret;
 671}
 672
 673/*
 674 * nbd_receive_one_meta_context:
 675 * Called in a loop to receive and trace one set/list meta context reply.
 676 * Pass non-NULL @name or @id to collect results back to the caller, which
 677 * must eventually call g_free().
 678 * return 1 if name is set and iteration must continue,
 679 *        0 if iteration is complete (including if option is unsupported),
 680 *        -1 with errp set for any error
 681 */
 682static int nbd_receive_one_meta_context(QIOChannel *ioc,
 683                                        uint32_t opt,
 684                                        char **name,
 685                                        uint32_t *id,
 686                                        Error **errp)
 687{
 688    int ret;
 689    NBDOptionReply reply;
 690    char *local_name = NULL;
 691    uint32_t local_id;
 692
 693    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
 694        return -1;
 695    }
 696
 697    ret = nbd_handle_reply_err(ioc, &reply, errp);
 698    if (ret <= 0) {
 699        return ret;
 700    }
 701
 702    if (reply.type == NBD_REP_ACK) {
 703        if (reply.length != 0) {
 704            error_setg(errp, "Unexpected length to ACK response");
 705            nbd_send_opt_abort(ioc);
 706            return -1;
 707        }
 708        return 0;
 709    } else if (reply.type != NBD_REP_META_CONTEXT) {
 710        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
 711                   reply.type, nbd_rep_lookup(reply.type),
 712                   NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT));
 713        nbd_send_opt_abort(ioc);
 714        return -1;
 715    }
 716
 717    if (reply.length <= sizeof(local_id) ||
 718        reply.length > NBD_MAX_BUFFER_SIZE) {
 719        error_setg(errp, "Failed to negotiate meta context, server "
 720                   "answered with unexpected length %" PRIu32,
 721                   reply.length);
 722        nbd_send_opt_abort(ioc);
 723        return -1;
 724    }
 725
 726    if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
 727        return -1;
 728    }
 729
 730    reply.length -= sizeof(local_id);
 731    local_name = g_malloc(reply.length + 1);
 732    if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
 733        g_free(local_name);
 734        return -1;
 735    }
 736    local_name[reply.length] = '\0';
 737    trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id);
 738
 739    if (name) {
 740        *name = local_name;
 741    } else {
 742        g_free(local_name);
 743    }
 744    if (id) {
 745        *id = local_id;
 746    }
 747    return 1;
 748}
 749
 750/*
 751 * nbd_negotiate_simple_meta_context:
 752 * Request the server to set the meta context for export @info->name
 753 * using @info->x_dirty_bitmap with a fallback to "base:allocation",
 754 * setting @info->context_id to the resulting id. Fail if the server
 755 * responds with more than one context or with a context different
 756 * than the query.
 757 * return 1 for successful negotiation,
 758 *        0 if operation is unsupported,
 759 *        -1 with errp set for any other error
 760 */
 761static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
 762                                             NBDExportInfo *info,
 763                                             Error **errp)
 764{
 765    /*
 766     * TODO: Removing the x_dirty_bitmap hack will mean refactoring
 767     * this function to request and store ids for multiple contexts
 768     * (both base:allocation and a dirty bitmap), at which point this
 769     * function should lose the term _simple.
 770     */
 771    int ret;
 772    const char *context = info->x_dirty_bitmap ?: "base:allocation";
 773    bool received = false;
 774    char *name = NULL;
 775
 776    if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT,
 777                            info->name, context, errp) < 0) {
 778        return -1;
 779    }
 780
 781    ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
 782                                       &name, &info->context_id, errp);
 783    if (ret < 0) {
 784        return -1;
 785    }
 786    if (ret == 1) {
 787        if (strcmp(context, name)) {
 788            error_setg(errp, "Failed to negotiate meta context '%s', server "
 789                       "answered with different context '%s'", context,
 790                       name);
 791            g_free(name);
 792            nbd_send_opt_abort(ioc);
 793            return -1;
 794        }
 795        g_free(name);
 796        received = true;
 797
 798        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
 799                                           NULL, NULL, errp);
 800        if (ret < 0) {
 801            return -1;
 802        }
 803    }
 804    if (ret != 0) {
 805        error_setg(errp, "Server answered with more than one context");
 806        nbd_send_opt_abort(ioc);
 807        return -1;
 808    }
 809    return received;
 810}
 811
 812/*
 813 * nbd_list_meta_contexts:
 814 * Request the server to list all meta contexts for export @info->name.
 815 * return 0 if list is complete (even if empty),
 816 *        -1 with errp set for any error
 817 */
 818static int nbd_list_meta_contexts(QIOChannel *ioc,
 819                                  NBDExportInfo *info,
 820                                  Error **errp)
 821{
 822    int ret;
 823    int seen_any = false;
 824    int seen_qemu = false;
 825
 826    if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
 827                            info->name, NULL, errp) < 0) {
 828        return -1;
 829    }
 830
 831    while (1) {
 832        char *context;
 833
 834        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT,
 835                                           &context, NULL, errp);
 836        if (ret == 0 && seen_any && !seen_qemu) {
 837            /*
 838             * Work around qemu 3.0 bug: the server forgot to send
 839             * "qemu:" replies to 0 queries. If we saw at least one
 840             * reply (probably base:allocation), but none of them were
 841             * qemu:, then run a more specific query to make sure.
 842             */
 843            seen_qemu = true;
 844            if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
 845                                    info->name, "qemu:", errp) < 0) {
 846                return -1;
 847            }
 848            continue;
 849        }
 850        if (ret <= 0) {
 851            return ret;
 852        }
 853        seen_any = true;
 854        seen_qemu |= strstart(context, "qemu:", NULL);
 855        info->contexts = g_renew(char *, info->contexts, ++info->n_contexts);
 856        info->contexts[info->n_contexts - 1] = context;
 857    }
 858}
 859
 860/*
 861 * nbd_start_negotiate:
 862 * Start the handshake to the server.  After a positive return, the server
 863 * is ready to accept additional NBD_OPT requests.
 864 * Returns: negative errno: failure talking to server
 865 *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
 866 *          1: server is newstyle, but can only accept EXPORT_NAME
 867 *          2: server is newstyle, but lacks structured replies
 868 *          3: server is newstyle and set up for structured replies
 869 */
 870static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
 871                               const char *hostname, QIOChannel **outioc,
 872                               bool structured_reply, bool *zeroes,
 873                               Error **errp)
 874{
 875    uint64_t magic;
 876
 877    trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "<null>");
 878
 879    if (zeroes) {
 880        *zeroes = true;
 881    }
 882    if (outioc) {
 883        *outioc = NULL;
 884    }
 885    if (tlscreds && !outioc) {
 886        error_setg(errp, "Output I/O channel required for TLS");
 887        return -EINVAL;
 888    }
 889
 890    if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
 891        return -EINVAL;
 892    }
 893    trace_nbd_receive_negotiate_magic(magic);
 894
 895    if (magic != NBD_INIT_MAGIC) {
 896        error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic);
 897        return -EINVAL;
 898    }
 899
 900    if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
 901        return -EINVAL;
 902    }
 903    trace_nbd_receive_negotiate_magic(magic);
 904
 905    if (magic == NBD_OPTS_MAGIC) {
 906        uint32_t clientflags = 0;
 907        uint16_t globalflags;
 908        bool fixedNewStyle = false;
 909
 910        if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
 911            return -EINVAL;
 912        }
 913        trace_nbd_receive_negotiate_server_flags(globalflags);
 914        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
 915            fixedNewStyle = true;
 916            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
 917        }
 918        if (globalflags & NBD_FLAG_NO_ZEROES) {
 919            if (zeroes) {
 920                *zeroes = false;
 921            }
 922            clientflags |= NBD_FLAG_C_NO_ZEROES;
 923        }
 924        /* client requested flags */
 925        clientflags = cpu_to_be32(clientflags);
 926        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
 927            error_prepend(errp, "Failed to send clientflags field: ");
 928            return -EINVAL;
 929        }
 930        if (tlscreds) {
 931            if (fixedNewStyle) {
 932                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
 933                if (!*outioc) {
 934                    return -EINVAL;
 935                }
 936                ioc = *outioc;
 937            } else {
 938                error_setg(errp, "Server does not support STARTTLS");
 939                return -EINVAL;
 940            }
 941        }
 942        if (fixedNewStyle) {
 943            int result = 0;
 944
 945            if (structured_reply) {
 946                result = nbd_request_simple_option(ioc,
 947                                                   NBD_OPT_STRUCTURED_REPLY,
 948                                                   errp);
 949                if (result < 0) {
 950                    return -EINVAL;
 951                }
 952            }
 953            return 2 + result;
 954        } else {
 955            return 1;
 956        }
 957    } else if (magic == NBD_CLIENT_MAGIC) {
 958        if (tlscreds) {
 959            error_setg(errp, "Server does not support STARTTLS");
 960            return -EINVAL;
 961        }
 962        return 0;
 963    } else {
 964        error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
 965        return -EINVAL;
 966    }
 967}
 968
 969/*
 970 * nbd_negotiate_finish_oldstyle:
 971 * Populate @info with the size and export flags from an oldstyle server,
 972 * but does not consume 124 bytes of reserved zero padding.
 973 * Returns 0 on success, -1 with @errp set on failure
 974 */
 975static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
 976                                         Error **errp)
 977{
 978    uint32_t oldflags;
 979
 980    if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
 981        return -EINVAL;
 982    }
 983
 984    if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
 985        return -EINVAL;
 986    }
 987    if (oldflags & ~0xffff) {
 988        error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
 989        return -EINVAL;
 990    }
 991    info->flags = oldflags;
 992    return 0;
 993}
 994
 995/*
 996 * nbd_receive_negotiate:
 997 * Connect to server, complete negotiation, and move into transmission phase.
 998 * Returns: negative errno: failure talking to server
 999 *          0: server is connected
1000 */
1001int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
1002                          const char *hostname, QIOChannel **outioc,
1003                          NBDExportInfo *info, Error **errp)
1004{
1005    int result;
1006    bool zeroes;
1007    bool base_allocation = info->base_allocation;
1008
1009    assert(info->name);
1010    trace_nbd_receive_negotiate_name(info->name);
1011
1012    result = nbd_start_negotiate(ioc, tlscreds, hostname, outioc,
1013                                 info->structured_reply, &zeroes, errp);
1014
1015    info->structured_reply = false;
1016    info->base_allocation = false;
1017    if (tlscreds && *outioc) {
1018        ioc = *outioc;
1019    }
1020
1021    switch (result) {
1022    case 3: /* newstyle, with structured replies */
1023        info->structured_reply = true;
1024        if (base_allocation) {
1025            result = nbd_negotiate_simple_meta_context(ioc, info, errp);
1026            if (result < 0) {
1027                return -EINVAL;
1028            }
1029            info->base_allocation = result == 1;
1030        }
1031        /* fall through */
1032    case 2: /* newstyle, try OPT_GO */
1033        /* Try NBD_OPT_GO first - if it works, we are done (it
1034         * also gives us a good message if the server requires
1035         * TLS).  If it is not available, fall back to
1036         * NBD_OPT_LIST for nicer error messages about a missing
1037         * export, then use NBD_OPT_EXPORT_NAME.  */
1038        result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp);
1039        if (result < 0) {
1040            return -EINVAL;
1041        }
1042        if (result > 0) {
1043            return 0;
1044        }
1045        /* Check our desired export is present in the
1046         * server export list. Since NBD_OPT_EXPORT_NAME
1047         * cannot return an error message, running this
1048         * query gives us better error reporting if the
1049         * export name is not available.
1050         */
1051        if (nbd_receive_query_exports(ioc, info->name, errp) < 0) {
1052            return -EINVAL;
1053        }
1054        /* fall through */
1055    case 1: /* newstyle, but limited to EXPORT_NAME */
1056        /* write the export name request */
1057        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
1058                                    errp) < 0) {
1059            return -EINVAL;
1060        }
1061
1062        /* Read the response */
1063        if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
1064            return -EINVAL;
1065        }
1066
1067        if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
1068            return -EINVAL;
1069        }
1070        break;
1071    case 0: /* oldstyle, parse length and flags */
1072        if (*info->name) {
1073            error_setg(errp, "Server does not support non-empty export names");
1074            return -EINVAL;
1075        }
1076        if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) {
1077            return -EINVAL;
1078        }
1079        break;
1080    default:
1081        return result;
1082    }
1083
1084    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
1085    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
1086        error_prepend(errp, "Failed to read reserved block: ");
1087        return -EINVAL;
1088    }
1089    return 0;
1090}
1091
1092/* Clean up result of nbd_receive_export_list */
1093void nbd_free_export_list(NBDExportInfo *info, int count)
1094{
1095    int i, j;
1096
1097    if (!info) {
1098        return;
1099    }
1100
1101    for (i = 0; i < count; i++) {
1102        g_free(info[i].name);
1103        g_free(info[i].description);
1104        for (j = 0; j < info[i].n_contexts; j++) {
1105            g_free(info[i].contexts[j]);
1106        }
1107        g_free(info[i].contexts);
1108    }
1109    g_free(info);
1110}
1111
1112/*
1113 * nbd_receive_export_list:
1114 * Query details about a server's exports, then disconnect without
1115 * going into transmission phase. Return a count of the exports listed
1116 * in @info by the server, or -1 on error. Caller must free @info using
1117 * nbd_free_export_list().
1118 */
1119int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
1120                            const char *hostname, NBDExportInfo **info,
1121                            Error **errp)
1122{
1123    int result;
1124    int count = 0;
1125    int i;
1126    int rc;
1127    int ret = -1;
1128    NBDExportInfo *array = NULL;
1129    QIOChannel *sioc = NULL;
1130
1131    *info = NULL;
1132    result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc, true, NULL,
1133                                 errp);
1134    if (tlscreds && sioc) {
1135        ioc = sioc;
1136    }
1137
1138    switch (result) {
1139    case 2:
1140    case 3:
1141        /* newstyle - use NBD_OPT_LIST to populate array, then try
1142         * NBD_OPT_INFO on each array member. If structured replies
1143         * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
1144        if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
1145            goto out;
1146        }
1147        while (1) {
1148            char *name;
1149            char *desc;
1150
1151            rc = nbd_receive_list(ioc, &name, &desc, errp);
1152            if (rc < 0) {
1153                goto out;
1154            } else if (rc == 0) {
1155                break;
1156            }
1157            array = g_renew(NBDExportInfo, array, ++count);
1158            memset(&array[count - 1], 0, sizeof(*array));
1159            array[count - 1].name = name;
1160            array[count - 1].description = desc;
1161            array[count - 1].structured_reply = result == 3;
1162        }
1163
1164        for (i = 0; i < count; i++) {
1165            array[i].request_sizes = true;
1166            rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp);
1167            if (rc < 0) {
1168                goto out;
1169            } else if (rc == 0) {
1170                /*
1171                 * Pointless to try rest of loop. If OPT_INFO doesn't work,
1172                 * it's unlikely that meta contexts work either
1173                 */
1174                break;
1175            }
1176
1177            if (result == 3 &&
1178                nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
1179                goto out;
1180            }
1181        }
1182
1183        /* Send NBD_OPT_ABORT as a courtesy before hanging up */
1184        nbd_send_opt_abort(ioc);
1185        break;
1186    case 1: /* newstyle, but limited to EXPORT_NAME */
1187        error_setg(errp, "Server does not support export lists");
1188        /* We can't even send NBD_OPT_ABORT, so merely hang up */
1189        goto out;
1190    case 0: /* oldstyle, parse length and flags */
1191        array = g_new0(NBDExportInfo, 1);
1192        array->name = g_strdup("");
1193        count = 1;
1194
1195        if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
1196            goto out;
1197        }
1198
1199        /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
1200         * errors now that we have the information we wanted. */
1201        if (nbd_drop(ioc, 124, NULL) == 0) {
1202            NBDRequest request = { .type = NBD_CMD_DISC };
1203
1204            nbd_send_request(ioc, &request);
1205        }
1206        break;
1207    default:
1208        goto out;
1209    }
1210
1211    *info = array;
1212    array = NULL;
1213    ret = count;
1214
1215 out:
1216    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1217    qio_channel_close(ioc, NULL);
1218    object_unref(OBJECT(sioc));
1219    nbd_free_export_list(array, count);
1220    return ret;
1221}
1222
1223#ifdef __linux__
1224int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
1225             Error **errp)
1226{
1227    unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
1228    unsigned long sectors = info->size / sector_size;
1229
1230    /* FIXME: Once the kernel module is patched to honor block sizes,
1231     * and to advertise that fact to user space, we should update the
1232     * hand-off to the kernel to use any block sizes we learned. */
1233    assert(!info->request_sizes);
1234    if (info->size / sector_size != sectors) {
1235        error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
1236                   info->size);
1237        return -E2BIG;
1238    }
1239
1240    trace_nbd_init_set_socket();
1241
1242    if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
1243        int serrno = errno;
1244        error_setg(errp, "Failed to set NBD socket");
1245        return -serrno;
1246    }
1247
1248    trace_nbd_init_set_block_size(sector_size);
1249
1250    if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
1251        int serrno = errno;
1252        error_setg(errp, "Failed setting NBD block size");
1253        return -serrno;
1254    }
1255
1256    trace_nbd_init_set_size(sectors);
1257    if (info->size % sector_size) {
1258        trace_nbd_init_trailing_bytes(info->size % sector_size);
1259    }
1260
1261    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
1262        int serrno = errno;
1263        error_setg(errp, "Failed setting size (in blocks)");
1264        return -serrno;
1265    }
1266
1267    if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
1268        if (errno == ENOTTY) {
1269            int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
1270            trace_nbd_init_set_readonly();
1271
1272            if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
1273                int serrno = errno;
1274                error_setg(errp, "Failed setting read-only attribute");
1275                return -serrno;
1276            }
1277        } else {
1278            int serrno = errno;
1279            error_setg(errp, "Failed setting flags");
1280            return -serrno;
1281        }
1282    }
1283
1284    trace_nbd_init_finish();
1285
1286    return 0;
1287}
1288
1289int nbd_client(int fd)
1290{
1291    int ret;
1292    int serrno;
1293
1294    trace_nbd_client_loop();
1295
1296    ret = ioctl(fd, NBD_DO_IT);
1297    if (ret < 0 && errno == EPIPE) {
1298        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
1299         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
1300         * that case.
1301         */
1302        ret = 0;
1303    }
1304    serrno = errno;
1305
1306    trace_nbd_client_loop_ret(ret, strerror(serrno));
1307
1308    trace_nbd_client_clear_queue();
1309    ioctl(fd, NBD_CLEAR_QUE);
1310
1311    trace_nbd_client_clear_socket();
1312    ioctl(fd, NBD_CLEAR_SOCK);
1313
1314    errno = serrno;
1315    return ret;
1316}
1317
1318int nbd_disconnect(int fd)
1319{
1320    ioctl(fd, NBD_CLEAR_QUE);
1321    ioctl(fd, NBD_DISCONNECT);
1322    ioctl(fd, NBD_CLEAR_SOCK);
1323    return 0;
1324}
1325
1326#endif /* __linux__ */
1327
1328int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
1329{
1330    uint8_t buf[NBD_REQUEST_SIZE];
1331
1332    trace_nbd_send_request(request->from, request->len, request->handle,
1333                           request->flags, request->type,
1334                           nbd_cmd_lookup(request->type));
1335
1336    stl_be_p(buf, NBD_REQUEST_MAGIC);
1337    stw_be_p(buf + 4, request->flags);
1338    stw_be_p(buf + 6, request->type);
1339    stq_be_p(buf + 8, request->handle);
1340    stq_be_p(buf + 16, request->from);
1341    stl_be_p(buf + 24, request->len);
1342
1343    return nbd_write(ioc, buf, sizeof(buf), NULL);
1344}
1345
1346/* nbd_receive_simple_reply
1347 * Read simple reply except magic field (which should be already read).
1348 * Payload is not read (payload is possible for CMD_READ, but here we even
1349 * don't know whether it take place or not).
1350 */
1351static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
1352                                    Error **errp)
1353{
1354    int ret;
1355
1356    assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
1357
1358    ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
1359                   sizeof(*reply) - sizeof(reply->magic), "reply", errp);
1360    if (ret < 0) {
1361        return ret;
1362    }
1363
1364    reply->error = be32_to_cpu(reply->error);
1365    reply->handle = be64_to_cpu(reply->handle);
1366
1367    return 0;
1368}
1369
1370/* nbd_receive_structured_reply_chunk
1371 * Read structured reply chunk except magic field (which should be already
1372 * read).
1373 * Payload is not read.
1374 */
1375static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
1376                                              NBDStructuredReplyChunk *chunk,
1377                                              Error **errp)
1378{
1379    int ret;
1380
1381    assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
1382
1383    ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
1384                   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
1385                   errp);
1386    if (ret < 0) {
1387        return ret;
1388    }
1389
1390    chunk->flags = be16_to_cpu(chunk->flags);
1391    chunk->type = be16_to_cpu(chunk->type);
1392    chunk->handle = be64_to_cpu(chunk->handle);
1393    chunk->length = be32_to_cpu(chunk->length);
1394
1395    return 0;
1396}
1397
1398/* nbd_read_eof
1399 * Tries to read @size bytes from @ioc.
1400 * Returns 1 on success
1401 *         0 on eof, when no data was read (errp is not set)
1402 *         negative errno on failure (errp is set)
1403 */
1404static inline int coroutine_fn
1405nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
1406             Error **errp)
1407{
1408    bool partial = false;
1409
1410    assert(size);
1411    while (size > 0) {
1412        struct iovec iov = { .iov_base = buffer, .iov_len = size };
1413        ssize_t len;
1414
1415        len = qio_channel_readv(ioc, &iov, 1, errp);
1416        if (len == QIO_CHANNEL_ERR_BLOCK) {
1417            bdrv_dec_in_flight(bs);
1418            qio_channel_yield(ioc, G_IO_IN);
1419            bdrv_inc_in_flight(bs);
1420            continue;
1421        } else if (len < 0) {
1422            return -EIO;
1423        } else if (len == 0) {
1424            if (partial) {
1425                error_setg(errp,
1426                           "Unexpected end-of-file before all bytes were read");
1427                return -EIO;
1428            } else {
1429                return 0;
1430            }
1431        }
1432
1433        partial = true;
1434        size -= len;
1435        buffer = (uint8_t*) buffer + len;
1436    }
1437    return 1;
1438}
1439
1440/* nbd_receive_reply
1441 *
1442 * Decreases bs->in_flight while waiting for a new reply. This yield is where
1443 * we wait indefinitely and the coroutine must be able to be safely reentered
1444 * for nbd_client_attach_aio_context().
1445 *
1446 * Returns 1 on success
1447 *         0 on eof, when no data was read (errp is not set)
1448 *         negative errno on failure (errp is set)
1449 */
1450int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
1451                                   NBDReply *reply, Error **errp)
1452{
1453    int ret;
1454    const char *type;
1455
1456    ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
1457    if (ret <= 0) {
1458        return ret;
1459    }
1460
1461    reply->magic = be32_to_cpu(reply->magic);
1462
1463    switch (reply->magic) {
1464    case NBD_SIMPLE_REPLY_MAGIC:
1465        ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
1466        if (ret < 0) {
1467            break;
1468        }
1469        trace_nbd_receive_simple_reply(reply->simple.error,
1470                                       nbd_err_lookup(reply->simple.error),
1471                                       reply->handle);
1472        break;
1473    case NBD_STRUCTURED_REPLY_MAGIC:
1474        ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
1475        if (ret < 0) {
1476            break;
1477        }
1478        type = nbd_reply_type_lookup(reply->structured.type);
1479        trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
1480                                                 reply->structured.type, type,
1481                                                 reply->structured.handle,
1482                                                 reply->structured.length);
1483        break;
1484    default:
1485        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
1486        return -EINVAL;
1487    }
1488    if (ret < 0) {
1489        return ret;
1490    }
1491
1492    return 1;
1493}
1494
1495