qemu/nbd/client.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2016-2019 Red Hat, Inc.
   3 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
   4 *
   5 *  Network Block Device Client Side
   6 *
   7 *  This program is free software; you can redistribute it and/or modify
   8 *  it under the terms of the GNU General Public License as published by
   9 *  the Free Software Foundation; under version 2 of the License.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qapi/error.h"
  22#include "qemu/queue.h"
  23#include "trace.h"
  24#include "nbd-internal.h"
  25#include "qemu/cutils.h"
  26
  27/* Definitions for opaque data types */
  28
  29static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
  30
  31/* That's all folks */
  32
  33/* Basic flow for negotiation
  34
  35   Server         Client
  36   Negotiate
  37
  38   or
  39
  40   Server         Client
  41   Negotiate #1
  42                  Option
  43   Negotiate #2
  44
  45   ----
  46
  47   followed by
  48
  49   Server         Client
  50                  Request
  51   Response
  52                  Request
  53   Response
  54                  ...
  55   ...
  56                  Request (type == 2)
  57
  58*/
  59
  60/* Send an option request.
  61 *
  62 * The request is for option @opt, with @data containing @len bytes of
  63 * additional payload for the request (@len may be -1 to treat @data as
  64 * a C string; and @data may be NULL if @len is 0).
  65 * Return 0 if successful, -1 with errp set if it is impossible to
  66 * continue. */
  67static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
  68                                   uint32_t len, const char *data,
  69                                   Error **errp)
  70{
  71    ERRP_GUARD();
  72    NBDOption req;
  73    QEMU_BUILD_BUG_ON(sizeof(req) != 16);
  74
  75    if (len == -1) {
  76        req.length = len = strlen(data);
  77    }
  78    trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
  79
  80    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
  81    stl_be_p(&req.option, opt);
  82    stl_be_p(&req.length, len);
  83
  84    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
  85        error_prepend(errp, "Failed to send option request header: ");
  86        return -1;
  87    }
  88
  89    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
  90        error_prepend(errp, "Failed to send option request data: ");
  91        return -1;
  92    }
  93
  94    return 0;
  95}
  96
  97/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
  98 * not going to attempt further negotiation. */
  99static void nbd_send_opt_abort(QIOChannel *ioc)
 100{
 101    /* Technically, a compliant server is supposed to reply to us; but
 102     * older servers disconnected instead. At any rate, we're allowed
 103     * to disconnect without waiting for the server reply, so we don't
 104     * even care if the request makes it to the server, let alone
 105     * waiting around for whether the server replies. */
 106    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
 107}
 108
 109
 110/* Receive the header of an option reply, which should match the given
 111 * opt.  Read through the length field, but NOT the length bytes of
 112 * payload. Return 0 if successful, -1 with errp set if it is
 113 * impossible to continue. */
 114static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
 115                                    NBDOptionReply *reply, Error **errp)
 116{
 117    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
 118    if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
 119        nbd_send_opt_abort(ioc);
 120        return -1;
 121    }
 122    reply->magic = be64_to_cpu(reply->magic);
 123    reply->option = be32_to_cpu(reply->option);
 124    reply->type = be32_to_cpu(reply->type);
 125    reply->length = be32_to_cpu(reply->length);
 126
 127    trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
 128                                   reply->type, nbd_rep_lookup(reply->type),
 129                                   reply->length);
 130
 131    if (reply->magic != NBD_REP_MAGIC) {
 132        error_setg(errp, "Unexpected option reply magic");
 133        nbd_send_opt_abort(ioc);
 134        return -1;
 135    }
 136    if (reply->option != opt) {
 137        error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)",
 138                   reply->option, nbd_opt_lookup(reply->option),
 139                   opt, nbd_opt_lookup(opt));
 140        nbd_send_opt_abort(ioc);
 141        return -1;
 142    }
 143    return 0;
 144}
 145
 146/*
 147 * If reply represents success, return 1 without further action.  If
 148 * reply represents an error, consume the optional payload of the
 149 * packet on ioc.  Then return 0 for unsupported (so the client can
 150 * fall back to other approaches), where @strict determines if only
 151 * ERR_UNSUP or all errors fit that category, or -1 with errp set for
 152 * other errors.
 153 */
 154static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
 155                                bool strict, Error **errp)
 156{
 157    ERRP_GUARD();
 158    g_autofree char *msg = NULL;
 159
 160    if (!(reply->type & (1 << 31))) {
 161        return 1;
 162    }
 163
 164    if (reply->length) {
 165        if (reply->length > NBD_MAX_BUFFER_SIZE) {
 166            error_setg(errp, "server error %" PRIu32
 167                       " (%s) message is too long",
 168                       reply->type, nbd_rep_lookup(reply->type));
 169            goto err;
 170        }
 171        msg = g_malloc(reply->length + 1);
 172        if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
 173            error_prepend(errp, "Failed to read option error %" PRIu32
 174                          " (%s) message: ",
 175                          reply->type, nbd_rep_lookup(reply->type));
 176            goto err;
 177        }
 178        msg[reply->length] = '\0';
 179        trace_nbd_server_error_msg(reply->type,
 180                                   nbd_reply_type_lookup(reply->type), msg);
 181    }
 182
 183    if (reply->type == NBD_REP_ERR_UNSUP || !strict) {
 184        trace_nbd_reply_err_ignored(reply->option,
 185                                    nbd_opt_lookup(reply->option),
 186                                    reply->type, nbd_rep_lookup(reply->type));
 187        return 0;
 188    }
 189
 190    switch (reply->type) {
 191    case NBD_REP_ERR_POLICY:
 192        error_setg(errp, "Denied by server for option %" PRIu32 " (%s)",
 193                   reply->option, nbd_opt_lookup(reply->option));
 194        break;
 195
 196    case NBD_REP_ERR_INVALID:
 197        error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)",
 198                   reply->option, nbd_opt_lookup(reply->option));
 199        break;
 200
 201    case NBD_REP_ERR_PLATFORM:
 202        error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)",
 203                   reply->option, nbd_opt_lookup(reply->option));
 204        break;
 205
 206    case NBD_REP_ERR_TLS_REQD:
 207        error_setg(errp, "TLS negotiation required before option %" PRIu32
 208                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 209        error_append_hint(errp, "Did you forget a valid tls-creds?\n");
 210        break;
 211
 212    case NBD_REP_ERR_UNKNOWN:
 213        error_setg(errp, "Requested export not available");
 214        break;
 215
 216    case NBD_REP_ERR_SHUTDOWN:
 217        error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)",
 218                   reply->option, nbd_opt_lookup(reply->option));
 219        break;
 220
 221    case NBD_REP_ERR_BLOCK_SIZE_REQD:
 222        error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32
 223                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 224        break;
 225
 226    default:
 227        error_setg(errp, "Unknown error code when asking for option %" PRIu32
 228                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 229        break;
 230    }
 231
 232    if (msg) {
 233        error_append_hint(errp, "server reported: %s\n", msg);
 234    }
 235
 236 err:
 237    nbd_send_opt_abort(ioc);
 238    return -1;
 239}
 240
 241/* nbd_receive_list:
 242 * Process another portion of the NBD_OPT_LIST reply, populating any
 243 * name received into *@name. If @description is non-NULL, and the
 244 * server provided a description, that is also populated. The caller
 245 * must eventually call g_free() on success.
 246 * Returns 1 if name and description were set and iteration must continue,
 247 *         0 if iteration is complete (including if OPT_LIST unsupported),
 248 *         -1 with @errp set if an unrecoverable error occurred.
 249 */
 250static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
 251                            Error **errp)
 252{
 253    NBDOptionReply reply;
 254    uint32_t len;
 255    uint32_t namelen;
 256    g_autofree char *local_name = NULL;
 257    g_autofree char *local_desc = NULL;
 258    int error;
 259
 260    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
 261        return -1;
 262    }
 263    error = nbd_handle_reply_err(ioc, &reply, true, errp);
 264    if (error <= 0) {
 265        return error;
 266    }
 267    len = reply.length;
 268
 269    if (reply.type == NBD_REP_ACK) {
 270        if (len != 0) {
 271            error_setg(errp, "length too long for option end");
 272            nbd_send_opt_abort(ioc);
 273            return -1;
 274        }
 275        return 0;
 276    } else if (reply.type != NBD_REP_SERVER) {
 277        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
 278                   reply.type, nbd_rep_lookup(reply.type),
 279                   NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER));
 280        nbd_send_opt_abort(ioc);
 281        return -1;
 282    }
 283
 284    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
 285        error_setg(errp, "incorrect option length %" PRIu32, len);
 286        nbd_send_opt_abort(ioc);
 287        return -1;
 288    }
 289    if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
 290        nbd_send_opt_abort(ioc);
 291        return -1;
 292    }
 293    len -= sizeof(namelen);
 294    if (len < namelen || namelen > NBD_MAX_STRING_SIZE) {
 295        error_setg(errp, "incorrect name length in server's list response");
 296        nbd_send_opt_abort(ioc);
 297        return -1;
 298    }
 299
 300    local_name = g_malloc(namelen + 1);
 301    if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
 302        nbd_send_opt_abort(ioc);
 303        return -1;
 304    }
 305    local_name[namelen] = '\0';
 306    len -= namelen;
 307    if (len) {
 308        if (len > NBD_MAX_STRING_SIZE) {
 309            error_setg(errp, "incorrect description length in server's "
 310                       "list response");
 311            nbd_send_opt_abort(ioc);
 312            return -1;
 313        }
 314        local_desc = g_malloc(len + 1);
 315        if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
 316            nbd_send_opt_abort(ioc);
 317            return -1;
 318        }
 319        local_desc[len] = '\0';
 320    }
 321
 322    trace_nbd_receive_list(local_name, local_desc ?: "");
 323    *name = g_steal_pointer(&local_name);
 324    if (description) {
 325        *description = g_steal_pointer(&local_desc);
 326    }
 327    return 1;
 328}
 329
 330
 331/*
 332 * nbd_opt_info_or_go:
 333 * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply.
 334 * Returns -1 if the option proves the export @info->name cannot be
 335 * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and
 336 * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
 337 * go (with the rest of @info populated).
 338 */
 339static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
 340                              NBDExportInfo *info, Error **errp)
 341{
 342    ERRP_GUARD();
 343    NBDOptionReply reply;
 344    uint32_t len = strlen(info->name);
 345    uint16_t type;
 346    int error;
 347    char *buf;
 348
 349    /* The protocol requires that the server send NBD_INFO_EXPORT with
 350     * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
 351     * flags still 0 is a witness of a broken server. */
 352    info->flags = 0;
 353
 354    assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
 355    trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
 356    buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
 357    stl_be_p(buf, len);
 358    memcpy(buf + 4, info->name, len);
 359    /* At most one request, everything else up to server */
 360    stw_be_p(buf + 4 + len, info->request_sizes);
 361    if (info->request_sizes) {
 362        stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
 363    }
 364    error = nbd_send_option_request(ioc, opt,
 365                                    4 + len + 2 + 2 * info->request_sizes,
 366                                    buf, errp);
 367    g_free(buf);
 368    if (error < 0) {
 369        return -1;
 370    }
 371
 372    while (1) {
 373        if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
 374            return -1;
 375        }
 376        error = nbd_handle_reply_err(ioc, &reply, true, errp);
 377        if (error <= 0) {
 378            return error;
 379        }
 380        len = reply.length;
 381
 382        if (reply.type == NBD_REP_ACK) {
 383            /*
 384             * Server is done sending info, and moved into transmission
 385             * phase for NBD_OPT_GO, but make sure it sent flags
 386             */
 387            if (len) {
 388                error_setg(errp, "server sent invalid NBD_REP_ACK");
 389                return -1;
 390            }
 391            if (!info->flags) {
 392                error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
 393                return -1;
 394            }
 395            trace_nbd_opt_info_go_success(nbd_opt_lookup(opt));
 396            return 1;
 397        }
 398        if (reply.type != NBD_REP_INFO) {
 399            error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)",
 400                       reply.type, nbd_rep_lookup(reply.type),
 401                       NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO));
 402            nbd_send_opt_abort(ioc);
 403            return -1;
 404        }
 405        if (len < sizeof(type)) {
 406            error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
 407                       len);
 408            nbd_send_opt_abort(ioc);
 409            return -1;
 410        }
 411        if (nbd_read16(ioc, &type, "info type", errp) < 0) {
 412            nbd_send_opt_abort(ioc);
 413            return -1;
 414        }
 415        len -= sizeof(type);
 416        switch (type) {
 417        case NBD_INFO_EXPORT:
 418            if (len != sizeof(info->size) + sizeof(info->flags)) {
 419                error_setg(errp, "remaining export info len %" PRIu32
 420                           " is unexpected size", len);
 421                nbd_send_opt_abort(ioc);
 422                return -1;
 423            }
 424            if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
 425                nbd_send_opt_abort(ioc);
 426                return -1;
 427            }
 428            if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
 429                nbd_send_opt_abort(ioc);
 430                return -1;
 431            }
 432            if (info->min_block &&
 433                !QEMU_IS_ALIGNED(info->size, info->min_block)) {
 434                error_setg(errp, "export size %" PRIu64 " is not multiple of "
 435                           "minimum block size %" PRIu32, info->size,
 436                           info->min_block);
 437                nbd_send_opt_abort(ioc);
 438                return -1;
 439            }
 440            trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
 441            break;
 442
 443        case NBD_INFO_BLOCK_SIZE:
 444            if (len != sizeof(info->min_block) * 3) {
 445                error_setg(errp, "remaining export info len %" PRIu32
 446                           " is unexpected size", len);
 447                nbd_send_opt_abort(ioc);
 448                return -1;
 449            }
 450            if (nbd_read32(ioc, &info->min_block, "info minimum block size",
 451                           errp) < 0) {
 452                nbd_send_opt_abort(ioc);
 453                return -1;
 454            }
 455            if (!is_power_of_2(info->min_block)) {
 456                error_setg(errp, "server minimum block size %" PRIu32
 457                           " is not a power of two", info->min_block);
 458                nbd_send_opt_abort(ioc);
 459                return -1;
 460            }
 461            if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
 462                           errp) < 0)
 463            {
 464                nbd_send_opt_abort(ioc);
 465                return -1;
 466            }
 467            if (!is_power_of_2(info->opt_block) ||
 468                info->opt_block < info->min_block) {
 469                error_setg(errp, "server preferred block size %" PRIu32
 470                           " is not valid", info->opt_block);
 471                nbd_send_opt_abort(ioc);
 472                return -1;
 473            }
 474            if (nbd_read32(ioc, &info->max_block, "info maximum block size",
 475                           errp) < 0)
 476            {
 477                nbd_send_opt_abort(ioc);
 478                return -1;
 479            }
 480            if (info->max_block < info->min_block) {
 481                error_setg(errp, "server maximum block size %" PRIu32
 482                           " is not valid", info->max_block);
 483                nbd_send_opt_abort(ioc);
 484                return -1;
 485            }
 486            trace_nbd_opt_info_block_size(info->min_block, info->opt_block,
 487                                          info->max_block);
 488            break;
 489
 490        default:
 491            /*
 492             * Not worth the bother to check if NBD_INFO_NAME or
 493             * NBD_INFO_DESCRIPTION exceed NBD_MAX_STRING_SIZE.
 494             */
 495            trace_nbd_opt_info_unknown(type, nbd_info_lookup(type));
 496            if (nbd_drop(ioc, len, errp) < 0) {
 497                error_prepend(errp, "Failed to read info payload: ");
 498                nbd_send_opt_abort(ioc);
 499                return -1;
 500            }
 501            break;
 502        }
 503    }
 504}
 505
 506/* Return -1 on failure, 0 if wantname is an available export. */
 507static int nbd_receive_query_exports(QIOChannel *ioc,
 508                                     const char *wantname,
 509                                     Error **errp)
 510{
 511    bool list_empty = true;
 512    bool found_export = false;
 513
 514    trace_nbd_receive_query_exports_start(wantname);
 515    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
 516        return -1;
 517    }
 518
 519    while (1) {
 520        char *name;
 521        int ret = nbd_receive_list(ioc, &name, NULL, errp);
 522
 523        if (ret < 0) {
 524            /* Server gave unexpected reply */
 525            return -1;
 526        } else if (ret == 0) {
 527            /* Done iterating. */
 528            if (list_empty) {
 529                /*
 530                 * We don't have enough context to tell a server that
 531                 * sent an empty list apart from a server that does
 532                 * not support the list command; but as this function
 533                 * is just used to trigger a nicer error message
 534                 * before trying NBD_OPT_EXPORT_NAME, assume the
 535                 * export is available.
 536                 */
 537                return 0;
 538            } else if (!found_export) {
 539                error_setg(errp, "No export with name '%s' available",
 540                           wantname);
 541                nbd_send_opt_abort(ioc);
 542                return -1;
 543            }
 544            trace_nbd_receive_query_exports_success(wantname);
 545            return 0;
 546        }
 547        list_empty = false;
 548        if (!strcmp(name, wantname)) {
 549            found_export = true;
 550        }
 551        g_free(name);
 552    }
 553}
 554
 555/*
 556 * nbd_request_simple_option: Send an option request, and parse the reply.
 557 * @strict controls whether ERR_UNSUP or all errors produce 0 status.
 558 * return 1 for successful negotiation,
 559 *        0 if operation is unsupported,
 560 *        -1 with errp set for any other error
 561 */
 562static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
 563                                     Error **errp)
 564{
 565    NBDOptionReply reply;
 566    int error;
 567
 568    if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
 569        return -1;
 570    }
 571
 572    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
 573        return -1;
 574    }
 575    error = nbd_handle_reply_err(ioc, &reply, strict, errp);
 576    if (error <= 0) {
 577        return error;
 578    }
 579
 580    if (reply.type != NBD_REP_ACK) {
 581        error_setg(errp, "Server answered option %d (%s) with unexpected "
 582                   "reply %" PRIu32 " (%s)", opt, nbd_opt_lookup(opt),
 583                   reply.type, nbd_rep_lookup(reply.type));
 584        nbd_send_opt_abort(ioc);
 585        return -1;
 586    }
 587
 588    if (reply.length != 0) {
 589        error_setg(errp, "Option %d ('%s') response length is %" PRIu32
 590                   " (it should be zero)", opt, nbd_opt_lookup(opt),
 591                   reply.length);
 592        nbd_send_opt_abort(ioc);
 593        return -1;
 594    }
 595
 596    return 1;
 597}
 598
 599static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
 600                                        QCryptoTLSCreds *tlscreds,
 601                                        const char *hostname, Error **errp)
 602{
 603    int ret;
 604    QIOChannelTLS *tioc;
 605    struct NBDTLSHandshakeData data = { 0 };
 606
 607    ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
 608    if (ret <= 0) {
 609        if (ret == 0) {
 610            error_setg(errp, "Server don't support STARTTLS option");
 611            nbd_send_opt_abort(ioc);
 612        }
 613        return NULL;
 614    }
 615
 616    trace_nbd_receive_starttls_new_client();
 617    tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
 618    if (!tioc) {
 619        return NULL;
 620    }
 621    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
 622    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
 623    trace_nbd_receive_starttls_tls_handshake();
 624    qio_channel_tls_handshake(tioc,
 625                              nbd_tls_handshake,
 626                              &data,
 627                              NULL,
 628                              NULL);
 629
 630    if (!data.complete) {
 631        g_main_loop_run(data.loop);
 632    }
 633    g_main_loop_unref(data.loop);
 634    if (data.error) {
 635        error_propagate(errp, data.error);
 636        object_unref(OBJECT(tioc));
 637        return NULL;
 638    }
 639
 640    return QIO_CHANNEL(tioc);
 641}
 642
 643/*
 644 * nbd_send_meta_query:
 645 * Send 0 or 1 set/list meta context queries.
 646 * Return 0 on success, -1 with errp set for any error
 647 */
 648static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
 649                               const char *export, const char *query,
 650                               Error **errp)
 651{
 652    int ret;
 653    uint32_t export_len = strlen(export);
 654    uint32_t queries = !!query;
 655    uint32_t query_len = 0;
 656    uint32_t data_len;
 657    char *data;
 658    char *p;
 659
 660    data_len = sizeof(export_len) + export_len + sizeof(queries);
 661    assert(export_len <= NBD_MAX_STRING_SIZE);
 662    if (query) {
 663        query_len = strlen(query);
 664        data_len += sizeof(query_len) + query_len;
 665        assert(query_len <= NBD_MAX_STRING_SIZE);
 666    } else {
 667        assert(opt == NBD_OPT_LIST_META_CONTEXT);
 668    }
 669    p = data = g_malloc(data_len);
 670
 671    trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)", export);
 672    stl_be_p(p, export_len);
 673    memcpy(p += sizeof(export_len), export, export_len);
 674    stl_be_p(p += export_len, queries);
 675    if (query) {
 676        stl_be_p(p += sizeof(queries), query_len);
 677        memcpy(p += sizeof(query_len), query, query_len);
 678    }
 679
 680    ret = nbd_send_option_request(ioc, opt, data_len, data, errp);
 681    g_free(data);
 682    return ret;
 683}
 684
 685/*
 686 * nbd_receive_one_meta_context:
 687 * Called in a loop to receive and trace one set/list meta context reply.
 688 * Pass non-NULL @name or @id to collect results back to the caller, which
 689 * must eventually call g_free().
 690 * return 1 if name is set and iteration must continue,
 691 *        0 if iteration is complete (including if option is unsupported),
 692 *        -1 with errp set for any error
 693 */
 694static int nbd_receive_one_meta_context(QIOChannel *ioc,
 695                                        uint32_t opt,
 696                                        char **name,
 697                                        uint32_t *id,
 698                                        Error **errp)
 699{
 700    int ret;
 701    NBDOptionReply reply;
 702    char *local_name = NULL;
 703    uint32_t local_id;
 704
 705    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
 706        return -1;
 707    }
 708
 709    ret = nbd_handle_reply_err(ioc, &reply, false, errp);
 710    if (ret <= 0) {
 711        return ret;
 712    }
 713
 714    if (reply.type == NBD_REP_ACK) {
 715        if (reply.length != 0) {
 716            error_setg(errp, "Unexpected length to ACK response");
 717            nbd_send_opt_abort(ioc);
 718            return -1;
 719        }
 720        return 0;
 721    } else if (reply.type != NBD_REP_META_CONTEXT) {
 722        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
 723                   reply.type, nbd_rep_lookup(reply.type),
 724                   NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT));
 725        nbd_send_opt_abort(ioc);
 726        return -1;
 727    }
 728
 729    if (reply.length <= sizeof(local_id) ||
 730        reply.length > NBD_MAX_BUFFER_SIZE) {
 731        error_setg(errp, "Failed to negotiate meta context, server "
 732                   "answered with unexpected length %" PRIu32,
 733                   reply.length);
 734        nbd_send_opt_abort(ioc);
 735        return -1;
 736    }
 737
 738    if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
 739        return -1;
 740    }
 741
 742    reply.length -= sizeof(local_id);
 743    local_name = g_malloc(reply.length + 1);
 744    if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
 745        g_free(local_name);
 746        return -1;
 747    }
 748    local_name[reply.length] = '\0';
 749    trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id);
 750
 751    if (name) {
 752        *name = local_name;
 753    } else {
 754        g_free(local_name);
 755    }
 756    if (id) {
 757        *id = local_id;
 758    }
 759    return 1;
 760}
 761
 762/*
 763 * nbd_negotiate_simple_meta_context:
 764 * Request the server to set the meta context for export @info->name
 765 * using @info->x_dirty_bitmap with a fallback to "base:allocation",
 766 * setting @info->context_id to the resulting id. Fail if the server
 767 * responds with more than one context or with a context different
 768 * than the query.
 769 * return 1 for successful negotiation,
 770 *        0 if operation is unsupported,
 771 *        -1 with errp set for any other error
 772 */
 773static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
 774                                             NBDExportInfo *info,
 775                                             Error **errp)
 776{
 777    /*
 778     * TODO: Removing the x_dirty_bitmap hack will mean refactoring
 779     * this function to request and store ids for multiple contexts
 780     * (both base:allocation and a dirty bitmap), at which point this
 781     * function should lose the term _simple.
 782     */
 783    int ret;
 784    const char *context = info->x_dirty_bitmap ?: "base:allocation";
 785    bool received = false;
 786    char *name = NULL;
 787
 788    if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT,
 789                            info->name, context, errp) < 0) {
 790        return -1;
 791    }
 792
 793    ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
 794                                       &name, &info->context_id, errp);
 795    if (ret < 0) {
 796        return -1;
 797    }
 798    if (ret == 1) {
 799        if (strcmp(context, name)) {
 800            error_setg(errp, "Failed to negotiate meta context '%s', server "
 801                       "answered with different context '%s'", context,
 802                       name);
 803            g_free(name);
 804            nbd_send_opt_abort(ioc);
 805            return -1;
 806        }
 807        g_free(name);
 808        received = true;
 809
 810        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
 811                                           NULL, NULL, errp);
 812        if (ret < 0) {
 813            return -1;
 814        }
 815    }
 816    if (ret != 0) {
 817        error_setg(errp, "Server answered with more than one context");
 818        nbd_send_opt_abort(ioc);
 819        return -1;
 820    }
 821    return received;
 822}
 823
 824/*
 825 * nbd_list_meta_contexts:
 826 * Request the server to list all meta contexts for export @info->name.
 827 * return 0 if list is complete (even if empty),
 828 *        -1 with errp set for any error
 829 */
 830static int nbd_list_meta_contexts(QIOChannel *ioc,
 831                                  NBDExportInfo *info,
 832                                  Error **errp)
 833{
 834    int ret;
 835    int seen_any = false;
 836    int seen_qemu = false;
 837
 838    if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
 839                            info->name, NULL, errp) < 0) {
 840        return -1;
 841    }
 842
 843    while (1) {
 844        char *context;
 845
 846        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT,
 847                                           &context, NULL, errp);
 848        if (ret == 0 && seen_any && !seen_qemu) {
 849            /*
 850             * Work around qemu 3.0 bug: the server forgot to send
 851             * "qemu:" replies to 0 queries. If we saw at least one
 852             * reply (probably base:allocation), but none of them were
 853             * qemu:, then run a more specific query to make sure.
 854             */
 855            seen_qemu = true;
 856            if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
 857                                    info->name, "qemu:", errp) < 0) {
 858                return -1;
 859            }
 860            continue;
 861        }
 862        if (ret <= 0) {
 863            return ret;
 864        }
 865        seen_any = true;
 866        seen_qemu |= strstart(context, "qemu:", NULL);
 867        info->contexts = g_renew(char *, info->contexts, ++info->n_contexts);
 868        info->contexts[info->n_contexts - 1] = context;
 869    }
 870}
 871
 872/*
 873 * nbd_start_negotiate:
 874 * Start the handshake to the server.  After a positive return, the server
 875 * is ready to accept additional NBD_OPT requests.
 876 * Returns: negative errno: failure talking to server
 877 *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
 878 *          1: server is newstyle, but can only accept EXPORT_NAME
 879 *          2: server is newstyle, but lacks structured replies
 880 *          3: server is newstyle and set up for structured replies
 881 */
 882static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
 883                               QCryptoTLSCreds *tlscreds,
 884                               const char *hostname, QIOChannel **outioc,
 885                               bool structured_reply, bool *zeroes,
 886                               Error **errp)
 887{
 888    ERRP_GUARD();
 889    uint64_t magic;
 890
 891    trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "<null>");
 892
 893    if (zeroes) {
 894        *zeroes = true;
 895    }
 896    if (outioc) {
 897        *outioc = NULL;
 898    }
 899    if (tlscreds && !outioc) {
 900        error_setg(errp, "Output I/O channel required for TLS");
 901        return -EINVAL;
 902    }
 903
 904    if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
 905        return -EINVAL;
 906    }
 907    trace_nbd_receive_negotiate_magic(magic);
 908
 909    if (magic != NBD_INIT_MAGIC) {
 910        error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic);
 911        return -EINVAL;
 912    }
 913
 914    if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
 915        return -EINVAL;
 916    }
 917    trace_nbd_receive_negotiate_magic(magic);
 918
 919    if (magic == NBD_OPTS_MAGIC) {
 920        uint32_t clientflags = 0;
 921        uint16_t globalflags;
 922        bool fixedNewStyle = false;
 923
 924        if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
 925            return -EINVAL;
 926        }
 927        trace_nbd_receive_negotiate_server_flags(globalflags);
 928        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
 929            fixedNewStyle = true;
 930            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
 931        }
 932        if (globalflags & NBD_FLAG_NO_ZEROES) {
 933            if (zeroes) {
 934                *zeroes = false;
 935            }
 936            clientflags |= NBD_FLAG_C_NO_ZEROES;
 937        }
 938        /* client requested flags */
 939        clientflags = cpu_to_be32(clientflags);
 940        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
 941            error_prepend(errp, "Failed to send clientflags field: ");
 942            return -EINVAL;
 943        }
 944        if (tlscreds) {
 945            if (fixedNewStyle) {
 946                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
 947                if (!*outioc) {
 948                    return -EINVAL;
 949                }
 950                ioc = *outioc;
 951                if (aio_context) {
 952                    qio_channel_set_blocking(ioc, false, NULL);
 953                    qio_channel_attach_aio_context(ioc, aio_context);
 954                }
 955            } else {
 956                error_setg(errp, "Server does not support STARTTLS");
 957                return -EINVAL;
 958            }
 959        }
 960        if (fixedNewStyle) {
 961            int result = 0;
 962
 963            if (structured_reply) {
 964                result = nbd_request_simple_option(ioc,
 965                                                   NBD_OPT_STRUCTURED_REPLY,
 966                                                   false, errp);
 967                if (result < 0) {
 968                    return -EINVAL;
 969                }
 970            }
 971            return 2 + result;
 972        } else {
 973            return 1;
 974        }
 975    } else if (magic == NBD_CLIENT_MAGIC) {
 976        if (tlscreds) {
 977            error_setg(errp, "Server does not support STARTTLS");
 978            return -EINVAL;
 979        }
 980        return 0;
 981    } else {
 982        error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
 983        return -EINVAL;
 984    }
 985}
 986
 987/*
 988 * nbd_negotiate_finish_oldstyle:
 989 * Populate @info with the size and export flags from an oldstyle server,
 990 * but does not consume 124 bytes of reserved zero padding.
 991 * Returns 0 on success, -1 with @errp set on failure
 992 */
 993static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
 994                                         Error **errp)
 995{
 996    uint32_t oldflags;
 997
 998    if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
 999        return -EINVAL;
1000    }
1001
1002    if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
1003        return -EINVAL;
1004    }
1005    if (oldflags & ~0xffff) {
1006        error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
1007        return -EINVAL;
1008    }
1009    info->flags = oldflags;
1010    return 0;
1011}
1012
1013/*
1014 * nbd_receive_negotiate:
1015 * Connect to server, complete negotiation, and move into transmission phase.
1016 * Returns: negative errno: failure talking to server
1017 *          0: server is connected
1018 */
1019int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
1020                          QCryptoTLSCreds *tlscreds,
1021                          const char *hostname, QIOChannel **outioc,
1022                          NBDExportInfo *info, Error **errp)
1023{
1024    ERRP_GUARD();
1025    int result;
1026    bool zeroes;
1027    bool base_allocation = info->base_allocation;
1028
1029    assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
1030    trace_nbd_receive_negotiate_name(info->name);
1031
1032    result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
1033                                 info->structured_reply, &zeroes, errp);
1034
1035    info->structured_reply = false;
1036    info->base_allocation = false;
1037    if (tlscreds && *outioc) {
1038        ioc = *outioc;
1039    }
1040
1041    switch (result) {
1042    case 3: /* newstyle, with structured replies */
1043        info->structured_reply = true;
1044        if (base_allocation) {
1045            result = nbd_negotiate_simple_meta_context(ioc, info, errp);
1046            if (result < 0) {
1047                return -EINVAL;
1048            }
1049            info->base_allocation = result == 1;
1050        }
1051        /* fall through */
1052    case 2: /* newstyle, try OPT_GO */
1053        /* Try NBD_OPT_GO first - if it works, we are done (it
1054         * also gives us a good message if the server requires
1055         * TLS).  If it is not available, fall back to
1056         * NBD_OPT_LIST for nicer error messages about a missing
1057         * export, then use NBD_OPT_EXPORT_NAME.  */
1058        result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp);
1059        if (result < 0) {
1060            return -EINVAL;
1061        }
1062        if (result > 0) {
1063            return 0;
1064        }
1065        /* Check our desired export is present in the
1066         * server export list. Since NBD_OPT_EXPORT_NAME
1067         * cannot return an error message, running this
1068         * query gives us better error reporting if the
1069         * export name is not available.
1070         */
1071        if (nbd_receive_query_exports(ioc, info->name, errp) < 0) {
1072            return -EINVAL;
1073        }
1074        /* fall through */
1075    case 1: /* newstyle, but limited to EXPORT_NAME */
1076        /* write the export name request */
1077        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
1078                                    errp) < 0) {
1079            return -EINVAL;
1080        }
1081
1082        /* Read the response */
1083        if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
1084            return -EINVAL;
1085        }
1086
1087        if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
1088            return -EINVAL;
1089        }
1090        break;
1091    case 0: /* oldstyle, parse length and flags */
1092        if (*info->name) {
1093            error_setg(errp, "Server does not support non-empty export names");
1094            return -EINVAL;
1095        }
1096        if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) {
1097            return -EINVAL;
1098        }
1099        break;
1100    default:
1101        return result;
1102    }
1103
1104    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
1105    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
1106        error_prepend(errp, "Failed to read reserved block: ");
1107        return -EINVAL;
1108    }
1109    return 0;
1110}
1111
1112/* Clean up result of nbd_receive_export_list */
1113void nbd_free_export_list(NBDExportInfo *info, int count)
1114{
1115    int i, j;
1116
1117    if (!info) {
1118        return;
1119    }
1120
1121    for (i = 0; i < count; i++) {
1122        g_free(info[i].name);
1123        g_free(info[i].description);
1124        for (j = 0; j < info[i].n_contexts; j++) {
1125            g_free(info[i].contexts[j]);
1126        }
1127        g_free(info[i].contexts);
1128    }
1129    g_free(info);
1130}
1131
1132/*
1133 * nbd_receive_export_list:
1134 * Query details about a server's exports, then disconnect without
1135 * going into transmission phase. Return a count of the exports listed
1136 * in @info by the server, or -1 on error. Caller must free @info using
1137 * nbd_free_export_list().
1138 */
1139int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
1140                            const char *hostname, NBDExportInfo **info,
1141                            Error **errp)
1142{
1143    int result;
1144    int count = 0;
1145    int i;
1146    int rc;
1147    int ret = -1;
1148    NBDExportInfo *array = NULL;
1149    QIOChannel *sioc = NULL;
1150
1151    *info = NULL;
1152    result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
1153                                 NULL, errp);
1154    if (tlscreds && sioc) {
1155        ioc = sioc;
1156    }
1157
1158    switch (result) {
1159    case 2:
1160    case 3:
1161        /* newstyle - use NBD_OPT_LIST to populate array, then try
1162         * NBD_OPT_INFO on each array member. If structured replies
1163         * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
1164        if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
1165            goto out;
1166        }
1167        while (1) {
1168            char *name;
1169            char *desc;
1170
1171            rc = nbd_receive_list(ioc, &name, &desc, errp);
1172            if (rc < 0) {
1173                goto out;
1174            } else if (rc == 0) {
1175                break;
1176            }
1177            array = g_renew(NBDExportInfo, array, ++count);
1178            memset(&array[count - 1], 0, sizeof(*array));
1179            array[count - 1].name = name;
1180            array[count - 1].description = desc;
1181            array[count - 1].structured_reply = result == 3;
1182        }
1183
1184        for (i = 0; i < count; i++) {
1185            array[i].request_sizes = true;
1186            rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp);
1187            if (rc < 0) {
1188                goto out;
1189            } else if (rc == 0) {
1190                /*
1191                 * Pointless to try rest of loop. If OPT_INFO doesn't work,
1192                 * it's unlikely that meta contexts work either
1193                 */
1194                break;
1195            }
1196
1197            if (result == 3 &&
1198                nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
1199                goto out;
1200            }
1201        }
1202
1203        /* Send NBD_OPT_ABORT as a courtesy before hanging up */
1204        nbd_send_opt_abort(ioc);
1205        break;
1206    case 1: /* newstyle, but limited to EXPORT_NAME */
1207        error_setg(errp, "Server does not support export lists");
1208        /* We can't even send NBD_OPT_ABORT, so merely hang up */
1209        goto out;
1210    case 0: /* oldstyle, parse length and flags */
1211        array = g_new0(NBDExportInfo, 1);
1212        array->name = g_strdup("");
1213        count = 1;
1214
1215        if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
1216            goto out;
1217        }
1218
1219        /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
1220         * errors now that we have the information we wanted. */
1221        if (nbd_drop(ioc, 124, NULL) == 0) {
1222            NBDRequest request = { .type = NBD_CMD_DISC };
1223
1224            nbd_send_request(ioc, &request);
1225        }
1226        break;
1227    default:
1228        goto out;
1229    }
1230
1231    *info = array;
1232    array = NULL;
1233    ret = count;
1234
1235 out:
1236    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1237    qio_channel_close(ioc, NULL);
1238    object_unref(OBJECT(sioc));
1239    nbd_free_export_list(array, count);
1240    return ret;
1241}
1242
1243#ifdef __linux__
1244int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
1245             Error **errp)
1246{
1247    unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
1248    unsigned long sectors = info->size / sector_size;
1249
1250    /* FIXME: Once the kernel module is patched to honor block sizes,
1251     * and to advertise that fact to user space, we should update the
1252     * hand-off to the kernel to use any block sizes we learned. */
1253    assert(!info->request_sizes);
1254    if (info->size / sector_size != sectors) {
1255        error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
1256                   info->size);
1257        return -E2BIG;
1258    }
1259
1260    trace_nbd_init_set_socket();
1261
1262    if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
1263        int serrno = errno;
1264        error_setg(errp, "Failed to set NBD socket");
1265        return -serrno;
1266    }
1267
1268    trace_nbd_init_set_block_size(sector_size);
1269
1270    if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
1271        int serrno = errno;
1272        error_setg(errp, "Failed setting NBD block size");
1273        return -serrno;
1274    }
1275
1276    trace_nbd_init_set_size(sectors);
1277    if (info->size % sector_size) {
1278        trace_nbd_init_trailing_bytes(info->size % sector_size);
1279    }
1280
1281    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
1282        int serrno = errno;
1283        error_setg(errp, "Failed setting size (in blocks)");
1284        return -serrno;
1285    }
1286
1287    if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
1288        if (errno == ENOTTY) {
1289            int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
1290            trace_nbd_init_set_readonly();
1291
1292            if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
1293                int serrno = errno;
1294                error_setg(errp, "Failed setting read-only attribute");
1295                return -serrno;
1296            }
1297        } else {
1298            int serrno = errno;
1299            error_setg(errp, "Failed setting flags");
1300            return -serrno;
1301        }
1302    }
1303
1304    trace_nbd_init_finish();
1305
1306    return 0;
1307}
1308
1309int nbd_client(int fd)
1310{
1311    int ret;
1312    int serrno;
1313
1314    trace_nbd_client_loop();
1315
1316    ret = ioctl(fd, NBD_DO_IT);
1317    if (ret < 0 && errno == EPIPE) {
1318        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
1319         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
1320         * that case.
1321         */
1322        ret = 0;
1323    }
1324    serrno = errno;
1325
1326    trace_nbd_client_loop_ret(ret, strerror(serrno));
1327
1328    trace_nbd_client_clear_queue();
1329    ioctl(fd, NBD_CLEAR_QUE);
1330
1331    trace_nbd_client_clear_socket();
1332    ioctl(fd, NBD_CLEAR_SOCK);
1333
1334    errno = serrno;
1335    return ret;
1336}
1337
1338int nbd_disconnect(int fd)
1339{
1340    ioctl(fd, NBD_CLEAR_QUE);
1341    ioctl(fd, NBD_DISCONNECT);
1342    ioctl(fd, NBD_CLEAR_SOCK);
1343    return 0;
1344}
1345
1346#endif /* __linux__ */
1347
1348int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
1349{
1350    uint8_t buf[NBD_REQUEST_SIZE];
1351
1352    trace_nbd_send_request(request->from, request->len, request->handle,
1353                           request->flags, request->type,
1354                           nbd_cmd_lookup(request->type));
1355
1356    stl_be_p(buf, NBD_REQUEST_MAGIC);
1357    stw_be_p(buf + 4, request->flags);
1358    stw_be_p(buf + 6, request->type);
1359    stq_be_p(buf + 8, request->handle);
1360    stq_be_p(buf + 16, request->from);
1361    stl_be_p(buf + 24, request->len);
1362
1363    return nbd_write(ioc, buf, sizeof(buf), NULL);
1364}
1365
1366/* nbd_receive_simple_reply
1367 * Read simple reply except magic field (which should be already read).
1368 * Payload is not read (payload is possible for CMD_READ, but here we even
1369 * don't know whether it take place or not).
1370 */
1371static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
1372                                    Error **errp)
1373{
1374    int ret;
1375
1376    assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
1377
1378    ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
1379                   sizeof(*reply) - sizeof(reply->magic), "reply", errp);
1380    if (ret < 0) {
1381        return ret;
1382    }
1383
1384    reply->error = be32_to_cpu(reply->error);
1385    reply->handle = be64_to_cpu(reply->handle);
1386
1387    return 0;
1388}
1389
1390/* nbd_receive_structured_reply_chunk
1391 * Read structured reply chunk except magic field (which should be already
1392 * read).
1393 * Payload is not read.
1394 */
1395static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
1396                                              NBDStructuredReplyChunk *chunk,
1397                                              Error **errp)
1398{
1399    int ret;
1400
1401    assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
1402
1403    ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
1404                   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
1405                   errp);
1406    if (ret < 0) {
1407        return ret;
1408    }
1409
1410    chunk->flags = be16_to_cpu(chunk->flags);
1411    chunk->type = be16_to_cpu(chunk->type);
1412    chunk->handle = be64_to_cpu(chunk->handle);
1413    chunk->length = be32_to_cpu(chunk->length);
1414
1415    return 0;
1416}
1417
1418/* nbd_read_eof
1419 * Tries to read @size bytes from @ioc.
1420 * Returns 1 on success
1421 *         0 on eof, when no data was read (errp is not set)
1422 *         negative errno on failure (errp is set)
1423 */
1424static inline int coroutine_fn
1425nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
1426             Error **errp)
1427{
1428    bool partial = false;
1429
1430    assert(size);
1431    while (size > 0) {
1432        struct iovec iov = { .iov_base = buffer, .iov_len = size };
1433        ssize_t len;
1434
1435        len = qio_channel_readv(ioc, &iov, 1, errp);
1436        if (len == QIO_CHANNEL_ERR_BLOCK) {
1437            qio_channel_yield(ioc, G_IO_IN);
1438            continue;
1439        } else if (len < 0) {
1440            return -EIO;
1441        } else if (len == 0) {
1442            if (partial) {
1443                error_setg(errp,
1444                           "Unexpected end-of-file before all bytes were read");
1445                return -EIO;
1446            } else {
1447                return 0;
1448            }
1449        }
1450
1451        partial = true;
1452        size -= len;
1453        buffer = (uint8_t*) buffer + len;
1454    }
1455    return 1;
1456}
1457
1458/* nbd_receive_reply
1459 *
1460 * Decreases bs->in_flight while waiting for a new reply. This yield is where
1461 * we wait indefinitely and the coroutine must be able to be safely reentered
1462 * for nbd_client_attach_aio_context().
1463 *
1464 * Returns 1 on success
1465 *         0 on eof, when no data was read (errp is not set)
1466 *         negative errno on failure (errp is set)
1467 */
1468int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
1469                                   NBDReply *reply, Error **errp)
1470{
1471    int ret;
1472    const char *type;
1473
1474    ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
1475    if (ret <= 0) {
1476        return ret;
1477    }
1478
1479    reply->magic = be32_to_cpu(reply->magic);
1480
1481    switch (reply->magic) {
1482    case NBD_SIMPLE_REPLY_MAGIC:
1483        ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
1484        if (ret < 0) {
1485            break;
1486        }
1487        trace_nbd_receive_simple_reply(reply->simple.error,
1488                                       nbd_err_lookup(reply->simple.error),
1489                                       reply->handle);
1490        break;
1491    case NBD_STRUCTURED_REPLY_MAGIC:
1492        ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
1493        if (ret < 0) {
1494            break;
1495        }
1496        type = nbd_reply_type_lookup(reply->structured.type);
1497        trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
1498                                                 reply->structured.type, type,
1499                                                 reply->structured.handle,
1500                                                 reply->structured.length);
1501        break;
1502    default:
1503        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
1504        return -EINVAL;
1505    }
1506    if (ret < 0) {
1507        return ret;
1508    }
1509
1510    return 1;
1511}
1512
1513