qemu/nbd/client.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2016-2017 Red Hat, Inc.
   3 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
   4 *
   5 *  Network Block Device Client Side
   6 *
   7 *  This program is free software; you can redistribute it and/or modify
   8 *  it under the terms of the GNU General Public License as published by
   9 *  the Free Software Foundation; under version 2 of the License.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qapi/error.h"
  22#include "trace.h"
  23#include "nbd-internal.h"
  24
  25static int nbd_errno_to_system_errno(int err)
  26{
  27    int ret;
  28    switch (err) {
  29    case NBD_SUCCESS:
  30        ret = 0;
  31        break;
  32    case NBD_EPERM:
  33        ret = EPERM;
  34        break;
  35    case NBD_EIO:
  36        ret = EIO;
  37        break;
  38    case NBD_ENOMEM:
  39        ret = ENOMEM;
  40        break;
  41    case NBD_ENOSPC:
  42        ret = ENOSPC;
  43        break;
  44    case NBD_ESHUTDOWN:
  45        ret = ESHUTDOWN;
  46        break;
  47    default:
  48        trace_nbd_unknown_error(err);
  49        /* fallthrough */
  50    case NBD_EINVAL:
  51        ret = EINVAL;
  52        break;
  53    }
  54    return ret;
  55}
  56
  57/* Definitions for opaque data types */
  58
  59static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
  60
  61/* That's all folks */
  62
  63/* Basic flow for negotiation
  64
  65   Server         Client
  66   Negotiate
  67
  68   or
  69
  70   Server         Client
  71   Negotiate #1
  72                  Option
  73   Negotiate #2
  74
  75   ----
  76
  77   followed by
  78
  79   Server         Client
  80                  Request
  81   Response
  82                  Request
  83   Response
  84                  ...
  85   ...
  86                  Request (type == 2)
  87
  88*/
  89
  90/* Send an option request.
  91 *
  92 * The request is for option @opt, with @data containing @len bytes of
  93 * additional payload for the request (@len may be -1 to treat @data as
  94 * a C string; and @data may be NULL if @len is 0).
  95 * Return 0 if successful, -1 with errp set if it is impossible to
  96 * continue. */
  97static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
  98                                   uint32_t len, const char *data,
  99                                   Error **errp)
 100{
 101    nbd_option req;
 102    QEMU_BUILD_BUG_ON(sizeof(req) != 16);
 103
 104    if (len == -1) {
 105        req.length = len = strlen(data);
 106    }
 107    trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
 108
 109    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
 110    stl_be_p(&req.option, opt);
 111    stl_be_p(&req.length, len);
 112
 113    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
 114        error_prepend(errp, "Failed to send option request header: ");
 115        return -1;
 116    }
 117
 118    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
 119        error_prepend(errp, "Failed to send option request data: ");
 120        return -1;
 121    }
 122
 123    return 0;
 124}
 125
 126/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
 127 * not going to attempt further negotiation. */
 128static void nbd_send_opt_abort(QIOChannel *ioc)
 129{
 130    /* Technically, a compliant server is supposed to reply to us; but
 131     * older servers disconnected instead. At any rate, we're allowed
 132     * to disconnect without waiting for the server reply, so we don't
 133     * even care if the request makes it to the server, let alone
 134     * waiting around for whether the server replies. */
 135    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
 136}
 137
 138
 139/* Receive the header of an option reply, which should match the given
 140 * opt.  Read through the length field, but NOT the length bytes of
 141 * payload. Return 0 if successful, -1 with errp set if it is
 142 * impossible to continue. */
 143static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
 144                                    nbd_opt_reply *reply, Error **errp)
 145{
 146    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
 147    if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) {
 148        error_prepend(errp, "failed to read option reply: ");
 149        nbd_send_opt_abort(ioc);
 150        return -1;
 151    }
 152    be64_to_cpus(&reply->magic);
 153    be32_to_cpus(&reply->option);
 154    be32_to_cpus(&reply->type);
 155    be32_to_cpus(&reply->length);
 156
 157    trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
 158                                   reply->type, nbd_rep_lookup(reply->type),
 159                                   reply->length);
 160
 161    if (reply->magic != NBD_REP_MAGIC) {
 162        error_setg(errp, "Unexpected option reply magic");
 163        nbd_send_opt_abort(ioc);
 164        return -1;
 165    }
 166    if (reply->option != opt) {
 167        error_setg(errp, "Unexpected option type %x expected %x",
 168                   reply->option, opt);
 169        nbd_send_opt_abort(ioc);
 170        return -1;
 171    }
 172    return 0;
 173}
 174
 175/* If reply represents success, return 1 without further action.
 176 * If reply represents an error, consume the optional payload of
 177 * the packet on ioc.  Then return 0 for unsupported (so the client
 178 * can fall back to other approaches), or -1 with errp set for other
 179 * errors.
 180 */
 181static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
 182                                Error **errp)
 183{
 184    char *msg = NULL;
 185    int result = -1;
 186
 187    if (!(reply->type & (1 << 31))) {
 188        return 1;
 189    }
 190
 191    if (reply->length) {
 192        if (reply->length > NBD_MAX_BUFFER_SIZE) {
 193            error_setg(errp, "server error 0x%" PRIx32
 194                       " (%s) message is too long",
 195                       reply->type, nbd_rep_lookup(reply->type));
 196            goto cleanup;
 197        }
 198        msg = g_malloc(reply->length + 1);
 199        if (nbd_read(ioc, msg, reply->length, errp) < 0) {
 200            error_prepend(errp, "failed to read option error 0x%" PRIx32
 201                          " (%s) message: ",
 202                          reply->type, nbd_rep_lookup(reply->type));
 203            goto cleanup;
 204        }
 205        msg[reply->length] = '\0';
 206    }
 207
 208    switch (reply->type) {
 209    case NBD_REP_ERR_UNSUP:
 210        trace_nbd_reply_err_unsup(reply->option, nbd_opt_lookup(reply->option));
 211        result = 0;
 212        goto cleanup;
 213
 214    case NBD_REP_ERR_POLICY:
 215        error_setg(errp, "Denied by server for option %" PRIx32 " (%s)",
 216                   reply->option, nbd_opt_lookup(reply->option));
 217        break;
 218
 219    case NBD_REP_ERR_INVALID:
 220        error_setg(errp, "Invalid data length for option %" PRIx32 " (%s)",
 221                   reply->option, nbd_opt_lookup(reply->option));
 222        break;
 223
 224    case NBD_REP_ERR_PLATFORM:
 225        error_setg(errp, "Server lacks support for option %" PRIx32 " (%s)",
 226                   reply->option, nbd_opt_lookup(reply->option));
 227        break;
 228
 229    case NBD_REP_ERR_TLS_REQD:
 230        error_setg(errp, "TLS negotiation required before option %" PRIx32
 231                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 232        break;
 233
 234    case NBD_REP_ERR_UNKNOWN:
 235        error_setg(errp, "Requested export not available");
 236        break;
 237
 238    case NBD_REP_ERR_SHUTDOWN:
 239        error_setg(errp, "Server shutting down before option %" PRIx32 " (%s)",
 240                   reply->option, nbd_opt_lookup(reply->option));
 241        break;
 242
 243    case NBD_REP_ERR_BLOCK_SIZE_REQD:
 244        error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIx32
 245                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 246        break;
 247
 248    default:
 249        error_setg(errp, "Unknown error code when asking for option %" PRIx32
 250                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
 251        break;
 252    }
 253
 254    if (msg) {
 255        error_append_hint(errp, "server reported: %s\n", msg);
 256    }
 257
 258 cleanup:
 259    g_free(msg);
 260    if (result < 0) {
 261        nbd_send_opt_abort(ioc);
 262    }
 263    return result;
 264}
 265
 266/* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
 267 * the current reply matches @want or if the server does not support
 268 * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
 269 * is complete, positive if more replies are expected, or negative
 270 * with @errp set if an unrecoverable error occurred. */
 271static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
 272                            Error **errp)
 273{
 274    nbd_opt_reply reply;
 275    uint32_t len;
 276    uint32_t namelen;
 277    char name[NBD_MAX_NAME_SIZE + 1];
 278    int error;
 279
 280    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
 281        return -1;
 282    }
 283    error = nbd_handle_reply_err(ioc, &reply, errp);
 284    if (error <= 0) {
 285        /* The server did not support NBD_OPT_LIST, so set *match on
 286         * the assumption that any name will be accepted.  */
 287        *match = true;
 288        return error;
 289    }
 290    len = reply.length;
 291
 292    if (reply.type == NBD_REP_ACK) {
 293        if (len != 0) {
 294            error_setg(errp, "length too long for option end");
 295            nbd_send_opt_abort(ioc);
 296            return -1;
 297        }
 298        return 0;
 299    } else if (reply.type != NBD_REP_SERVER) {
 300        error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
 301                   reply.type, NBD_REP_SERVER);
 302        nbd_send_opt_abort(ioc);
 303        return -1;
 304    }
 305
 306    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
 307        error_setg(errp, "incorrect option length %" PRIu32, len);
 308        nbd_send_opt_abort(ioc);
 309        return -1;
 310    }
 311    if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) {
 312        error_prepend(errp, "failed to read option name length: ");
 313        nbd_send_opt_abort(ioc);
 314        return -1;
 315    }
 316    namelen = be32_to_cpu(namelen);
 317    len -= sizeof(namelen);
 318    if (len < namelen) {
 319        error_setg(errp, "incorrect option name length");
 320        nbd_send_opt_abort(ioc);
 321        return -1;
 322    }
 323    if (namelen != strlen(want)) {
 324        if (nbd_drop(ioc, len, errp) < 0) {
 325            error_prepend(errp,
 326                          "failed to skip export name with wrong length: ");
 327            nbd_send_opt_abort(ioc);
 328            return -1;
 329        }
 330        return 1;
 331    }
 332
 333    assert(namelen < sizeof(name));
 334    if (nbd_read(ioc, name, namelen, errp) < 0) {
 335        error_prepend(errp, "failed to read export name: ");
 336        nbd_send_opt_abort(ioc);
 337        return -1;
 338    }
 339    name[namelen] = '\0';
 340    len -= namelen;
 341    if (nbd_drop(ioc, len, errp) < 0) {
 342        error_prepend(errp, "failed to read export description: ");
 343        nbd_send_opt_abort(ioc);
 344        return -1;
 345    }
 346    if (!strcmp(name, want)) {
 347        *match = true;
 348    }
 349    return 1;
 350}
 351
 352
 353/* Returns -1 if NBD_OPT_GO proves the export @wantname cannot be
 354 * used, 0 if NBD_OPT_GO is unsupported (fall back to NBD_OPT_LIST and
 355 * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
 356 * go (with @info populated). */
 357static int nbd_opt_go(QIOChannel *ioc, const char *wantname,
 358                      NBDExportInfo *info, Error **errp)
 359{
 360    nbd_opt_reply reply;
 361    uint32_t len = strlen(wantname);
 362    uint16_t type;
 363    int error;
 364    char *buf;
 365
 366    /* The protocol requires that the server send NBD_INFO_EXPORT with
 367     * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
 368     * flags still 0 is a witness of a broken server. */
 369    info->flags = 0;
 370
 371    trace_nbd_opt_go_start(wantname);
 372    buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
 373    stl_be_p(buf, len);
 374    memcpy(buf + 4, wantname, len);
 375    /* At most one request, everything else up to server */
 376    stw_be_p(buf + 4 + len, info->request_sizes);
 377    if (info->request_sizes) {
 378        stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
 379    }
 380    error = nbd_send_option_request(ioc, NBD_OPT_GO,
 381                                    4 + len + 2 + 2 * info->request_sizes,
 382                                    buf, errp);
 383    g_free(buf);
 384    if (error < 0) {
 385        return -1;
 386    }
 387
 388    while (1) {
 389        if (nbd_receive_option_reply(ioc, NBD_OPT_GO, &reply, errp) < 0) {
 390            return -1;
 391        }
 392        error = nbd_handle_reply_err(ioc, &reply, errp);
 393        if (error <= 0) {
 394            return error;
 395        }
 396        len = reply.length;
 397
 398        if (reply.type == NBD_REP_ACK) {
 399            /* Server is done sending info and moved into transmission
 400               phase, but make sure it sent flags */
 401            if (len) {
 402                error_setg(errp, "server sent invalid NBD_REP_ACK");
 403                nbd_send_opt_abort(ioc);
 404                return -1;
 405            }
 406            if (!info->flags) {
 407                error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
 408                nbd_send_opt_abort(ioc);
 409                return -1;
 410            }
 411            trace_nbd_opt_go_success();
 412            return 1;
 413        }
 414        if (reply.type != NBD_REP_INFO) {
 415            error_setg(errp, "unexpected reply type %" PRIx32
 416                       " (%s), expected %x",
 417                       reply.type, nbd_rep_lookup(reply.type), NBD_REP_INFO);
 418            nbd_send_opt_abort(ioc);
 419            return -1;
 420        }
 421        if (len < sizeof(type)) {
 422            error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
 423                       len);
 424            nbd_send_opt_abort(ioc);
 425            return -1;
 426        }
 427        if (nbd_read(ioc, &type, sizeof(type), errp) < 0) {
 428            error_prepend(errp, "failed to read info type: ");
 429            nbd_send_opt_abort(ioc);
 430            return -1;
 431        }
 432        len -= sizeof(type);
 433        be16_to_cpus(&type);
 434        switch (type) {
 435        case NBD_INFO_EXPORT:
 436            if (len != sizeof(info->size) + sizeof(info->flags)) {
 437                error_setg(errp, "remaining export info len %" PRIu32
 438                           " is unexpected size", len);
 439                nbd_send_opt_abort(ioc);
 440                return -1;
 441            }
 442            if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
 443                error_prepend(errp, "failed to read info size: ");
 444                nbd_send_opt_abort(ioc);
 445                return -1;
 446            }
 447            be64_to_cpus(&info->size);
 448            if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
 449                error_prepend(errp, "failed to read info flags: ");
 450                nbd_send_opt_abort(ioc);
 451                return -1;
 452            }
 453            be16_to_cpus(&info->flags);
 454            trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
 455            break;
 456
 457        case NBD_INFO_BLOCK_SIZE:
 458            if (len != sizeof(info->min_block) * 3) {
 459                error_setg(errp, "remaining export info len %" PRIu32
 460                           " is unexpected size", len);
 461                nbd_send_opt_abort(ioc);
 462                return -1;
 463            }
 464            if (nbd_read(ioc, &info->min_block, sizeof(info->min_block),
 465                         errp) < 0) {
 466                error_prepend(errp, "failed to read info minimum block size: ");
 467                nbd_send_opt_abort(ioc);
 468                return -1;
 469            }
 470            be32_to_cpus(&info->min_block);
 471            if (!is_power_of_2(info->min_block)) {
 472                error_setg(errp, "server minimum block size %" PRId32
 473                           "is not a power of two", info->min_block);
 474                nbd_send_opt_abort(ioc);
 475                return -1;
 476            }
 477            if (nbd_read(ioc, &info->opt_block, sizeof(info->opt_block),
 478                         errp) < 0) {
 479                error_prepend(errp,
 480                              "failed to read info preferred block size: ");
 481                nbd_send_opt_abort(ioc);
 482                return -1;
 483            }
 484            be32_to_cpus(&info->opt_block);
 485            if (!is_power_of_2(info->opt_block) ||
 486                info->opt_block < info->min_block) {
 487                error_setg(errp, "server preferred block size %" PRId32
 488                           "is not valid", info->opt_block);
 489                nbd_send_opt_abort(ioc);
 490                return -1;
 491            }
 492            if (nbd_read(ioc, &info->max_block, sizeof(info->max_block),
 493                         errp) < 0) {
 494                error_prepend(errp, "failed to read info maximum block size: ");
 495                nbd_send_opt_abort(ioc);
 496                return -1;
 497            }
 498            be32_to_cpus(&info->max_block);
 499            trace_nbd_opt_go_info_block_size(info->min_block, info->opt_block,
 500                                             info->max_block);
 501            break;
 502
 503        default:
 504            trace_nbd_opt_go_info_unknown(type, nbd_info_lookup(type));
 505            if (nbd_drop(ioc, len, errp) < 0) {
 506                error_prepend(errp, "Failed to read info payload: ");
 507                nbd_send_opt_abort(ioc);
 508                return -1;
 509            }
 510            break;
 511        }
 512    }
 513}
 514
 515/* Return -1 on failure, 0 if wantname is an available export. */
 516static int nbd_receive_query_exports(QIOChannel *ioc,
 517                                     const char *wantname,
 518                                     Error **errp)
 519{
 520    bool foundExport = false;
 521
 522    trace_nbd_receive_query_exports_start(wantname);
 523    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
 524        return -1;
 525    }
 526
 527    while (1) {
 528        int ret = nbd_receive_list(ioc, wantname, &foundExport, errp);
 529
 530        if (ret < 0) {
 531            /* Server gave unexpected reply */
 532            return -1;
 533        } else if (ret == 0) {
 534            /* Done iterating. */
 535            if (!foundExport) {
 536                error_setg(errp, "No export with name '%s' available",
 537                           wantname);
 538                nbd_send_opt_abort(ioc);
 539                return -1;
 540            }
 541            trace_nbd_receive_query_exports_success(wantname);
 542            return 0;
 543        }
 544    }
 545}
 546
 547static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
 548                                        QCryptoTLSCreds *tlscreds,
 549                                        const char *hostname, Error **errp)
 550{
 551    nbd_opt_reply reply;
 552    QIOChannelTLS *tioc;
 553    struct NBDTLSHandshakeData data = { 0 };
 554
 555    trace_nbd_receive_starttls_request();
 556    if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
 557        return NULL;
 558    }
 559
 560    trace_nbd_receive_starttls_reply();
 561    if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
 562        return NULL;
 563    }
 564
 565    if (reply.type != NBD_REP_ACK) {
 566        error_setg(errp, "Server rejected request to start TLS %" PRIx32,
 567                   reply.type);
 568        nbd_send_opt_abort(ioc);
 569        return NULL;
 570    }
 571
 572    if (reply.length != 0) {
 573        error_setg(errp, "Start TLS response was not zero %" PRIu32,
 574                   reply.length);
 575        nbd_send_opt_abort(ioc);
 576        return NULL;
 577    }
 578
 579    trace_nbd_receive_starttls_new_client();
 580    tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
 581    if (!tioc) {
 582        return NULL;
 583    }
 584    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
 585    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
 586    trace_nbd_receive_starttls_tls_handshake();
 587    qio_channel_tls_handshake(tioc,
 588                              nbd_tls_handshake,
 589                              &data,
 590                              NULL);
 591
 592    if (!data.complete) {
 593        g_main_loop_run(data.loop);
 594    }
 595    g_main_loop_unref(data.loop);
 596    if (data.error) {
 597        error_propagate(errp, data.error);
 598        object_unref(OBJECT(tioc));
 599        return NULL;
 600    }
 601
 602    return QIO_CHANNEL(tioc);
 603}
 604
 605
 606int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
 607                          QCryptoTLSCreds *tlscreds, const char *hostname,
 608                          QIOChannel **outioc, NBDExportInfo *info,
 609                          Error **errp)
 610{
 611    char buf[256];
 612    uint64_t magic;
 613    int rc;
 614    bool zeroes = true;
 615
 616    trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
 617
 618    rc = -EINVAL;
 619
 620    if (outioc) {
 621        *outioc = NULL;
 622    }
 623    if (tlscreds && !outioc) {
 624        error_setg(errp, "Output I/O channel required for TLS");
 625        goto fail;
 626    }
 627
 628    if (nbd_read(ioc, buf, 8, errp) < 0) {
 629        error_prepend(errp, "Failed to read data: ");
 630        goto fail;
 631    }
 632
 633    buf[8] = '\0';
 634    if (strlen(buf) == 0) {
 635        error_setg(errp, "Server connection closed unexpectedly");
 636        goto fail;
 637    }
 638
 639    magic = ldq_be_p(buf);
 640    trace_nbd_receive_negotiate_magic(magic);
 641
 642    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
 643        error_setg(errp, "Invalid magic received");
 644        goto fail;
 645    }
 646
 647    if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
 648        error_prepend(errp, "Failed to read magic: ");
 649        goto fail;
 650    }
 651    magic = be64_to_cpu(magic);
 652    trace_nbd_receive_negotiate_magic(magic);
 653
 654    if (magic == NBD_OPTS_MAGIC) {
 655        uint32_t clientflags = 0;
 656        uint16_t globalflags;
 657        bool fixedNewStyle = false;
 658
 659        if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
 660            error_prepend(errp, "Failed to read server flags: ");
 661            goto fail;
 662        }
 663        globalflags = be16_to_cpu(globalflags);
 664        trace_nbd_receive_negotiate_server_flags(globalflags);
 665        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
 666            fixedNewStyle = true;
 667            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
 668        }
 669        if (globalflags & NBD_FLAG_NO_ZEROES) {
 670            zeroes = false;
 671            clientflags |= NBD_FLAG_C_NO_ZEROES;
 672        }
 673        /* client requested flags */
 674        clientflags = cpu_to_be32(clientflags);
 675        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
 676            error_prepend(errp, "Failed to send clientflags field: ");
 677            goto fail;
 678        }
 679        if (tlscreds) {
 680            if (fixedNewStyle) {
 681                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
 682                if (!*outioc) {
 683                    goto fail;
 684                }
 685                ioc = *outioc;
 686            } else {
 687                error_setg(errp, "Server does not support STARTTLS");
 688                goto fail;
 689            }
 690        }
 691        if (!name) {
 692            trace_nbd_receive_negotiate_default_name();
 693            name = "";
 694        }
 695        if (fixedNewStyle) {
 696            int result;
 697
 698            /* Try NBD_OPT_GO first - if it works, we are done (it
 699             * also gives us a good message if the server requires
 700             * TLS).  If it is not available, fall back to
 701             * NBD_OPT_LIST for nicer error messages about a missing
 702             * export, then use NBD_OPT_EXPORT_NAME.  */
 703            result = nbd_opt_go(ioc, name, info, errp);
 704            if (result < 0) {
 705                goto fail;
 706            }
 707            if (result > 0) {
 708                return 0;
 709            }
 710            /* Check our desired export is present in the
 711             * server export list. Since NBD_OPT_EXPORT_NAME
 712             * cannot return an error message, running this
 713             * query gives us better error reporting if the
 714             * export name is not available.
 715             */
 716            if (nbd_receive_query_exports(ioc, name, errp) < 0) {
 717                goto fail;
 718            }
 719        }
 720        /* write the export name request */
 721        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
 722                                    errp) < 0) {
 723            goto fail;
 724        }
 725
 726        /* Read the response */
 727        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
 728            error_prepend(errp, "Failed to read export length: ");
 729            goto fail;
 730        }
 731        be64_to_cpus(&info->size);
 732
 733        if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
 734            error_prepend(errp, "Failed to read export flags: ");
 735            goto fail;
 736        }
 737        be16_to_cpus(&info->flags);
 738    } else if (magic == NBD_CLIENT_MAGIC) {
 739        uint32_t oldflags;
 740
 741        if (name) {
 742            error_setg(errp, "Server does not support export names");
 743            goto fail;
 744        }
 745        if (tlscreds) {
 746            error_setg(errp, "Server does not support STARTTLS");
 747            goto fail;
 748        }
 749
 750        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
 751            error_prepend(errp, "Failed to read export length: ");
 752            goto fail;
 753        }
 754        be64_to_cpus(&info->size);
 755
 756        if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
 757            error_prepend(errp, "Failed to read export flags: ");
 758            goto fail;
 759        }
 760        be32_to_cpus(&oldflags);
 761        if (oldflags & ~0xffff) {
 762            error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
 763            goto fail;
 764        }
 765        info->flags = oldflags;
 766    } else {
 767        error_setg(errp, "Bad magic received");
 768        goto fail;
 769    }
 770
 771    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
 772    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
 773        error_prepend(errp, "Failed to read reserved block: ");
 774        goto fail;
 775    }
 776    rc = 0;
 777
 778fail:
 779    return rc;
 780}
 781
 782#ifdef __linux__
 783int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
 784             Error **errp)
 785{
 786    unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
 787    unsigned long sectors = info->size / sector_size;
 788
 789    /* FIXME: Once the kernel module is patched to honor block sizes,
 790     * and to advertise that fact to user space, we should update the
 791     * hand-off to the kernel to use any block sizes we learned. */
 792    assert(!info->request_sizes);
 793    if (info->size / sector_size != sectors) {
 794        error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
 795                   info->size);
 796        return -E2BIG;
 797    }
 798
 799    trace_nbd_init_set_socket();
 800
 801    if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
 802        int serrno = errno;
 803        error_setg(errp, "Failed to set NBD socket");
 804        return -serrno;
 805    }
 806
 807    trace_nbd_init_set_block_size(sector_size);
 808
 809    if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
 810        int serrno = errno;
 811        error_setg(errp, "Failed setting NBD block size");
 812        return -serrno;
 813    }
 814
 815    trace_nbd_init_set_size(sectors);
 816    if (info->size % sector_size) {
 817        trace_nbd_init_trailing_bytes(info->size % sector_size);
 818    }
 819
 820    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
 821        int serrno = errno;
 822        error_setg(errp, "Failed setting size (in blocks)");
 823        return -serrno;
 824    }
 825
 826    if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
 827        if (errno == ENOTTY) {
 828            int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
 829            trace_nbd_init_set_readonly();
 830
 831            if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
 832                int serrno = errno;
 833                error_setg(errp, "Failed setting read-only attribute");
 834                return -serrno;
 835            }
 836        } else {
 837            int serrno = errno;
 838            error_setg(errp, "Failed setting flags");
 839            return -serrno;
 840        }
 841    }
 842
 843    trace_nbd_init_finish();
 844
 845    return 0;
 846}
 847
 848int nbd_client(int fd)
 849{
 850    int ret;
 851    int serrno;
 852
 853    trace_nbd_client_loop();
 854
 855    ret = ioctl(fd, NBD_DO_IT);
 856    if (ret < 0 && errno == EPIPE) {
 857        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
 858         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
 859         * that case.
 860         */
 861        ret = 0;
 862    }
 863    serrno = errno;
 864
 865    trace_nbd_client_loop_ret(ret, strerror(serrno));
 866
 867    trace_nbd_client_clear_queue();
 868    ioctl(fd, NBD_CLEAR_QUE);
 869
 870    trace_nbd_client_clear_socket();
 871    ioctl(fd, NBD_CLEAR_SOCK);
 872
 873    errno = serrno;
 874    return ret;
 875}
 876
 877int nbd_disconnect(int fd)
 878{
 879    ioctl(fd, NBD_CLEAR_QUE);
 880    ioctl(fd, NBD_DISCONNECT);
 881    ioctl(fd, NBD_CLEAR_SOCK);
 882    return 0;
 883}
 884
 885#else
 886int nbd_init(int fd, QIOChannelSocket *ioc, NBDExportInfo *info,
 887             Error **errp)
 888{
 889    error_setg(errp, "nbd_init is only supported on Linux");
 890    return -ENOTSUP;
 891}
 892
 893int nbd_client(int fd)
 894{
 895    return -ENOTSUP;
 896}
 897int nbd_disconnect(int fd)
 898{
 899    return -ENOTSUP;
 900}
 901#endif
 902
 903ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
 904{
 905    uint8_t buf[NBD_REQUEST_SIZE];
 906
 907    trace_nbd_send_request(request->from, request->len, request->handle,
 908                           request->flags, request->type,
 909                           nbd_cmd_lookup(request->type));
 910
 911    stl_be_p(buf, NBD_REQUEST_MAGIC);
 912    stw_be_p(buf + 4, request->flags);
 913    stw_be_p(buf + 6, request->type);
 914    stq_be_p(buf + 8, request->handle);
 915    stq_be_p(buf + 16, request->from);
 916    stl_be_p(buf + 24, request->len);
 917
 918    return nbd_write(ioc, buf, sizeof(buf), NULL);
 919}
 920
 921ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
 922{
 923    uint8_t buf[NBD_REPLY_SIZE];
 924    uint32_t magic;
 925    ssize_t ret;
 926
 927    ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
 928    if (ret <= 0) {
 929        return ret;
 930    }
 931
 932    if (ret != sizeof(buf)) {
 933        error_setg(errp, "read failed");
 934        return -EINVAL;
 935    }
 936
 937    /* Reply
 938       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
 939       [ 4 ..  7]    error   (0 == no error)
 940       [ 7 .. 15]    handle
 941     */
 942
 943    magic = ldl_be_p(buf);
 944    reply->error  = ldl_be_p(buf + 4);
 945    reply->handle = ldq_be_p(buf + 8);
 946
 947    reply->error = nbd_errno_to_system_errno(reply->error);
 948    trace_nbd_receive_reply(magic, reply->error, reply->handle);
 949
 950    if (magic != NBD_REPLY_MAGIC) {
 951        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
 952        return -EINVAL;
 953    }
 954    return sizeof(buf);
 955}
 956
 957