qemu/nbd.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
   3 *
   4 *  Network Block Device
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; under version 2 of the License.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License
  16 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#include "nbd.h"
  20
  21#include <errno.h>
  22#include <string.h>
  23#ifndef _WIN32
  24#include <sys/ioctl.h>
  25#endif
  26#if defined(__sun__) || defined(__HAIKU__)
  27#include <sys/ioccom.h>
  28#endif
  29#include <ctype.h>
  30#include <inttypes.h>
  31
  32#include "qemu_socket.h"
  33
  34//#define DEBUG_NBD
  35
  36#ifdef DEBUG_NBD
  37#define TRACE(msg, ...) do { \
  38    LOG(msg, ## __VA_ARGS__); \
  39} while(0)
  40#else
  41#define TRACE(msg, ...) \
  42    do { } while (0)
  43#endif
  44
  45#define LOG(msg, ...) do { \
  46    fprintf(stderr, "%s:%s():L%d: " msg "\n", \
  47            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
  48} while(0)
  49
  50/* This is all part of the "official" NBD API */
  51
  52#define NBD_REPLY_SIZE          (4 + 4 + 8)
  53#define NBD_REQUEST_MAGIC       0x25609513
  54#define NBD_REPLY_MAGIC         0x67446698
  55
  56#define NBD_SET_SOCK            _IO(0xab, 0)
  57#define NBD_SET_BLKSIZE         _IO(0xab, 1)
  58#define NBD_SET_SIZE            _IO(0xab, 2)
  59#define NBD_DO_IT               _IO(0xab, 3)
  60#define NBD_CLEAR_SOCK          _IO(0xab, 4)
  61#define NBD_CLEAR_QUE           _IO(0xab, 5)
  62#define NBD_PRINT_DEBUG         _IO(0xab, 6)
  63#define NBD_SET_SIZE_BLOCKS     _IO(0xab, 7)
  64#define NBD_DISCONNECT          _IO(0xab, 8)
  65
  66#define NBD_OPT_EXPORT_NAME     (1 << 0)
  67
  68/* That's all folks */
  69
  70#define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true)
  71#define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false)
  72
  73size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
  74{
  75    size_t offset = 0;
  76
  77    while (offset < size) {
  78        ssize_t len;
  79
  80        if (do_read) {
  81            len = qemu_recv(fd, buffer + offset, size - offset, 0);
  82        } else {
  83            len = send(fd, buffer + offset, size - offset, 0);
  84        }
  85
  86        if (len == -1)
  87            errno = socket_error();
  88
  89        /* recoverable error */
  90        if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
  91            continue;
  92        }
  93
  94        /* eof */
  95        if (len == 0) {
  96            break;
  97        }
  98
  99        /* unrecoverable error */
 100        if (len == -1) {
 101            return 0;
 102        }
 103
 104        offset += len;
 105    }
 106
 107    return offset;
 108}
 109
 110static void combine_addr(char *buf, size_t len, const char* address,
 111                         uint16_t port)
 112{
 113    /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
 114    if (strstr(address, ":")) {
 115        snprintf(buf, len, "[%s]:%u", address, port);
 116    } else {
 117        snprintf(buf, len, "%s:%u", address, port);
 118    }
 119}
 120
 121int tcp_socket_outgoing(const char *address, uint16_t port)
 122{
 123    char address_and_port[128];
 124    combine_addr(address_and_port, 128, address, port);
 125    return tcp_socket_outgoing_spec(address_and_port);
 126}
 127
 128int tcp_socket_outgoing_spec(const char *address_and_port)
 129{
 130    return inet_connect(address_and_port, SOCK_STREAM);
 131}
 132
 133int tcp_socket_incoming(const char *address, uint16_t port)
 134{
 135    char address_and_port[128];
 136    combine_addr(address_and_port, 128, address, port);
 137    return tcp_socket_incoming_spec(address_and_port);
 138}
 139
 140int tcp_socket_incoming_spec(const char *address_and_port)
 141{
 142    char *ostr  = NULL;
 143    int olen = 0;
 144    return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0);
 145}
 146
 147int unix_socket_incoming(const char *path)
 148{
 149    char *ostr = NULL;
 150    int olen = 0;
 151
 152    return unix_listen(path, ostr, olen);
 153}
 154
 155int unix_socket_outgoing(const char *path)
 156{
 157    return unix_connect(path);
 158}
 159
 160/* Basic flow
 161
 162   Server         Client
 163
 164   Negotiate
 165                  Request
 166   Response
 167                  Request
 168   Response
 169                  ...
 170   ...
 171                  Request (type == 2)
 172*/
 173
 174int nbd_negotiate(int csock, off_t size)
 175{
 176    char buf[8 + 8 + 8 + 128];
 177
 178    /* Negotiate
 179        [ 0 ..   7]   passwd   ("NBDMAGIC")
 180        [ 8 ..  15]   magic    (0x00420281861253)
 181        [16 ..  23]   size
 182        [24 .. 151]   reserved (0)
 183     */
 184
 185    TRACE("Beginning negotiation.");
 186    memcpy(buf, "NBDMAGIC", 8);
 187    cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
 188    cpu_to_be64w((uint64_t*)(buf + 16), size);
 189    memset(buf + 24, 0, 128);
 190
 191    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
 192        LOG("write failed");
 193        errno = EINVAL;
 194        return -1;
 195    }
 196
 197    TRACE("Negotation succeeded.");
 198
 199    return 0;
 200}
 201
 202int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
 203                          off_t *size, size_t *blocksize)
 204{
 205    char buf[256];
 206    uint64_t magic, s;
 207    uint16_t tmp;
 208
 209    TRACE("Receiving negotation.");
 210
 211    if (read_sync(csock, buf, 8) != 8) {
 212        LOG("read failed");
 213        errno = EINVAL;
 214        return -1;
 215    }
 216
 217    buf[8] = '\0';
 218    if (strlen(buf) == 0) {
 219        LOG("server connection closed");
 220        errno = EINVAL;
 221        return -1;
 222    }
 223
 224    TRACE("Magic is %c%c%c%c%c%c%c%c",
 225          qemu_isprint(buf[0]) ? buf[0] : '.',
 226          qemu_isprint(buf[1]) ? buf[1] : '.',
 227          qemu_isprint(buf[2]) ? buf[2] : '.',
 228          qemu_isprint(buf[3]) ? buf[3] : '.',
 229          qemu_isprint(buf[4]) ? buf[4] : '.',
 230          qemu_isprint(buf[5]) ? buf[5] : '.',
 231          qemu_isprint(buf[6]) ? buf[6] : '.',
 232          qemu_isprint(buf[7]) ? buf[7] : '.');
 233
 234    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
 235        LOG("Invalid magic received");
 236        errno = EINVAL;
 237        return -1;
 238    }
 239
 240    if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
 241        LOG("read failed");
 242        errno = EINVAL;
 243        return -1;
 244    }
 245    magic = be64_to_cpu(magic);
 246    TRACE("Magic is 0x%" PRIx64, magic);
 247
 248    if (name) {
 249        uint32_t reserved = 0;
 250        uint32_t opt;
 251        uint32_t namesize;
 252
 253        TRACE("Checking magic (opts_magic)");
 254        if (magic != 0x49484156454F5054LL) {
 255            LOG("Bad magic received");
 256            errno = EINVAL;
 257            return -1;
 258        }
 259        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
 260            LOG("flags read failed");
 261            errno = EINVAL;
 262            return -1;
 263        }
 264        *flags = be16_to_cpu(tmp) << 16;
 265        /* reserved for future use */
 266        if (write_sync(csock, &reserved, sizeof(reserved)) !=
 267            sizeof(reserved)) {
 268            LOG("write failed (reserved)");
 269            errno = EINVAL;
 270            return -1;
 271        }
 272        /* write the export name */
 273        magic = cpu_to_be64(magic);
 274        if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
 275            LOG("write failed (magic)");
 276            errno = EINVAL;
 277            return -1;
 278        }
 279        opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
 280        if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
 281            LOG("write failed (opt)");
 282            errno = EINVAL;
 283            return -1;
 284        }
 285        namesize = cpu_to_be32(strlen(name));
 286        if (write_sync(csock, &namesize, sizeof(namesize)) !=
 287            sizeof(namesize)) {
 288            LOG("write failed (namesize)");
 289            errno = EINVAL;
 290            return -1;
 291        }
 292        if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
 293            LOG("write failed (name)");
 294            errno = EINVAL;
 295            return -1;
 296        }
 297    } else {
 298        TRACE("Checking magic (cli_magic)");
 299
 300        if (magic != 0x00420281861253LL) {
 301            LOG("Bad magic received");
 302            errno = EINVAL;
 303            return -1;
 304        }
 305    }
 306
 307    if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
 308        LOG("read failed");
 309        errno = EINVAL;
 310        return -1;
 311    }
 312    *size = be64_to_cpu(s);
 313    *blocksize = 1024;
 314    TRACE("Size is %" PRIu64, *size);
 315
 316    if (!name) {
 317        if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
 318            LOG("read failed (flags)");
 319            errno = EINVAL;
 320            return -1;
 321        }
 322        *flags = be32_to_cpup(flags);
 323    } else {
 324        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
 325            LOG("read failed (tmp)");
 326            errno = EINVAL;
 327            return -1;
 328        }
 329        *flags |= be32_to_cpu(tmp);
 330    }
 331    if (read_sync(csock, &buf, 124) != 124) {
 332        LOG("read failed (buf)");
 333        errno = EINVAL;
 334        return -1;
 335    }
 336        return 0;
 337}
 338
 339#ifndef _WIN32
 340int nbd_init(int fd, int csock, off_t size, size_t blocksize)
 341{
 342    TRACE("Setting block size to %lu", (unsigned long)blocksize);
 343
 344    if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) {
 345        int serrno = errno;
 346        LOG("Failed setting NBD block size");
 347        errno = serrno;
 348        return -1;
 349    }
 350
 351        TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
 352
 353    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) {
 354        int serrno = errno;
 355        LOG("Failed setting size (in blocks)");
 356        errno = serrno;
 357        return -1;
 358    }
 359
 360    TRACE("Clearing NBD socket");
 361
 362    if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
 363        int serrno = errno;
 364        LOG("Failed clearing NBD socket");
 365        errno = serrno;
 366        return -1;
 367    }
 368
 369    TRACE("Setting NBD socket");
 370
 371    if (ioctl(fd, NBD_SET_SOCK, csock) == -1) {
 372        int serrno = errno;
 373        LOG("Failed to set NBD socket");
 374        errno = serrno;
 375        return -1;
 376    }
 377
 378    TRACE("Negotiation ended");
 379
 380    return 0;
 381}
 382
 383int nbd_disconnect(int fd)
 384{
 385    ioctl(fd, NBD_CLEAR_QUE);
 386    ioctl(fd, NBD_DISCONNECT);
 387    ioctl(fd, NBD_CLEAR_SOCK);
 388    return 0;
 389}
 390
 391int nbd_client(int fd)
 392{
 393    int ret;
 394    int serrno;
 395
 396    TRACE("Doing NBD loop");
 397
 398    ret = ioctl(fd, NBD_DO_IT);
 399    serrno = errno;
 400
 401    TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
 402
 403    TRACE("Clearing NBD queue");
 404    ioctl(fd, NBD_CLEAR_QUE);
 405
 406    TRACE("Clearing NBD socket");
 407    ioctl(fd, NBD_CLEAR_SOCK);
 408
 409    errno = serrno;
 410    return ret;
 411}
 412#else
 413int nbd_init(int fd, int csock, off_t size, size_t blocksize)
 414{
 415    errno = ENOTSUP;
 416    return -1;
 417}
 418
 419int nbd_disconnect(int fd)
 420{
 421    errno = ENOTSUP;
 422    return -1;
 423}
 424
 425int nbd_client(int fd)
 426{
 427    errno = ENOTSUP;
 428    return -1;
 429}
 430#endif
 431
 432int nbd_send_request(int csock, struct nbd_request *request)
 433{
 434    uint8_t buf[4 + 4 + 8 + 8 + 4];
 435
 436    cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
 437    cpu_to_be32w((uint32_t*)(buf + 4), request->type);
 438    cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
 439    cpu_to_be64w((uint64_t*)(buf + 16), request->from);
 440    cpu_to_be32w((uint32_t*)(buf + 24), request->len);
 441
 442    TRACE("Sending request to client: "
 443          "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
 444          request->from, request->len, request->handle, request->type);
 445
 446    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
 447        LOG("writing to socket failed");
 448        errno = EINVAL;
 449        return -1;
 450    }
 451    return 0;
 452}
 453
 454static int nbd_receive_request(int csock, struct nbd_request *request)
 455{
 456    uint8_t buf[4 + 4 + 8 + 8 + 4];
 457    uint32_t magic;
 458
 459    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
 460        LOG("read failed");
 461        errno = EINVAL;
 462        return -1;
 463    }
 464
 465    /* Request
 466       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
 467       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
 468       [ 8 .. 15]   handle
 469       [16 .. 23]   from
 470       [24 .. 27]   len
 471     */
 472
 473    magic = be32_to_cpup((uint32_t*)buf);
 474    request->type  = be32_to_cpup((uint32_t*)(buf + 4));
 475    request->handle = be64_to_cpup((uint64_t*)(buf + 8));
 476    request->from  = be64_to_cpup((uint64_t*)(buf + 16));
 477    request->len   = be32_to_cpup((uint32_t*)(buf + 24));
 478
 479    TRACE("Got request: "
 480          "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
 481          magic, request->type, request->from, request->len);
 482
 483    if (magic != NBD_REQUEST_MAGIC) {
 484        LOG("invalid magic (got 0x%x)", magic);
 485        errno = EINVAL;
 486        return -1;
 487    }
 488    return 0;
 489}
 490
 491int nbd_receive_reply(int csock, struct nbd_reply *reply)
 492{
 493    uint8_t buf[NBD_REPLY_SIZE];
 494    uint32_t magic;
 495
 496    memset(buf, 0xAA, sizeof(buf));
 497
 498    if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
 499        LOG("read failed");
 500        errno = EINVAL;
 501        return -1;
 502    }
 503
 504    /* Reply
 505       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
 506       [ 4 ..  7]    error   (0 == no error)
 507       [ 7 .. 15]    handle
 508     */
 509
 510    magic = be32_to_cpup((uint32_t*)buf);
 511    reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
 512    reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
 513
 514    TRACE("Got reply: "
 515          "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
 516          magic, reply->error, reply->handle);
 517
 518    if (magic != NBD_REPLY_MAGIC) {
 519        LOG("invalid magic (got 0x%x)", magic);
 520        errno = EINVAL;
 521        return -1;
 522    }
 523    return 0;
 524}
 525
 526static int nbd_send_reply(int csock, struct nbd_reply *reply)
 527{
 528    uint8_t buf[4 + 4 + 8];
 529
 530    /* Reply
 531       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
 532       [ 4 ..  7]    error   (0 == no error)
 533       [ 7 .. 15]    handle
 534     */
 535    cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
 536    cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
 537    cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
 538
 539    TRACE("Sending response to client");
 540
 541    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
 542        LOG("writing to socket failed");
 543        errno = EINVAL;
 544        return -1;
 545    }
 546    return 0;
 547}
 548
 549int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
 550             off_t *offset, bool readonly, uint8_t *data, int data_size)
 551{
 552    struct nbd_request request;
 553    struct nbd_reply reply;
 554
 555    TRACE("Reading request.");
 556
 557    if (nbd_receive_request(csock, &request) == -1)
 558        return -1;
 559
 560    if (request.len + NBD_REPLY_SIZE > data_size) {
 561        LOG("len (%u) is larger than max len (%u)",
 562            request.len + NBD_REPLY_SIZE, data_size);
 563        errno = EINVAL;
 564        return -1;
 565    }
 566
 567    if ((request.from + request.len) < request.from) {
 568        LOG("integer overflow detected! "
 569            "you're probably being attacked");
 570        errno = EINVAL;
 571        return -1;
 572    }
 573
 574    if ((request.from + request.len) > size) {
 575            LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
 576            ", Offset: %" PRIu64 "\n",
 577                    request.from, request.len, (uint64_t)size, dev_offset);
 578        LOG("requested operation past EOF--bad client?");
 579        errno = EINVAL;
 580        return -1;
 581    }
 582
 583    TRACE("Decoding type");
 584
 585    reply.handle = request.handle;
 586    reply.error = 0;
 587
 588    switch (request.type) {
 589    case NBD_CMD_READ:
 590        TRACE("Request type is READ");
 591
 592        if (bdrv_read(bs, (request.from + dev_offset) / 512,
 593                  data + NBD_REPLY_SIZE,
 594                  request.len / 512) == -1) {
 595            LOG("reading from file failed");
 596            errno = EINVAL;
 597            return -1;
 598        }
 599        *offset += request.len;
 600
 601        TRACE("Read %u byte(s)", request.len);
 602
 603        /* Reply
 604           [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
 605           [ 4 ..  7]    error   (0 == no error)
 606           [ 7 .. 15]    handle
 607         */
 608
 609        cpu_to_be32w((uint32_t*)data, NBD_REPLY_MAGIC);
 610        cpu_to_be32w((uint32_t*)(data + 4), reply.error);
 611        cpu_to_be64w((uint64_t*)(data + 8), reply.handle);
 612
 613        TRACE("Sending data to client");
 614
 615        if (write_sync(csock, data,
 616                   request.len + NBD_REPLY_SIZE) !=
 617                   request.len + NBD_REPLY_SIZE) {
 618            LOG("writing to socket failed");
 619            errno = EINVAL;
 620            return -1;
 621        }
 622        break;
 623    case NBD_CMD_WRITE:
 624        TRACE("Request type is WRITE");
 625
 626        TRACE("Reading %u byte(s)", request.len);
 627
 628        if (read_sync(csock, data, request.len) != request.len) {
 629            LOG("reading from socket failed");
 630            errno = EINVAL;
 631            return -1;
 632        }
 633
 634        if (readonly) {
 635            TRACE("Server is read-only, return error");
 636            reply.error = 1;
 637        } else {
 638            TRACE("Writing to device");
 639
 640            if (bdrv_write(bs, (request.from + dev_offset) / 512,
 641                       data, request.len / 512) == -1) {
 642                LOG("writing to file failed");
 643                errno = EINVAL;
 644                return -1;
 645            }
 646
 647            *offset += request.len;
 648        }
 649
 650        if (nbd_send_reply(csock, &reply) == -1)
 651            return -1;
 652        break;
 653    case NBD_CMD_DISC:
 654        TRACE("Request type is DISCONNECT");
 655        errno = 0;
 656        return 1;
 657    default:
 658        LOG("invalid request type (%u) received", request.type);
 659        errno = EINVAL;
 660        return -1;
 661    }
 662
 663    TRACE("Request/Reply complete");
 664
 665    return 0;
 666}
 667