qemu/block/iscsi.c
<<
>>
Prefs
   1/*
   2 * QEMU Block driver for iSCSI images
   3 *
   4 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
   5 * Copyright (c) 2012-2017 Peter Lieven <pl@kamp.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28#include <poll.h>
  29#include <math.h>
  30#include <arpa/inet.h>
  31#include "qemu/config-file.h"
  32#include "qemu/error-report.h"
  33#include "qemu/bitops.h"
  34#include "qemu/bitmap.h"
  35#include "block/block_int.h"
  36#include "scsi/constants.h"
  37#include "qemu/iov.h"
  38#include "qemu/option.h"
  39#include "qemu/uuid.h"
  40#include "qapi/error.h"
  41#include "qapi/qapi-commands-misc.h"
  42#include "qapi/qmp/qdict.h"
  43#include "qapi/qmp/qstring.h"
  44#include "crypto/secret.h"
  45#include "scsi/utils.h"
  46
  47/* Conflict between scsi/utils.h and libiscsi! :( */
  48#define SCSI_XFER_NONE ISCSI_XFER_NONE
  49#include <iscsi/iscsi.h>
  50#include <iscsi/scsi-lowlevel.h>
  51#undef SCSI_XFER_NONE
  52QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);
  53
  54#ifdef __linux__
  55#include <scsi/sg.h>
  56#endif
  57
  58typedef struct IscsiLun {
  59    struct iscsi_context *iscsi;
  60    AioContext *aio_context;
  61    int lun;
  62    enum scsi_inquiry_peripheral_device_type type;
  63    int block_size;
  64    uint64_t num_blocks;
  65    int events;
  66    QEMUTimer *nop_timer;
  67    QEMUTimer *event_timer;
  68    QemuMutex mutex;
  69    struct scsi_inquiry_logical_block_provisioning lbp;
  70    struct scsi_inquiry_block_limits bl;
  71    unsigned char *zeroblock;
  72    /* The allocmap tracks which clusters (pages) on the iSCSI target are
  73     * allocated and which are not. In case a target returns zeros for
  74     * unallocated pages (iscsilun->lprz) we can directly return zeros instead
  75     * of reading zeros over the wire if a read request falls within an
  76     * unallocated block. As there are 3 possible states we need 2 bitmaps to
  77     * track. allocmap_valid keeps track if QEMU's information about a page is
  78     * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
  79     * valid information about a page the corresponding allocmap entry should be
  80     * switched to unallocated as well to force a new lookup of the allocation
  81     * status as lookups are generally skipped if a page is suspect to be
  82     * allocated. If a iSCSI target is opened with cache.direct = on the
  83     * allocmap_valid does not exist turning all cached information invalid so
  84     * that a fresh lookup is made for any page even if allocmap entry returns
  85     * it's unallocated. */
  86    unsigned long *allocmap;
  87    unsigned long *allocmap_valid;
  88    long allocmap_size;
  89    int cluster_size;
  90    bool use_16_for_rw;
  91    bool write_protected;
  92    bool lbpme;
  93    bool lbprz;
  94    bool dpofua;
  95    bool has_write_same;
  96    bool request_timed_out;
  97} IscsiLun;
  98
  99typedef struct IscsiTask {
 100    int status;
 101    int complete;
 102    int retries;
 103    int do_retry;
 104    struct scsi_task *task;
 105    Coroutine *co;
 106    IscsiLun *iscsilun;
 107    QEMUTimer retry_timer;
 108    int err_code;
 109    char *err_str;
 110} IscsiTask;
 111
 112typedef struct IscsiAIOCB {
 113    BlockAIOCB common;
 114    QEMUBH *bh;
 115    IscsiLun *iscsilun;
 116    struct scsi_task *task;
 117    uint8_t *buf;
 118    int status;
 119    int64_t sector_num;
 120    int nb_sectors;
 121    int ret;
 122#ifdef __linux__
 123    sg_io_hdr_t *ioh;
 124#endif
 125} IscsiAIOCB;
 126
 127/* libiscsi uses time_t so its enough to process events every second */
 128#define EVENT_INTERVAL 1000
 129#define NOP_INTERVAL 5000
 130#define MAX_NOP_FAILURES 3
 131#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
 132static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
 133
 134/* this threshold is a trade-off knob to choose between
 135 * the potential additional overhead of an extra GET_LBA_STATUS request
 136 * vs. unnecessarily reading a lot of zero sectors over the wire.
 137 * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
 138 * sectors we check the allocation status of the area covered by the
 139 * request first if the allocationmap indicates that the area might be
 140 * unallocated. */
 141#define ISCSI_CHECKALLOC_THRES 64
 142
 143static void
 144iscsi_bh_cb(void *p)
 145{
 146    IscsiAIOCB *acb = p;
 147
 148    qemu_bh_delete(acb->bh);
 149
 150    g_free(acb->buf);
 151    acb->buf = NULL;
 152
 153    acb->common.cb(acb->common.opaque, acb->status);
 154
 155    if (acb->task != NULL) {
 156        scsi_free_scsi_task(acb->task);
 157        acb->task = NULL;
 158    }
 159
 160    qemu_aio_unref(acb);
 161}
 162
 163static void
 164iscsi_schedule_bh(IscsiAIOCB *acb)
 165{
 166    if (acb->bh) {
 167        return;
 168    }
 169    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
 170    qemu_bh_schedule(acb->bh);
 171}
 172
 173static void iscsi_co_generic_bh_cb(void *opaque)
 174{
 175    struct IscsiTask *iTask = opaque;
 176
 177    iTask->complete = 1;
 178    aio_co_wake(iTask->co);
 179}
 180
 181static void iscsi_retry_timer_expired(void *opaque)
 182{
 183    struct IscsiTask *iTask = opaque;
 184    iTask->complete = 1;
 185    if (iTask->co) {
 186        aio_co_wake(iTask->co);
 187    }
 188}
 189
 190static inline unsigned exp_random(double mean)
 191{
 192    return -mean * log((double)rand() / RAND_MAX);
 193}
 194
 195/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
 196 * libiscsi 1.10.0, together with other constants we need.  Use it as
 197 * a hint that we have to define them ourselves if needed, to keep the
 198 * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
 199 * the test because SCSI_STATUS_* is an enum.
 200 *
 201 * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
 202 * an enum, check against the LIBISCSI_API_VERSION macro, which was
 203 * introduced in 1.11.0.  If it is present, there is no need to define
 204 * anything.
 205 */
 206#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
 207    !defined(LIBISCSI_API_VERSION)
 208#define SCSI_STATUS_TASK_SET_FULL                          0x28
 209#define SCSI_STATUS_TIMEOUT                                0x0f000002
 210#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
 211#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
 212#endif
 213
 214#ifndef LIBISCSI_API_VERSION
 215#define LIBISCSI_API_VERSION 20130701
 216#endif
 217
 218static int iscsi_translate_sense(struct scsi_sense *sense)
 219{
 220    return - scsi_sense_to_errno(sense->key,
 221                                 (sense->ascq & 0xFF00) >> 8,
 222                                 sense->ascq & 0xFF);
 223}
 224
 225/* Called (via iscsi_service) with QemuMutex held.  */
 226static void
 227iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 228                        void *command_data, void *opaque)
 229{
 230    struct IscsiTask *iTask = opaque;
 231    struct scsi_task *task = command_data;
 232
 233    iTask->status = status;
 234    iTask->do_retry = 0;
 235    iTask->task = task;
 236
 237    if (status != SCSI_STATUS_GOOD) {
 238        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
 239            if (status == SCSI_STATUS_CHECK_CONDITION
 240                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
 241                error_report("iSCSI CheckCondition: %s",
 242                             iscsi_get_error(iscsi));
 243                iTask->do_retry = 1;
 244                goto out;
 245            }
 246            if (status == SCSI_STATUS_BUSY ||
 247                status == SCSI_STATUS_TIMEOUT ||
 248                status == SCSI_STATUS_TASK_SET_FULL) {
 249                unsigned retry_time =
 250                    exp_random(iscsi_retry_times[iTask->retries - 1]);
 251                if (status == SCSI_STATUS_TIMEOUT) {
 252                    /* make sure the request is rescheduled AFTER the
 253                     * reconnect is initiated */
 254                    retry_time = EVENT_INTERVAL * 2;
 255                    iTask->iscsilun->request_timed_out = true;
 256                }
 257                error_report("iSCSI Busy/TaskSetFull/TimeOut"
 258                             " (retry #%u in %u ms): %s",
 259                             iTask->retries, retry_time,
 260                             iscsi_get_error(iscsi));
 261                aio_timer_init(iTask->iscsilun->aio_context,
 262                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
 263                               SCALE_MS, iscsi_retry_timer_expired, iTask);
 264                timer_mod(&iTask->retry_timer,
 265                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
 266                iTask->do_retry = 1;
 267                return;
 268            }
 269        }
 270        iTask->err_code = iscsi_translate_sense(&task->sense);
 271        iTask->err_str = g_strdup(iscsi_get_error(iscsi));
 272    }
 273
 274out:
 275    if (iTask->co) {
 276        aio_bh_schedule_oneshot(iTask->iscsilun->aio_context,
 277                                 iscsi_co_generic_bh_cb, iTask);
 278    } else {
 279        iTask->complete = 1;
 280    }
 281}
 282
 283static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 284{
 285    *iTask = (struct IscsiTask) {
 286        .co         = qemu_coroutine_self(),
 287        .iscsilun   = iscsilun,
 288    };
 289}
 290
 291static void
 292iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
 293                    void *private_data)
 294{
 295    IscsiAIOCB *acb = private_data;
 296
 297    acb->status = -ECANCELED;
 298    iscsi_schedule_bh(acb);
 299}
 300
 301static void
 302iscsi_aio_cancel(BlockAIOCB *blockacb)
 303{
 304    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
 305    IscsiLun *iscsilun = acb->iscsilun;
 306
 307    if (acb->status != -EINPROGRESS) {
 308        return;
 309    }
 310
 311    /* send a task mgmt call to the target to cancel the task on the target */
 312    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
 313                                     iscsi_abort_task_cb, acb);
 314
 315}
 316
 317static const AIOCBInfo iscsi_aiocb_info = {
 318    .aiocb_size         = sizeof(IscsiAIOCB),
 319    .cancel_async       = iscsi_aio_cancel,
 320};
 321
 322
 323static void iscsi_process_read(void *arg);
 324static void iscsi_process_write(void *arg);
 325
 326/* Called with QemuMutex held.  */
 327static void
 328iscsi_set_events(IscsiLun *iscsilun)
 329{
 330    struct iscsi_context *iscsi = iscsilun->iscsi;
 331    int ev = iscsi_which_events(iscsi);
 332
 333    if (ev != iscsilun->events) {
 334        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
 335                           false,
 336                           (ev & POLLIN) ? iscsi_process_read : NULL,
 337                           (ev & POLLOUT) ? iscsi_process_write : NULL,
 338                           NULL,
 339                           iscsilun);
 340        iscsilun->events = ev;
 341    }
 342}
 343
 344static void iscsi_timed_check_events(void *opaque)
 345{
 346    IscsiLun *iscsilun = opaque;
 347
 348    /* check for timed out requests */
 349    iscsi_service(iscsilun->iscsi, 0);
 350
 351    if (iscsilun->request_timed_out) {
 352        iscsilun->request_timed_out = false;
 353        iscsi_reconnect(iscsilun->iscsi);
 354    }
 355
 356    /* newer versions of libiscsi may return zero events. Ensure we are able
 357     * to return to service once this situation changes. */
 358    iscsi_set_events(iscsilun);
 359
 360    timer_mod(iscsilun->event_timer,
 361              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 362}
 363
 364static void
 365iscsi_process_read(void *arg)
 366{
 367    IscsiLun *iscsilun = arg;
 368    struct iscsi_context *iscsi = iscsilun->iscsi;
 369
 370    qemu_mutex_lock(&iscsilun->mutex);
 371    iscsi_service(iscsi, POLLIN);
 372    iscsi_set_events(iscsilun);
 373    qemu_mutex_unlock(&iscsilun->mutex);
 374}
 375
 376static void
 377iscsi_process_write(void *arg)
 378{
 379    IscsiLun *iscsilun = arg;
 380    struct iscsi_context *iscsi = iscsilun->iscsi;
 381
 382    qemu_mutex_lock(&iscsilun->mutex);
 383    iscsi_service(iscsi, POLLOUT);
 384    iscsi_set_events(iscsilun);
 385    qemu_mutex_unlock(&iscsilun->mutex);
 386}
 387
 388static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
 389{
 390    return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
 391}
 392
 393static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
 394{
 395    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 396}
 397
 398static bool is_byte_request_lun_aligned(int64_t offset, int count,
 399                                        IscsiLun *iscsilun)
 400{
 401    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
 402        error_report("iSCSI misaligned request: "
 403                     "iscsilun->block_size %u, offset %" PRIi64
 404                     ", count %d",
 405                     iscsilun->block_size, offset, count);
 406        return false;
 407    }
 408    return true;
 409}
 410
 411static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
 412                                          IscsiLun *iscsilun)
 413{
 414    assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
 415    return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
 416                                       nb_sectors << BDRV_SECTOR_BITS,
 417                                       iscsilun);
 418}
 419
 420static void iscsi_allocmap_free(IscsiLun *iscsilun)
 421{
 422    g_free(iscsilun->allocmap);
 423    g_free(iscsilun->allocmap_valid);
 424    iscsilun->allocmap = NULL;
 425    iscsilun->allocmap_valid = NULL;
 426}
 427
 428
 429static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
 430{
 431    iscsi_allocmap_free(iscsilun);
 432
 433    assert(iscsilun->cluster_size);
 434    iscsilun->allocmap_size =
 435        DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size,
 436                     iscsilun->cluster_size);
 437
 438    iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
 439    if (!iscsilun->allocmap) {
 440        return -ENOMEM;
 441    }
 442
 443    if (open_flags & BDRV_O_NOCACHE) {
 444        /* when cache.direct = on all allocmap entries are
 445         * treated as invalid to force a relookup of the block
 446         * status on every read request */
 447        return 0;
 448    }
 449
 450    iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
 451    if (!iscsilun->allocmap_valid) {
 452        /* if we are under memory pressure free the allocmap as well */
 453        iscsi_allocmap_free(iscsilun);
 454        return -ENOMEM;
 455    }
 456
 457    return 0;
 458}
 459
 460static void
 461iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset,
 462                      int64_t bytes, bool allocated, bool valid)
 463{
 464    int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
 465
 466    if (iscsilun->allocmap == NULL) {
 467        return;
 468    }
 469    /* expand to entirely contain all affected clusters */
 470    assert(iscsilun->cluster_size);
 471    cl_num_expanded = offset / iscsilun->cluster_size;
 472    nb_cls_expanded = DIV_ROUND_UP(offset + bytes,
 473                                   iscsilun->cluster_size) - cl_num_expanded;
 474    /* shrink to touch only completely contained clusters */
 475    cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size);
 476    nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk;
 477    if (allocated) {
 478        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
 479    } else {
 480        if (nb_cls_shrunk > 0) {
 481            bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
 482        }
 483    }
 484
 485    if (iscsilun->allocmap_valid == NULL) {
 486        return;
 487    }
 488    if (valid) {
 489        if (nb_cls_shrunk > 0) {
 490            bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
 491        }
 492    } else {
 493        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
 494                     nb_cls_expanded);
 495    }
 496}
 497
 498static void
 499iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset,
 500                             int64_t bytes)
 501{
 502    iscsi_allocmap_update(iscsilun, offset, bytes, true, true);
 503}
 504
 505static void
 506iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset,
 507                               int64_t bytes)
 508{
 509    /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
 510     * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
 511     */
 512    iscsi_allocmap_update(iscsilun, offset, bytes, false, true);
 513}
 514
 515static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset,
 516                                       int64_t bytes)
 517{
 518    iscsi_allocmap_update(iscsilun, offset, bytes, false, false);
 519}
 520
 521static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
 522{
 523    if (iscsilun->allocmap) {
 524        bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
 525    }
 526    if (iscsilun->allocmap_valid) {
 527        bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
 528    }
 529}
 530
 531static inline bool
 532iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset,
 533                            int64_t bytes)
 534{
 535    unsigned long size;
 536    if (iscsilun->allocmap == NULL) {
 537        return true;
 538    }
 539    assert(iscsilun->cluster_size);
 540    size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
 541    return !(find_next_bit(iscsilun->allocmap, size,
 542                           offset / iscsilun->cluster_size) == size);
 543}
 544
 545static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
 546                                           int64_t offset, int64_t bytes)
 547{
 548    unsigned long size;
 549    if (iscsilun->allocmap_valid == NULL) {
 550        return false;
 551    }
 552    assert(iscsilun->cluster_size);
 553    size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
 554    return (find_next_zero_bit(iscsilun->allocmap_valid, size,
 555                               offset / iscsilun->cluster_size) == size);
 556}
 557
 558static int coroutine_fn
 559iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 560                      QEMUIOVector *iov, int flags)
 561{
 562    IscsiLun *iscsilun = bs->opaque;
 563    struct IscsiTask iTask;
 564    uint64_t lba;
 565    uint32_t num_sectors;
 566    bool fua = flags & BDRV_REQ_FUA;
 567    int r = 0;
 568
 569    if (fua) {
 570        assert(iscsilun->dpofua);
 571    }
 572    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 573        return -EINVAL;
 574    }
 575
 576    if (bs->bl.max_transfer) {
 577        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 578    }
 579
 580    lba = sector_qemu2lun(sector_num, iscsilun);
 581    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 582    iscsi_co_init_iscsitask(iscsilun, &iTask);
 583    qemu_mutex_lock(&iscsilun->mutex);
 584retry:
 585    if (iscsilun->use_16_for_rw) {
 586#if LIBISCSI_API_VERSION >= (20160603)
 587        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 588                                            NULL, num_sectors * iscsilun->block_size,
 589                                            iscsilun->block_size, 0, 0, fua, 0, 0,
 590                                            iscsi_co_generic_cb, &iTask,
 591                                            (struct scsi_iovec *)iov->iov, iov->niov);
 592    } else {
 593        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 594                                            NULL, num_sectors * iscsilun->block_size,
 595                                            iscsilun->block_size, 0, 0, fua, 0, 0,
 596                                            iscsi_co_generic_cb, &iTask,
 597                                            (struct scsi_iovec *)iov->iov, iov->niov);
 598    }
 599#else
 600        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
 601                                        NULL, num_sectors * iscsilun->block_size,
 602                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 603                                        iscsi_co_generic_cb, &iTask);
 604    } else {
 605        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
 606                                        NULL, num_sectors * iscsilun->block_size,
 607                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 608                                        iscsi_co_generic_cb, &iTask);
 609    }
 610#endif
 611    if (iTask.task == NULL) {
 612        qemu_mutex_unlock(&iscsilun->mutex);
 613        return -ENOMEM;
 614    }
 615#if LIBISCSI_API_VERSION < (20160603)
 616    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
 617                          iov->niov);
 618#endif
 619    while (!iTask.complete) {
 620        iscsi_set_events(iscsilun);
 621        qemu_mutex_unlock(&iscsilun->mutex);
 622        qemu_coroutine_yield();
 623        qemu_mutex_lock(&iscsilun->mutex);
 624    }
 625
 626    if (iTask.task != NULL) {
 627        scsi_free_scsi_task(iTask.task);
 628        iTask.task = NULL;
 629    }
 630
 631    if (iTask.do_retry) {
 632        iTask.complete = 0;
 633        goto retry;
 634    }
 635
 636    if (iTask.status != SCSI_STATUS_GOOD) {
 637        iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
 638                                   nb_sectors * BDRV_SECTOR_SIZE);
 639        error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
 640                     iTask.err_str);
 641        r = iTask.err_code;
 642        goto out_unlock;
 643    }
 644
 645    iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
 646                                 nb_sectors * BDRV_SECTOR_SIZE);
 647
 648out_unlock:
 649    qemu_mutex_unlock(&iscsilun->mutex);
 650    g_free(iTask.err_str);
 651    return r;
 652}
 653
 654
 655
 656static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
 657                                              bool want_zero, int64_t offset,
 658                                              int64_t bytes, int64_t *pnum,
 659                                              int64_t *map,
 660                                              BlockDriverState **file)
 661{
 662    IscsiLun *iscsilun = bs->opaque;
 663    struct scsi_get_lba_status *lbas = NULL;
 664    struct scsi_lba_status_descriptor *lbasd = NULL;
 665    struct IscsiTask iTask;
 666    uint64_t lba;
 667    int ret;
 668
 669    iscsi_co_init_iscsitask(iscsilun, &iTask);
 670
 671    assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size));
 672
 673    /* default to all sectors allocated */
 674    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 675    if (map) {
 676        *map = offset;
 677    }
 678    *pnum = bytes;
 679
 680    /* LUN does not support logical block provisioning */
 681    if (!iscsilun->lbpme) {
 682        goto out;
 683    }
 684
 685    lba = offset / iscsilun->block_size;
 686
 687    qemu_mutex_lock(&iscsilun->mutex);
 688retry:
 689    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
 690                                  lba, 8 + 16, iscsi_co_generic_cb,
 691                                  &iTask) == NULL) {
 692        ret = -ENOMEM;
 693        goto out_unlock;
 694    }
 695
 696    while (!iTask.complete) {
 697        iscsi_set_events(iscsilun);
 698        qemu_mutex_unlock(&iscsilun->mutex);
 699        qemu_coroutine_yield();
 700        qemu_mutex_lock(&iscsilun->mutex);
 701    }
 702
 703    if (iTask.do_retry) {
 704        if (iTask.task != NULL) {
 705            scsi_free_scsi_task(iTask.task);
 706            iTask.task = NULL;
 707        }
 708        iTask.complete = 0;
 709        goto retry;
 710    }
 711
 712    if (iTask.status != SCSI_STATUS_GOOD) {
 713        /* in case the get_lba_status_callout fails (i.e.
 714         * because the device is busy or the cmd is not
 715         * supported) we pretend all blocks are allocated
 716         * for backwards compatibility */
 717        error_report("iSCSI GET_LBA_STATUS failed at lba %" PRIu64 ": %s",
 718                     lba, iTask.err_str);
 719        goto out_unlock;
 720    }
 721
 722    lbas = scsi_datain_unmarshall(iTask.task);
 723    if (lbas == NULL) {
 724        ret = -EIO;
 725        goto out_unlock;
 726    }
 727
 728    lbasd = &lbas->descriptors[0];
 729
 730    if (lba != lbasd->lba) {
 731        ret = -EIO;
 732        goto out_unlock;
 733    }
 734
 735    *pnum = lbasd->num_blocks * iscsilun->block_size;
 736
 737    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
 738        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
 739        ret &= ~BDRV_BLOCK_DATA;
 740        if (iscsilun->lbprz) {
 741            ret |= BDRV_BLOCK_ZERO;
 742        }
 743    }
 744
 745    if (ret & BDRV_BLOCK_ZERO) {
 746        iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum);
 747    } else {
 748        iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
 749    }
 750
 751    if (*pnum > bytes) {
 752        *pnum = bytes;
 753    }
 754out_unlock:
 755    qemu_mutex_unlock(&iscsilun->mutex);
 756    g_free(iTask.err_str);
 757out:
 758    if (iTask.task != NULL) {
 759        scsi_free_scsi_task(iTask.task);
 760    }
 761    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
 762        *file = bs;
 763    }
 764    return ret;
 765}
 766
 767static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
 768                                       int64_t sector_num, int nb_sectors,
 769                                       QEMUIOVector *iov)
 770{
 771    IscsiLun *iscsilun = bs->opaque;
 772    struct IscsiTask iTask;
 773    uint64_t lba;
 774    uint32_t num_sectors;
 775    int r = 0;
 776
 777    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 778        return -EINVAL;
 779    }
 780
 781    if (bs->bl.max_transfer) {
 782        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 783    }
 784
 785    /* if cache.direct is off and we have a valid entry in our allocation map
 786     * we can skip checking the block status and directly return zeroes if
 787     * the request falls within an unallocated area */
 788    if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
 789                                nb_sectors * BDRV_SECTOR_SIZE) &&
 790        !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
 791                                     nb_sectors * BDRV_SECTOR_SIZE)) {
 792            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 793            return 0;
 794    }
 795
 796    if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
 797        !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
 798                                 nb_sectors * BDRV_SECTOR_SIZE) &&
 799        !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
 800                                     nb_sectors * BDRV_SECTOR_SIZE)) {
 801        int64_t pnum;
 802        /* check the block status from the beginning of the cluster
 803         * containing the start sector */
 804        int64_t head;
 805        int ret;
 806
 807        assert(iscsilun->cluster_size);
 808        head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size;
 809        ret = iscsi_co_block_status(bs, true,
 810                                    sector_num * BDRV_SECTOR_SIZE - head,
 811                                    BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL);
 812        if (ret < 0) {
 813            return ret;
 814        }
 815        /* if the whole request falls into an unallocated area we can avoid
 816         * reading and directly return zeroes instead */
 817        if (ret & BDRV_BLOCK_ZERO &&
 818            pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) {
 819            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 820            return 0;
 821        }
 822    }
 823
 824    lba = sector_qemu2lun(sector_num, iscsilun);
 825    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 826
 827    iscsi_co_init_iscsitask(iscsilun, &iTask);
 828    qemu_mutex_lock(&iscsilun->mutex);
 829retry:
 830    if (iscsilun->use_16_for_rw) {
 831#if LIBISCSI_API_VERSION >= (20160603)
 832        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 833                                           num_sectors * iscsilun->block_size,
 834                                           iscsilun->block_size, 0, 0, 0, 0, 0,
 835                                           iscsi_co_generic_cb, &iTask,
 836                                           (struct scsi_iovec *)iov->iov, iov->niov);
 837    } else {
 838        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 839                                           num_sectors * iscsilun->block_size,
 840                                           iscsilun->block_size,
 841                                           0, 0, 0, 0, 0,
 842                                           iscsi_co_generic_cb, &iTask,
 843                                           (struct scsi_iovec *)iov->iov, iov->niov);
 844    }
 845#else
 846        iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
 847                                       num_sectors * iscsilun->block_size,
 848                                       iscsilun->block_size, 0, 0, 0, 0, 0,
 849                                       iscsi_co_generic_cb, &iTask);
 850    } else {
 851        iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
 852                                       num_sectors * iscsilun->block_size,
 853                                       iscsilun->block_size,
 854                                       0, 0, 0, 0, 0,
 855                                       iscsi_co_generic_cb, &iTask);
 856    }
 857#endif
 858    if (iTask.task == NULL) {
 859        qemu_mutex_unlock(&iscsilun->mutex);
 860        return -ENOMEM;
 861    }
 862#if LIBISCSI_API_VERSION < (20160603)
 863    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
 864#endif
 865    while (!iTask.complete) {
 866        iscsi_set_events(iscsilun);
 867        qemu_mutex_unlock(&iscsilun->mutex);
 868        qemu_coroutine_yield();
 869        qemu_mutex_lock(&iscsilun->mutex);
 870    }
 871
 872    if (iTask.task != NULL) {
 873        scsi_free_scsi_task(iTask.task);
 874        iTask.task = NULL;
 875    }
 876
 877    if (iTask.do_retry) {
 878        iTask.complete = 0;
 879        goto retry;
 880    }
 881
 882    if (iTask.status != SCSI_STATUS_GOOD) {
 883        error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
 884                     lba, iTask.err_str);
 885        r = iTask.err_code;
 886    }
 887
 888    qemu_mutex_unlock(&iscsilun->mutex);
 889    g_free(iTask.err_str);
 890    return r;
 891}
 892
 893static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 894{
 895    IscsiLun *iscsilun = bs->opaque;
 896    struct IscsiTask iTask;
 897    int r = 0;
 898
 899    iscsi_co_init_iscsitask(iscsilun, &iTask);
 900    qemu_mutex_lock(&iscsilun->mutex);
 901retry:
 902    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
 903                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
 904        qemu_mutex_unlock(&iscsilun->mutex);
 905        return -ENOMEM;
 906    }
 907
 908    while (!iTask.complete) {
 909        iscsi_set_events(iscsilun);
 910        qemu_mutex_unlock(&iscsilun->mutex);
 911        qemu_coroutine_yield();
 912        qemu_mutex_lock(&iscsilun->mutex);
 913    }
 914
 915    if (iTask.task != NULL) {
 916        scsi_free_scsi_task(iTask.task);
 917        iTask.task = NULL;
 918    }
 919
 920    if (iTask.do_retry) {
 921        iTask.complete = 0;
 922        goto retry;
 923    }
 924
 925    if (iTask.status != SCSI_STATUS_GOOD) {
 926        error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
 927        r = iTask.err_code;
 928    }
 929
 930    qemu_mutex_unlock(&iscsilun->mutex);
 931    g_free(iTask.err_str);
 932    return r;
 933}
 934
 935#ifdef __linux__
 936/* Called (via iscsi_service) with QemuMutex held.  */
 937static void
 938iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
 939                     void *command_data, void *opaque)
 940{
 941    IscsiAIOCB *acb = opaque;
 942
 943    g_free(acb->buf);
 944    acb->buf = NULL;
 945
 946    acb->status = 0;
 947    if (status < 0) {
 948        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
 949                     iscsi_get_error(iscsi));
 950        acb->status = iscsi_translate_sense(&acb->task->sense);
 951    }
 952
 953    acb->ioh->driver_status = 0;
 954    acb->ioh->host_status   = 0;
 955    acb->ioh->resid         = 0;
 956    acb->ioh->status        = status;
 957
 958#define SG_ERR_DRIVER_SENSE    0x08
 959
 960    if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
 961        int ss;
 962
 963        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
 964
 965        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
 966        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
 967             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
 968        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
 969    }
 970
 971    iscsi_schedule_bh(acb);
 972}
 973
 974static void iscsi_ioctl_bh_completion(void *opaque)
 975{
 976    IscsiAIOCB *acb = opaque;
 977
 978    qemu_bh_delete(acb->bh);
 979    acb->common.cb(acb->common.opaque, acb->ret);
 980    qemu_aio_unref(acb);
 981}
 982
 983static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
 984{
 985    BlockDriverState *bs = acb->common.bs;
 986    IscsiLun *iscsilun = bs->opaque;
 987    int ret = 0;
 988
 989    switch (req) {
 990    case SG_GET_VERSION_NUM:
 991        *(int *)buf = 30000;
 992        break;
 993    case SG_GET_SCSI_ID:
 994        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
 995        break;
 996    default:
 997        ret = -EINVAL;
 998    }
 999    assert(!acb->bh);
1000    acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
1001                         iscsi_ioctl_bh_completion, acb);
1002    acb->ret = ret;
1003    qemu_bh_schedule(acb->bh);
1004}
1005
1006static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
1007        unsigned long int req, void *buf,
1008        BlockCompletionFunc *cb, void *opaque)
1009{
1010    IscsiLun *iscsilun = bs->opaque;
1011    struct iscsi_context *iscsi = iscsilun->iscsi;
1012    struct iscsi_data data;
1013    IscsiAIOCB *acb;
1014
1015    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
1016
1017    acb->iscsilun = iscsilun;
1018    acb->bh          = NULL;
1019    acb->status      = -EINPROGRESS;
1020    acb->buf         = NULL;
1021    acb->ioh         = buf;
1022
1023    if (req != SG_IO) {
1024        iscsi_ioctl_handle_emulated(acb, req, buf);
1025        return &acb->common;
1026    }
1027
1028    if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
1029        error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
1030                     acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
1031        qemu_aio_unref(acb);
1032        return NULL;
1033    }
1034
1035    acb->task = malloc(sizeof(struct scsi_task));
1036    if (acb->task == NULL) {
1037        error_report("iSCSI: Failed to allocate task for scsi command. %s",
1038                     iscsi_get_error(iscsi));
1039        qemu_aio_unref(acb);
1040        return NULL;
1041    }
1042    memset(acb->task, 0, sizeof(struct scsi_task));
1043
1044    switch (acb->ioh->dxfer_direction) {
1045    case SG_DXFER_TO_DEV:
1046        acb->task->xfer_dir = SCSI_XFER_WRITE;
1047        break;
1048    case SG_DXFER_FROM_DEV:
1049        acb->task->xfer_dir = SCSI_XFER_READ;
1050        break;
1051    default:
1052        acb->task->xfer_dir = SCSI_XFER_NONE;
1053        break;
1054    }
1055
1056    acb->task->cdb_size = acb->ioh->cmd_len;
1057    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
1058    acb->task->expxferlen = acb->ioh->dxfer_len;
1059
1060    data.size = 0;
1061    qemu_mutex_lock(&iscsilun->mutex);
1062    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
1063        if (acb->ioh->iovec_count == 0) {
1064            data.data = acb->ioh->dxferp;
1065            data.size = acb->ioh->dxfer_len;
1066        } else {
1067            scsi_task_set_iov_out(acb->task,
1068                                 (struct scsi_iovec *) acb->ioh->dxferp,
1069                                 acb->ioh->iovec_count);
1070        }
1071    }
1072
1073    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
1074                                 iscsi_aio_ioctl_cb,
1075                                 (data.size > 0) ? &data : NULL,
1076                                 acb) != 0) {
1077        qemu_mutex_unlock(&iscsilun->mutex);
1078        scsi_free_scsi_task(acb->task);
1079        qemu_aio_unref(acb);
1080        return NULL;
1081    }
1082
1083    /* tell libiscsi to read straight into the buffer we got from ioctl */
1084    if (acb->task->xfer_dir == SCSI_XFER_READ) {
1085        if (acb->ioh->iovec_count == 0) {
1086            scsi_task_add_data_in_buffer(acb->task,
1087                                         acb->ioh->dxfer_len,
1088                                         acb->ioh->dxferp);
1089        } else {
1090            scsi_task_set_iov_in(acb->task,
1091                                 (struct scsi_iovec *) acb->ioh->dxferp,
1092                                 acb->ioh->iovec_count);
1093        }
1094    }
1095
1096    iscsi_set_events(iscsilun);
1097    qemu_mutex_unlock(&iscsilun->mutex);
1098
1099    return &acb->common;
1100}
1101
1102#endif
1103
1104static int64_t
1105iscsi_getlength(BlockDriverState *bs)
1106{
1107    IscsiLun *iscsilun = bs->opaque;
1108    int64_t len;
1109
1110    len  = iscsilun->num_blocks;
1111    len *= iscsilun->block_size;
1112
1113    return len;
1114}
1115
1116static int
1117coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
1118{
1119    IscsiLun *iscsilun = bs->opaque;
1120    struct IscsiTask iTask;
1121    struct unmap_list list;
1122    int r = 0;
1123
1124    if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1125        return -ENOTSUP;
1126    }
1127
1128    if (!iscsilun->lbp.lbpu) {
1129        /* UNMAP is not supported by the target */
1130        return 0;
1131    }
1132
1133    list.lba = offset / iscsilun->block_size;
1134    list.num = bytes / iscsilun->block_size;
1135
1136    iscsi_co_init_iscsitask(iscsilun, &iTask);
1137    qemu_mutex_lock(&iscsilun->mutex);
1138retry:
1139    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
1140                         iscsi_co_generic_cb, &iTask) == NULL) {
1141        r = -ENOMEM;
1142        goto out_unlock;
1143    }
1144
1145    while (!iTask.complete) {
1146        iscsi_set_events(iscsilun);
1147        qemu_mutex_unlock(&iscsilun->mutex);
1148        qemu_coroutine_yield();
1149        qemu_mutex_lock(&iscsilun->mutex);
1150    }
1151
1152    if (iTask.task != NULL) {
1153        scsi_free_scsi_task(iTask.task);
1154        iTask.task = NULL;
1155    }
1156
1157    if (iTask.do_retry) {
1158        iTask.complete = 0;
1159        goto retry;
1160    }
1161
1162    iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
1163
1164    if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
1165        /* the target might fail with a check condition if it
1166           is not happy with the alignment of the UNMAP request
1167           we silently fail in this case */
1168        goto out_unlock;
1169    }
1170
1171    if (iTask.status != SCSI_STATUS_GOOD) {
1172        error_report("iSCSI UNMAP failed at lba %" PRIu64 ": %s",
1173                     list.lba, iTask.err_str);
1174        r = iTask.err_code;
1175        goto out_unlock;
1176    }
1177
1178out_unlock:
1179    qemu_mutex_unlock(&iscsilun->mutex);
1180    g_free(iTask.err_str);
1181    return r;
1182}
1183
1184static int
1185coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1186                                    int bytes, BdrvRequestFlags flags)
1187{
1188    IscsiLun *iscsilun = bs->opaque;
1189    struct IscsiTask iTask;
1190    uint64_t lba;
1191    uint32_t nb_blocks;
1192    bool use_16_for_ws = iscsilun->use_16_for_rw;
1193    int r = 0;
1194
1195    if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1196        return -ENOTSUP;
1197    }
1198
1199    if (flags & BDRV_REQ_MAY_UNMAP) {
1200        if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1201            /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1202            use_16_for_ws = true;
1203        }
1204        if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1205            /* WRITESAME16 with UNMAP is not supported by the target,
1206             * fall back and try WRITESAME10/16 without UNMAP */
1207            flags &= ~BDRV_REQ_MAY_UNMAP;
1208            use_16_for_ws = iscsilun->use_16_for_rw;
1209        }
1210    }
1211
1212    if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1213        /* WRITESAME without UNMAP is not supported by the target */
1214        return -ENOTSUP;
1215    }
1216
1217    lba = offset / iscsilun->block_size;
1218    nb_blocks = bytes / iscsilun->block_size;
1219
1220    if (iscsilun->zeroblock == NULL) {
1221        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1222        if (iscsilun->zeroblock == NULL) {
1223            return -ENOMEM;
1224        }
1225    }
1226
1227    qemu_mutex_lock(&iscsilun->mutex);
1228    iscsi_co_init_iscsitask(iscsilun, &iTask);
1229retry:
1230    if (use_16_for_ws) {
1231        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1232                                            iscsilun->zeroblock, iscsilun->block_size,
1233                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1234                                            0, 0, iscsi_co_generic_cb, &iTask);
1235    } else {
1236        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1237                                            iscsilun->zeroblock, iscsilun->block_size,
1238                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1239                                            0, 0, iscsi_co_generic_cb, &iTask);
1240    }
1241    if (iTask.task == NULL) {
1242        qemu_mutex_unlock(&iscsilun->mutex);
1243        return -ENOMEM;
1244    }
1245
1246    while (!iTask.complete) {
1247        iscsi_set_events(iscsilun);
1248        qemu_mutex_unlock(&iscsilun->mutex);
1249        qemu_coroutine_yield();
1250        qemu_mutex_lock(&iscsilun->mutex);
1251    }
1252
1253    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1254        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1255        (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1256         iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1257        /* WRITE SAME is not supported by the target */
1258        iscsilun->has_write_same = false;
1259        scsi_free_scsi_task(iTask.task);
1260        r = -ENOTSUP;
1261        goto out_unlock;
1262    }
1263
1264    if (iTask.task != NULL) {
1265        scsi_free_scsi_task(iTask.task);
1266        iTask.task = NULL;
1267    }
1268
1269    if (iTask.do_retry) {
1270        iTask.complete = 0;
1271        goto retry;
1272    }
1273
1274    if (iTask.status != SCSI_STATUS_GOOD) {
1275        iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
1276        error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s",
1277                     lba, iTask.err_str);
1278        r = iTask.err_code;
1279        goto out_unlock;
1280    }
1281
1282    if (flags & BDRV_REQ_MAY_UNMAP) {
1283        iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
1284    } else {
1285        iscsi_allocmap_set_allocated(iscsilun, offset, bytes);
1286    }
1287
1288out_unlock:
1289    qemu_mutex_unlock(&iscsilun->mutex);
1290    g_free(iTask.err_str);
1291    return r;
1292}
1293
1294static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
1295                       Error **errp)
1296{
1297    const char *user = NULL;
1298    const char *password = NULL;
1299    const char *secretid;
1300    char *secret = NULL;
1301
1302    user = qemu_opt_get(opts, "user");
1303    if (!user) {
1304        return;
1305    }
1306
1307    secretid = qemu_opt_get(opts, "password-secret");
1308    password = qemu_opt_get(opts, "password");
1309    if (secretid && password) {
1310        error_setg(errp, "'password' and 'password-secret' properties are "
1311                   "mutually exclusive");
1312        return;
1313    }
1314    if (secretid) {
1315        secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1316        if (!secret) {
1317            return;
1318        }
1319        password = secret;
1320    } else if (!password) {
1321        error_setg(errp, "CHAP username specified but no password was given");
1322        return;
1323    }
1324
1325    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1326        error_setg(errp, "Failed to set initiator username and password");
1327    }
1328
1329    g_free(secret);
1330}
1331
1332static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
1333                                Error **errp)
1334{
1335    const char *digest = NULL;
1336
1337    digest = qemu_opt_get(opts, "header-digest");
1338    if (!digest) {
1339        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1340    } else if (!strcmp(digest, "crc32c")) {
1341        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1342    } else if (!strcmp(digest, "none")) {
1343        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1344    } else if (!strcmp(digest, "crc32c-none")) {
1345        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1346    } else if (!strcmp(digest, "none-crc32c")) {
1347        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1348    } else {
1349        error_setg(errp, "Invalid header-digest setting : %s", digest);
1350    }
1351}
1352
1353static char *get_initiator_name(QemuOpts *opts)
1354{
1355    const char *name;
1356    char *iscsi_name;
1357    UuidInfo *uuid_info;
1358
1359    name = qemu_opt_get(opts, "initiator-name");
1360    if (name) {
1361        return g_strdup(name);
1362    }
1363
1364    uuid_info = qmp_query_uuid(NULL);
1365    if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1366        name = qemu_get_vm_name();
1367    } else {
1368        name = uuid_info->UUID;
1369    }
1370    iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1371                                 name ? ":" : "", name ? name : "");
1372    qapi_free_UuidInfo(uuid_info);
1373    return iscsi_name;
1374}
1375
1376static void iscsi_nop_timed_event(void *opaque)
1377{
1378    IscsiLun *iscsilun = opaque;
1379
1380    qemu_mutex_lock(&iscsilun->mutex);
1381    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1382        error_report("iSCSI: NOP timeout. Reconnecting...");
1383        iscsilun->request_timed_out = true;
1384    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1385        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1386        goto out;
1387    }
1388
1389    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1390    iscsi_set_events(iscsilun);
1391
1392out:
1393    qemu_mutex_unlock(&iscsilun->mutex);
1394}
1395
1396static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1397{
1398    struct scsi_task *task = NULL;
1399    struct scsi_readcapacity10 *rc10 = NULL;
1400    struct scsi_readcapacity16 *rc16 = NULL;
1401    int retries = ISCSI_CMD_RETRIES; 
1402
1403    do {
1404        if (task != NULL) {
1405            scsi_free_scsi_task(task);
1406            task = NULL;
1407        }
1408
1409        switch (iscsilun->type) {
1410        case TYPE_DISK:
1411            task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1412            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1413                rc16 = scsi_datain_unmarshall(task);
1414                if (rc16 == NULL) {
1415                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1416                } else {
1417                    iscsilun->block_size = rc16->block_length;
1418                    iscsilun->num_blocks = rc16->returned_lba + 1;
1419                    iscsilun->lbpme = !!rc16->lbpme;
1420                    iscsilun->lbprz = !!rc16->lbprz;
1421                    iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1422                }
1423                break;
1424            }
1425            if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1426                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1427                break;
1428            }
1429            /* Fall through and try READ CAPACITY(10) instead.  */
1430        case TYPE_ROM:
1431            task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1432            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1433                rc10 = scsi_datain_unmarshall(task);
1434                if (rc10 == NULL) {
1435                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1436                } else {
1437                    iscsilun->block_size = rc10->block_size;
1438                    if (rc10->lba == 0) {
1439                        /* blank disk loaded */
1440                        iscsilun->num_blocks = 0;
1441                    } else {
1442                        iscsilun->num_blocks = rc10->lba + 1;
1443                    }
1444                }
1445            }
1446            break;
1447        default:
1448            return;
1449        }
1450    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1451             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1452             && retries-- > 0);
1453
1454    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1455        error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1456    } else if (!iscsilun->block_size ||
1457               iscsilun->block_size % BDRV_SECTOR_SIZE) {
1458        error_setg(errp, "iSCSI: the target returned an invalid "
1459                   "block size of %d.", iscsilun->block_size);
1460    }
1461    if (task) {
1462        scsi_free_scsi_task(task);
1463    }
1464}
1465
1466static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1467                                          int evpd, int pc, void **inq, Error **errp)
1468{
1469    int full_size;
1470    struct scsi_task *task = NULL;
1471    task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1472    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1473        goto fail;
1474    }
1475    full_size = scsi_datain_getfullsize(task);
1476    if (full_size > task->datain.size) {
1477        scsi_free_scsi_task(task);
1478
1479        /* we need more data for the full list */
1480        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1481        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1482            goto fail;
1483        }
1484    }
1485
1486    *inq = scsi_datain_unmarshall(task);
1487    if (*inq == NULL) {
1488        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1489        goto fail_with_err;
1490    }
1491
1492    return task;
1493
1494fail:
1495    error_setg(errp, "iSCSI: Inquiry command failed : %s",
1496               iscsi_get_error(iscsi));
1497fail_with_err:
1498    if (task != NULL) {
1499        scsi_free_scsi_task(task);
1500    }
1501    return NULL;
1502}
1503
1504static void iscsi_detach_aio_context(BlockDriverState *bs)
1505{
1506    IscsiLun *iscsilun = bs->opaque;
1507
1508    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1509                       false, NULL, NULL, NULL, NULL);
1510    iscsilun->events = 0;
1511
1512    if (iscsilun->nop_timer) {
1513        timer_del(iscsilun->nop_timer);
1514        timer_free(iscsilun->nop_timer);
1515        iscsilun->nop_timer = NULL;
1516    }
1517    if (iscsilun->event_timer) {
1518        timer_del(iscsilun->event_timer);
1519        timer_free(iscsilun->event_timer);
1520        iscsilun->event_timer = NULL;
1521    }
1522}
1523
1524static void iscsi_attach_aio_context(BlockDriverState *bs,
1525                                     AioContext *new_context)
1526{
1527    IscsiLun *iscsilun = bs->opaque;
1528
1529    iscsilun->aio_context = new_context;
1530    iscsi_set_events(iscsilun);
1531
1532    /* Set up a timer for sending out iSCSI NOPs */
1533    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1534                                        QEMU_CLOCK_REALTIME, SCALE_MS,
1535                                        iscsi_nop_timed_event, iscsilun);
1536    timer_mod(iscsilun->nop_timer,
1537              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1538
1539    /* Set up a timer for periodic calls to iscsi_set_events and to
1540     * scan for command timeout */
1541    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1542                                          QEMU_CLOCK_REALTIME, SCALE_MS,
1543                                          iscsi_timed_check_events, iscsilun);
1544    timer_mod(iscsilun->event_timer,
1545              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1546}
1547
1548static void iscsi_modesense_sync(IscsiLun *iscsilun)
1549{
1550    struct scsi_task *task;
1551    struct scsi_mode_sense *ms = NULL;
1552    iscsilun->write_protected = false;
1553    iscsilun->dpofua = false;
1554
1555    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1556                                 1, SCSI_MODESENSE_PC_CURRENT,
1557                                 0x3F, 0, 255);
1558    if (task == NULL) {
1559        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1560                     iscsi_get_error(iscsilun->iscsi));
1561        goto out;
1562    }
1563
1564    if (task->status != SCSI_STATUS_GOOD) {
1565        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1566        goto out;
1567    }
1568    ms = scsi_datain_unmarshall(task);
1569    if (!ms) {
1570        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1571                     iscsi_get_error(iscsilun->iscsi));
1572        goto out;
1573    }
1574    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1575    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1576
1577out:
1578    if (task) {
1579        scsi_free_scsi_task(task);
1580    }
1581}
1582
1583static void iscsi_parse_iscsi_option(const char *target, QDict *options)
1584{
1585    QemuOptsList *list;
1586    QemuOpts *opts;
1587    const char *user, *password, *password_secret, *initiator_name,
1588               *header_digest, *timeout;
1589
1590    list = qemu_find_opts("iscsi");
1591    if (!list) {
1592        return;
1593    }
1594
1595    opts = qemu_opts_find(list, target);
1596    if (opts == NULL) {
1597        opts = QTAILQ_FIRST(&list->head);
1598        if (!opts) {
1599            return;
1600        }
1601    }
1602
1603    user = qemu_opt_get(opts, "user");
1604    if (user) {
1605        qdict_set_default_str(options, "user", user);
1606    }
1607
1608    password = qemu_opt_get(opts, "password");
1609    if (password) {
1610        qdict_set_default_str(options, "password", password);
1611    }
1612
1613    password_secret = qemu_opt_get(opts, "password-secret");
1614    if (password_secret) {
1615        qdict_set_default_str(options, "password-secret", password_secret);
1616    }
1617
1618    initiator_name = qemu_opt_get(opts, "initiator-name");
1619    if (initiator_name) {
1620        qdict_set_default_str(options, "initiator-name", initiator_name);
1621    }
1622
1623    header_digest = qemu_opt_get(opts, "header-digest");
1624    if (header_digest) {
1625        /* -iscsi takes upper case values, but QAPI only supports lower case
1626         * enum constant names, so we have to convert here. */
1627        char *qapi_value = g_ascii_strdown(header_digest, -1);
1628        qdict_set_default_str(options, "header-digest", qapi_value);
1629        g_free(qapi_value);
1630    }
1631
1632    timeout = qemu_opt_get(opts, "timeout");
1633    if (timeout) {
1634        qdict_set_default_str(options, "timeout", timeout);
1635    }
1636}
1637
1638/*
1639 * We support iscsi url's on the form
1640 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1641 */
1642static void iscsi_parse_filename(const char *filename, QDict *options,
1643                                 Error **errp)
1644{
1645    struct iscsi_url *iscsi_url;
1646    const char *transport_name;
1647    char *lun_str;
1648
1649    iscsi_url = iscsi_parse_full_url(NULL, filename);
1650    if (iscsi_url == NULL) {
1651        error_setg(errp, "Failed to parse URL : %s", filename);
1652        return;
1653    }
1654
1655#if LIBISCSI_API_VERSION >= (20160603)
1656    switch (iscsi_url->transport) {
1657    case TCP_TRANSPORT:
1658        transport_name = "tcp";
1659        break;
1660    case ISER_TRANSPORT:
1661        transport_name = "iser";
1662        break;
1663    default:
1664        error_setg(errp, "Unknown transport type (%d)",
1665                   iscsi_url->transport);
1666        return;
1667    }
1668#else
1669    transport_name = "tcp";
1670#endif
1671
1672    qdict_set_default_str(options, "transport", transport_name);
1673    qdict_set_default_str(options, "portal", iscsi_url->portal);
1674    qdict_set_default_str(options, "target", iscsi_url->target);
1675
1676    lun_str = g_strdup_printf("%d", iscsi_url->lun);
1677    qdict_set_default_str(options, "lun", lun_str);
1678    g_free(lun_str);
1679
1680    /* User/password from -iscsi take precedence over those from the URL */
1681    iscsi_parse_iscsi_option(iscsi_url->target, options);
1682
1683    if (iscsi_url->user[0] != '\0') {
1684        qdict_set_default_str(options, "user", iscsi_url->user);
1685        qdict_set_default_str(options, "password", iscsi_url->passwd);
1686    }
1687
1688    iscsi_destroy_url(iscsi_url);
1689}
1690
1691static QemuOptsList runtime_opts = {
1692    .name = "iscsi",
1693    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1694    .desc = {
1695        {
1696            .name = "transport",
1697            .type = QEMU_OPT_STRING,
1698        },
1699        {
1700            .name = "portal",
1701            .type = QEMU_OPT_STRING,
1702        },
1703        {
1704            .name = "target",
1705            .type = QEMU_OPT_STRING,
1706        },
1707        {
1708            .name = "user",
1709            .type = QEMU_OPT_STRING,
1710        },
1711        {
1712            .name = "password",
1713            .type = QEMU_OPT_STRING,
1714        },
1715        {
1716            .name = "password-secret",
1717            .type = QEMU_OPT_STRING,
1718        },
1719        {
1720            .name = "lun",
1721            .type = QEMU_OPT_NUMBER,
1722        },
1723        {
1724            .name = "initiator-name",
1725            .type = QEMU_OPT_STRING,
1726        },
1727        {
1728            .name = "header-digest",
1729            .type = QEMU_OPT_STRING,
1730        },
1731        {
1732            .name = "timeout",
1733            .type = QEMU_OPT_NUMBER,
1734        },
1735        {
1736            .name = "filename",
1737            .type = QEMU_OPT_STRING,
1738        },
1739        { /* end of list */ }
1740    },
1741};
1742
1743static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1744                      Error **errp)
1745{
1746    IscsiLun *iscsilun = bs->opaque;
1747    struct iscsi_context *iscsi = NULL;
1748    struct scsi_task *task = NULL;
1749    struct scsi_inquiry_standard *inq = NULL;
1750    struct scsi_inquiry_supported_pages *inq_vpd;
1751    char *initiator_name = NULL;
1752    QemuOpts *opts;
1753    Error *local_err = NULL;
1754    const char *transport_name, *portal, *target, *filename;
1755#if LIBISCSI_API_VERSION >= (20160603)
1756    enum iscsi_transport_type transport;
1757#endif
1758    int i, ret = 0, timeout = 0, lun;
1759
1760    /* If we are given a filename, parse the filename, with precedence given to
1761     * filename encoded options */
1762    filename = qdict_get_try_str(options, "filename");
1763    if (filename) {
1764        warn_report("'filename' option specified. "
1765                    "This is an unsupported option, and may be deprecated "
1766                    "in the future");
1767        iscsi_parse_filename(filename, options, &local_err);
1768        if (local_err) {
1769            ret = -EINVAL;
1770            error_propagate(errp, local_err);
1771            goto exit;
1772        }
1773    }
1774
1775    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1776    qemu_opts_absorb_qdict(opts, options, &local_err);
1777    if (local_err) {
1778        error_propagate(errp, local_err);
1779        ret = -EINVAL;
1780        goto out;
1781    }
1782
1783    transport_name = qemu_opt_get(opts, "transport");
1784    portal = qemu_opt_get(opts, "portal");
1785    target = qemu_opt_get(opts, "target");
1786    lun = qemu_opt_get_number(opts, "lun", 0);
1787
1788    if (!transport_name || !portal || !target) {
1789        error_setg(errp, "Need all of transport, portal and target options");
1790        ret = -EINVAL;
1791        goto out;
1792    }
1793
1794    if (!strcmp(transport_name, "tcp")) {
1795#if LIBISCSI_API_VERSION >= (20160603)
1796        transport = TCP_TRANSPORT;
1797    } else if (!strcmp(transport_name, "iser")) {
1798        transport = ISER_TRANSPORT;
1799#else
1800        /* TCP is what older libiscsi versions always use */
1801#endif
1802    } else {
1803        error_setg(errp, "Unknown transport: %s", transport_name);
1804        ret = -EINVAL;
1805        goto out;
1806    }
1807
1808    memset(iscsilun, 0, sizeof(IscsiLun));
1809
1810    initiator_name = get_initiator_name(opts);
1811
1812    iscsi = iscsi_create_context(initiator_name);
1813    if (iscsi == NULL) {
1814        error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1815        ret = -ENOMEM;
1816        goto out;
1817    }
1818#if LIBISCSI_API_VERSION >= (20160603)
1819    if (iscsi_init_transport(iscsi, transport)) {
1820        error_setg(errp, ("Error initializing transport."));
1821        ret = -EINVAL;
1822        goto out;
1823    }
1824#endif
1825    if (iscsi_set_targetname(iscsi, target)) {
1826        error_setg(errp, "iSCSI: Failed to set target name.");
1827        ret = -EINVAL;
1828        goto out;
1829    }
1830
1831    /* check if we got CHAP username/password via the options */
1832    apply_chap(iscsi, opts, &local_err);
1833    if (local_err != NULL) {
1834        error_propagate(errp, local_err);
1835        ret = -EINVAL;
1836        goto out;
1837    }
1838
1839    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1840        error_setg(errp, "iSCSI: Failed to set session type to normal.");
1841        ret = -EINVAL;
1842        goto out;
1843    }
1844
1845    /* check if we got HEADER_DIGEST via the options */
1846    apply_header_digest(iscsi, opts, &local_err);
1847    if (local_err != NULL) {
1848        error_propagate(errp, local_err);
1849        ret = -EINVAL;
1850        goto out;
1851    }
1852
1853    /* timeout handling is broken in libiscsi before 1.15.0 */
1854    timeout = qemu_opt_get_number(opts, "timeout", 0);
1855#if LIBISCSI_API_VERSION >= 20150621
1856    iscsi_set_timeout(iscsi, timeout);
1857#else
1858    if (timeout) {
1859        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1860    }
1861#endif
1862
1863    if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
1864        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1865            iscsi_get_error(iscsi));
1866        ret = -EINVAL;
1867        goto out;
1868    }
1869
1870    iscsilun->iscsi = iscsi;
1871    iscsilun->aio_context = bdrv_get_aio_context(bs);
1872    iscsilun->lun = lun;
1873    iscsilun->has_write_same = true;
1874
1875    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1876                            (void **) &inq, errp);
1877    if (task == NULL) {
1878        ret = -EINVAL;
1879        goto out;
1880    }
1881    iscsilun->type = inq->periperal_device_type;
1882    scsi_free_scsi_task(task);
1883    task = NULL;
1884
1885    iscsi_modesense_sync(iscsilun);
1886    if (iscsilun->dpofua) {
1887        bs->supported_write_flags = BDRV_REQ_FUA;
1888    }
1889
1890    /* Check the write protect flag of the LUN if we want to write */
1891    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1892        iscsilun->write_protected) {
1893        error_setg(errp, "Cannot open a write protected LUN as read-write");
1894        ret = -EACCES;
1895        goto out;
1896    }
1897
1898    iscsi_readcapacity_sync(iscsilun, &local_err);
1899    if (local_err != NULL) {
1900        error_propagate(errp, local_err);
1901        ret = -EINVAL;
1902        goto out;
1903    }
1904    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1905
1906    /* We don't have any emulation for devices other than disks and CD-ROMs, so
1907     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1908     * will try to read from the device to guess the image format.
1909     */
1910    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1911        bs->sg = true;
1912    }
1913
1914    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1915                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1916                            (void **) &inq_vpd, errp);
1917    if (task == NULL) {
1918        ret = -EINVAL;
1919        goto out;
1920    }
1921    for (i = 0; i < inq_vpd->num_pages; i++) {
1922        struct scsi_task *inq_task;
1923        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1924        struct scsi_inquiry_block_limits *inq_bl;
1925        switch (inq_vpd->pages[i]) {
1926        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1927            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1928                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1929                                        (void **) &inq_lbp, errp);
1930            if (inq_task == NULL) {
1931                ret = -EINVAL;
1932                goto out;
1933            }
1934            memcpy(&iscsilun->lbp, inq_lbp,
1935                   sizeof(struct scsi_inquiry_logical_block_provisioning));
1936            scsi_free_scsi_task(inq_task);
1937            break;
1938        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1939            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1940                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1941                                    (void **) &inq_bl, errp);
1942            if (inq_task == NULL) {
1943                ret = -EINVAL;
1944                goto out;
1945            }
1946            memcpy(&iscsilun->bl, inq_bl,
1947                   sizeof(struct scsi_inquiry_block_limits));
1948            scsi_free_scsi_task(inq_task);
1949            break;
1950        default:
1951            break;
1952        }
1953    }
1954    scsi_free_scsi_task(task);
1955    task = NULL;
1956
1957    qemu_mutex_init(&iscsilun->mutex);
1958    iscsi_attach_aio_context(bs, iscsilun->aio_context);
1959
1960    /* Guess the internal cluster (page) size of the iscsi target by the means
1961     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1962     * reasonable size */
1963    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1964        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1965        iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran *
1966            iscsilun->block_size;
1967        if (iscsilun->lbprz) {
1968            ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
1969        }
1970    }
1971
1972    if (iscsilun->lbprz && iscsilun->lbp.lbpws) {
1973        bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1974    }
1975
1976out:
1977    qemu_opts_del(opts);
1978    g_free(initiator_name);
1979    if (task != NULL) {
1980        scsi_free_scsi_task(task);
1981    }
1982
1983    if (ret) {
1984        if (iscsi != NULL) {
1985            if (iscsi_is_logged_in(iscsi)) {
1986                iscsi_logout_sync(iscsi);
1987            }
1988            iscsi_destroy_context(iscsi);
1989        }
1990        memset(iscsilun, 0, sizeof(IscsiLun));
1991    }
1992exit:
1993    return ret;
1994}
1995
1996static void iscsi_close(BlockDriverState *bs)
1997{
1998    IscsiLun *iscsilun = bs->opaque;
1999    struct iscsi_context *iscsi = iscsilun->iscsi;
2000
2001    iscsi_detach_aio_context(bs);
2002    if (iscsi_is_logged_in(iscsi)) {
2003        iscsi_logout_sync(iscsi);
2004    }
2005    iscsi_destroy_context(iscsi);
2006    g_free(iscsilun->zeroblock);
2007    iscsi_allocmap_free(iscsilun);
2008    qemu_mutex_destroy(&iscsilun->mutex);
2009    memset(iscsilun, 0, sizeof(IscsiLun));
2010}
2011
2012static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
2013{
2014    /* We don't actually refresh here, but just return data queried in
2015     * iscsi_open(): iscsi targets don't change their limits. */
2016
2017    IscsiLun *iscsilun = bs->opaque;
2018    uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
2019    unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
2020
2021    assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg);
2022
2023    bs->bl.request_alignment = block_size;
2024
2025    if (iscsilun->bl.max_xfer_len) {
2026        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
2027    }
2028
2029    if (max_xfer_len * block_size < INT_MAX) {
2030        bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
2031    }
2032
2033    if (iscsilun->lbp.lbpu) {
2034        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
2035            bs->bl.max_pdiscard =
2036                iscsilun->bl.max_unmap * iscsilun->block_size;
2037        }
2038        bs->bl.pdiscard_alignment =
2039            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2040    } else {
2041        bs->bl.pdiscard_alignment = iscsilun->block_size;
2042    }
2043
2044    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
2045        bs->bl.max_pwrite_zeroes =
2046            iscsilun->bl.max_ws_len * iscsilun->block_size;
2047    }
2048    if (iscsilun->lbp.lbpws) {
2049        bs->bl.pwrite_zeroes_alignment =
2050            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2051    } else {
2052        bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
2053    }
2054    if (iscsilun->bl.opt_xfer_len &&
2055        iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
2056        bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
2057                                        iscsilun->block_size);
2058    }
2059}
2060
2061/* Note that this will not re-establish a connection with an iSCSI target - it
2062 * is effectively a NOP.  */
2063static int iscsi_reopen_prepare(BDRVReopenState *state,
2064                                BlockReopenQueue *queue, Error **errp)
2065{
2066    IscsiLun *iscsilun = state->bs->opaque;
2067
2068    if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
2069        error_setg(errp, "Cannot open a write protected LUN as read-write");
2070        return -EACCES;
2071    }
2072    return 0;
2073}
2074
2075static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
2076{
2077    IscsiLun *iscsilun = reopen_state->bs->opaque;
2078
2079    /* the cache.direct status might have changed */
2080    if (iscsilun->allocmap != NULL) {
2081        iscsi_allocmap_init(iscsilun, reopen_state->flags);
2082    }
2083}
2084
2085static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
2086                          PreallocMode prealloc, Error **errp)
2087{
2088    IscsiLun *iscsilun = bs->opaque;
2089    Error *local_err = NULL;
2090
2091    if (prealloc != PREALLOC_MODE_OFF) {
2092        error_setg(errp, "Unsupported preallocation mode '%s'",
2093                   PreallocMode_str(prealloc));
2094        return -ENOTSUP;
2095    }
2096
2097    if (iscsilun->type != TYPE_DISK) {
2098        error_setg(errp, "Cannot resize non-disk iSCSI devices");
2099        return -ENOTSUP;
2100    }
2101
2102    iscsi_readcapacity_sync(iscsilun, &local_err);
2103    if (local_err != NULL) {
2104        error_propagate(errp, local_err);
2105        return -EIO;
2106    }
2107
2108    if (offset > iscsi_getlength(bs)) {
2109        error_setg(errp, "Cannot grow iSCSI devices");
2110        return -EINVAL;
2111    }
2112
2113    if (iscsilun->allocmap != NULL) {
2114        iscsi_allocmap_init(iscsilun, bs->open_flags);
2115    }
2116
2117    return 0;
2118}
2119
2120static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts,
2121                                             Error **errp)
2122{
2123    int ret = 0;
2124    int64_t total_size = 0;
2125    BlockDriverState *bs;
2126    IscsiLun *iscsilun = NULL;
2127    QDict *bs_options;
2128    Error *local_err = NULL;
2129
2130    bs = bdrv_new();
2131
2132    /* Read out options */
2133    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
2134                              BDRV_SECTOR_SIZE);
2135    bs->opaque = g_new0(struct IscsiLun, 1);
2136    iscsilun = bs->opaque;
2137
2138    bs_options = qdict_new();
2139    iscsi_parse_filename(filename, bs_options, &local_err);
2140    if (local_err) {
2141        error_propagate(errp, local_err);
2142        ret = -EINVAL;
2143    } else {
2144        ret = iscsi_open(bs, bs_options, 0, NULL);
2145    }
2146    QDECREF(bs_options);
2147
2148    if (ret != 0) {
2149        goto out;
2150    }
2151    iscsi_detach_aio_context(bs);
2152    if (iscsilun->type != TYPE_DISK) {
2153        ret = -ENODEV;
2154        goto out;
2155    }
2156    if (bs->total_sectors < total_size) {
2157        ret = -ENOSPC;
2158        goto out;
2159    }
2160
2161    ret = 0;
2162out:
2163    if (iscsilun->iscsi != NULL) {
2164        iscsi_destroy_context(iscsilun->iscsi);
2165    }
2166    g_free(bs->opaque);
2167    bs->opaque = NULL;
2168    bdrv_unref(bs);
2169    return ret;
2170}
2171
2172static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2173{
2174    IscsiLun *iscsilun = bs->opaque;
2175    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
2176    bdi->cluster_size = iscsilun->cluster_size;
2177    return 0;
2178}
2179
2180static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
2181                                                   Error **errp)
2182{
2183    IscsiLun *iscsilun = bs->opaque;
2184    iscsi_allocmap_invalidate(iscsilun);
2185}
2186
2187static QemuOptsList iscsi_create_opts = {
2188    .name = "iscsi-create-opts",
2189    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
2190    .desc = {
2191        {
2192            .name = BLOCK_OPT_SIZE,
2193            .type = QEMU_OPT_SIZE,
2194            .help = "Virtual disk size"
2195        },
2196        { /* end of list */ }
2197    }
2198};
2199
2200static BlockDriver bdrv_iscsi = {
2201    .format_name     = "iscsi",
2202    .protocol_name   = "iscsi",
2203
2204    .instance_size          = sizeof(IscsiLun),
2205    .bdrv_parse_filename    = iscsi_parse_filename,
2206    .bdrv_file_open         = iscsi_open,
2207    .bdrv_close             = iscsi_close,
2208    .bdrv_co_create_opts    = iscsi_co_create_opts,
2209    .create_opts            = &iscsi_create_opts,
2210    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2211    .bdrv_reopen_commit     = iscsi_reopen_commit,
2212    .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache,
2213
2214    .bdrv_getlength  = iscsi_getlength,
2215    .bdrv_get_info   = iscsi_get_info,
2216    .bdrv_truncate   = iscsi_truncate,
2217    .bdrv_refresh_limits = iscsi_refresh_limits,
2218
2219    .bdrv_co_block_status  = iscsi_co_block_status,
2220    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2221    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2222    .bdrv_co_readv         = iscsi_co_readv,
2223    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2224    .bdrv_co_flush_to_disk = iscsi_co_flush,
2225
2226#ifdef __linux__
2227    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2228#endif
2229
2230    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2231    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2232};
2233
2234#if LIBISCSI_API_VERSION >= (20160603)
2235static BlockDriver bdrv_iser = {
2236    .format_name     = "iser",
2237    .protocol_name   = "iser",
2238
2239    .instance_size          = sizeof(IscsiLun),
2240    .bdrv_parse_filename    = iscsi_parse_filename,
2241    .bdrv_file_open         = iscsi_open,
2242    .bdrv_close             = iscsi_close,
2243    .bdrv_co_create_opts    = iscsi_co_create_opts,
2244    .create_opts            = &iscsi_create_opts,
2245    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2246    .bdrv_reopen_commit     = iscsi_reopen_commit,
2247    .bdrv_co_invalidate_cache  = iscsi_co_invalidate_cache,
2248
2249    .bdrv_getlength  = iscsi_getlength,
2250    .bdrv_get_info   = iscsi_get_info,
2251    .bdrv_truncate   = iscsi_truncate,
2252    .bdrv_refresh_limits = iscsi_refresh_limits,
2253
2254    .bdrv_co_block_status  = iscsi_co_block_status,
2255    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2256    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2257    .bdrv_co_readv         = iscsi_co_readv,
2258    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2259    .bdrv_co_flush_to_disk = iscsi_co_flush,
2260
2261#ifdef __linux__
2262    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2263#endif
2264
2265    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2266    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2267};
2268#endif
2269
2270static void iscsi_block_init(void)
2271{
2272    bdrv_register(&bdrv_iscsi);
2273#if LIBISCSI_API_VERSION >= (20160603)
2274    bdrv_register(&bdrv_iser);
2275#endif
2276}
2277
2278block_init(iscsi_block_init);
2279