qemu/block/iscsi.c
<<
>>
Prefs
   1/*
   2 * QEMU Block driver for iSCSI images
   3 *
   4 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
   5 * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28#include <poll.h>
  29#include <math.h>
  30#include <arpa/inet.h>
  31#include "qemu-common.h"
  32#include "qemu/config-file.h"
  33#include "qemu/error-report.h"
  34#include "qemu/bitops.h"
  35#include "qemu/bitmap.h"
  36#include "block/block_int.h"
  37#include "block/scsi.h"
  38#include "qemu/iov.h"
  39#include "sysemu/sysemu.h"
  40#include "qmp-commands.h"
  41#include "qapi/qmp/qstring.h"
  42#include "crypto/secret.h"
  43
  44#include <iscsi/iscsi.h>
  45#include <iscsi/scsi-lowlevel.h>
  46
  47#ifdef __linux__
  48#include <scsi/sg.h>
  49#endif
  50
  51typedef struct IscsiLun {
  52    struct iscsi_context *iscsi;
  53    AioContext *aio_context;
  54    int lun;
  55    enum scsi_inquiry_peripheral_device_type type;
  56    int block_size;
  57    uint64_t num_blocks;
  58    int events;
  59    QEMUTimer *nop_timer;
  60    QEMUTimer *event_timer;
  61    struct scsi_inquiry_logical_block_provisioning lbp;
  62    struct scsi_inquiry_block_limits bl;
  63    unsigned char *zeroblock;
  64    /* The allocmap tracks which clusters (pages) on the iSCSI target are
  65     * allocated and which are not. In case a target returns zeros for
  66     * unallocated pages (iscsilun->lprz) we can directly return zeros instead
  67     * of reading zeros over the wire if a read request falls within an
  68     * unallocated block. As there are 3 possible states we need 2 bitmaps to
  69     * track. allocmap_valid keeps track if QEMU's information about a page is
  70     * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
  71     * valid information about a page the corresponding allocmap entry should be
  72     * switched to unallocated as well to force a new lookup of the allocation
  73     * status as lookups are generally skipped if a page is suspect to be
  74     * allocated. If a iSCSI target is opened with cache.direct = on the
  75     * allocmap_valid does not exist turning all cached information invalid so
  76     * that a fresh lookup is made for any page even if allocmap entry returns
  77     * it's unallocated. */
  78    unsigned long *allocmap;
  79    unsigned long *allocmap_valid;
  80    long allocmap_size;
  81    int cluster_sectors;
  82    bool use_16_for_rw;
  83    bool write_protected;
  84    bool lbpme;
  85    bool lbprz;
  86    bool dpofua;
  87    bool has_write_same;
  88    bool request_timed_out;
  89} IscsiLun;
  90
  91typedef struct IscsiTask {
  92    int status;
  93    int complete;
  94    int retries;
  95    int do_retry;
  96    struct scsi_task *task;
  97    Coroutine *co;
  98    QEMUBH *bh;
  99    IscsiLun *iscsilun;
 100    QEMUTimer retry_timer;
 101    int err_code;
 102} IscsiTask;
 103
 104typedef struct IscsiAIOCB {
 105    BlockAIOCB common;
 106    QEMUIOVector *qiov;
 107    QEMUBH *bh;
 108    IscsiLun *iscsilun;
 109    struct scsi_task *task;
 110    uint8_t *buf;
 111    int status;
 112    int64_t sector_num;
 113    int nb_sectors;
 114    int ret;
 115#ifdef __linux__
 116    sg_io_hdr_t *ioh;
 117#endif
 118} IscsiAIOCB;
 119
 120/* libiscsi uses time_t so its enough to process events every second */
 121#define EVENT_INTERVAL 1000
 122#define NOP_INTERVAL 5000
 123#define MAX_NOP_FAILURES 3
 124#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
 125static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
 126
 127/* this threshold is a trade-off knob to choose between
 128 * the potential additional overhead of an extra GET_LBA_STATUS request
 129 * vs. unnecessarily reading a lot of zero sectors over the wire.
 130 * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
 131 * sectors we check the allocation status of the area covered by the
 132 * request first if the allocationmap indicates that the area might be
 133 * unallocated. */
 134#define ISCSI_CHECKALLOC_THRES 64
 135
 136static void
 137iscsi_bh_cb(void *p)
 138{
 139    IscsiAIOCB *acb = p;
 140
 141    qemu_bh_delete(acb->bh);
 142
 143    g_free(acb->buf);
 144    acb->buf = NULL;
 145
 146    acb->common.cb(acb->common.opaque, acb->status);
 147
 148    if (acb->task != NULL) {
 149        scsi_free_scsi_task(acb->task);
 150        acb->task = NULL;
 151    }
 152
 153    qemu_aio_unref(acb);
 154}
 155
 156static void
 157iscsi_schedule_bh(IscsiAIOCB *acb)
 158{
 159    if (acb->bh) {
 160        return;
 161    }
 162    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
 163    qemu_bh_schedule(acb->bh);
 164}
 165
 166static void iscsi_co_generic_bh_cb(void *opaque)
 167{
 168    struct IscsiTask *iTask = opaque;
 169    iTask->complete = 1;
 170    qemu_bh_delete(iTask->bh);
 171    qemu_coroutine_enter(iTask->co);
 172}
 173
 174static void iscsi_retry_timer_expired(void *opaque)
 175{
 176    struct IscsiTask *iTask = opaque;
 177    iTask->complete = 1;
 178    if (iTask->co) {
 179        qemu_coroutine_enter(iTask->co);
 180    }
 181}
 182
 183static inline unsigned exp_random(double mean)
 184{
 185    return -mean * log((double)rand() / RAND_MAX);
 186}
 187
 188/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
 189 * libiscsi 1.10.0, together with other constants we need.  Use it as
 190 * a hint that we have to define them ourselves if needed, to keep the
 191 * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
 192 * the test because SCSI_STATUS_* is an enum.
 193 *
 194 * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
 195 * an enum, check against the LIBISCSI_API_VERSION macro, which was
 196 * introduced in 1.11.0.  If it is present, there is no need to define
 197 * anything.
 198 */
 199#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
 200    !defined(LIBISCSI_API_VERSION)
 201#define SCSI_STATUS_TASK_SET_FULL                          0x28
 202#define SCSI_STATUS_TIMEOUT                                0x0f000002
 203#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
 204#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
 205#endif
 206
 207static int iscsi_translate_sense(struct scsi_sense *sense)
 208{
 209    int ret;
 210
 211    switch (sense->key) {
 212    case SCSI_SENSE_NOT_READY:
 213        return -EBUSY;
 214    case SCSI_SENSE_DATA_PROTECTION:
 215        return -EACCES;
 216    case SCSI_SENSE_COMMAND_ABORTED:
 217        return -ECANCELED;
 218    case SCSI_SENSE_ILLEGAL_REQUEST:
 219        /* Parse ASCQ */
 220        break;
 221    default:
 222        return -EIO;
 223    }
 224    switch (sense->ascq) {
 225    case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
 226    case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
 227    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
 228    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
 229        ret = -EINVAL;
 230        break;
 231    case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
 232        ret = -ENOSPC;
 233        break;
 234    case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
 235        ret = -ENOTSUP;
 236        break;
 237    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
 238    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
 239    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
 240        ret = -ENOMEDIUM;
 241        break;
 242    case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
 243        ret = -EACCES;
 244        break;
 245    default:
 246        ret = -EIO;
 247        break;
 248    }
 249    return ret;
 250}
 251
 252static void
 253iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 254                        void *command_data, void *opaque)
 255{
 256    struct IscsiTask *iTask = opaque;
 257    struct scsi_task *task = command_data;
 258
 259    iTask->status = status;
 260    iTask->do_retry = 0;
 261    iTask->task = task;
 262
 263    if (status != SCSI_STATUS_GOOD) {
 264        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
 265            if (status == SCSI_STATUS_CHECK_CONDITION
 266                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
 267                error_report("iSCSI CheckCondition: %s",
 268                             iscsi_get_error(iscsi));
 269                iTask->do_retry = 1;
 270                goto out;
 271            }
 272            if (status == SCSI_STATUS_BUSY ||
 273                status == SCSI_STATUS_TIMEOUT ||
 274                status == SCSI_STATUS_TASK_SET_FULL) {
 275                unsigned retry_time =
 276                    exp_random(iscsi_retry_times[iTask->retries - 1]);
 277                if (status == SCSI_STATUS_TIMEOUT) {
 278                    /* make sure the request is rescheduled AFTER the
 279                     * reconnect is initiated */
 280                    retry_time = EVENT_INTERVAL * 2;
 281                    iTask->iscsilun->request_timed_out = true;
 282                }
 283                error_report("iSCSI Busy/TaskSetFull/TimeOut"
 284                             " (retry #%u in %u ms): %s",
 285                             iTask->retries, retry_time,
 286                             iscsi_get_error(iscsi));
 287                aio_timer_init(iTask->iscsilun->aio_context,
 288                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
 289                               SCALE_MS, iscsi_retry_timer_expired, iTask);
 290                timer_mod(&iTask->retry_timer,
 291                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
 292                iTask->do_retry = 1;
 293                return;
 294            }
 295        }
 296        iTask->err_code = iscsi_translate_sense(&task->sense);
 297        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
 298    }
 299
 300out:
 301    if (iTask->co) {
 302        iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
 303                               iscsi_co_generic_bh_cb, iTask);
 304        qemu_bh_schedule(iTask->bh);
 305    } else {
 306        iTask->complete = 1;
 307    }
 308}
 309
 310static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 311{
 312    *iTask = (struct IscsiTask) {
 313        .co         = qemu_coroutine_self(),
 314        .iscsilun   = iscsilun,
 315    };
 316}
 317
 318static void
 319iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
 320                    void *private_data)
 321{
 322    IscsiAIOCB *acb = private_data;
 323
 324    acb->status = -ECANCELED;
 325    iscsi_schedule_bh(acb);
 326}
 327
 328static void
 329iscsi_aio_cancel(BlockAIOCB *blockacb)
 330{
 331    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
 332    IscsiLun *iscsilun = acb->iscsilun;
 333
 334    if (acb->status != -EINPROGRESS) {
 335        return;
 336    }
 337
 338    /* send a task mgmt call to the target to cancel the task on the target */
 339    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
 340                                     iscsi_abort_task_cb, acb);
 341
 342}
 343
 344static const AIOCBInfo iscsi_aiocb_info = {
 345    .aiocb_size         = sizeof(IscsiAIOCB),
 346    .cancel_async       = iscsi_aio_cancel,
 347};
 348
 349
 350static void iscsi_process_read(void *arg);
 351static void iscsi_process_write(void *arg);
 352
 353static void
 354iscsi_set_events(IscsiLun *iscsilun)
 355{
 356    struct iscsi_context *iscsi = iscsilun->iscsi;
 357    int ev = iscsi_which_events(iscsi);
 358
 359    if (ev != iscsilun->events) {
 360        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
 361                           false,
 362                           (ev & POLLIN) ? iscsi_process_read : NULL,
 363                           (ev & POLLOUT) ? iscsi_process_write : NULL,
 364                           iscsilun);
 365        iscsilun->events = ev;
 366    }
 367}
 368
 369static void iscsi_timed_check_events(void *opaque)
 370{
 371    IscsiLun *iscsilun = opaque;
 372
 373    /* check for timed out requests */
 374    iscsi_service(iscsilun->iscsi, 0);
 375
 376    if (iscsilun->request_timed_out) {
 377        iscsilun->request_timed_out = false;
 378        iscsi_reconnect(iscsilun->iscsi);
 379    }
 380
 381    /* newer versions of libiscsi may return zero events. Ensure we are able
 382     * to return to service once this situation changes. */
 383    iscsi_set_events(iscsilun);
 384
 385    timer_mod(iscsilun->event_timer,
 386              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 387}
 388
 389static void
 390iscsi_process_read(void *arg)
 391{
 392    IscsiLun *iscsilun = arg;
 393    struct iscsi_context *iscsi = iscsilun->iscsi;
 394
 395    iscsi_service(iscsi, POLLIN);
 396    iscsi_set_events(iscsilun);
 397}
 398
 399static void
 400iscsi_process_write(void *arg)
 401{
 402    IscsiLun *iscsilun = arg;
 403    struct iscsi_context *iscsi = iscsilun->iscsi;
 404
 405    iscsi_service(iscsi, POLLOUT);
 406    iscsi_set_events(iscsilun);
 407}
 408
 409static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
 410{
 411    return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
 412}
 413
 414static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
 415{
 416    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 417}
 418
 419static bool is_byte_request_lun_aligned(int64_t offset, int count,
 420                                        IscsiLun *iscsilun)
 421{
 422    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
 423        error_report("iSCSI misaligned request: "
 424                     "iscsilun->block_size %u, offset %" PRIi64
 425                     ", count %d",
 426                     iscsilun->block_size, offset, count);
 427        return false;
 428    }
 429    return true;
 430}
 431
 432static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
 433                                          IscsiLun *iscsilun)
 434{
 435    assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
 436    return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
 437                                       nb_sectors << BDRV_SECTOR_BITS,
 438                                       iscsilun);
 439}
 440
 441static void iscsi_allocmap_free(IscsiLun *iscsilun)
 442{
 443    g_free(iscsilun->allocmap);
 444    g_free(iscsilun->allocmap_valid);
 445    iscsilun->allocmap = NULL;
 446    iscsilun->allocmap_valid = NULL;
 447}
 448
 449
 450static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
 451{
 452    iscsi_allocmap_free(iscsilun);
 453
 454    iscsilun->allocmap_size =
 455        DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
 456                     iscsilun->cluster_sectors);
 457
 458    iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
 459    if (!iscsilun->allocmap) {
 460        return -ENOMEM;
 461    }
 462
 463    if (open_flags & BDRV_O_NOCACHE) {
 464        /* in case that cache.direct = on all allocmap entries are
 465         * treated as invalid to force a relookup of the block
 466         * status on every read request */
 467        return 0;
 468    }
 469
 470    iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
 471    if (!iscsilun->allocmap_valid) {
 472        /* if we are under memory pressure free the allocmap as well */
 473        iscsi_allocmap_free(iscsilun);
 474        return -ENOMEM;
 475    }
 476
 477    return 0;
 478}
 479
 480static void
 481iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
 482                      int nb_sectors, bool allocated, bool valid)
 483{
 484    int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
 485
 486    if (iscsilun->allocmap == NULL) {
 487        return;
 488    }
 489    /* expand to entirely contain all affected clusters */
 490    cl_num_expanded = sector_num / iscsilun->cluster_sectors;
 491    nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
 492                                   iscsilun->cluster_sectors) - cl_num_expanded;
 493    /* shrink to touch only completely contained clusters */
 494    cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
 495    nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
 496                      - cl_num_shrunk;
 497    if (allocated) {
 498        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
 499    } else {
 500        bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
 501    }
 502
 503    if (iscsilun->allocmap_valid == NULL) {
 504        return;
 505    }
 506    if (valid) {
 507        bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
 508    } else {
 509        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
 510                     nb_cls_expanded);
 511    }
 512}
 513
 514static void
 515iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
 516                             int nb_sectors)
 517{
 518    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
 519}
 520
 521static void
 522iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
 523                               int nb_sectors)
 524{
 525    /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
 526     * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
 527     */
 528    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
 529}
 530
 531static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
 532                                       int nb_sectors)
 533{
 534    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
 535}
 536
 537static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
 538{
 539    if (iscsilun->allocmap) {
 540        bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
 541    }
 542    if (iscsilun->allocmap_valid) {
 543        bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
 544    }
 545}
 546
 547static inline bool
 548iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
 549                            int nb_sectors)
 550{
 551    unsigned long size;
 552    if (iscsilun->allocmap == NULL) {
 553        return true;
 554    }
 555    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
 556    return !(find_next_bit(iscsilun->allocmap, size,
 557                           sector_num / iscsilun->cluster_sectors) == size);
 558}
 559
 560static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
 561                                           int64_t sector_num, int nb_sectors)
 562{
 563    unsigned long size;
 564    if (iscsilun->allocmap_valid == NULL) {
 565        return false;
 566    }
 567    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
 568    return (find_next_zero_bit(iscsilun->allocmap_valid, size,
 569                               sector_num / iscsilun->cluster_sectors) == size);
 570}
 571
 572static int coroutine_fn
 573iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 574                      QEMUIOVector *iov, int flags)
 575{
 576    IscsiLun *iscsilun = bs->opaque;
 577    struct IscsiTask iTask;
 578    uint64_t lba;
 579    uint32_t num_sectors;
 580    bool fua = flags & BDRV_REQ_FUA;
 581
 582    if (fua) {
 583        assert(iscsilun->dpofua);
 584    }
 585    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 586        return -EINVAL;
 587    }
 588
 589    if (bs->bl.max_transfer) {
 590        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 591    }
 592
 593    lba = sector_qemu2lun(sector_num, iscsilun);
 594    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 595    iscsi_co_init_iscsitask(iscsilun, &iTask);
 596retry:
 597    if (iscsilun->use_16_for_rw) {
 598        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
 599                                        NULL, num_sectors * iscsilun->block_size,
 600                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 601                                        iscsi_co_generic_cb, &iTask);
 602    } else {
 603        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
 604                                        NULL, num_sectors * iscsilun->block_size,
 605                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 606                                        iscsi_co_generic_cb, &iTask);
 607    }
 608    if (iTask.task == NULL) {
 609        return -ENOMEM;
 610    }
 611    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
 612                          iov->niov);
 613    while (!iTask.complete) {
 614        iscsi_set_events(iscsilun);
 615        qemu_coroutine_yield();
 616    }
 617
 618    if (iTask.task != NULL) {
 619        scsi_free_scsi_task(iTask.task);
 620        iTask.task = NULL;
 621    }
 622
 623    if (iTask.do_retry) {
 624        iTask.complete = 0;
 625        goto retry;
 626    }
 627
 628    if (iTask.status != SCSI_STATUS_GOOD) {
 629        iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
 630        return iTask.err_code;
 631    }
 632
 633    iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
 634
 635    return 0;
 636}
 637
 638
 639
 640static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
 641                                                  int64_t sector_num,
 642                                                  int nb_sectors, int *pnum,
 643                                                  BlockDriverState **file)
 644{
 645    IscsiLun *iscsilun = bs->opaque;
 646    struct scsi_get_lba_status *lbas = NULL;
 647    struct scsi_lba_status_descriptor *lbasd = NULL;
 648    struct IscsiTask iTask;
 649    int64_t ret;
 650
 651    iscsi_co_init_iscsitask(iscsilun, &iTask);
 652
 653    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 654        ret = -EINVAL;
 655        goto out;
 656    }
 657
 658    /* default to all sectors allocated */
 659    ret = BDRV_BLOCK_DATA;
 660    ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
 661    *pnum = nb_sectors;
 662
 663    /* LUN does not support logical block provisioning */
 664    if (!iscsilun->lbpme) {
 665        goto out;
 666    }
 667
 668retry:
 669    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
 670                                  sector_qemu2lun(sector_num, iscsilun),
 671                                  8 + 16, iscsi_co_generic_cb,
 672                                  &iTask) == NULL) {
 673        ret = -ENOMEM;
 674        goto out;
 675    }
 676
 677    while (!iTask.complete) {
 678        iscsi_set_events(iscsilun);
 679        qemu_coroutine_yield();
 680    }
 681
 682    if (iTask.do_retry) {
 683        if (iTask.task != NULL) {
 684            scsi_free_scsi_task(iTask.task);
 685            iTask.task = NULL;
 686        }
 687        iTask.complete = 0;
 688        goto retry;
 689    }
 690
 691    if (iTask.status != SCSI_STATUS_GOOD) {
 692        /* in case the get_lba_status_callout fails (i.e.
 693         * because the device is busy or the cmd is not
 694         * supported) we pretend all blocks are allocated
 695         * for backwards compatibility */
 696        goto out;
 697    }
 698
 699    lbas = scsi_datain_unmarshall(iTask.task);
 700    if (lbas == NULL) {
 701        ret = -EIO;
 702        goto out;
 703    }
 704
 705    lbasd = &lbas->descriptors[0];
 706
 707    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
 708        ret = -EIO;
 709        goto out;
 710    }
 711
 712    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
 713
 714    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
 715        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
 716        ret &= ~BDRV_BLOCK_DATA;
 717        if (iscsilun->lbprz) {
 718            ret |= BDRV_BLOCK_ZERO;
 719        }
 720    }
 721
 722    if (ret & BDRV_BLOCK_ZERO) {
 723        iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
 724    } else {
 725        iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
 726    }
 727
 728    if (*pnum > nb_sectors) {
 729        *pnum = nb_sectors;
 730    }
 731out:
 732    if (iTask.task != NULL) {
 733        scsi_free_scsi_task(iTask.task);
 734    }
 735    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
 736        *file = bs;
 737    }
 738    return ret;
 739}
 740
 741static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
 742                                       int64_t sector_num, int nb_sectors,
 743                                       QEMUIOVector *iov)
 744{
 745    IscsiLun *iscsilun = bs->opaque;
 746    struct IscsiTask iTask;
 747    uint64_t lba;
 748    uint32_t num_sectors;
 749
 750    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 751        return -EINVAL;
 752    }
 753
 754    if (bs->bl.max_transfer) {
 755        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 756    }
 757
 758    /* if cache.direct is off and we have a valid entry in our allocation map
 759     * we can skip checking the block status and directly return zeroes if
 760     * the request falls within an unallocated area */
 761    if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
 762        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
 763            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 764            return 0;
 765    }
 766
 767    if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
 768        !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
 769        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
 770        int pnum;
 771        BlockDriverState *file;
 772        /* check the block status from the beginning of the cluster
 773         * containing the start sector */
 774        int64_t ret = iscsi_co_get_block_status(bs,
 775                          sector_num - sector_num % iscsilun->cluster_sectors,
 776                          BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
 777        if (ret < 0) {
 778            return ret;
 779        }
 780        /* if the whole request falls into an unallocated area we can avoid
 781         * to read and directly return zeroes instead */
 782        if (ret & BDRV_BLOCK_ZERO &&
 783            pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
 784            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 785            return 0;
 786        }
 787    }
 788
 789    lba = sector_qemu2lun(sector_num, iscsilun);
 790    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 791
 792    iscsi_co_init_iscsitask(iscsilun, &iTask);
 793retry:
 794    if (iscsilun->use_16_for_rw) {
 795        iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
 796                                       num_sectors * iscsilun->block_size,
 797                                       iscsilun->block_size, 0, 0, 0, 0, 0,
 798                                       iscsi_co_generic_cb, &iTask);
 799    } else {
 800        iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
 801                                       num_sectors * iscsilun->block_size,
 802                                       iscsilun->block_size,
 803                                       0, 0, 0, 0, 0,
 804                                       iscsi_co_generic_cb, &iTask);
 805    }
 806    if (iTask.task == NULL) {
 807        return -ENOMEM;
 808    }
 809    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
 810
 811    while (!iTask.complete) {
 812        iscsi_set_events(iscsilun);
 813        qemu_coroutine_yield();
 814    }
 815
 816    if (iTask.task != NULL) {
 817        scsi_free_scsi_task(iTask.task);
 818        iTask.task = NULL;
 819    }
 820
 821    if (iTask.do_retry) {
 822        iTask.complete = 0;
 823        goto retry;
 824    }
 825
 826    if (iTask.status != SCSI_STATUS_GOOD) {
 827        return iTask.err_code;
 828    }
 829
 830    return 0;
 831}
 832
 833static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 834{
 835    IscsiLun *iscsilun = bs->opaque;
 836    struct IscsiTask iTask;
 837
 838    iscsi_co_init_iscsitask(iscsilun, &iTask);
 839retry:
 840    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
 841                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
 842        return -ENOMEM;
 843    }
 844
 845    while (!iTask.complete) {
 846        iscsi_set_events(iscsilun);
 847        qemu_coroutine_yield();
 848    }
 849
 850    if (iTask.task != NULL) {
 851        scsi_free_scsi_task(iTask.task);
 852        iTask.task = NULL;
 853    }
 854
 855    if (iTask.do_retry) {
 856        iTask.complete = 0;
 857        goto retry;
 858    }
 859
 860    if (iTask.status != SCSI_STATUS_GOOD) {
 861        return iTask.err_code;
 862    }
 863
 864    return 0;
 865}
 866
 867#ifdef __linux__
 868static void
 869iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
 870                     void *command_data, void *opaque)
 871{
 872    IscsiAIOCB *acb = opaque;
 873
 874    g_free(acb->buf);
 875    acb->buf = NULL;
 876
 877    acb->status = 0;
 878    if (status < 0) {
 879        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
 880                     iscsi_get_error(iscsi));
 881        acb->status = iscsi_translate_sense(&acb->task->sense);
 882    }
 883
 884    acb->ioh->driver_status = 0;
 885    acb->ioh->host_status   = 0;
 886    acb->ioh->resid         = 0;
 887    acb->ioh->status        = status;
 888
 889#define SG_ERR_DRIVER_SENSE    0x08
 890
 891    if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
 892        int ss;
 893
 894        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
 895
 896        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
 897        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
 898             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
 899        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
 900    }
 901
 902    iscsi_schedule_bh(acb);
 903}
 904
 905static void iscsi_ioctl_bh_completion(void *opaque)
 906{
 907    IscsiAIOCB *acb = opaque;
 908
 909    qemu_bh_delete(acb->bh);
 910    acb->common.cb(acb->common.opaque, acb->ret);
 911    qemu_aio_unref(acb);
 912}
 913
 914static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
 915{
 916    BlockDriverState *bs = acb->common.bs;
 917    IscsiLun *iscsilun = bs->opaque;
 918    int ret = 0;
 919
 920    switch (req) {
 921    case SG_GET_VERSION_NUM:
 922        *(int *)buf = 30000;
 923        break;
 924    case SG_GET_SCSI_ID:
 925        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
 926        break;
 927    default:
 928        ret = -EINVAL;
 929    }
 930    assert(!acb->bh);
 931    acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
 932                         iscsi_ioctl_bh_completion, acb);
 933    acb->ret = ret;
 934    qemu_bh_schedule(acb->bh);
 935}
 936
 937static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
 938        unsigned long int req, void *buf,
 939        BlockCompletionFunc *cb, void *opaque)
 940{
 941    IscsiLun *iscsilun = bs->opaque;
 942    struct iscsi_context *iscsi = iscsilun->iscsi;
 943    struct iscsi_data data;
 944    IscsiAIOCB *acb;
 945
 946    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
 947
 948    acb->iscsilun = iscsilun;
 949    acb->bh          = NULL;
 950    acb->status      = -EINPROGRESS;
 951    acb->buf         = NULL;
 952    acb->ioh         = buf;
 953
 954    if (req != SG_IO) {
 955        iscsi_ioctl_handle_emulated(acb, req, buf);
 956        return &acb->common;
 957    }
 958
 959    if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
 960        error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
 961                     acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
 962        qemu_aio_unref(acb);
 963        return NULL;
 964    }
 965
 966    acb->task = malloc(sizeof(struct scsi_task));
 967    if (acb->task == NULL) {
 968        error_report("iSCSI: Failed to allocate task for scsi command. %s",
 969                     iscsi_get_error(iscsi));
 970        qemu_aio_unref(acb);
 971        return NULL;
 972    }
 973    memset(acb->task, 0, sizeof(struct scsi_task));
 974
 975    switch (acb->ioh->dxfer_direction) {
 976    case SG_DXFER_TO_DEV:
 977        acb->task->xfer_dir = SCSI_XFER_WRITE;
 978        break;
 979    case SG_DXFER_FROM_DEV:
 980        acb->task->xfer_dir = SCSI_XFER_READ;
 981        break;
 982    default:
 983        acb->task->xfer_dir = SCSI_XFER_NONE;
 984        break;
 985    }
 986
 987    acb->task->cdb_size = acb->ioh->cmd_len;
 988    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
 989    acb->task->expxferlen = acb->ioh->dxfer_len;
 990
 991    data.size = 0;
 992    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
 993        if (acb->ioh->iovec_count == 0) {
 994            data.data = acb->ioh->dxferp;
 995            data.size = acb->ioh->dxfer_len;
 996        } else {
 997            scsi_task_set_iov_out(acb->task,
 998                                 (struct scsi_iovec *) acb->ioh->dxferp,
 999                                 acb->ioh->iovec_count);
1000        }
1001    }
1002
1003    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
1004                                 iscsi_aio_ioctl_cb,
1005                                 (data.size > 0) ? &data : NULL,
1006                                 acb) != 0) {
1007        scsi_free_scsi_task(acb->task);
1008        qemu_aio_unref(acb);
1009        return NULL;
1010    }
1011
1012    /* tell libiscsi to read straight into the buffer we got from ioctl */
1013    if (acb->task->xfer_dir == SCSI_XFER_READ) {
1014        if (acb->ioh->iovec_count == 0) {
1015            scsi_task_add_data_in_buffer(acb->task,
1016                                         acb->ioh->dxfer_len,
1017                                         acb->ioh->dxferp);
1018        } else {
1019            scsi_task_set_iov_in(acb->task,
1020                                 (struct scsi_iovec *) acb->ioh->dxferp,
1021                                 acb->ioh->iovec_count);
1022        }
1023    }
1024
1025    iscsi_set_events(iscsilun);
1026
1027    return &acb->common;
1028}
1029
1030#endif
1031
1032static int64_t
1033iscsi_getlength(BlockDriverState *bs)
1034{
1035    IscsiLun *iscsilun = bs->opaque;
1036    int64_t len;
1037
1038    len  = iscsilun->num_blocks;
1039    len *= iscsilun->block_size;
1040
1041    return len;
1042}
1043
1044static int
1045coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
1046{
1047    IscsiLun *iscsilun = bs->opaque;
1048    struct IscsiTask iTask;
1049    struct unmap_list list;
1050
1051    assert(is_byte_request_lun_aligned(offset, count, iscsilun));
1052
1053    if (!iscsilun->lbp.lbpu) {
1054        /* UNMAP is not supported by the target */
1055        return 0;
1056    }
1057
1058    list.lba = offset / iscsilun->block_size;
1059    list.num = count / iscsilun->block_size;
1060
1061    iscsi_co_init_iscsitask(iscsilun, &iTask);
1062retry:
1063    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
1064                         iscsi_co_generic_cb, &iTask) == NULL) {
1065        return -ENOMEM;
1066    }
1067
1068    while (!iTask.complete) {
1069        iscsi_set_events(iscsilun);
1070        qemu_coroutine_yield();
1071    }
1072
1073    if (iTask.task != NULL) {
1074        scsi_free_scsi_task(iTask.task);
1075        iTask.task = NULL;
1076    }
1077
1078    if (iTask.do_retry) {
1079        iTask.complete = 0;
1080        goto retry;
1081    }
1082
1083    if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
1084        /* the target might fail with a check condition if it
1085           is not happy with the alignment of the UNMAP request
1086           we silently fail in this case */
1087        return 0;
1088    }
1089
1090    if (iTask.status != SCSI_STATUS_GOOD) {
1091        return iTask.err_code;
1092    }
1093
1094    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1095                               count >> BDRV_SECTOR_BITS);
1096
1097    return 0;
1098}
1099
1100static int
1101coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1102                                    int count, BdrvRequestFlags flags)
1103{
1104    IscsiLun *iscsilun = bs->opaque;
1105    struct IscsiTask iTask;
1106    uint64_t lba;
1107    uint32_t nb_blocks;
1108    bool use_16_for_ws = iscsilun->use_16_for_rw;
1109
1110    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
1111        return -ENOTSUP;
1112    }
1113
1114    if (flags & BDRV_REQ_MAY_UNMAP) {
1115        if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1116            /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1117            use_16_for_ws = true;
1118        }
1119        if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1120            /* WRITESAME16 with UNMAP is not supported by the target,
1121             * fall back and try WRITESAME10/16 without UNMAP */
1122            flags &= ~BDRV_REQ_MAY_UNMAP;
1123            use_16_for_ws = iscsilun->use_16_for_rw;
1124        }
1125    }
1126
1127    if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1128        /* WRITESAME without UNMAP is not supported by the target */
1129        return -ENOTSUP;
1130    }
1131
1132    lba = offset / iscsilun->block_size;
1133    nb_blocks = count / iscsilun->block_size;
1134
1135    if (iscsilun->zeroblock == NULL) {
1136        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1137        if (iscsilun->zeroblock == NULL) {
1138            return -ENOMEM;
1139        }
1140    }
1141
1142    iscsi_co_init_iscsitask(iscsilun, &iTask);
1143retry:
1144    if (use_16_for_ws) {
1145        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1146                                            iscsilun->zeroblock, iscsilun->block_size,
1147                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1148                                            0, 0, iscsi_co_generic_cb, &iTask);
1149    } else {
1150        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1151                                            iscsilun->zeroblock, iscsilun->block_size,
1152                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1153                                            0, 0, iscsi_co_generic_cb, &iTask);
1154    }
1155    if (iTask.task == NULL) {
1156        return -ENOMEM;
1157    }
1158
1159    while (!iTask.complete) {
1160        iscsi_set_events(iscsilun);
1161        qemu_coroutine_yield();
1162    }
1163
1164    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1165        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1166        (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1167         iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1168        /* WRITE SAME is not supported by the target */
1169        iscsilun->has_write_same = false;
1170        scsi_free_scsi_task(iTask.task);
1171        return -ENOTSUP;
1172    }
1173
1174    if (iTask.task != NULL) {
1175        scsi_free_scsi_task(iTask.task);
1176        iTask.task = NULL;
1177    }
1178
1179    if (iTask.do_retry) {
1180        iTask.complete = 0;
1181        goto retry;
1182    }
1183
1184    if (iTask.status != SCSI_STATUS_GOOD) {
1185        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1186                                   count >> BDRV_SECTOR_BITS);
1187        return iTask.err_code;
1188    }
1189
1190    if (flags & BDRV_REQ_MAY_UNMAP) {
1191        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1192                                   count >> BDRV_SECTOR_BITS);
1193    } else {
1194        iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
1195                                     count >> BDRV_SECTOR_BITS);
1196    }
1197
1198    return 0;
1199}
1200
1201static void parse_chap(struct iscsi_context *iscsi, const char *target,
1202                       Error **errp)
1203{
1204    QemuOptsList *list;
1205    QemuOpts *opts;
1206    const char *user = NULL;
1207    const char *password = NULL;
1208    const char *secretid;
1209    char *secret = NULL;
1210
1211    list = qemu_find_opts("iscsi");
1212    if (!list) {
1213        return;
1214    }
1215
1216    opts = qemu_opts_find(list, target);
1217    if (opts == NULL) {
1218        opts = QTAILQ_FIRST(&list->head);
1219        if (!opts) {
1220            return;
1221        }
1222    }
1223
1224    user = qemu_opt_get(opts, "user");
1225    if (!user) {
1226        return;
1227    }
1228
1229    secretid = qemu_opt_get(opts, "password-secret");
1230    password = qemu_opt_get(opts, "password");
1231    if (secretid && password) {
1232        error_setg(errp, "'password' and 'password-secret' properties are "
1233                   "mutually exclusive");
1234        return;
1235    }
1236    if (secretid) {
1237        secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1238        if (!secret) {
1239            return;
1240        }
1241        password = secret;
1242    } else if (!password) {
1243        error_setg(errp, "CHAP username specified but no password was given");
1244        return;
1245    }
1246
1247    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1248        error_setg(errp, "Failed to set initiator username and password");
1249    }
1250
1251    g_free(secret);
1252}
1253
1254static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1255                                Error **errp)
1256{
1257    QemuOptsList *list;
1258    QemuOpts *opts;
1259    const char *digest = NULL;
1260
1261    list = qemu_find_opts("iscsi");
1262    if (!list) {
1263        return;
1264    }
1265
1266    opts = qemu_opts_find(list, target);
1267    if (opts == NULL) {
1268        opts = QTAILQ_FIRST(&list->head);
1269        if (!opts) {
1270            return;
1271        }
1272    }
1273
1274    digest = qemu_opt_get(opts, "header-digest");
1275    if (!digest) {
1276        return;
1277    }
1278
1279    if (!strcmp(digest, "CRC32C")) {
1280        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1281    } else if (!strcmp(digest, "NONE")) {
1282        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1283    } else if (!strcmp(digest, "CRC32C-NONE")) {
1284        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1285    } else if (!strcmp(digest, "NONE-CRC32C")) {
1286        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1287    } else {
1288        error_setg(errp, "Invalid header-digest setting : %s", digest);
1289    }
1290}
1291
1292static char *parse_initiator_name(const char *target)
1293{
1294    QemuOptsList *list;
1295    QemuOpts *opts;
1296    const char *name;
1297    char *iscsi_name;
1298    UuidInfo *uuid_info;
1299
1300    list = qemu_find_opts("iscsi");
1301    if (list) {
1302        opts = qemu_opts_find(list, target);
1303        if (!opts) {
1304            opts = QTAILQ_FIRST(&list->head);
1305        }
1306        if (opts) {
1307            name = qemu_opt_get(opts, "initiator-name");
1308            if (name) {
1309                return g_strdup(name);
1310            }
1311        }
1312    }
1313
1314    uuid_info = qmp_query_uuid(NULL);
1315    if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1316        name = qemu_get_vm_name();
1317    } else {
1318        name = uuid_info->UUID;
1319    }
1320    iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1321                                 name ? ":" : "", name ? name : "");
1322    qapi_free_UuidInfo(uuid_info);
1323    return iscsi_name;
1324}
1325
1326static int parse_timeout(const char *target)
1327{
1328    QemuOptsList *list;
1329    QemuOpts *opts;
1330    const char *timeout;
1331
1332    list = qemu_find_opts("iscsi");
1333    if (list) {
1334        opts = qemu_opts_find(list, target);
1335        if (!opts) {
1336            opts = QTAILQ_FIRST(&list->head);
1337        }
1338        if (opts) {
1339            timeout = qemu_opt_get(opts, "timeout");
1340            if (timeout) {
1341                return atoi(timeout);
1342            }
1343        }
1344    }
1345
1346    return 0;
1347}
1348
1349static void iscsi_nop_timed_event(void *opaque)
1350{
1351    IscsiLun *iscsilun = opaque;
1352
1353    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1354        error_report("iSCSI: NOP timeout. Reconnecting...");
1355        iscsilun->request_timed_out = true;
1356    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1357        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1358        return;
1359    }
1360
1361    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1362    iscsi_set_events(iscsilun);
1363}
1364
1365static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1366{
1367    struct scsi_task *task = NULL;
1368    struct scsi_readcapacity10 *rc10 = NULL;
1369    struct scsi_readcapacity16 *rc16 = NULL;
1370    int retries = ISCSI_CMD_RETRIES; 
1371
1372    do {
1373        if (task != NULL) {
1374            scsi_free_scsi_task(task);
1375            task = NULL;
1376        }
1377
1378        switch (iscsilun->type) {
1379        case TYPE_DISK:
1380            task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1381            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1382                rc16 = scsi_datain_unmarshall(task);
1383                if (rc16 == NULL) {
1384                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1385                } else {
1386                    iscsilun->block_size = rc16->block_length;
1387                    iscsilun->num_blocks = rc16->returned_lba + 1;
1388                    iscsilun->lbpme = !!rc16->lbpme;
1389                    iscsilun->lbprz = !!rc16->lbprz;
1390                    iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1391                }
1392                break;
1393            }
1394            if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1395                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1396                break;
1397            }
1398            /* Fall through and try READ CAPACITY(10) instead.  */
1399        case TYPE_ROM:
1400            task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1401            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1402                rc10 = scsi_datain_unmarshall(task);
1403                if (rc10 == NULL) {
1404                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1405                } else {
1406                    iscsilun->block_size = rc10->block_size;
1407                    if (rc10->lba == 0) {
1408                        /* blank disk loaded */
1409                        iscsilun->num_blocks = 0;
1410                    } else {
1411                        iscsilun->num_blocks = rc10->lba + 1;
1412                    }
1413                }
1414            }
1415            break;
1416        default:
1417            return;
1418        }
1419    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1420             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1421             && retries-- > 0);
1422
1423    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1424        error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1425    } else if (!iscsilun->block_size ||
1426               iscsilun->block_size % BDRV_SECTOR_SIZE) {
1427        error_setg(errp, "iSCSI: the target returned an invalid "
1428                   "block size of %d.", iscsilun->block_size);
1429    }
1430    if (task) {
1431        scsi_free_scsi_task(task);
1432    }
1433}
1434
1435/* TODO Convert to fine grained options */
1436static QemuOptsList runtime_opts = {
1437    .name = "iscsi",
1438    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1439    .desc = {
1440        {
1441            .name = "filename",
1442            .type = QEMU_OPT_STRING,
1443            .help = "URL to the iscsi image",
1444        },
1445        { /* end of list */ }
1446    },
1447};
1448
1449static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1450                                          int evpd, int pc, void **inq, Error **errp)
1451{
1452    int full_size;
1453    struct scsi_task *task = NULL;
1454    task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1455    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1456        goto fail;
1457    }
1458    full_size = scsi_datain_getfullsize(task);
1459    if (full_size > task->datain.size) {
1460        scsi_free_scsi_task(task);
1461
1462        /* we need more data for the full list */
1463        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1464        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1465            goto fail;
1466        }
1467    }
1468
1469    *inq = scsi_datain_unmarshall(task);
1470    if (*inq == NULL) {
1471        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1472        goto fail_with_err;
1473    }
1474
1475    return task;
1476
1477fail:
1478    error_setg(errp, "iSCSI: Inquiry command failed : %s",
1479               iscsi_get_error(iscsi));
1480fail_with_err:
1481    if (task != NULL) {
1482        scsi_free_scsi_task(task);
1483    }
1484    return NULL;
1485}
1486
1487static void iscsi_detach_aio_context(BlockDriverState *bs)
1488{
1489    IscsiLun *iscsilun = bs->opaque;
1490
1491    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1492                       false, NULL, NULL, NULL);
1493    iscsilun->events = 0;
1494
1495    if (iscsilun->nop_timer) {
1496        timer_del(iscsilun->nop_timer);
1497        timer_free(iscsilun->nop_timer);
1498        iscsilun->nop_timer = NULL;
1499    }
1500    if (iscsilun->event_timer) {
1501        timer_del(iscsilun->event_timer);
1502        timer_free(iscsilun->event_timer);
1503        iscsilun->event_timer = NULL;
1504    }
1505}
1506
1507static void iscsi_attach_aio_context(BlockDriverState *bs,
1508                                     AioContext *new_context)
1509{
1510    IscsiLun *iscsilun = bs->opaque;
1511
1512    iscsilun->aio_context = new_context;
1513    iscsi_set_events(iscsilun);
1514
1515    /* Set up a timer for sending out iSCSI NOPs */
1516    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1517                                        QEMU_CLOCK_REALTIME, SCALE_MS,
1518                                        iscsi_nop_timed_event, iscsilun);
1519    timer_mod(iscsilun->nop_timer,
1520              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1521
1522    /* Set up a timer for periodic calls to iscsi_set_events and to
1523     * scan for command timeout */
1524    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1525                                          QEMU_CLOCK_REALTIME, SCALE_MS,
1526                                          iscsi_timed_check_events, iscsilun);
1527    timer_mod(iscsilun->event_timer,
1528              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1529}
1530
1531static void iscsi_modesense_sync(IscsiLun *iscsilun)
1532{
1533    struct scsi_task *task;
1534    struct scsi_mode_sense *ms = NULL;
1535    iscsilun->write_protected = false;
1536    iscsilun->dpofua = false;
1537
1538    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1539                                 1, SCSI_MODESENSE_PC_CURRENT,
1540                                 0x3F, 0, 255);
1541    if (task == NULL) {
1542        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1543                     iscsi_get_error(iscsilun->iscsi));
1544        goto out;
1545    }
1546
1547    if (task->status != SCSI_STATUS_GOOD) {
1548        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1549        goto out;
1550    }
1551    ms = scsi_datain_unmarshall(task);
1552    if (!ms) {
1553        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1554                     iscsi_get_error(iscsilun->iscsi));
1555        goto out;
1556    }
1557    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1558    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1559
1560out:
1561    if (task) {
1562        scsi_free_scsi_task(task);
1563    }
1564}
1565
1566/*
1567 * We support iscsi url's on the form
1568 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1569 */
1570static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1571                      Error **errp)
1572{
1573    IscsiLun *iscsilun = bs->opaque;
1574    struct iscsi_context *iscsi = NULL;
1575    struct iscsi_url *iscsi_url = NULL;
1576    struct scsi_task *task = NULL;
1577    struct scsi_inquiry_standard *inq = NULL;
1578    struct scsi_inquiry_supported_pages *inq_vpd;
1579    char *initiator_name = NULL;
1580    QemuOpts *opts;
1581    Error *local_err = NULL;
1582    const char *filename;
1583    int i, ret = 0, timeout = 0;
1584
1585    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1586    qemu_opts_absorb_qdict(opts, options, &local_err);
1587    if (local_err) {
1588        error_propagate(errp, local_err);
1589        ret = -EINVAL;
1590        goto out;
1591    }
1592
1593    filename = qemu_opt_get(opts, "filename");
1594
1595    iscsi_url = iscsi_parse_full_url(iscsi, filename);
1596    if (iscsi_url == NULL) {
1597        error_setg(errp, "Failed to parse URL : %s", filename);
1598        ret = -EINVAL;
1599        goto out;
1600    }
1601
1602    memset(iscsilun, 0, sizeof(IscsiLun));
1603
1604    initiator_name = parse_initiator_name(iscsi_url->target);
1605
1606    iscsi = iscsi_create_context(initiator_name);
1607    if (iscsi == NULL) {
1608        error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1609        ret = -ENOMEM;
1610        goto out;
1611    }
1612
1613    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1614        error_setg(errp, "iSCSI: Failed to set target name.");
1615        ret = -EINVAL;
1616        goto out;
1617    }
1618
1619    if (iscsi_url->user[0] != '\0') {
1620        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1621                                              iscsi_url->passwd);
1622        if (ret != 0) {
1623            error_setg(errp, "Failed to set initiator username and password");
1624            ret = -EINVAL;
1625            goto out;
1626        }
1627    }
1628
1629    /* check if we got CHAP username/password via the options */
1630    parse_chap(iscsi, iscsi_url->target, &local_err);
1631    if (local_err != NULL) {
1632        error_propagate(errp, local_err);
1633        ret = -EINVAL;
1634        goto out;
1635    }
1636
1637    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1638        error_setg(errp, "iSCSI: Failed to set session type to normal.");
1639        ret = -EINVAL;
1640        goto out;
1641    }
1642
1643    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1644
1645    /* check if we got HEADER_DIGEST via the options */
1646    parse_header_digest(iscsi, iscsi_url->target, &local_err);
1647    if (local_err != NULL) {
1648        error_propagate(errp, local_err);
1649        ret = -EINVAL;
1650        goto out;
1651    }
1652
1653    /* timeout handling is broken in libiscsi before 1.15.0 */
1654    timeout = parse_timeout(iscsi_url->target);
1655#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
1656    iscsi_set_timeout(iscsi, timeout);
1657#else
1658    if (timeout) {
1659        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1660    }
1661#endif
1662
1663    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1664        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1665            iscsi_get_error(iscsi));
1666        ret = -EINVAL;
1667        goto out;
1668    }
1669
1670    iscsilun->iscsi = iscsi;
1671    iscsilun->aio_context = bdrv_get_aio_context(bs);
1672    iscsilun->lun   = iscsi_url->lun;
1673    iscsilun->has_write_same = true;
1674
1675    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1676                            (void **) &inq, errp);
1677    if (task == NULL) {
1678        ret = -EINVAL;
1679        goto out;
1680    }
1681    iscsilun->type = inq->periperal_device_type;
1682    scsi_free_scsi_task(task);
1683    task = NULL;
1684
1685    iscsi_modesense_sync(iscsilun);
1686    if (iscsilun->dpofua) {
1687        bs->supported_write_flags = BDRV_REQ_FUA;
1688    }
1689    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1690
1691    /* Check the write protect flag of the LUN if we want to write */
1692    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1693        iscsilun->write_protected) {
1694        error_setg(errp, "Cannot open a write protected LUN as read-write");
1695        ret = -EACCES;
1696        goto out;
1697    }
1698
1699    iscsi_readcapacity_sync(iscsilun, &local_err);
1700    if (local_err != NULL) {
1701        error_propagate(errp, local_err);
1702        ret = -EINVAL;
1703        goto out;
1704    }
1705    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1706
1707    /* We don't have any emulation for devices other than disks and CD-ROMs, so
1708     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1709     * will try to read from the device to guess the image format.
1710     */
1711    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1712        bs->sg = true;
1713    }
1714
1715    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1716                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1717                            (void **) &inq_vpd, errp);
1718    if (task == NULL) {
1719        ret = -EINVAL;
1720        goto out;
1721    }
1722    for (i = 0; i < inq_vpd->num_pages; i++) {
1723        struct scsi_task *inq_task;
1724        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1725        struct scsi_inquiry_block_limits *inq_bl;
1726        switch (inq_vpd->pages[i]) {
1727        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1728            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1729                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1730                                        (void **) &inq_lbp, errp);
1731            if (inq_task == NULL) {
1732                ret = -EINVAL;
1733                goto out;
1734            }
1735            memcpy(&iscsilun->lbp, inq_lbp,
1736                   sizeof(struct scsi_inquiry_logical_block_provisioning));
1737            scsi_free_scsi_task(inq_task);
1738            break;
1739        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1740            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1741                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1742                                    (void **) &inq_bl, errp);
1743            if (inq_task == NULL) {
1744                ret = -EINVAL;
1745                goto out;
1746            }
1747            memcpy(&iscsilun->bl, inq_bl,
1748                   sizeof(struct scsi_inquiry_block_limits));
1749            scsi_free_scsi_task(inq_task);
1750            break;
1751        default:
1752            break;
1753        }
1754    }
1755    scsi_free_scsi_task(task);
1756    task = NULL;
1757
1758    iscsi_attach_aio_context(bs, iscsilun->aio_context);
1759
1760    /* Guess the internal cluster (page) size of the iscsi target by the means
1761     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1762     * reasonable size */
1763    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1764        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1765        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1766                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
1767        if (iscsilun->lbprz) {
1768            ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
1769        }
1770    }
1771
1772out:
1773    qemu_opts_del(opts);
1774    g_free(initiator_name);
1775    if (iscsi_url != NULL) {
1776        iscsi_destroy_url(iscsi_url);
1777    }
1778    if (task != NULL) {
1779        scsi_free_scsi_task(task);
1780    }
1781
1782    if (ret) {
1783        if (iscsi != NULL) {
1784            if (iscsi_is_logged_in(iscsi)) {
1785                iscsi_logout_sync(iscsi);
1786            }
1787            iscsi_destroy_context(iscsi);
1788        }
1789        memset(iscsilun, 0, sizeof(IscsiLun));
1790    }
1791    return ret;
1792}
1793
1794static void iscsi_close(BlockDriverState *bs)
1795{
1796    IscsiLun *iscsilun = bs->opaque;
1797    struct iscsi_context *iscsi = iscsilun->iscsi;
1798
1799    iscsi_detach_aio_context(bs);
1800    if (iscsi_is_logged_in(iscsi)) {
1801        iscsi_logout_sync(iscsi);
1802    }
1803    iscsi_destroy_context(iscsi);
1804    g_free(iscsilun->zeroblock);
1805    iscsi_allocmap_free(iscsilun);
1806    memset(iscsilun, 0, sizeof(IscsiLun));
1807}
1808
1809static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1810{
1811    /* We don't actually refresh here, but just return data queried in
1812     * iscsi_open(): iscsi targets don't change their limits. */
1813
1814    IscsiLun *iscsilun = bs->opaque;
1815    uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1816
1817    bs->bl.request_alignment = iscsilun->block_size;
1818
1819    if (iscsilun->bl.max_xfer_len) {
1820        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1821    }
1822
1823    if (max_xfer_len * iscsilun->block_size < INT_MAX) {
1824        bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
1825    }
1826
1827    if (iscsilun->lbp.lbpu) {
1828        if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
1829            bs->bl.max_pdiscard =
1830                iscsilun->bl.max_unmap * iscsilun->block_size;
1831        }
1832        bs->bl.pdiscard_alignment =
1833            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
1834    } else {
1835        bs->bl.pdiscard_alignment = iscsilun->block_size;
1836    }
1837
1838    if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
1839        bs->bl.max_pwrite_zeroes =
1840            iscsilun->bl.max_ws_len * iscsilun->block_size;
1841    }
1842    if (iscsilun->lbp.lbpws) {
1843        bs->bl.pwrite_zeroes_alignment =
1844            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
1845    } else {
1846        bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
1847    }
1848    if (iscsilun->bl.opt_xfer_len &&
1849        iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
1850        bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
1851                                        iscsilun->block_size);
1852    }
1853}
1854
1855/* Note that this will not re-establish a connection with an iSCSI target - it
1856 * is effectively a NOP.  */
1857static int iscsi_reopen_prepare(BDRVReopenState *state,
1858                                BlockReopenQueue *queue, Error **errp)
1859{
1860    IscsiLun *iscsilun = state->bs->opaque;
1861
1862    if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1863        error_setg(errp, "Cannot open a write protected LUN as read-write");
1864        return -EACCES;
1865    }
1866    return 0;
1867}
1868
1869static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
1870{
1871    IscsiLun *iscsilun = reopen_state->bs->opaque;
1872
1873    /* the cache.direct status might have changed */
1874    if (iscsilun->allocmap != NULL) {
1875        iscsi_allocmap_init(iscsilun, reopen_state->flags);
1876    }
1877}
1878
1879static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1880{
1881    IscsiLun *iscsilun = bs->opaque;
1882    Error *local_err = NULL;
1883
1884    if (iscsilun->type != TYPE_DISK) {
1885        return -ENOTSUP;
1886    }
1887
1888    iscsi_readcapacity_sync(iscsilun, &local_err);
1889    if (local_err != NULL) {
1890        error_free(local_err);
1891        return -EIO;
1892    }
1893
1894    if (offset > iscsi_getlength(bs)) {
1895        return -EINVAL;
1896    }
1897
1898    if (iscsilun->allocmap != NULL) {
1899        iscsi_allocmap_init(iscsilun, bs->open_flags);
1900    }
1901
1902    return 0;
1903}
1904
1905static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1906{
1907    int ret = 0;
1908    int64_t total_size = 0;
1909    BlockDriverState *bs;
1910    IscsiLun *iscsilun = NULL;
1911    QDict *bs_options;
1912
1913    bs = bdrv_new();
1914
1915    /* Read out options */
1916    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1917                              BDRV_SECTOR_SIZE);
1918    bs->opaque = g_new0(struct IscsiLun, 1);
1919    iscsilun = bs->opaque;
1920
1921    bs_options = qdict_new();
1922    qdict_put(bs_options, "filename", qstring_from_str(filename));
1923    ret = iscsi_open(bs, bs_options, 0, NULL);
1924    QDECREF(bs_options);
1925
1926    if (ret != 0) {
1927        goto out;
1928    }
1929    iscsi_detach_aio_context(bs);
1930    if (iscsilun->type != TYPE_DISK) {
1931        ret = -ENODEV;
1932        goto out;
1933    }
1934    if (bs->total_sectors < total_size) {
1935        ret = -ENOSPC;
1936        goto out;
1937    }
1938
1939    ret = 0;
1940out:
1941    if (iscsilun->iscsi != NULL) {
1942        iscsi_destroy_context(iscsilun->iscsi);
1943    }
1944    g_free(bs->opaque);
1945    bs->opaque = NULL;
1946    bdrv_unref(bs);
1947    return ret;
1948}
1949
1950static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1951{
1952    IscsiLun *iscsilun = bs->opaque;
1953    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
1954    bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1955    bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1956    return 0;
1957}
1958
1959static void iscsi_invalidate_cache(BlockDriverState *bs,
1960                                   Error **errp)
1961{
1962    IscsiLun *iscsilun = bs->opaque;
1963    iscsi_allocmap_invalidate(iscsilun);
1964}
1965
1966static QemuOptsList iscsi_create_opts = {
1967    .name = "iscsi-create-opts",
1968    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1969    .desc = {
1970        {
1971            .name = BLOCK_OPT_SIZE,
1972            .type = QEMU_OPT_SIZE,
1973            .help = "Virtual disk size"
1974        },
1975        { /* end of list */ }
1976    }
1977};
1978
1979static BlockDriver bdrv_iscsi = {
1980    .format_name     = "iscsi",
1981    .protocol_name   = "iscsi",
1982
1983    .instance_size   = sizeof(IscsiLun),
1984    .bdrv_needs_filename = true,
1985    .bdrv_file_open  = iscsi_open,
1986    .bdrv_close      = iscsi_close,
1987    .bdrv_create     = iscsi_create,
1988    .create_opts     = &iscsi_create_opts,
1989    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
1990    .bdrv_reopen_commit    = iscsi_reopen_commit,
1991    .bdrv_invalidate_cache = iscsi_invalidate_cache,
1992
1993    .bdrv_getlength  = iscsi_getlength,
1994    .bdrv_get_info   = iscsi_get_info,
1995    .bdrv_truncate   = iscsi_truncate,
1996    .bdrv_refresh_limits = iscsi_refresh_limits,
1997
1998    .bdrv_co_get_block_status = iscsi_co_get_block_status,
1999    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2000    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2001    .bdrv_co_readv         = iscsi_co_readv,
2002    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2003    .bdrv_co_flush_to_disk = iscsi_co_flush,
2004
2005#ifdef __linux__
2006    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2007#endif
2008
2009    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2010    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2011};
2012
2013static QemuOptsList qemu_iscsi_opts = {
2014    .name = "iscsi",
2015    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
2016    .desc = {
2017        {
2018            .name = "user",
2019            .type = QEMU_OPT_STRING,
2020            .help = "username for CHAP authentication to target",
2021        },{
2022            .name = "password",
2023            .type = QEMU_OPT_STRING,
2024            .help = "password for CHAP authentication to target",
2025        },{
2026            .name = "password-secret",
2027            .type = QEMU_OPT_STRING,
2028            .help = "ID of the secret providing password for CHAP "
2029                    "authentication to target",
2030        },{
2031            .name = "header-digest",
2032            .type = QEMU_OPT_STRING,
2033            .help = "HeaderDigest setting. "
2034                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
2035        },{
2036            .name = "initiator-name",
2037            .type = QEMU_OPT_STRING,
2038            .help = "Initiator iqn name to use when connecting",
2039        },{
2040            .name = "timeout",
2041            .type = QEMU_OPT_NUMBER,
2042            .help = "Request timeout in seconds (default 0 = no timeout)",
2043        },
2044        { /* end of list */ }
2045    },
2046};
2047
2048static void iscsi_block_init(void)
2049{
2050    bdrv_register(&bdrv_iscsi);
2051    qemu_add_opts(&qemu_iscsi_opts);
2052}
2053
2054block_init(iscsi_block_init);
2055