qemu/block/iscsi.c
<<
>>
Prefs
   1/*
   2 * QEMU Block driver for iSCSI images
   3 *
   4 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
   5 * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28#include <poll.h>
  29#include <math.h>
  30#include <arpa/inet.h>
  31#include "qemu-common.h"
  32#include "qemu/config-file.h"
  33#include "qemu/error-report.h"
  34#include "qemu/bitops.h"
  35#include "qemu/bitmap.h"
  36#include "block/block_int.h"
  37#include "block/scsi.h"
  38#include "qemu/iov.h"
  39#include "qemu/uuid.h"
  40#include "qmp-commands.h"
  41#include "qapi/qmp/qstring.h"
  42#include "crypto/secret.h"
  43
  44#include <iscsi/iscsi.h>
  45#include <iscsi/scsi-lowlevel.h>
  46
  47#ifdef __linux__
  48#include <scsi/sg.h>
  49#endif
  50
  51typedef struct IscsiLun {
  52    struct iscsi_context *iscsi;
  53    AioContext *aio_context;
  54    int lun;
  55    enum scsi_inquiry_peripheral_device_type type;
  56    int block_size;
  57    uint64_t num_blocks;
  58    int events;
  59    QEMUTimer *nop_timer;
  60    QEMUTimer *event_timer;
  61    struct scsi_inquiry_logical_block_provisioning lbp;
  62    struct scsi_inquiry_block_limits bl;
  63    unsigned char *zeroblock;
  64    /* The allocmap tracks which clusters (pages) on the iSCSI target are
  65     * allocated and which are not. In case a target returns zeros for
  66     * unallocated pages (iscsilun->lprz) we can directly return zeros instead
  67     * of reading zeros over the wire if a read request falls within an
  68     * unallocated block. As there are 3 possible states we need 2 bitmaps to
  69     * track. allocmap_valid keeps track if QEMU's information about a page is
  70     * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
  71     * valid information about a page the corresponding allocmap entry should be
  72     * switched to unallocated as well to force a new lookup of the allocation
  73     * status as lookups are generally skipped if a page is suspect to be
  74     * allocated. If a iSCSI target is opened with cache.direct = on the
  75     * allocmap_valid does not exist turning all cached information invalid so
  76     * that a fresh lookup is made for any page even if allocmap entry returns
  77     * it's unallocated. */
  78    unsigned long *allocmap;
  79    unsigned long *allocmap_valid;
  80    long allocmap_size;
  81    int cluster_sectors;
  82    bool use_16_for_rw;
  83    bool write_protected;
  84    bool lbpme;
  85    bool lbprz;
  86    bool dpofua;
  87    bool has_write_same;
  88    bool request_timed_out;
  89} IscsiLun;
  90
  91typedef struct IscsiTask {
  92    int status;
  93    int complete;
  94    int retries;
  95    int do_retry;
  96    struct scsi_task *task;
  97    Coroutine *co;
  98    IscsiLun *iscsilun;
  99    QEMUTimer retry_timer;
 100    int err_code;
 101} IscsiTask;
 102
 103typedef struct IscsiAIOCB {
 104    BlockAIOCB common;
 105    QEMUIOVector *qiov;
 106    QEMUBH *bh;
 107    IscsiLun *iscsilun;
 108    struct scsi_task *task;
 109    uint8_t *buf;
 110    int status;
 111    int64_t sector_num;
 112    int nb_sectors;
 113    int ret;
 114#ifdef __linux__
 115    sg_io_hdr_t *ioh;
 116#endif
 117} IscsiAIOCB;
 118
 119/* libiscsi uses time_t so its enough to process events every second */
 120#define EVENT_INTERVAL 1000
 121#define NOP_INTERVAL 5000
 122#define MAX_NOP_FAILURES 3
 123#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
 124static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
 125
 126/* this threshold is a trade-off knob to choose between
 127 * the potential additional overhead of an extra GET_LBA_STATUS request
 128 * vs. unnecessarily reading a lot of zero sectors over the wire.
 129 * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
 130 * sectors we check the allocation status of the area covered by the
 131 * request first if the allocationmap indicates that the area might be
 132 * unallocated. */
 133#define ISCSI_CHECKALLOC_THRES 64
 134
 135static void
 136iscsi_bh_cb(void *p)
 137{
 138    IscsiAIOCB *acb = p;
 139
 140    qemu_bh_delete(acb->bh);
 141
 142    g_free(acb->buf);
 143    acb->buf = NULL;
 144
 145    acb->common.cb(acb->common.opaque, acb->status);
 146
 147    if (acb->task != NULL) {
 148        scsi_free_scsi_task(acb->task);
 149        acb->task = NULL;
 150    }
 151
 152    qemu_aio_unref(acb);
 153}
 154
 155static void
 156iscsi_schedule_bh(IscsiAIOCB *acb)
 157{
 158    if (acb->bh) {
 159        return;
 160    }
 161    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
 162    qemu_bh_schedule(acb->bh);
 163}
 164
 165static void iscsi_co_generic_bh_cb(void *opaque)
 166{
 167    struct IscsiTask *iTask = opaque;
 168    iTask->complete = 1;
 169    qemu_coroutine_enter(iTask->co);
 170}
 171
 172static void iscsi_retry_timer_expired(void *opaque)
 173{
 174    struct IscsiTask *iTask = opaque;
 175    iTask->complete = 1;
 176    if (iTask->co) {
 177        qemu_coroutine_enter(iTask->co);
 178    }
 179}
 180
 181static inline unsigned exp_random(double mean)
 182{
 183    return -mean * log((double)rand() / RAND_MAX);
 184}
 185
 186/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
 187 * libiscsi 1.10.0, together with other constants we need.  Use it as
 188 * a hint that we have to define them ourselves if needed, to keep the
 189 * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
 190 * the test because SCSI_STATUS_* is an enum.
 191 *
 192 * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
 193 * an enum, check against the LIBISCSI_API_VERSION macro, which was
 194 * introduced in 1.11.0.  If it is present, there is no need to define
 195 * anything.
 196 */
 197#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
 198    !defined(LIBISCSI_API_VERSION)
 199#define SCSI_STATUS_TASK_SET_FULL                          0x28
 200#define SCSI_STATUS_TIMEOUT                                0x0f000002
 201#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
 202#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
 203#endif
 204
 205#ifndef LIBISCSI_API_VERSION
 206#define LIBISCSI_API_VERSION 20130701
 207#endif
 208
 209static int iscsi_translate_sense(struct scsi_sense *sense)
 210{
 211    int ret;
 212
 213    switch (sense->key) {
 214    case SCSI_SENSE_NOT_READY:
 215        return -EBUSY;
 216    case SCSI_SENSE_DATA_PROTECTION:
 217        return -EACCES;
 218    case SCSI_SENSE_COMMAND_ABORTED:
 219        return -ECANCELED;
 220    case SCSI_SENSE_ILLEGAL_REQUEST:
 221        /* Parse ASCQ */
 222        break;
 223    default:
 224        return -EIO;
 225    }
 226    switch (sense->ascq) {
 227    case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
 228    case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
 229    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
 230    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
 231        ret = -EINVAL;
 232        break;
 233    case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
 234        ret = -ENOSPC;
 235        break;
 236    case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
 237        ret = -ENOTSUP;
 238        break;
 239    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
 240    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
 241    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
 242        ret = -ENOMEDIUM;
 243        break;
 244    case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
 245        ret = -EACCES;
 246        break;
 247    default:
 248        ret = -EIO;
 249        break;
 250    }
 251    return ret;
 252}
 253
 254static void
 255iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 256                        void *command_data, void *opaque)
 257{
 258    struct IscsiTask *iTask = opaque;
 259    struct scsi_task *task = command_data;
 260
 261    iTask->status = status;
 262    iTask->do_retry = 0;
 263    iTask->task = task;
 264
 265    if (status != SCSI_STATUS_GOOD) {
 266        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
 267            if (status == SCSI_STATUS_CHECK_CONDITION
 268                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
 269                error_report("iSCSI CheckCondition: %s",
 270                             iscsi_get_error(iscsi));
 271                iTask->do_retry = 1;
 272                goto out;
 273            }
 274            if (status == SCSI_STATUS_BUSY ||
 275                status == SCSI_STATUS_TIMEOUT ||
 276                status == SCSI_STATUS_TASK_SET_FULL) {
 277                unsigned retry_time =
 278                    exp_random(iscsi_retry_times[iTask->retries - 1]);
 279                if (status == SCSI_STATUS_TIMEOUT) {
 280                    /* make sure the request is rescheduled AFTER the
 281                     * reconnect is initiated */
 282                    retry_time = EVENT_INTERVAL * 2;
 283                    iTask->iscsilun->request_timed_out = true;
 284                }
 285                error_report("iSCSI Busy/TaskSetFull/TimeOut"
 286                             " (retry #%u in %u ms): %s",
 287                             iTask->retries, retry_time,
 288                             iscsi_get_error(iscsi));
 289                aio_timer_init(iTask->iscsilun->aio_context,
 290                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
 291                               SCALE_MS, iscsi_retry_timer_expired, iTask);
 292                timer_mod(&iTask->retry_timer,
 293                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
 294                iTask->do_retry = 1;
 295                return;
 296            }
 297        }
 298        iTask->err_code = iscsi_translate_sense(&task->sense);
 299        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
 300    }
 301
 302out:
 303    if (iTask->co) {
 304        aio_bh_schedule_oneshot(iTask->iscsilun->aio_context,
 305                                 iscsi_co_generic_bh_cb, iTask);
 306    } else {
 307        iTask->complete = 1;
 308    }
 309}
 310
 311static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 312{
 313    *iTask = (struct IscsiTask) {
 314        .co         = qemu_coroutine_self(),
 315        .iscsilun   = iscsilun,
 316    };
 317}
 318
 319static void
 320iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
 321                    void *private_data)
 322{
 323    IscsiAIOCB *acb = private_data;
 324
 325    acb->status = -ECANCELED;
 326    iscsi_schedule_bh(acb);
 327}
 328
 329static void
 330iscsi_aio_cancel(BlockAIOCB *blockacb)
 331{
 332    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
 333    IscsiLun *iscsilun = acb->iscsilun;
 334
 335    if (acb->status != -EINPROGRESS) {
 336        return;
 337    }
 338
 339    /* send a task mgmt call to the target to cancel the task on the target */
 340    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
 341                                     iscsi_abort_task_cb, acb);
 342
 343}
 344
 345static const AIOCBInfo iscsi_aiocb_info = {
 346    .aiocb_size         = sizeof(IscsiAIOCB),
 347    .cancel_async       = iscsi_aio_cancel,
 348};
 349
 350
 351static void iscsi_process_read(void *arg);
 352static void iscsi_process_write(void *arg);
 353
 354static void
 355iscsi_set_events(IscsiLun *iscsilun)
 356{
 357    struct iscsi_context *iscsi = iscsilun->iscsi;
 358    int ev = iscsi_which_events(iscsi);
 359
 360    if (ev != iscsilun->events) {
 361        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
 362                           false,
 363                           (ev & POLLIN) ? iscsi_process_read : NULL,
 364                           (ev & POLLOUT) ? iscsi_process_write : NULL,
 365                           iscsilun);
 366        iscsilun->events = ev;
 367    }
 368}
 369
 370static void iscsi_timed_check_events(void *opaque)
 371{
 372    IscsiLun *iscsilun = opaque;
 373
 374    /* check for timed out requests */
 375    iscsi_service(iscsilun->iscsi, 0);
 376
 377    if (iscsilun->request_timed_out) {
 378        iscsilun->request_timed_out = false;
 379        iscsi_reconnect(iscsilun->iscsi);
 380    }
 381
 382    /* newer versions of libiscsi may return zero events. Ensure we are able
 383     * to return to service once this situation changes. */
 384    iscsi_set_events(iscsilun);
 385
 386    timer_mod(iscsilun->event_timer,
 387              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 388}
 389
 390static void
 391iscsi_process_read(void *arg)
 392{
 393    IscsiLun *iscsilun = arg;
 394    struct iscsi_context *iscsi = iscsilun->iscsi;
 395
 396    iscsi_service(iscsi, POLLIN);
 397    iscsi_set_events(iscsilun);
 398}
 399
 400static void
 401iscsi_process_write(void *arg)
 402{
 403    IscsiLun *iscsilun = arg;
 404    struct iscsi_context *iscsi = iscsilun->iscsi;
 405
 406    iscsi_service(iscsi, POLLOUT);
 407    iscsi_set_events(iscsilun);
 408}
 409
 410static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
 411{
 412    return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
 413}
 414
 415static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
 416{
 417    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 418}
 419
 420static bool is_byte_request_lun_aligned(int64_t offset, int count,
 421                                        IscsiLun *iscsilun)
 422{
 423    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
 424        error_report("iSCSI misaligned request: "
 425                     "iscsilun->block_size %u, offset %" PRIi64
 426                     ", count %d",
 427                     iscsilun->block_size, offset, count);
 428        return false;
 429    }
 430    return true;
 431}
 432
 433static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
 434                                          IscsiLun *iscsilun)
 435{
 436    assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
 437    return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
 438                                       nb_sectors << BDRV_SECTOR_BITS,
 439                                       iscsilun);
 440}
 441
 442static void iscsi_allocmap_free(IscsiLun *iscsilun)
 443{
 444    g_free(iscsilun->allocmap);
 445    g_free(iscsilun->allocmap_valid);
 446    iscsilun->allocmap = NULL;
 447    iscsilun->allocmap_valid = NULL;
 448}
 449
 450
 451static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
 452{
 453    iscsi_allocmap_free(iscsilun);
 454
 455    iscsilun->allocmap_size =
 456        DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
 457                     iscsilun->cluster_sectors);
 458
 459    iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
 460    if (!iscsilun->allocmap) {
 461        return -ENOMEM;
 462    }
 463
 464    if (open_flags & BDRV_O_NOCACHE) {
 465        /* in case that cache.direct = on all allocmap entries are
 466         * treated as invalid to force a relookup of the block
 467         * status on every read request */
 468        return 0;
 469    }
 470
 471    iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
 472    if (!iscsilun->allocmap_valid) {
 473        /* if we are under memory pressure free the allocmap as well */
 474        iscsi_allocmap_free(iscsilun);
 475        return -ENOMEM;
 476    }
 477
 478    return 0;
 479}
 480
 481static void
 482iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
 483                      int nb_sectors, bool allocated, bool valid)
 484{
 485    int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
 486
 487    if (iscsilun->allocmap == NULL) {
 488        return;
 489    }
 490    /* expand to entirely contain all affected clusters */
 491    cl_num_expanded = sector_num / iscsilun->cluster_sectors;
 492    nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
 493                                   iscsilun->cluster_sectors) - cl_num_expanded;
 494    /* shrink to touch only completely contained clusters */
 495    cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
 496    nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
 497                      - cl_num_shrunk;
 498    if (allocated) {
 499        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
 500    } else {
 501        if (nb_cls_shrunk > 0) {
 502            bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
 503        }
 504    }
 505
 506    if (iscsilun->allocmap_valid == NULL) {
 507        return;
 508    }
 509    if (valid) {
 510        if (nb_cls_shrunk > 0) {
 511            bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
 512        }
 513    } else {
 514        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
 515                     nb_cls_expanded);
 516    }
 517}
 518
 519static void
 520iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
 521                             int nb_sectors)
 522{
 523    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
 524}
 525
 526static void
 527iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
 528                               int nb_sectors)
 529{
 530    /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
 531     * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
 532     */
 533    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
 534}
 535
 536static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
 537                                       int nb_sectors)
 538{
 539    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
 540}
 541
 542static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
 543{
 544    if (iscsilun->allocmap) {
 545        bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
 546    }
 547    if (iscsilun->allocmap_valid) {
 548        bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
 549    }
 550}
 551
 552static inline bool
 553iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
 554                            int nb_sectors)
 555{
 556    unsigned long size;
 557    if (iscsilun->allocmap == NULL) {
 558        return true;
 559    }
 560    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
 561    return !(find_next_bit(iscsilun->allocmap, size,
 562                           sector_num / iscsilun->cluster_sectors) == size);
 563}
 564
 565static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
 566                                           int64_t sector_num, int nb_sectors)
 567{
 568    unsigned long size;
 569    if (iscsilun->allocmap_valid == NULL) {
 570        return false;
 571    }
 572    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
 573    return (find_next_zero_bit(iscsilun->allocmap_valid, size,
 574                               sector_num / iscsilun->cluster_sectors) == size);
 575}
 576
 577static int coroutine_fn
 578iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 579                      QEMUIOVector *iov, int flags)
 580{
 581    IscsiLun *iscsilun = bs->opaque;
 582    struct IscsiTask iTask;
 583    uint64_t lba;
 584    uint32_t num_sectors;
 585    bool fua = flags & BDRV_REQ_FUA;
 586
 587    if (fua) {
 588        assert(iscsilun->dpofua);
 589    }
 590    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 591        return -EINVAL;
 592    }
 593
 594    if (bs->bl.max_transfer) {
 595        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 596    }
 597
 598    lba = sector_qemu2lun(sector_num, iscsilun);
 599    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 600    iscsi_co_init_iscsitask(iscsilun, &iTask);
 601retry:
 602    if (iscsilun->use_16_for_rw) {
 603#if LIBISCSI_API_VERSION >= (20160603)
 604        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 605                                            NULL, num_sectors * iscsilun->block_size,
 606                                            iscsilun->block_size, 0, 0, fua, 0, 0,
 607                                            iscsi_co_generic_cb, &iTask,
 608                                            (struct scsi_iovec *)iov->iov, iov->niov);
 609    } else {
 610        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 611                                            NULL, num_sectors * iscsilun->block_size,
 612                                            iscsilun->block_size, 0, 0, fua, 0, 0,
 613                                            iscsi_co_generic_cb, &iTask,
 614                                            (struct scsi_iovec *)iov->iov, iov->niov);
 615    }
 616#else
 617        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
 618                                        NULL, num_sectors * iscsilun->block_size,
 619                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 620                                        iscsi_co_generic_cb, &iTask);
 621    } else {
 622        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
 623                                        NULL, num_sectors * iscsilun->block_size,
 624                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 625                                        iscsi_co_generic_cb, &iTask);
 626    }
 627#endif
 628    if (iTask.task == NULL) {
 629        return -ENOMEM;
 630    }
 631#if LIBISCSI_API_VERSION < (20160603)
 632    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
 633                          iov->niov);
 634#endif
 635    while (!iTask.complete) {
 636        iscsi_set_events(iscsilun);
 637        qemu_coroutine_yield();
 638    }
 639
 640    if (iTask.task != NULL) {
 641        scsi_free_scsi_task(iTask.task);
 642        iTask.task = NULL;
 643    }
 644
 645    if (iTask.do_retry) {
 646        iTask.complete = 0;
 647        goto retry;
 648    }
 649
 650    if (iTask.status != SCSI_STATUS_GOOD) {
 651        iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
 652        return iTask.err_code;
 653    }
 654
 655    iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
 656
 657    return 0;
 658}
 659
 660
 661
 662static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
 663                                                  int64_t sector_num,
 664                                                  int nb_sectors, int *pnum,
 665                                                  BlockDriverState **file)
 666{
 667    IscsiLun *iscsilun = bs->opaque;
 668    struct scsi_get_lba_status *lbas = NULL;
 669    struct scsi_lba_status_descriptor *lbasd = NULL;
 670    struct IscsiTask iTask;
 671    int64_t ret;
 672
 673    iscsi_co_init_iscsitask(iscsilun, &iTask);
 674
 675    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 676        ret = -EINVAL;
 677        goto out;
 678    }
 679
 680    /* default to all sectors allocated */
 681    ret = BDRV_BLOCK_DATA;
 682    ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
 683    *pnum = nb_sectors;
 684
 685    /* LUN does not support logical block provisioning */
 686    if (!iscsilun->lbpme) {
 687        goto out;
 688    }
 689
 690retry:
 691    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
 692                                  sector_qemu2lun(sector_num, iscsilun),
 693                                  8 + 16, iscsi_co_generic_cb,
 694                                  &iTask) == NULL) {
 695        ret = -ENOMEM;
 696        goto out;
 697    }
 698
 699    while (!iTask.complete) {
 700        iscsi_set_events(iscsilun);
 701        qemu_coroutine_yield();
 702    }
 703
 704    if (iTask.do_retry) {
 705        if (iTask.task != NULL) {
 706            scsi_free_scsi_task(iTask.task);
 707            iTask.task = NULL;
 708        }
 709        iTask.complete = 0;
 710        goto retry;
 711    }
 712
 713    if (iTask.status != SCSI_STATUS_GOOD) {
 714        /* in case the get_lba_status_callout fails (i.e.
 715         * because the device is busy or the cmd is not
 716         * supported) we pretend all blocks are allocated
 717         * for backwards compatibility */
 718        goto out;
 719    }
 720
 721    lbas = scsi_datain_unmarshall(iTask.task);
 722    if (lbas == NULL) {
 723        ret = -EIO;
 724        goto out;
 725    }
 726
 727    lbasd = &lbas->descriptors[0];
 728
 729    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
 730        ret = -EIO;
 731        goto out;
 732    }
 733
 734    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
 735
 736    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
 737        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
 738        ret &= ~BDRV_BLOCK_DATA;
 739        if (iscsilun->lbprz) {
 740            ret |= BDRV_BLOCK_ZERO;
 741        }
 742    }
 743
 744    if (ret & BDRV_BLOCK_ZERO) {
 745        iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
 746    } else {
 747        iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
 748    }
 749
 750    if (*pnum > nb_sectors) {
 751        *pnum = nb_sectors;
 752    }
 753out:
 754    if (iTask.task != NULL) {
 755        scsi_free_scsi_task(iTask.task);
 756    }
 757    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
 758        *file = bs;
 759    }
 760    return ret;
 761}
 762
 763static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
 764                                       int64_t sector_num, int nb_sectors,
 765                                       QEMUIOVector *iov)
 766{
 767    IscsiLun *iscsilun = bs->opaque;
 768    struct IscsiTask iTask;
 769    uint64_t lba;
 770    uint32_t num_sectors;
 771
 772    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 773        return -EINVAL;
 774    }
 775
 776    if (bs->bl.max_transfer) {
 777        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 778    }
 779
 780    /* if cache.direct is off and we have a valid entry in our allocation map
 781     * we can skip checking the block status and directly return zeroes if
 782     * the request falls within an unallocated area */
 783    if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
 784        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
 785            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 786            return 0;
 787    }
 788
 789    if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
 790        !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
 791        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
 792        int pnum;
 793        BlockDriverState *file;
 794        /* check the block status from the beginning of the cluster
 795         * containing the start sector */
 796        int64_t ret = iscsi_co_get_block_status(bs,
 797                          sector_num - sector_num % iscsilun->cluster_sectors,
 798                          BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
 799        if (ret < 0) {
 800            return ret;
 801        }
 802        /* if the whole request falls into an unallocated area we can avoid
 803         * to read and directly return zeroes instead */
 804        if (ret & BDRV_BLOCK_ZERO &&
 805            pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
 806            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 807            return 0;
 808        }
 809    }
 810
 811    lba = sector_qemu2lun(sector_num, iscsilun);
 812    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 813
 814    iscsi_co_init_iscsitask(iscsilun, &iTask);
 815retry:
 816    if (iscsilun->use_16_for_rw) {
 817#if LIBISCSI_API_VERSION >= (20160603)
 818        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 819                                           num_sectors * iscsilun->block_size,
 820                                           iscsilun->block_size, 0, 0, 0, 0, 0,
 821                                           iscsi_co_generic_cb, &iTask,
 822                                           (struct scsi_iovec *)iov->iov, iov->niov);
 823    } else {
 824        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 825                                           num_sectors * iscsilun->block_size,
 826                                           iscsilun->block_size,
 827                                           0, 0, 0, 0, 0,
 828                                           iscsi_co_generic_cb, &iTask,
 829                                           (struct scsi_iovec *)iov->iov, iov->niov);
 830    }
 831#else
 832        iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
 833                                       num_sectors * iscsilun->block_size,
 834                                       iscsilun->block_size, 0, 0, 0, 0, 0,
 835                                       iscsi_co_generic_cb, &iTask);
 836    } else {
 837        iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
 838                                       num_sectors * iscsilun->block_size,
 839                                       iscsilun->block_size,
 840                                       0, 0, 0, 0, 0,
 841                                       iscsi_co_generic_cb, &iTask);
 842    }
 843#endif
 844    if (iTask.task == NULL) {
 845        return -ENOMEM;
 846    }
 847#if LIBISCSI_API_VERSION < (20160603)
 848    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
 849#endif
 850    while (!iTask.complete) {
 851        iscsi_set_events(iscsilun);
 852        qemu_coroutine_yield();
 853    }
 854
 855    if (iTask.task != NULL) {
 856        scsi_free_scsi_task(iTask.task);
 857        iTask.task = NULL;
 858    }
 859
 860    if (iTask.do_retry) {
 861        iTask.complete = 0;
 862        goto retry;
 863    }
 864
 865    if (iTask.status != SCSI_STATUS_GOOD) {
 866        return iTask.err_code;
 867    }
 868
 869    return 0;
 870}
 871
 872static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 873{
 874    IscsiLun *iscsilun = bs->opaque;
 875    struct IscsiTask iTask;
 876
 877    iscsi_co_init_iscsitask(iscsilun, &iTask);
 878retry:
 879    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
 880                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
 881        return -ENOMEM;
 882    }
 883
 884    while (!iTask.complete) {
 885        iscsi_set_events(iscsilun);
 886        qemu_coroutine_yield();
 887    }
 888
 889    if (iTask.task != NULL) {
 890        scsi_free_scsi_task(iTask.task);
 891        iTask.task = NULL;
 892    }
 893
 894    if (iTask.do_retry) {
 895        iTask.complete = 0;
 896        goto retry;
 897    }
 898
 899    if (iTask.status != SCSI_STATUS_GOOD) {
 900        return iTask.err_code;
 901    }
 902
 903    return 0;
 904}
 905
 906#ifdef __linux__
 907static void
 908iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
 909                     void *command_data, void *opaque)
 910{
 911    IscsiAIOCB *acb = opaque;
 912
 913    g_free(acb->buf);
 914    acb->buf = NULL;
 915
 916    acb->status = 0;
 917    if (status < 0) {
 918        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
 919                     iscsi_get_error(iscsi));
 920        acb->status = iscsi_translate_sense(&acb->task->sense);
 921    }
 922
 923    acb->ioh->driver_status = 0;
 924    acb->ioh->host_status   = 0;
 925    acb->ioh->resid         = 0;
 926    acb->ioh->status        = status;
 927
 928#define SG_ERR_DRIVER_SENSE    0x08
 929
 930    if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
 931        int ss;
 932
 933        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
 934
 935        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
 936        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
 937             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
 938        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
 939    }
 940
 941    iscsi_schedule_bh(acb);
 942}
 943
 944static void iscsi_ioctl_bh_completion(void *opaque)
 945{
 946    IscsiAIOCB *acb = opaque;
 947
 948    qemu_bh_delete(acb->bh);
 949    acb->common.cb(acb->common.opaque, acb->ret);
 950    qemu_aio_unref(acb);
 951}
 952
 953static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
 954{
 955    BlockDriverState *bs = acb->common.bs;
 956    IscsiLun *iscsilun = bs->opaque;
 957    int ret = 0;
 958
 959    switch (req) {
 960    case SG_GET_VERSION_NUM:
 961        *(int *)buf = 30000;
 962        break;
 963    case SG_GET_SCSI_ID:
 964        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
 965        break;
 966    default:
 967        ret = -EINVAL;
 968    }
 969    assert(!acb->bh);
 970    acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
 971                         iscsi_ioctl_bh_completion, acb);
 972    acb->ret = ret;
 973    qemu_bh_schedule(acb->bh);
 974}
 975
 976static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
 977        unsigned long int req, void *buf,
 978        BlockCompletionFunc *cb, void *opaque)
 979{
 980    IscsiLun *iscsilun = bs->opaque;
 981    struct iscsi_context *iscsi = iscsilun->iscsi;
 982    struct iscsi_data data;
 983    IscsiAIOCB *acb;
 984
 985    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
 986
 987    acb->iscsilun = iscsilun;
 988    acb->bh          = NULL;
 989    acb->status      = -EINPROGRESS;
 990    acb->buf         = NULL;
 991    acb->ioh         = buf;
 992
 993    if (req != SG_IO) {
 994        iscsi_ioctl_handle_emulated(acb, req, buf);
 995        return &acb->common;
 996    }
 997
 998    if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
 999        error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
1000                     acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
1001        qemu_aio_unref(acb);
1002        return NULL;
1003    }
1004
1005    acb->task = malloc(sizeof(struct scsi_task));
1006    if (acb->task == NULL) {
1007        error_report("iSCSI: Failed to allocate task for scsi command. %s",
1008                     iscsi_get_error(iscsi));
1009        qemu_aio_unref(acb);
1010        return NULL;
1011    }
1012    memset(acb->task, 0, sizeof(struct scsi_task));
1013
1014    switch (acb->ioh->dxfer_direction) {
1015    case SG_DXFER_TO_DEV:
1016        acb->task->xfer_dir = SCSI_XFER_WRITE;
1017        break;
1018    case SG_DXFER_FROM_DEV:
1019        acb->task->xfer_dir = SCSI_XFER_READ;
1020        break;
1021    default:
1022        acb->task->xfer_dir = SCSI_XFER_NONE;
1023        break;
1024    }
1025
1026    acb->task->cdb_size = acb->ioh->cmd_len;
1027    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
1028    acb->task->expxferlen = acb->ioh->dxfer_len;
1029
1030    data.size = 0;
1031    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
1032        if (acb->ioh->iovec_count == 0) {
1033            data.data = acb->ioh->dxferp;
1034            data.size = acb->ioh->dxfer_len;
1035        } else {
1036            scsi_task_set_iov_out(acb->task,
1037                                 (struct scsi_iovec *) acb->ioh->dxferp,
1038                                 acb->ioh->iovec_count);
1039        }
1040    }
1041
1042    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
1043                                 iscsi_aio_ioctl_cb,
1044                                 (data.size > 0) ? &data : NULL,
1045                                 acb) != 0) {
1046        scsi_free_scsi_task(acb->task);
1047        qemu_aio_unref(acb);
1048        return NULL;
1049    }
1050
1051    /* tell libiscsi to read straight into the buffer we got from ioctl */
1052    if (acb->task->xfer_dir == SCSI_XFER_READ) {
1053        if (acb->ioh->iovec_count == 0) {
1054            scsi_task_add_data_in_buffer(acb->task,
1055                                         acb->ioh->dxfer_len,
1056                                         acb->ioh->dxferp);
1057        } else {
1058            scsi_task_set_iov_in(acb->task,
1059                                 (struct scsi_iovec *) acb->ioh->dxferp,
1060                                 acb->ioh->iovec_count);
1061        }
1062    }
1063
1064    iscsi_set_events(iscsilun);
1065
1066    return &acb->common;
1067}
1068
1069#endif
1070
1071static int64_t
1072iscsi_getlength(BlockDriverState *bs)
1073{
1074    IscsiLun *iscsilun = bs->opaque;
1075    int64_t len;
1076
1077    len  = iscsilun->num_blocks;
1078    len *= iscsilun->block_size;
1079
1080    return len;
1081}
1082
1083static int
1084coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
1085{
1086    IscsiLun *iscsilun = bs->opaque;
1087    struct IscsiTask iTask;
1088    struct unmap_list list;
1089
1090    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
1091        return -ENOTSUP;
1092    }
1093
1094    if (!iscsilun->lbp.lbpu) {
1095        /* UNMAP is not supported by the target */
1096        return 0;
1097    }
1098
1099    list.lba = offset / iscsilun->block_size;
1100    list.num = count / iscsilun->block_size;
1101
1102    iscsi_co_init_iscsitask(iscsilun, &iTask);
1103retry:
1104    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
1105                         iscsi_co_generic_cb, &iTask) == NULL) {
1106        return -ENOMEM;
1107    }
1108
1109    while (!iTask.complete) {
1110        iscsi_set_events(iscsilun);
1111        qemu_coroutine_yield();
1112    }
1113
1114    if (iTask.task != NULL) {
1115        scsi_free_scsi_task(iTask.task);
1116        iTask.task = NULL;
1117    }
1118
1119    if (iTask.do_retry) {
1120        iTask.complete = 0;
1121        goto retry;
1122    }
1123
1124    if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
1125        /* the target might fail with a check condition if it
1126           is not happy with the alignment of the UNMAP request
1127           we silently fail in this case */
1128        return 0;
1129    }
1130
1131    if (iTask.status != SCSI_STATUS_GOOD) {
1132        return iTask.err_code;
1133    }
1134
1135    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1136                               count >> BDRV_SECTOR_BITS);
1137
1138    return 0;
1139}
1140
1141static int
1142coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1143                                    int count, BdrvRequestFlags flags)
1144{
1145    IscsiLun *iscsilun = bs->opaque;
1146    struct IscsiTask iTask;
1147    uint64_t lba;
1148    uint32_t nb_blocks;
1149    bool use_16_for_ws = iscsilun->use_16_for_rw;
1150
1151    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
1152        return -ENOTSUP;
1153    }
1154
1155    if (flags & BDRV_REQ_MAY_UNMAP) {
1156        if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1157            /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1158            use_16_for_ws = true;
1159        }
1160        if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1161            /* WRITESAME16 with UNMAP is not supported by the target,
1162             * fall back and try WRITESAME10/16 without UNMAP */
1163            flags &= ~BDRV_REQ_MAY_UNMAP;
1164            use_16_for_ws = iscsilun->use_16_for_rw;
1165        }
1166    }
1167
1168    if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1169        /* WRITESAME without UNMAP is not supported by the target */
1170        return -ENOTSUP;
1171    }
1172
1173    lba = offset / iscsilun->block_size;
1174    nb_blocks = count / iscsilun->block_size;
1175
1176    if (iscsilun->zeroblock == NULL) {
1177        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1178        if (iscsilun->zeroblock == NULL) {
1179            return -ENOMEM;
1180        }
1181    }
1182
1183    iscsi_co_init_iscsitask(iscsilun, &iTask);
1184retry:
1185    if (use_16_for_ws) {
1186        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1187                                            iscsilun->zeroblock, iscsilun->block_size,
1188                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1189                                            0, 0, iscsi_co_generic_cb, &iTask);
1190    } else {
1191        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1192                                            iscsilun->zeroblock, iscsilun->block_size,
1193                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1194                                            0, 0, iscsi_co_generic_cb, &iTask);
1195    }
1196    if (iTask.task == NULL) {
1197        return -ENOMEM;
1198    }
1199
1200    while (!iTask.complete) {
1201        iscsi_set_events(iscsilun);
1202        qemu_coroutine_yield();
1203    }
1204
1205    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1206        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1207        (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1208         iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1209        /* WRITE SAME is not supported by the target */
1210        iscsilun->has_write_same = false;
1211        scsi_free_scsi_task(iTask.task);
1212        return -ENOTSUP;
1213    }
1214
1215    if (iTask.task != NULL) {
1216        scsi_free_scsi_task(iTask.task);
1217        iTask.task = NULL;
1218    }
1219
1220    if (iTask.do_retry) {
1221        iTask.complete = 0;
1222        goto retry;
1223    }
1224
1225    if (iTask.status != SCSI_STATUS_GOOD) {
1226        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1227                                   count >> BDRV_SECTOR_BITS);
1228        return iTask.err_code;
1229    }
1230
1231    if (flags & BDRV_REQ_MAY_UNMAP) {
1232        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1233                                   count >> BDRV_SECTOR_BITS);
1234    } else {
1235        iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
1236                                     count >> BDRV_SECTOR_BITS);
1237    }
1238
1239    return 0;
1240}
1241
1242static void parse_chap(struct iscsi_context *iscsi, const char *target,
1243                       Error **errp)
1244{
1245    QemuOptsList *list;
1246    QemuOpts *opts;
1247    const char *user = NULL;
1248    const char *password = NULL;
1249    const char *secretid;
1250    char *secret = NULL;
1251
1252    list = qemu_find_opts("iscsi");
1253    if (!list) {
1254        return;
1255    }
1256
1257    opts = qemu_opts_find(list, target);
1258    if (opts == NULL) {
1259        opts = QTAILQ_FIRST(&list->head);
1260        if (!opts) {
1261            return;
1262        }
1263    }
1264
1265    user = qemu_opt_get(opts, "user");
1266    if (!user) {
1267        return;
1268    }
1269
1270    secretid = qemu_opt_get(opts, "password-secret");
1271    password = qemu_opt_get(opts, "password");
1272    if (secretid && password) {
1273        error_setg(errp, "'password' and 'password-secret' properties are "
1274                   "mutually exclusive");
1275        return;
1276    }
1277    if (secretid) {
1278        secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1279        if (!secret) {
1280            return;
1281        }
1282        password = secret;
1283    } else if (!password) {
1284        error_setg(errp, "CHAP username specified but no password was given");
1285        return;
1286    }
1287
1288    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1289        error_setg(errp, "Failed to set initiator username and password");
1290    }
1291
1292    g_free(secret);
1293}
1294
1295static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1296                                Error **errp)
1297{
1298    QemuOptsList *list;
1299    QemuOpts *opts;
1300    const char *digest = NULL;
1301
1302    list = qemu_find_opts("iscsi");
1303    if (!list) {
1304        return;
1305    }
1306
1307    opts = qemu_opts_find(list, target);
1308    if (opts == NULL) {
1309        opts = QTAILQ_FIRST(&list->head);
1310        if (!opts) {
1311            return;
1312        }
1313    }
1314
1315    digest = qemu_opt_get(opts, "header-digest");
1316    if (!digest) {
1317        return;
1318    }
1319
1320    if (!strcmp(digest, "CRC32C")) {
1321        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1322    } else if (!strcmp(digest, "NONE")) {
1323        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1324    } else if (!strcmp(digest, "CRC32C-NONE")) {
1325        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1326    } else if (!strcmp(digest, "NONE-CRC32C")) {
1327        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1328    } else {
1329        error_setg(errp, "Invalid header-digest setting : %s", digest);
1330    }
1331}
1332
1333static char *parse_initiator_name(const char *target)
1334{
1335    QemuOptsList *list;
1336    QemuOpts *opts;
1337    const char *name;
1338    char *iscsi_name;
1339    UuidInfo *uuid_info;
1340
1341    list = qemu_find_opts("iscsi");
1342    if (list) {
1343        opts = qemu_opts_find(list, target);
1344        if (!opts) {
1345            opts = QTAILQ_FIRST(&list->head);
1346        }
1347        if (opts) {
1348            name = qemu_opt_get(opts, "initiator-name");
1349            if (name) {
1350                return g_strdup(name);
1351            }
1352        }
1353    }
1354
1355    uuid_info = qmp_query_uuid(NULL);
1356    if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1357        name = qemu_get_vm_name();
1358    } else {
1359        name = uuid_info->UUID;
1360    }
1361    iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1362                                 name ? ":" : "", name ? name : "");
1363    qapi_free_UuidInfo(uuid_info);
1364    return iscsi_name;
1365}
1366
1367static int parse_timeout(const char *target)
1368{
1369    QemuOptsList *list;
1370    QemuOpts *opts;
1371    const char *timeout;
1372
1373    list = qemu_find_opts("iscsi");
1374    if (list) {
1375        opts = qemu_opts_find(list, target);
1376        if (!opts) {
1377            opts = QTAILQ_FIRST(&list->head);
1378        }
1379        if (opts) {
1380            timeout = qemu_opt_get(opts, "timeout");
1381            if (timeout) {
1382                return atoi(timeout);
1383            }
1384        }
1385    }
1386
1387    return 0;
1388}
1389
1390static void iscsi_nop_timed_event(void *opaque)
1391{
1392    IscsiLun *iscsilun = opaque;
1393
1394    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1395        error_report("iSCSI: NOP timeout. Reconnecting...");
1396        iscsilun->request_timed_out = true;
1397    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1398        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1399        return;
1400    }
1401
1402    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1403    iscsi_set_events(iscsilun);
1404}
1405
1406static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1407{
1408    struct scsi_task *task = NULL;
1409    struct scsi_readcapacity10 *rc10 = NULL;
1410    struct scsi_readcapacity16 *rc16 = NULL;
1411    int retries = ISCSI_CMD_RETRIES; 
1412
1413    do {
1414        if (task != NULL) {
1415            scsi_free_scsi_task(task);
1416            task = NULL;
1417        }
1418
1419        switch (iscsilun->type) {
1420        case TYPE_DISK:
1421            task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1422            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1423                rc16 = scsi_datain_unmarshall(task);
1424                if (rc16 == NULL) {
1425                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1426                } else {
1427                    iscsilun->block_size = rc16->block_length;
1428                    iscsilun->num_blocks = rc16->returned_lba + 1;
1429                    iscsilun->lbpme = !!rc16->lbpme;
1430                    iscsilun->lbprz = !!rc16->lbprz;
1431                    iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1432                }
1433                break;
1434            }
1435            if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1436                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1437                break;
1438            }
1439            /* Fall through and try READ CAPACITY(10) instead.  */
1440        case TYPE_ROM:
1441            task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1442            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1443                rc10 = scsi_datain_unmarshall(task);
1444                if (rc10 == NULL) {
1445                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1446                } else {
1447                    iscsilun->block_size = rc10->block_size;
1448                    if (rc10->lba == 0) {
1449                        /* blank disk loaded */
1450                        iscsilun->num_blocks = 0;
1451                    } else {
1452                        iscsilun->num_blocks = rc10->lba + 1;
1453                    }
1454                }
1455            }
1456            break;
1457        default:
1458            return;
1459        }
1460    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1461             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1462             && retries-- > 0);
1463
1464    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1465        error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1466    } else if (!iscsilun->block_size ||
1467               iscsilun->block_size % BDRV_SECTOR_SIZE) {
1468        error_setg(errp, "iSCSI: the target returned an invalid "
1469                   "block size of %d.", iscsilun->block_size);
1470    }
1471    if (task) {
1472        scsi_free_scsi_task(task);
1473    }
1474}
1475
1476/* TODO Convert to fine grained options */
1477static QemuOptsList runtime_opts = {
1478    .name = "iscsi",
1479    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1480    .desc = {
1481        {
1482            .name = "filename",
1483            .type = QEMU_OPT_STRING,
1484            .help = "URL to the iscsi image",
1485        },
1486        { /* end of list */ }
1487    },
1488};
1489
1490static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1491                                          int evpd, int pc, void **inq, Error **errp)
1492{
1493    int full_size;
1494    struct scsi_task *task = NULL;
1495    task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1496    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1497        goto fail;
1498    }
1499    full_size = scsi_datain_getfullsize(task);
1500    if (full_size > task->datain.size) {
1501        scsi_free_scsi_task(task);
1502
1503        /* we need more data for the full list */
1504        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1505        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1506            goto fail;
1507        }
1508    }
1509
1510    *inq = scsi_datain_unmarshall(task);
1511    if (*inq == NULL) {
1512        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1513        goto fail_with_err;
1514    }
1515
1516    return task;
1517
1518fail:
1519    error_setg(errp, "iSCSI: Inquiry command failed : %s",
1520               iscsi_get_error(iscsi));
1521fail_with_err:
1522    if (task != NULL) {
1523        scsi_free_scsi_task(task);
1524    }
1525    return NULL;
1526}
1527
1528static void iscsi_detach_aio_context(BlockDriverState *bs)
1529{
1530    IscsiLun *iscsilun = bs->opaque;
1531
1532    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1533                       false, NULL, NULL, NULL);
1534    iscsilun->events = 0;
1535
1536    if (iscsilun->nop_timer) {
1537        timer_del(iscsilun->nop_timer);
1538        timer_free(iscsilun->nop_timer);
1539        iscsilun->nop_timer = NULL;
1540    }
1541    if (iscsilun->event_timer) {
1542        timer_del(iscsilun->event_timer);
1543        timer_free(iscsilun->event_timer);
1544        iscsilun->event_timer = NULL;
1545    }
1546}
1547
1548static void iscsi_attach_aio_context(BlockDriverState *bs,
1549                                     AioContext *new_context)
1550{
1551    IscsiLun *iscsilun = bs->opaque;
1552
1553    iscsilun->aio_context = new_context;
1554    iscsi_set_events(iscsilun);
1555
1556    /* Set up a timer for sending out iSCSI NOPs */
1557    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1558                                        QEMU_CLOCK_REALTIME, SCALE_MS,
1559                                        iscsi_nop_timed_event, iscsilun);
1560    timer_mod(iscsilun->nop_timer,
1561              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1562
1563    /* Set up a timer for periodic calls to iscsi_set_events and to
1564     * scan for command timeout */
1565    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1566                                          QEMU_CLOCK_REALTIME, SCALE_MS,
1567                                          iscsi_timed_check_events, iscsilun);
1568    timer_mod(iscsilun->event_timer,
1569              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1570}
1571
1572static void iscsi_modesense_sync(IscsiLun *iscsilun)
1573{
1574    struct scsi_task *task;
1575    struct scsi_mode_sense *ms = NULL;
1576    iscsilun->write_protected = false;
1577    iscsilun->dpofua = false;
1578
1579    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1580                                 1, SCSI_MODESENSE_PC_CURRENT,
1581                                 0x3F, 0, 255);
1582    if (task == NULL) {
1583        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1584                     iscsi_get_error(iscsilun->iscsi));
1585        goto out;
1586    }
1587
1588    if (task->status != SCSI_STATUS_GOOD) {
1589        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1590        goto out;
1591    }
1592    ms = scsi_datain_unmarshall(task);
1593    if (!ms) {
1594        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1595                     iscsi_get_error(iscsilun->iscsi));
1596        goto out;
1597    }
1598    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1599    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1600
1601out:
1602    if (task) {
1603        scsi_free_scsi_task(task);
1604    }
1605}
1606
1607/*
1608 * We support iscsi url's on the form
1609 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1610 */
1611static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1612                      Error **errp)
1613{
1614    IscsiLun *iscsilun = bs->opaque;
1615    struct iscsi_context *iscsi = NULL;
1616    struct iscsi_url *iscsi_url = NULL;
1617    struct scsi_task *task = NULL;
1618    struct scsi_inquiry_standard *inq = NULL;
1619    struct scsi_inquiry_supported_pages *inq_vpd;
1620    char *initiator_name = NULL;
1621    QemuOpts *opts;
1622    Error *local_err = NULL;
1623    const char *filename;
1624    int i, ret = 0, timeout = 0;
1625
1626    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1627    qemu_opts_absorb_qdict(opts, options, &local_err);
1628    if (local_err) {
1629        error_propagate(errp, local_err);
1630        ret = -EINVAL;
1631        goto out;
1632    }
1633
1634    filename = qemu_opt_get(opts, "filename");
1635
1636    iscsi_url = iscsi_parse_full_url(iscsi, filename);
1637    if (iscsi_url == NULL) {
1638        error_setg(errp, "Failed to parse URL : %s", filename);
1639        ret = -EINVAL;
1640        goto out;
1641    }
1642
1643    memset(iscsilun, 0, sizeof(IscsiLun));
1644
1645    initiator_name = parse_initiator_name(iscsi_url->target);
1646
1647    iscsi = iscsi_create_context(initiator_name);
1648    if (iscsi == NULL) {
1649        error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1650        ret = -ENOMEM;
1651        goto out;
1652    }
1653#if LIBISCSI_API_VERSION >= (20160603)
1654    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
1655        error_setg(errp, ("Error initializing transport."));
1656        ret = -EINVAL;
1657        goto out;
1658    }
1659#endif
1660    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1661        error_setg(errp, "iSCSI: Failed to set target name.");
1662        ret = -EINVAL;
1663        goto out;
1664    }
1665
1666    if (iscsi_url->user[0] != '\0') {
1667        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1668                                              iscsi_url->passwd);
1669        if (ret != 0) {
1670            error_setg(errp, "Failed to set initiator username and password");
1671            ret = -EINVAL;
1672            goto out;
1673        }
1674    }
1675
1676    /* check if we got CHAP username/password via the options */
1677    parse_chap(iscsi, iscsi_url->target, &local_err);
1678    if (local_err != NULL) {
1679        error_propagate(errp, local_err);
1680        ret = -EINVAL;
1681        goto out;
1682    }
1683
1684    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1685        error_setg(errp, "iSCSI: Failed to set session type to normal.");
1686        ret = -EINVAL;
1687        goto out;
1688    }
1689
1690    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1691
1692    /* check if we got HEADER_DIGEST via the options */
1693    parse_header_digest(iscsi, iscsi_url->target, &local_err);
1694    if (local_err != NULL) {
1695        error_propagate(errp, local_err);
1696        ret = -EINVAL;
1697        goto out;
1698    }
1699
1700    /* timeout handling is broken in libiscsi before 1.15.0 */
1701    timeout = parse_timeout(iscsi_url->target);
1702#if LIBISCSI_API_VERSION >= 20150621
1703    iscsi_set_timeout(iscsi, timeout);
1704#else
1705    if (timeout) {
1706        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1707    }
1708#endif
1709
1710    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1711        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1712            iscsi_get_error(iscsi));
1713        ret = -EINVAL;
1714        goto out;
1715    }
1716
1717    iscsilun->iscsi = iscsi;
1718    iscsilun->aio_context = bdrv_get_aio_context(bs);
1719    iscsilun->lun   = iscsi_url->lun;
1720    iscsilun->has_write_same = true;
1721
1722    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1723                            (void **) &inq, errp);
1724    if (task == NULL) {
1725        ret = -EINVAL;
1726        goto out;
1727    }
1728    iscsilun->type = inq->periperal_device_type;
1729    scsi_free_scsi_task(task);
1730    task = NULL;
1731
1732    iscsi_modesense_sync(iscsilun);
1733    if (iscsilun->dpofua) {
1734        bs->supported_write_flags = BDRV_REQ_FUA;
1735    }
1736    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1737
1738    /* Check the write protect flag of the LUN if we want to write */
1739    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1740        iscsilun->write_protected) {
1741        error_setg(errp, "Cannot open a write protected LUN as read-write");
1742        ret = -EACCES;
1743        goto out;
1744    }
1745
1746    iscsi_readcapacity_sync(iscsilun, &local_err);
1747    if (local_err != NULL) {
1748        error_propagate(errp, local_err);
1749        ret = -EINVAL;
1750        goto out;
1751    }
1752    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1753
1754    /* We don't have any emulation for devices other than disks and CD-ROMs, so
1755     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1756     * will try to read from the device to guess the image format.
1757     */
1758    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1759        bs->sg = true;
1760    }
1761
1762    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1763                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1764                            (void **) &inq_vpd, errp);
1765    if (task == NULL) {
1766        ret = -EINVAL;
1767        goto out;
1768    }
1769    for (i = 0; i < inq_vpd->num_pages; i++) {
1770        struct scsi_task *inq_task;
1771        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1772        struct scsi_inquiry_block_limits *inq_bl;
1773        switch (inq_vpd->pages[i]) {
1774        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1775            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1776                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1777                                        (void **) &inq_lbp, errp);
1778            if (inq_task == NULL) {
1779                ret = -EINVAL;
1780                goto out;
1781            }
1782            memcpy(&iscsilun->lbp, inq_lbp,
1783                   sizeof(struct scsi_inquiry_logical_block_provisioning));
1784            scsi_free_scsi_task(inq_task);
1785            break;
1786        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1787            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1788                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1789                                    (void **) &inq_bl, errp);
1790            if (inq_task == NULL) {
1791                ret = -EINVAL;
1792                goto out;
1793            }
1794            memcpy(&iscsilun->bl, inq_bl,
1795                   sizeof(struct scsi_inquiry_block_limits));
1796            scsi_free_scsi_task(inq_task);
1797            break;
1798        default:
1799            break;
1800        }
1801    }
1802    scsi_free_scsi_task(task);
1803    task = NULL;
1804
1805    iscsi_attach_aio_context(bs, iscsilun->aio_context);
1806
1807    /* Guess the internal cluster (page) size of the iscsi target by the means
1808     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1809     * reasonable size */
1810    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1811        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1812        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1813                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
1814        if (iscsilun->lbprz) {
1815            ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
1816        }
1817    }
1818
1819out:
1820    qemu_opts_del(opts);
1821    g_free(initiator_name);
1822    if (iscsi_url != NULL) {
1823        iscsi_destroy_url(iscsi_url);
1824    }
1825    if (task != NULL) {
1826        scsi_free_scsi_task(task);
1827    }
1828
1829    if (ret) {
1830        if (iscsi != NULL) {
1831            if (iscsi_is_logged_in(iscsi)) {
1832                iscsi_logout_sync(iscsi);
1833            }
1834            iscsi_destroy_context(iscsi);
1835        }
1836        memset(iscsilun, 0, sizeof(IscsiLun));
1837    }
1838    return ret;
1839}
1840
1841static void iscsi_close(BlockDriverState *bs)
1842{
1843    IscsiLun *iscsilun = bs->opaque;
1844    struct iscsi_context *iscsi = iscsilun->iscsi;
1845
1846    iscsi_detach_aio_context(bs);
1847    if (iscsi_is_logged_in(iscsi)) {
1848        iscsi_logout_sync(iscsi);
1849    }
1850    iscsi_destroy_context(iscsi);
1851    g_free(iscsilun->zeroblock);
1852    iscsi_allocmap_free(iscsilun);
1853    memset(iscsilun, 0, sizeof(IscsiLun));
1854}
1855
1856static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1857{
1858    /* We don't actually refresh here, but just return data queried in
1859     * iscsi_open(): iscsi targets don't change their limits. */
1860
1861    IscsiLun *iscsilun = bs->opaque;
1862    uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1863    unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
1864
1865    assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg);
1866
1867    bs->bl.request_alignment = block_size;
1868
1869    if (iscsilun->bl.max_xfer_len) {
1870        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1871    }
1872
1873    if (max_xfer_len * block_size < INT_MAX) {
1874        bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
1875    }
1876
1877    if (iscsilun->lbp.lbpu) {
1878        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
1879            bs->bl.max_pdiscard =
1880                iscsilun->bl.max_unmap * iscsilun->block_size;
1881        }
1882        bs->bl.pdiscard_alignment =
1883            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
1884    } else {
1885        bs->bl.pdiscard_alignment = iscsilun->block_size;
1886    }
1887
1888    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
1889        bs->bl.max_pwrite_zeroes =
1890            iscsilun->bl.max_ws_len * iscsilun->block_size;
1891    }
1892    if (iscsilun->lbp.lbpws) {
1893        bs->bl.pwrite_zeroes_alignment =
1894            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
1895    } else {
1896        bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
1897    }
1898    if (iscsilun->bl.opt_xfer_len &&
1899        iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
1900        bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
1901                                        iscsilun->block_size);
1902    }
1903}
1904
1905/* Note that this will not re-establish a connection with an iSCSI target - it
1906 * is effectively a NOP.  */
1907static int iscsi_reopen_prepare(BDRVReopenState *state,
1908                                BlockReopenQueue *queue, Error **errp)
1909{
1910    IscsiLun *iscsilun = state->bs->opaque;
1911
1912    if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1913        error_setg(errp, "Cannot open a write protected LUN as read-write");
1914        return -EACCES;
1915    }
1916    return 0;
1917}
1918
1919static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
1920{
1921    IscsiLun *iscsilun = reopen_state->bs->opaque;
1922
1923    /* the cache.direct status might have changed */
1924    if (iscsilun->allocmap != NULL) {
1925        iscsi_allocmap_init(iscsilun, reopen_state->flags);
1926    }
1927}
1928
1929static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1930{
1931    IscsiLun *iscsilun = bs->opaque;
1932    Error *local_err = NULL;
1933
1934    if (iscsilun->type != TYPE_DISK) {
1935        return -ENOTSUP;
1936    }
1937
1938    iscsi_readcapacity_sync(iscsilun, &local_err);
1939    if (local_err != NULL) {
1940        error_free(local_err);
1941        return -EIO;
1942    }
1943
1944    if (offset > iscsi_getlength(bs)) {
1945        return -EINVAL;
1946    }
1947
1948    if (iscsilun->allocmap != NULL) {
1949        iscsi_allocmap_init(iscsilun, bs->open_flags);
1950    }
1951
1952    return 0;
1953}
1954
1955static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1956{
1957    int ret = 0;
1958    int64_t total_size = 0;
1959    BlockDriverState *bs;
1960    IscsiLun *iscsilun = NULL;
1961    QDict *bs_options;
1962
1963    bs = bdrv_new();
1964
1965    /* Read out options */
1966    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1967                              BDRV_SECTOR_SIZE);
1968    bs->opaque = g_new0(struct IscsiLun, 1);
1969    iscsilun = bs->opaque;
1970
1971    bs_options = qdict_new();
1972    qdict_put(bs_options, "filename", qstring_from_str(filename));
1973    ret = iscsi_open(bs, bs_options, 0, NULL);
1974    QDECREF(bs_options);
1975
1976    if (ret != 0) {
1977        goto out;
1978    }
1979    iscsi_detach_aio_context(bs);
1980    if (iscsilun->type != TYPE_DISK) {
1981        ret = -ENODEV;
1982        goto out;
1983    }
1984    if (bs->total_sectors < total_size) {
1985        ret = -ENOSPC;
1986        goto out;
1987    }
1988
1989    ret = 0;
1990out:
1991    if (iscsilun->iscsi != NULL) {
1992        iscsi_destroy_context(iscsilun->iscsi);
1993    }
1994    g_free(bs->opaque);
1995    bs->opaque = NULL;
1996    bdrv_unref(bs);
1997    return ret;
1998}
1999
2000static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2001{
2002    IscsiLun *iscsilun = bs->opaque;
2003    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
2004    bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
2005    bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
2006    return 0;
2007}
2008
2009static void iscsi_invalidate_cache(BlockDriverState *bs,
2010                                   Error **errp)
2011{
2012    IscsiLun *iscsilun = bs->opaque;
2013    iscsi_allocmap_invalidate(iscsilun);
2014}
2015
2016static QemuOptsList iscsi_create_opts = {
2017    .name = "iscsi-create-opts",
2018    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
2019    .desc = {
2020        {
2021            .name = BLOCK_OPT_SIZE,
2022            .type = QEMU_OPT_SIZE,
2023            .help = "Virtual disk size"
2024        },
2025        { /* end of list */ }
2026    }
2027};
2028
2029static BlockDriver bdrv_iscsi = {
2030    .format_name     = "iscsi",
2031    .protocol_name   = "iscsi",
2032
2033    .instance_size   = sizeof(IscsiLun),
2034    .bdrv_needs_filename = true,
2035    .bdrv_file_open  = iscsi_open,
2036    .bdrv_close      = iscsi_close,
2037    .bdrv_create     = iscsi_create,
2038    .create_opts     = &iscsi_create_opts,
2039    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
2040    .bdrv_reopen_commit    = iscsi_reopen_commit,
2041    .bdrv_invalidate_cache = iscsi_invalidate_cache,
2042
2043    .bdrv_getlength  = iscsi_getlength,
2044    .bdrv_get_info   = iscsi_get_info,
2045    .bdrv_truncate   = iscsi_truncate,
2046    .bdrv_refresh_limits = iscsi_refresh_limits,
2047
2048    .bdrv_co_get_block_status = iscsi_co_get_block_status,
2049    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2050    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2051    .bdrv_co_readv         = iscsi_co_readv,
2052    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2053    .bdrv_co_flush_to_disk = iscsi_co_flush,
2054
2055#ifdef __linux__
2056    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2057#endif
2058
2059    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2060    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2061};
2062
2063#if LIBISCSI_API_VERSION >= (20160603)
2064static BlockDriver bdrv_iser = {
2065    .format_name     = "iser",
2066    .protocol_name   = "iser",
2067
2068    .instance_size   = sizeof(IscsiLun),
2069    .bdrv_needs_filename = true,
2070    .bdrv_file_open  = iscsi_open,
2071    .bdrv_close      = iscsi_close,
2072    .bdrv_create     = iscsi_create,
2073    .create_opts     = &iscsi_create_opts,
2074    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
2075    .bdrv_reopen_commit    = iscsi_reopen_commit,
2076    .bdrv_invalidate_cache = iscsi_invalidate_cache,
2077
2078    .bdrv_getlength  = iscsi_getlength,
2079    .bdrv_get_info   = iscsi_get_info,
2080    .bdrv_truncate   = iscsi_truncate,
2081    .bdrv_refresh_limits = iscsi_refresh_limits,
2082
2083    .bdrv_co_get_block_status = iscsi_co_get_block_status,
2084    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2085    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2086    .bdrv_co_readv         = iscsi_co_readv,
2087    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2088    .bdrv_co_flush_to_disk = iscsi_co_flush,
2089
2090#ifdef __linux__
2091    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2092#endif
2093
2094    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2095    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2096};
2097#endif
2098
2099static void iscsi_block_init(void)
2100{
2101    bdrv_register(&bdrv_iscsi);
2102#if LIBISCSI_API_VERSION >= (20160603)
2103    bdrv_register(&bdrv_iser);
2104#endif
2105}
2106
2107block_init(iscsi_block_init);
2108