qemu/block/iscsi.c
<<
>>
Prefs
   1/*
   2 * QEMU Block driver for iSCSI images
   3 *
   4 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
   5 * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28#include <poll.h>
  29#include <math.h>
  30#include <arpa/inet.h>
  31#include "qemu-common.h"
  32#include "qemu/config-file.h"
  33#include "qemu/error-report.h"
  34#include "qemu/bitops.h"
  35#include "qemu/bitmap.h"
  36#include "block/block_int.h"
  37#include "scsi/constants.h"
  38#include "qemu/iov.h"
  39#include "qemu/uuid.h"
  40#include "qmp-commands.h"
  41#include "qapi/qmp/qstring.h"
  42#include "crypto/secret.h"
  43#include "scsi/utils.h"
  44
  45/* Conflict between scsi/utils.h and libiscsi! :( */
  46#define SCSI_XFER_NONE ISCSI_XFER_NONE
  47#include <iscsi/iscsi.h>
  48#include <iscsi/scsi-lowlevel.h>
  49#undef SCSI_XFER_NONE
  50QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);
  51
  52#ifdef __linux__
  53#include <scsi/sg.h>
  54#endif
  55
  56typedef struct IscsiLun {
  57    struct iscsi_context *iscsi;
  58    AioContext *aio_context;
  59    int lun;
  60    enum scsi_inquiry_peripheral_device_type type;
  61    int block_size;
  62    uint64_t num_blocks;
  63    int events;
  64    QEMUTimer *nop_timer;
  65    QEMUTimer *event_timer;
  66    QemuMutex mutex;
  67    struct scsi_inquiry_logical_block_provisioning lbp;
  68    struct scsi_inquiry_block_limits bl;
  69    unsigned char *zeroblock;
  70    /* The allocmap tracks which clusters (pages) on the iSCSI target are
  71     * allocated and which are not. In case a target returns zeros for
  72     * unallocated pages (iscsilun->lprz) we can directly return zeros instead
  73     * of reading zeros over the wire if a read request falls within an
  74     * unallocated block. As there are 3 possible states we need 2 bitmaps to
  75     * track. allocmap_valid keeps track if QEMU's information about a page is
  76     * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
  77     * valid information about a page the corresponding allocmap entry should be
  78     * switched to unallocated as well to force a new lookup of the allocation
  79     * status as lookups are generally skipped if a page is suspect to be
  80     * allocated. If a iSCSI target is opened with cache.direct = on the
  81     * allocmap_valid does not exist turning all cached information invalid so
  82     * that a fresh lookup is made for any page even if allocmap entry returns
  83     * it's unallocated. */
  84    unsigned long *allocmap;
  85    unsigned long *allocmap_valid;
  86    long allocmap_size;
  87    int cluster_sectors;
  88    bool use_16_for_rw;
  89    bool write_protected;
  90    bool lbpme;
  91    bool lbprz;
  92    bool dpofua;
  93    bool has_write_same;
  94    bool request_timed_out;
  95} IscsiLun;
  96
  97typedef struct IscsiTask {
  98    int status;
  99    int complete;
 100    int retries;
 101    int do_retry;
 102    struct scsi_task *task;
 103    Coroutine *co;
 104    IscsiLun *iscsilun;
 105    QEMUTimer retry_timer;
 106    int err_code;
 107} IscsiTask;
 108
 109typedef struct IscsiAIOCB {
 110    BlockAIOCB common;
 111    QEMUBH *bh;
 112    IscsiLun *iscsilun;
 113    struct scsi_task *task;
 114    uint8_t *buf;
 115    int status;
 116    int64_t sector_num;
 117    int nb_sectors;
 118    int ret;
 119#ifdef __linux__
 120    sg_io_hdr_t *ioh;
 121#endif
 122} IscsiAIOCB;
 123
 124/* libiscsi uses time_t so its enough to process events every second */
 125#define EVENT_INTERVAL 1000
 126#define NOP_INTERVAL 5000
 127#define MAX_NOP_FAILURES 3
 128#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
 129static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
 130
 131/* this threshold is a trade-off knob to choose between
 132 * the potential additional overhead of an extra GET_LBA_STATUS request
 133 * vs. unnecessarily reading a lot of zero sectors over the wire.
 134 * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
 135 * sectors we check the allocation status of the area covered by the
 136 * request first if the allocationmap indicates that the area might be
 137 * unallocated. */
 138#define ISCSI_CHECKALLOC_THRES 64
 139
 140static void
 141iscsi_bh_cb(void *p)
 142{
 143    IscsiAIOCB *acb = p;
 144
 145    qemu_bh_delete(acb->bh);
 146
 147    g_free(acb->buf);
 148    acb->buf = NULL;
 149
 150    acb->common.cb(acb->common.opaque, acb->status);
 151
 152    if (acb->task != NULL) {
 153        scsi_free_scsi_task(acb->task);
 154        acb->task = NULL;
 155    }
 156
 157    qemu_aio_unref(acb);
 158}
 159
 160static void
 161iscsi_schedule_bh(IscsiAIOCB *acb)
 162{
 163    if (acb->bh) {
 164        return;
 165    }
 166    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
 167    qemu_bh_schedule(acb->bh);
 168}
 169
 170static void iscsi_co_generic_bh_cb(void *opaque)
 171{
 172    struct IscsiTask *iTask = opaque;
 173
 174    iTask->complete = 1;
 175    aio_co_wake(iTask->co);
 176}
 177
 178static void iscsi_retry_timer_expired(void *opaque)
 179{
 180    struct IscsiTask *iTask = opaque;
 181    iTask->complete = 1;
 182    if (iTask->co) {
 183        aio_co_wake(iTask->co);
 184    }
 185}
 186
 187static inline unsigned exp_random(double mean)
 188{
 189    return -mean * log((double)rand() / RAND_MAX);
 190}
 191
 192/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
 193 * libiscsi 1.10.0, together with other constants we need.  Use it as
 194 * a hint that we have to define them ourselves if needed, to keep the
 195 * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
 196 * the test because SCSI_STATUS_* is an enum.
 197 *
 198 * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
 199 * an enum, check against the LIBISCSI_API_VERSION macro, which was
 200 * introduced in 1.11.0.  If it is present, there is no need to define
 201 * anything.
 202 */
 203#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
 204    !defined(LIBISCSI_API_VERSION)
 205#define SCSI_STATUS_TASK_SET_FULL                          0x28
 206#define SCSI_STATUS_TIMEOUT                                0x0f000002
 207#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
 208#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
 209#endif
 210
 211#ifndef LIBISCSI_API_VERSION
 212#define LIBISCSI_API_VERSION 20130701
 213#endif
 214
 215static int iscsi_translate_sense(struct scsi_sense *sense)
 216{
 217    return - scsi_sense_to_errno(sense->key,
 218                                 (sense->ascq & 0xFF00) >> 8,
 219                                 sense->ascq & 0xFF);
 220}
 221
 222/* Called (via iscsi_service) with QemuMutex held.  */
 223static void
 224iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 225                        void *command_data, void *opaque)
 226{
 227    struct IscsiTask *iTask = opaque;
 228    struct scsi_task *task = command_data;
 229
 230    iTask->status = status;
 231    iTask->do_retry = 0;
 232    iTask->task = task;
 233
 234    if (status != SCSI_STATUS_GOOD) {
 235        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
 236            if (status == SCSI_STATUS_CHECK_CONDITION
 237                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
 238                error_report("iSCSI CheckCondition: %s",
 239                             iscsi_get_error(iscsi));
 240                iTask->do_retry = 1;
 241                goto out;
 242            }
 243            if (status == SCSI_STATUS_BUSY ||
 244                status == SCSI_STATUS_TIMEOUT ||
 245                status == SCSI_STATUS_TASK_SET_FULL) {
 246                unsigned retry_time =
 247                    exp_random(iscsi_retry_times[iTask->retries - 1]);
 248                if (status == SCSI_STATUS_TIMEOUT) {
 249                    /* make sure the request is rescheduled AFTER the
 250                     * reconnect is initiated */
 251                    retry_time = EVENT_INTERVAL * 2;
 252                    iTask->iscsilun->request_timed_out = true;
 253                }
 254                error_report("iSCSI Busy/TaskSetFull/TimeOut"
 255                             " (retry #%u in %u ms): %s",
 256                             iTask->retries, retry_time,
 257                             iscsi_get_error(iscsi));
 258                aio_timer_init(iTask->iscsilun->aio_context,
 259                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
 260                               SCALE_MS, iscsi_retry_timer_expired, iTask);
 261                timer_mod(&iTask->retry_timer,
 262                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
 263                iTask->do_retry = 1;
 264                return;
 265            }
 266        }
 267        iTask->err_code = iscsi_translate_sense(&task->sense);
 268        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
 269    }
 270
 271out:
 272    if (iTask->co) {
 273        aio_bh_schedule_oneshot(iTask->iscsilun->aio_context,
 274                                 iscsi_co_generic_bh_cb, iTask);
 275    } else {
 276        iTask->complete = 1;
 277    }
 278}
 279
 280static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 281{
 282    *iTask = (struct IscsiTask) {
 283        .co         = qemu_coroutine_self(),
 284        .iscsilun   = iscsilun,
 285    };
 286}
 287
 288static void
 289iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
 290                    void *private_data)
 291{
 292    IscsiAIOCB *acb = private_data;
 293
 294    acb->status = -ECANCELED;
 295    iscsi_schedule_bh(acb);
 296}
 297
 298static void
 299iscsi_aio_cancel(BlockAIOCB *blockacb)
 300{
 301    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
 302    IscsiLun *iscsilun = acb->iscsilun;
 303
 304    if (acb->status != -EINPROGRESS) {
 305        return;
 306    }
 307
 308    /* send a task mgmt call to the target to cancel the task on the target */
 309    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
 310                                     iscsi_abort_task_cb, acb);
 311
 312}
 313
 314static const AIOCBInfo iscsi_aiocb_info = {
 315    .aiocb_size         = sizeof(IscsiAIOCB),
 316    .cancel_async       = iscsi_aio_cancel,
 317};
 318
 319
 320static void iscsi_process_read(void *arg);
 321static void iscsi_process_write(void *arg);
 322
 323/* Called with QemuMutex held.  */
 324static void
 325iscsi_set_events(IscsiLun *iscsilun)
 326{
 327    struct iscsi_context *iscsi = iscsilun->iscsi;
 328    int ev = iscsi_which_events(iscsi);
 329
 330    if (ev != iscsilun->events) {
 331        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
 332                           false,
 333                           (ev & POLLIN) ? iscsi_process_read : NULL,
 334                           (ev & POLLOUT) ? iscsi_process_write : NULL,
 335                           NULL,
 336                           iscsilun);
 337        iscsilun->events = ev;
 338    }
 339}
 340
 341static void iscsi_timed_check_events(void *opaque)
 342{
 343    IscsiLun *iscsilun = opaque;
 344
 345    /* check for timed out requests */
 346    iscsi_service(iscsilun->iscsi, 0);
 347
 348    if (iscsilun->request_timed_out) {
 349        iscsilun->request_timed_out = false;
 350        iscsi_reconnect(iscsilun->iscsi);
 351    }
 352
 353    /* newer versions of libiscsi may return zero events. Ensure we are able
 354     * to return to service once this situation changes. */
 355    iscsi_set_events(iscsilun);
 356
 357    timer_mod(iscsilun->event_timer,
 358              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 359}
 360
 361static void
 362iscsi_process_read(void *arg)
 363{
 364    IscsiLun *iscsilun = arg;
 365    struct iscsi_context *iscsi = iscsilun->iscsi;
 366
 367    qemu_mutex_lock(&iscsilun->mutex);
 368    iscsi_service(iscsi, POLLIN);
 369    iscsi_set_events(iscsilun);
 370    qemu_mutex_unlock(&iscsilun->mutex);
 371}
 372
 373static void
 374iscsi_process_write(void *arg)
 375{
 376    IscsiLun *iscsilun = arg;
 377    struct iscsi_context *iscsi = iscsilun->iscsi;
 378
 379    qemu_mutex_lock(&iscsilun->mutex);
 380    iscsi_service(iscsi, POLLOUT);
 381    iscsi_set_events(iscsilun);
 382    qemu_mutex_unlock(&iscsilun->mutex);
 383}
 384
 385static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
 386{
 387    return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
 388}
 389
 390static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
 391{
 392    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 393}
 394
 395static bool is_byte_request_lun_aligned(int64_t offset, int count,
 396                                        IscsiLun *iscsilun)
 397{
 398    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
 399        error_report("iSCSI misaligned request: "
 400                     "iscsilun->block_size %u, offset %" PRIi64
 401                     ", count %d",
 402                     iscsilun->block_size, offset, count);
 403        return false;
 404    }
 405    return true;
 406}
 407
 408static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
 409                                          IscsiLun *iscsilun)
 410{
 411    assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
 412    return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
 413                                       nb_sectors << BDRV_SECTOR_BITS,
 414                                       iscsilun);
 415}
 416
 417static void iscsi_allocmap_free(IscsiLun *iscsilun)
 418{
 419    g_free(iscsilun->allocmap);
 420    g_free(iscsilun->allocmap_valid);
 421    iscsilun->allocmap = NULL;
 422    iscsilun->allocmap_valid = NULL;
 423}
 424
 425
 426static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
 427{
 428    iscsi_allocmap_free(iscsilun);
 429
 430    iscsilun->allocmap_size =
 431        DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
 432                     iscsilun->cluster_sectors);
 433
 434    iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
 435    if (!iscsilun->allocmap) {
 436        return -ENOMEM;
 437    }
 438
 439    if (open_flags & BDRV_O_NOCACHE) {
 440        /* in case that cache.direct = on all allocmap entries are
 441         * treated as invalid to force a relookup of the block
 442         * status on every read request */
 443        return 0;
 444    }
 445
 446    iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
 447    if (!iscsilun->allocmap_valid) {
 448        /* if we are under memory pressure free the allocmap as well */
 449        iscsi_allocmap_free(iscsilun);
 450        return -ENOMEM;
 451    }
 452
 453    return 0;
 454}
 455
 456static void
 457iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
 458                      int nb_sectors, bool allocated, bool valid)
 459{
 460    int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
 461
 462    if (iscsilun->allocmap == NULL) {
 463        return;
 464    }
 465    /* expand to entirely contain all affected clusters */
 466    cl_num_expanded = sector_num / iscsilun->cluster_sectors;
 467    nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
 468                                   iscsilun->cluster_sectors) - cl_num_expanded;
 469    /* shrink to touch only completely contained clusters */
 470    cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
 471    nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
 472                      - cl_num_shrunk;
 473    if (allocated) {
 474        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
 475    } else {
 476        if (nb_cls_shrunk > 0) {
 477            bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
 478        }
 479    }
 480
 481    if (iscsilun->allocmap_valid == NULL) {
 482        return;
 483    }
 484    if (valid) {
 485        if (nb_cls_shrunk > 0) {
 486            bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
 487        }
 488    } else {
 489        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
 490                     nb_cls_expanded);
 491    }
 492}
 493
 494static void
 495iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
 496                             int nb_sectors)
 497{
 498    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
 499}
 500
 501static void
 502iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
 503                               int nb_sectors)
 504{
 505    /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
 506     * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
 507     */
 508    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
 509}
 510
 511static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
 512                                       int nb_sectors)
 513{
 514    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
 515}
 516
 517static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
 518{
 519    if (iscsilun->allocmap) {
 520        bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
 521    }
 522    if (iscsilun->allocmap_valid) {
 523        bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
 524    }
 525}
 526
 527static inline bool
 528iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
 529                            int nb_sectors)
 530{
 531    unsigned long size;
 532    if (iscsilun->allocmap == NULL) {
 533        return true;
 534    }
 535    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
 536    return !(find_next_bit(iscsilun->allocmap, size,
 537                           sector_num / iscsilun->cluster_sectors) == size);
 538}
 539
 540static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
 541                                           int64_t sector_num, int nb_sectors)
 542{
 543    unsigned long size;
 544    if (iscsilun->allocmap_valid == NULL) {
 545        return false;
 546    }
 547    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
 548    return (find_next_zero_bit(iscsilun->allocmap_valid, size,
 549                               sector_num / iscsilun->cluster_sectors) == size);
 550}
 551
 552static int coroutine_fn
 553iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 554                      QEMUIOVector *iov, int flags)
 555{
 556    IscsiLun *iscsilun = bs->opaque;
 557    struct IscsiTask iTask;
 558    uint64_t lba;
 559    uint32_t num_sectors;
 560    bool fua = flags & BDRV_REQ_FUA;
 561    int r = 0;
 562
 563    if (fua) {
 564        assert(iscsilun->dpofua);
 565    }
 566    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 567        return -EINVAL;
 568    }
 569
 570    if (bs->bl.max_transfer) {
 571        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 572    }
 573
 574    lba = sector_qemu2lun(sector_num, iscsilun);
 575    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 576    iscsi_co_init_iscsitask(iscsilun, &iTask);
 577    qemu_mutex_lock(&iscsilun->mutex);
 578retry:
 579    if (iscsilun->use_16_for_rw) {
 580#if LIBISCSI_API_VERSION >= (20160603)
 581        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 582                                            NULL, num_sectors * iscsilun->block_size,
 583                                            iscsilun->block_size, 0, 0, fua, 0, 0,
 584                                            iscsi_co_generic_cb, &iTask,
 585                                            (struct scsi_iovec *)iov->iov, iov->niov);
 586    } else {
 587        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 588                                            NULL, num_sectors * iscsilun->block_size,
 589                                            iscsilun->block_size, 0, 0, fua, 0, 0,
 590                                            iscsi_co_generic_cb, &iTask,
 591                                            (struct scsi_iovec *)iov->iov, iov->niov);
 592    }
 593#else
 594        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
 595                                        NULL, num_sectors * iscsilun->block_size,
 596                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 597                                        iscsi_co_generic_cb, &iTask);
 598    } else {
 599        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
 600                                        NULL, num_sectors * iscsilun->block_size,
 601                                        iscsilun->block_size, 0, 0, fua, 0, 0,
 602                                        iscsi_co_generic_cb, &iTask);
 603    }
 604#endif
 605    if (iTask.task == NULL) {
 606        qemu_mutex_unlock(&iscsilun->mutex);
 607        return -ENOMEM;
 608    }
 609#if LIBISCSI_API_VERSION < (20160603)
 610    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
 611                          iov->niov);
 612#endif
 613    while (!iTask.complete) {
 614        iscsi_set_events(iscsilun);
 615        qemu_mutex_unlock(&iscsilun->mutex);
 616        qemu_coroutine_yield();
 617        qemu_mutex_lock(&iscsilun->mutex);
 618    }
 619
 620    if (iTask.task != NULL) {
 621        scsi_free_scsi_task(iTask.task);
 622        iTask.task = NULL;
 623    }
 624
 625    if (iTask.do_retry) {
 626        iTask.complete = 0;
 627        goto retry;
 628    }
 629
 630    if (iTask.status != SCSI_STATUS_GOOD) {
 631        iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
 632        r = iTask.err_code;
 633        goto out_unlock;
 634    }
 635
 636    iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
 637
 638out_unlock:
 639    qemu_mutex_unlock(&iscsilun->mutex);
 640    return r;
 641}
 642
 643
 644
 645static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
 646                                                  int64_t sector_num,
 647                                                  int nb_sectors, int *pnum,
 648                                                  BlockDriverState **file)
 649{
 650    IscsiLun *iscsilun = bs->opaque;
 651    struct scsi_get_lba_status *lbas = NULL;
 652    struct scsi_lba_status_descriptor *lbasd = NULL;
 653    struct IscsiTask iTask;
 654    int64_t ret;
 655
 656    iscsi_co_init_iscsitask(iscsilun, &iTask);
 657
 658    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 659        ret = -EINVAL;
 660        goto out;
 661    }
 662
 663    /* default to all sectors allocated */
 664    ret = BDRV_BLOCK_DATA;
 665    ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
 666    *pnum = nb_sectors;
 667
 668    /* LUN does not support logical block provisioning */
 669    if (!iscsilun->lbpme) {
 670        goto out;
 671    }
 672
 673    qemu_mutex_lock(&iscsilun->mutex);
 674retry:
 675    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
 676                                  sector_qemu2lun(sector_num, iscsilun),
 677                                  8 + 16, iscsi_co_generic_cb,
 678                                  &iTask) == NULL) {
 679        ret = -ENOMEM;
 680        goto out_unlock;
 681    }
 682
 683    while (!iTask.complete) {
 684        iscsi_set_events(iscsilun);
 685        qemu_mutex_unlock(&iscsilun->mutex);
 686        qemu_coroutine_yield();
 687        qemu_mutex_lock(&iscsilun->mutex);
 688    }
 689
 690    if (iTask.do_retry) {
 691        if (iTask.task != NULL) {
 692            scsi_free_scsi_task(iTask.task);
 693            iTask.task = NULL;
 694        }
 695        iTask.complete = 0;
 696        goto retry;
 697    }
 698
 699    if (iTask.status != SCSI_STATUS_GOOD) {
 700        /* in case the get_lba_status_callout fails (i.e.
 701         * because the device is busy or the cmd is not
 702         * supported) we pretend all blocks are allocated
 703         * for backwards compatibility */
 704        goto out_unlock;
 705    }
 706
 707    lbas = scsi_datain_unmarshall(iTask.task);
 708    if (lbas == NULL) {
 709        ret = -EIO;
 710        goto out_unlock;
 711    }
 712
 713    lbasd = &lbas->descriptors[0];
 714
 715    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
 716        ret = -EIO;
 717        goto out_unlock;
 718    }
 719
 720    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
 721
 722    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
 723        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
 724        ret &= ~BDRV_BLOCK_DATA;
 725        if (iscsilun->lbprz) {
 726            ret |= BDRV_BLOCK_ZERO;
 727        }
 728    }
 729
 730    if (ret & BDRV_BLOCK_ZERO) {
 731        iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
 732    } else {
 733        iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
 734    }
 735
 736    if (*pnum > nb_sectors) {
 737        *pnum = nb_sectors;
 738    }
 739out_unlock:
 740    qemu_mutex_unlock(&iscsilun->mutex);
 741out:
 742    if (iTask.task != NULL) {
 743        scsi_free_scsi_task(iTask.task);
 744    }
 745    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
 746        *file = bs;
 747    }
 748    return ret;
 749}
 750
 751static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
 752                                       int64_t sector_num, int nb_sectors,
 753                                       QEMUIOVector *iov)
 754{
 755    IscsiLun *iscsilun = bs->opaque;
 756    struct IscsiTask iTask;
 757    uint64_t lba;
 758    uint32_t num_sectors;
 759
 760    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 761        return -EINVAL;
 762    }
 763
 764    if (bs->bl.max_transfer) {
 765        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
 766    }
 767
 768    /* if cache.direct is off and we have a valid entry in our allocation map
 769     * we can skip checking the block status and directly return zeroes if
 770     * the request falls within an unallocated area */
 771    if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
 772        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
 773            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 774            return 0;
 775    }
 776
 777    if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
 778        !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
 779        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
 780        int pnum;
 781        BlockDriverState *file;
 782        /* check the block status from the beginning of the cluster
 783         * containing the start sector */
 784        int64_t ret = iscsi_co_get_block_status(bs,
 785                          sector_num - sector_num % iscsilun->cluster_sectors,
 786                          BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
 787        if (ret < 0) {
 788            return ret;
 789        }
 790        /* if the whole request falls into an unallocated area we can avoid
 791         * to read and directly return zeroes instead */
 792        if (ret & BDRV_BLOCK_ZERO &&
 793            pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
 794            qemu_iovec_memset(iov, 0, 0x00, iov->size);
 795            return 0;
 796        }
 797    }
 798
 799    lba = sector_qemu2lun(sector_num, iscsilun);
 800    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 801
 802    iscsi_co_init_iscsitask(iscsilun, &iTask);
 803    qemu_mutex_lock(&iscsilun->mutex);
 804retry:
 805    if (iscsilun->use_16_for_rw) {
 806#if LIBISCSI_API_VERSION >= (20160603)
 807        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 808                                           num_sectors * iscsilun->block_size,
 809                                           iscsilun->block_size, 0, 0, 0, 0, 0,
 810                                           iscsi_co_generic_cb, &iTask,
 811                                           (struct scsi_iovec *)iov->iov, iov->niov);
 812    } else {
 813        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
 814                                           num_sectors * iscsilun->block_size,
 815                                           iscsilun->block_size,
 816                                           0, 0, 0, 0, 0,
 817                                           iscsi_co_generic_cb, &iTask,
 818                                           (struct scsi_iovec *)iov->iov, iov->niov);
 819    }
 820#else
 821        iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
 822                                       num_sectors * iscsilun->block_size,
 823                                       iscsilun->block_size, 0, 0, 0, 0, 0,
 824                                       iscsi_co_generic_cb, &iTask);
 825    } else {
 826        iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
 827                                       num_sectors * iscsilun->block_size,
 828                                       iscsilun->block_size,
 829                                       0, 0, 0, 0, 0,
 830                                       iscsi_co_generic_cb, &iTask);
 831    }
 832#endif
 833    if (iTask.task == NULL) {
 834        qemu_mutex_unlock(&iscsilun->mutex);
 835        return -ENOMEM;
 836    }
 837#if LIBISCSI_API_VERSION < (20160603)
 838    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
 839#endif
 840    while (!iTask.complete) {
 841        iscsi_set_events(iscsilun);
 842        qemu_mutex_unlock(&iscsilun->mutex);
 843        qemu_coroutine_yield();
 844        qemu_mutex_lock(&iscsilun->mutex);
 845    }
 846
 847    if (iTask.task != NULL) {
 848        scsi_free_scsi_task(iTask.task);
 849        iTask.task = NULL;
 850    }
 851
 852    if (iTask.do_retry) {
 853        iTask.complete = 0;
 854        goto retry;
 855    }
 856    qemu_mutex_unlock(&iscsilun->mutex);
 857
 858    if (iTask.status != SCSI_STATUS_GOOD) {
 859        return iTask.err_code;
 860    }
 861
 862    return 0;
 863}
 864
 865static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 866{
 867    IscsiLun *iscsilun = bs->opaque;
 868    struct IscsiTask iTask;
 869
 870    iscsi_co_init_iscsitask(iscsilun, &iTask);
 871    qemu_mutex_lock(&iscsilun->mutex);
 872retry:
 873    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
 874                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
 875        qemu_mutex_unlock(&iscsilun->mutex);
 876        return -ENOMEM;
 877    }
 878
 879    while (!iTask.complete) {
 880        iscsi_set_events(iscsilun);
 881        qemu_mutex_unlock(&iscsilun->mutex);
 882        qemu_coroutine_yield();
 883        qemu_mutex_lock(&iscsilun->mutex);
 884    }
 885
 886    if (iTask.task != NULL) {
 887        scsi_free_scsi_task(iTask.task);
 888        iTask.task = NULL;
 889    }
 890
 891    if (iTask.do_retry) {
 892        iTask.complete = 0;
 893        goto retry;
 894    }
 895    qemu_mutex_unlock(&iscsilun->mutex);
 896
 897    if (iTask.status != SCSI_STATUS_GOOD) {
 898        return iTask.err_code;
 899    }
 900
 901    return 0;
 902}
 903
 904#ifdef __linux__
 905/* Called (via iscsi_service) with QemuMutex held.  */
 906static void
 907iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
 908                     void *command_data, void *opaque)
 909{
 910    IscsiAIOCB *acb = opaque;
 911
 912    g_free(acb->buf);
 913    acb->buf = NULL;
 914
 915    acb->status = 0;
 916    if (status < 0) {
 917        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
 918                     iscsi_get_error(iscsi));
 919        acb->status = iscsi_translate_sense(&acb->task->sense);
 920    }
 921
 922    acb->ioh->driver_status = 0;
 923    acb->ioh->host_status   = 0;
 924    acb->ioh->resid         = 0;
 925    acb->ioh->status        = status;
 926
 927#define SG_ERR_DRIVER_SENSE    0x08
 928
 929    if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
 930        int ss;
 931
 932        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
 933
 934        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
 935        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
 936             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
 937        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
 938    }
 939
 940    iscsi_schedule_bh(acb);
 941}
 942
 943static void iscsi_ioctl_bh_completion(void *opaque)
 944{
 945    IscsiAIOCB *acb = opaque;
 946
 947    qemu_bh_delete(acb->bh);
 948    acb->common.cb(acb->common.opaque, acb->ret);
 949    qemu_aio_unref(acb);
 950}
 951
 952static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
 953{
 954    BlockDriverState *bs = acb->common.bs;
 955    IscsiLun *iscsilun = bs->opaque;
 956    int ret = 0;
 957
 958    switch (req) {
 959    case SG_GET_VERSION_NUM:
 960        *(int *)buf = 30000;
 961        break;
 962    case SG_GET_SCSI_ID:
 963        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
 964        break;
 965    default:
 966        ret = -EINVAL;
 967    }
 968    assert(!acb->bh);
 969    acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
 970                         iscsi_ioctl_bh_completion, acb);
 971    acb->ret = ret;
 972    qemu_bh_schedule(acb->bh);
 973}
 974
 975static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
 976        unsigned long int req, void *buf,
 977        BlockCompletionFunc *cb, void *opaque)
 978{
 979    IscsiLun *iscsilun = bs->opaque;
 980    struct iscsi_context *iscsi = iscsilun->iscsi;
 981    struct iscsi_data data;
 982    IscsiAIOCB *acb;
 983
 984    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
 985
 986    acb->iscsilun = iscsilun;
 987    acb->bh          = NULL;
 988    acb->status      = -EINPROGRESS;
 989    acb->buf         = NULL;
 990    acb->ioh         = buf;
 991
 992    if (req != SG_IO) {
 993        iscsi_ioctl_handle_emulated(acb, req, buf);
 994        return &acb->common;
 995    }
 996
 997    if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
 998        error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
 999                     acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
1000        qemu_aio_unref(acb);
1001        return NULL;
1002    }
1003
1004    acb->task = malloc(sizeof(struct scsi_task));
1005    if (acb->task == NULL) {
1006        error_report("iSCSI: Failed to allocate task for scsi command. %s",
1007                     iscsi_get_error(iscsi));
1008        qemu_aio_unref(acb);
1009        return NULL;
1010    }
1011    memset(acb->task, 0, sizeof(struct scsi_task));
1012
1013    switch (acb->ioh->dxfer_direction) {
1014    case SG_DXFER_TO_DEV:
1015        acb->task->xfer_dir = SCSI_XFER_WRITE;
1016        break;
1017    case SG_DXFER_FROM_DEV:
1018        acb->task->xfer_dir = SCSI_XFER_READ;
1019        break;
1020    default:
1021        acb->task->xfer_dir = SCSI_XFER_NONE;
1022        break;
1023    }
1024
1025    acb->task->cdb_size = acb->ioh->cmd_len;
1026    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
1027    acb->task->expxferlen = acb->ioh->dxfer_len;
1028
1029    data.size = 0;
1030    qemu_mutex_lock(&iscsilun->mutex);
1031    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
1032        if (acb->ioh->iovec_count == 0) {
1033            data.data = acb->ioh->dxferp;
1034            data.size = acb->ioh->dxfer_len;
1035        } else {
1036            scsi_task_set_iov_out(acb->task,
1037                                 (struct scsi_iovec *) acb->ioh->dxferp,
1038                                 acb->ioh->iovec_count);
1039        }
1040    }
1041
1042    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
1043                                 iscsi_aio_ioctl_cb,
1044                                 (data.size > 0) ? &data : NULL,
1045                                 acb) != 0) {
1046        qemu_mutex_unlock(&iscsilun->mutex);
1047        scsi_free_scsi_task(acb->task);
1048        qemu_aio_unref(acb);
1049        return NULL;
1050    }
1051
1052    /* tell libiscsi to read straight into the buffer we got from ioctl */
1053    if (acb->task->xfer_dir == SCSI_XFER_READ) {
1054        if (acb->ioh->iovec_count == 0) {
1055            scsi_task_add_data_in_buffer(acb->task,
1056                                         acb->ioh->dxfer_len,
1057                                         acb->ioh->dxferp);
1058        } else {
1059            scsi_task_set_iov_in(acb->task,
1060                                 (struct scsi_iovec *) acb->ioh->dxferp,
1061                                 acb->ioh->iovec_count);
1062        }
1063    }
1064
1065    iscsi_set_events(iscsilun);
1066    qemu_mutex_unlock(&iscsilun->mutex);
1067
1068    return &acb->common;
1069}
1070
1071#endif
1072
1073static int64_t
1074iscsi_getlength(BlockDriverState *bs)
1075{
1076    IscsiLun *iscsilun = bs->opaque;
1077    int64_t len;
1078
1079    len  = iscsilun->num_blocks;
1080    len *= iscsilun->block_size;
1081
1082    return len;
1083}
1084
1085static int
1086coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
1087{
1088    IscsiLun *iscsilun = bs->opaque;
1089    struct IscsiTask iTask;
1090    struct unmap_list list;
1091    int r = 0;
1092
1093    if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1094        return -ENOTSUP;
1095    }
1096
1097    if (!iscsilun->lbp.lbpu) {
1098        /* UNMAP is not supported by the target */
1099        return 0;
1100    }
1101
1102    list.lba = offset / iscsilun->block_size;
1103    list.num = bytes / iscsilun->block_size;
1104
1105    iscsi_co_init_iscsitask(iscsilun, &iTask);
1106    qemu_mutex_lock(&iscsilun->mutex);
1107retry:
1108    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
1109                         iscsi_co_generic_cb, &iTask) == NULL) {
1110        r = -ENOMEM;
1111        goto out_unlock;
1112    }
1113
1114    while (!iTask.complete) {
1115        iscsi_set_events(iscsilun);
1116        qemu_mutex_unlock(&iscsilun->mutex);
1117        qemu_coroutine_yield();
1118        qemu_mutex_lock(&iscsilun->mutex);
1119    }
1120
1121    if (iTask.task != NULL) {
1122        scsi_free_scsi_task(iTask.task);
1123        iTask.task = NULL;
1124    }
1125
1126    if (iTask.do_retry) {
1127        iTask.complete = 0;
1128        goto retry;
1129    }
1130
1131    if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
1132        /* the target might fail with a check condition if it
1133           is not happy with the alignment of the UNMAP request
1134           we silently fail in this case */
1135        goto out_unlock;
1136    }
1137
1138    if (iTask.status != SCSI_STATUS_GOOD) {
1139        r = iTask.err_code;
1140        goto out_unlock;
1141    }
1142
1143    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1144                               bytes >> BDRV_SECTOR_BITS);
1145
1146out_unlock:
1147    qemu_mutex_unlock(&iscsilun->mutex);
1148    return r;
1149}
1150
1151static int
1152coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1153                                    int bytes, BdrvRequestFlags flags)
1154{
1155    IscsiLun *iscsilun = bs->opaque;
1156    struct IscsiTask iTask;
1157    uint64_t lba;
1158    uint32_t nb_blocks;
1159    bool use_16_for_ws = iscsilun->use_16_for_rw;
1160    int r = 0;
1161
1162    if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1163        return -ENOTSUP;
1164    }
1165
1166    if (flags & BDRV_REQ_MAY_UNMAP) {
1167        if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1168            /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1169            use_16_for_ws = true;
1170        }
1171        if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1172            /* WRITESAME16 with UNMAP is not supported by the target,
1173             * fall back and try WRITESAME10/16 without UNMAP */
1174            flags &= ~BDRV_REQ_MAY_UNMAP;
1175            use_16_for_ws = iscsilun->use_16_for_rw;
1176        }
1177    }
1178
1179    if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1180        /* WRITESAME without UNMAP is not supported by the target */
1181        return -ENOTSUP;
1182    }
1183
1184    lba = offset / iscsilun->block_size;
1185    nb_blocks = bytes / iscsilun->block_size;
1186
1187    if (iscsilun->zeroblock == NULL) {
1188        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1189        if (iscsilun->zeroblock == NULL) {
1190            return -ENOMEM;
1191        }
1192    }
1193
1194    qemu_mutex_lock(&iscsilun->mutex);
1195    iscsi_co_init_iscsitask(iscsilun, &iTask);
1196retry:
1197    if (use_16_for_ws) {
1198        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1199                                            iscsilun->zeroblock, iscsilun->block_size,
1200                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1201                                            0, 0, iscsi_co_generic_cb, &iTask);
1202    } else {
1203        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1204                                            iscsilun->zeroblock, iscsilun->block_size,
1205                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1206                                            0, 0, iscsi_co_generic_cb, &iTask);
1207    }
1208    if (iTask.task == NULL) {
1209        qemu_mutex_unlock(&iscsilun->mutex);
1210        return -ENOMEM;
1211    }
1212
1213    while (!iTask.complete) {
1214        iscsi_set_events(iscsilun);
1215        qemu_mutex_unlock(&iscsilun->mutex);
1216        qemu_coroutine_yield();
1217        qemu_mutex_lock(&iscsilun->mutex);
1218    }
1219
1220    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1221        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1222        (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1223         iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1224        /* WRITE SAME is not supported by the target */
1225        iscsilun->has_write_same = false;
1226        scsi_free_scsi_task(iTask.task);
1227        r = -ENOTSUP;
1228        goto out_unlock;
1229    }
1230
1231    if (iTask.task != NULL) {
1232        scsi_free_scsi_task(iTask.task);
1233        iTask.task = NULL;
1234    }
1235
1236    if (iTask.do_retry) {
1237        iTask.complete = 0;
1238        goto retry;
1239    }
1240
1241    if (iTask.status != SCSI_STATUS_GOOD) {
1242        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1243                                   bytes >> BDRV_SECTOR_BITS);
1244        r = iTask.err_code;
1245        goto out_unlock;
1246    }
1247
1248    if (flags & BDRV_REQ_MAY_UNMAP) {
1249        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1250                                   bytes >> BDRV_SECTOR_BITS);
1251    } else {
1252        iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
1253                                     bytes >> BDRV_SECTOR_BITS);
1254    }
1255
1256out_unlock:
1257    qemu_mutex_unlock(&iscsilun->mutex);
1258    return r;
1259}
1260
1261static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
1262                       Error **errp)
1263{
1264    const char *user = NULL;
1265    const char *password = NULL;
1266    const char *secretid;
1267    char *secret = NULL;
1268
1269    user = qemu_opt_get(opts, "user");
1270    if (!user) {
1271        return;
1272    }
1273
1274    secretid = qemu_opt_get(opts, "password-secret");
1275    password = qemu_opt_get(opts, "password");
1276    if (secretid && password) {
1277        error_setg(errp, "'password' and 'password-secret' properties are "
1278                   "mutually exclusive");
1279        return;
1280    }
1281    if (secretid) {
1282        secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1283        if (!secret) {
1284            return;
1285        }
1286        password = secret;
1287    } else if (!password) {
1288        error_setg(errp, "CHAP username specified but no password was given");
1289        return;
1290    }
1291
1292    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1293        error_setg(errp, "Failed to set initiator username and password");
1294    }
1295
1296    g_free(secret);
1297}
1298
1299static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
1300                                Error **errp)
1301{
1302    const char *digest = NULL;
1303
1304    digest = qemu_opt_get(opts, "header-digest");
1305    if (!digest) {
1306        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1307    } else if (!strcmp(digest, "crc32c")) {
1308        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1309    } else if (!strcmp(digest, "none")) {
1310        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1311    } else if (!strcmp(digest, "crc32c-none")) {
1312        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1313    } else if (!strcmp(digest, "none-crc32c")) {
1314        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1315    } else {
1316        error_setg(errp, "Invalid header-digest setting : %s", digest);
1317    }
1318}
1319
1320static char *get_initiator_name(QemuOpts *opts)
1321{
1322    const char *name;
1323    char *iscsi_name;
1324    UuidInfo *uuid_info;
1325
1326    name = qemu_opt_get(opts, "initiator-name");
1327    if (name) {
1328        return g_strdup(name);
1329    }
1330
1331    uuid_info = qmp_query_uuid(NULL);
1332    if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1333        name = qemu_get_vm_name();
1334    } else {
1335        name = uuid_info->UUID;
1336    }
1337    iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1338                                 name ? ":" : "", name ? name : "");
1339    qapi_free_UuidInfo(uuid_info);
1340    return iscsi_name;
1341}
1342
1343static void iscsi_nop_timed_event(void *opaque)
1344{
1345    IscsiLun *iscsilun = opaque;
1346
1347    qemu_mutex_lock(&iscsilun->mutex);
1348    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1349        error_report("iSCSI: NOP timeout. Reconnecting...");
1350        iscsilun->request_timed_out = true;
1351    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1352        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1353        goto out;
1354    }
1355
1356    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1357    iscsi_set_events(iscsilun);
1358
1359out:
1360    qemu_mutex_unlock(&iscsilun->mutex);
1361}
1362
1363static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1364{
1365    struct scsi_task *task = NULL;
1366    struct scsi_readcapacity10 *rc10 = NULL;
1367    struct scsi_readcapacity16 *rc16 = NULL;
1368    int retries = ISCSI_CMD_RETRIES; 
1369
1370    do {
1371        if (task != NULL) {
1372            scsi_free_scsi_task(task);
1373            task = NULL;
1374        }
1375
1376        switch (iscsilun->type) {
1377        case TYPE_DISK:
1378            task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1379            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1380                rc16 = scsi_datain_unmarshall(task);
1381                if (rc16 == NULL) {
1382                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1383                } else {
1384                    iscsilun->block_size = rc16->block_length;
1385                    iscsilun->num_blocks = rc16->returned_lba + 1;
1386                    iscsilun->lbpme = !!rc16->lbpme;
1387                    iscsilun->lbprz = !!rc16->lbprz;
1388                    iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1389                }
1390                break;
1391            }
1392            if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1393                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1394                break;
1395            }
1396            /* Fall through and try READ CAPACITY(10) instead.  */
1397        case TYPE_ROM:
1398            task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1399            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1400                rc10 = scsi_datain_unmarshall(task);
1401                if (rc10 == NULL) {
1402                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1403                } else {
1404                    iscsilun->block_size = rc10->block_size;
1405                    if (rc10->lba == 0) {
1406                        /* blank disk loaded */
1407                        iscsilun->num_blocks = 0;
1408                    } else {
1409                        iscsilun->num_blocks = rc10->lba + 1;
1410                    }
1411                }
1412            }
1413            break;
1414        default:
1415            return;
1416        }
1417    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1418             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1419             && retries-- > 0);
1420
1421    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1422        error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1423    } else if (!iscsilun->block_size ||
1424               iscsilun->block_size % BDRV_SECTOR_SIZE) {
1425        error_setg(errp, "iSCSI: the target returned an invalid "
1426                   "block size of %d.", iscsilun->block_size);
1427    }
1428    if (task) {
1429        scsi_free_scsi_task(task);
1430    }
1431}
1432
1433static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1434                                          int evpd, int pc, void **inq, Error **errp)
1435{
1436    int full_size;
1437    struct scsi_task *task = NULL;
1438    task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1439    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1440        goto fail;
1441    }
1442    full_size = scsi_datain_getfullsize(task);
1443    if (full_size > task->datain.size) {
1444        scsi_free_scsi_task(task);
1445
1446        /* we need more data for the full list */
1447        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1448        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1449            goto fail;
1450        }
1451    }
1452
1453    *inq = scsi_datain_unmarshall(task);
1454    if (*inq == NULL) {
1455        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1456        goto fail_with_err;
1457    }
1458
1459    return task;
1460
1461fail:
1462    error_setg(errp, "iSCSI: Inquiry command failed : %s",
1463               iscsi_get_error(iscsi));
1464fail_with_err:
1465    if (task != NULL) {
1466        scsi_free_scsi_task(task);
1467    }
1468    return NULL;
1469}
1470
1471static void iscsi_detach_aio_context(BlockDriverState *bs)
1472{
1473    IscsiLun *iscsilun = bs->opaque;
1474
1475    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1476                       false, NULL, NULL, NULL, NULL);
1477    iscsilun->events = 0;
1478
1479    if (iscsilun->nop_timer) {
1480        timer_del(iscsilun->nop_timer);
1481        timer_free(iscsilun->nop_timer);
1482        iscsilun->nop_timer = NULL;
1483    }
1484    if (iscsilun->event_timer) {
1485        timer_del(iscsilun->event_timer);
1486        timer_free(iscsilun->event_timer);
1487        iscsilun->event_timer = NULL;
1488    }
1489}
1490
1491static void iscsi_attach_aio_context(BlockDriverState *bs,
1492                                     AioContext *new_context)
1493{
1494    IscsiLun *iscsilun = bs->opaque;
1495
1496    iscsilun->aio_context = new_context;
1497    iscsi_set_events(iscsilun);
1498
1499    /* Set up a timer for sending out iSCSI NOPs */
1500    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1501                                        QEMU_CLOCK_REALTIME, SCALE_MS,
1502                                        iscsi_nop_timed_event, iscsilun);
1503    timer_mod(iscsilun->nop_timer,
1504              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1505
1506    /* Set up a timer for periodic calls to iscsi_set_events and to
1507     * scan for command timeout */
1508    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1509                                          QEMU_CLOCK_REALTIME, SCALE_MS,
1510                                          iscsi_timed_check_events, iscsilun);
1511    timer_mod(iscsilun->event_timer,
1512              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1513}
1514
1515static void iscsi_modesense_sync(IscsiLun *iscsilun)
1516{
1517    struct scsi_task *task;
1518    struct scsi_mode_sense *ms = NULL;
1519    iscsilun->write_protected = false;
1520    iscsilun->dpofua = false;
1521
1522    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1523                                 1, SCSI_MODESENSE_PC_CURRENT,
1524                                 0x3F, 0, 255);
1525    if (task == NULL) {
1526        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1527                     iscsi_get_error(iscsilun->iscsi));
1528        goto out;
1529    }
1530
1531    if (task->status != SCSI_STATUS_GOOD) {
1532        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1533        goto out;
1534    }
1535    ms = scsi_datain_unmarshall(task);
1536    if (!ms) {
1537        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1538                     iscsi_get_error(iscsilun->iscsi));
1539        goto out;
1540    }
1541    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1542    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1543
1544out:
1545    if (task) {
1546        scsi_free_scsi_task(task);
1547    }
1548}
1549
1550static void iscsi_parse_iscsi_option(const char *target, QDict *options)
1551{
1552    QemuOptsList *list;
1553    QemuOpts *opts;
1554    const char *user, *password, *password_secret, *initiator_name,
1555               *header_digest, *timeout;
1556
1557    list = qemu_find_opts("iscsi");
1558    if (!list) {
1559        return;
1560    }
1561
1562    opts = qemu_opts_find(list, target);
1563    if (opts == NULL) {
1564        opts = QTAILQ_FIRST(&list->head);
1565        if (!opts) {
1566            return;
1567        }
1568    }
1569
1570    user = qemu_opt_get(opts, "user");
1571    if (user) {
1572        qdict_set_default_str(options, "user", user);
1573    }
1574
1575    password = qemu_opt_get(opts, "password");
1576    if (password) {
1577        qdict_set_default_str(options, "password", password);
1578    }
1579
1580    password_secret = qemu_opt_get(opts, "password-secret");
1581    if (password_secret) {
1582        qdict_set_default_str(options, "password-secret", password_secret);
1583    }
1584
1585    initiator_name = qemu_opt_get(opts, "initiator-name");
1586    if (initiator_name) {
1587        qdict_set_default_str(options, "initiator-name", initiator_name);
1588    }
1589
1590    header_digest = qemu_opt_get(opts, "header-digest");
1591    if (header_digest) {
1592        /* -iscsi takes upper case values, but QAPI only supports lower case
1593         * enum constant names, so we have to convert here. */
1594        char *qapi_value = g_ascii_strdown(header_digest, -1);
1595        qdict_set_default_str(options, "header-digest", qapi_value);
1596        g_free(qapi_value);
1597    }
1598
1599    timeout = qemu_opt_get(opts, "timeout");
1600    if (timeout) {
1601        qdict_set_default_str(options, "timeout", timeout);
1602    }
1603}
1604
1605/*
1606 * We support iscsi url's on the form
1607 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1608 */
1609static void iscsi_parse_filename(const char *filename, QDict *options,
1610                                 Error **errp)
1611{
1612    struct iscsi_url *iscsi_url;
1613    const char *transport_name;
1614    char *lun_str;
1615
1616    iscsi_url = iscsi_parse_full_url(NULL, filename);
1617    if (iscsi_url == NULL) {
1618        error_setg(errp, "Failed to parse URL : %s", filename);
1619        return;
1620    }
1621
1622#if LIBISCSI_API_VERSION >= (20160603)
1623    switch (iscsi_url->transport) {
1624    case TCP_TRANSPORT:
1625        transport_name = "tcp";
1626        break;
1627    case ISER_TRANSPORT:
1628        transport_name = "iser";
1629        break;
1630    default:
1631        error_setg(errp, "Unknown transport type (%d)",
1632                   iscsi_url->transport);
1633        return;
1634    }
1635#else
1636    transport_name = "tcp";
1637#endif
1638
1639    qdict_set_default_str(options, "transport", transport_name);
1640    qdict_set_default_str(options, "portal", iscsi_url->portal);
1641    qdict_set_default_str(options, "target", iscsi_url->target);
1642
1643    lun_str = g_strdup_printf("%d", iscsi_url->lun);
1644    qdict_set_default_str(options, "lun", lun_str);
1645    g_free(lun_str);
1646
1647    /* User/password from -iscsi take precedence over those from the URL */
1648    iscsi_parse_iscsi_option(iscsi_url->target, options);
1649
1650    if (iscsi_url->user[0] != '\0') {
1651        qdict_set_default_str(options, "user", iscsi_url->user);
1652        qdict_set_default_str(options, "password", iscsi_url->passwd);
1653    }
1654
1655    iscsi_destroy_url(iscsi_url);
1656}
1657
1658static QemuOptsList runtime_opts = {
1659    .name = "iscsi",
1660    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1661    .desc = {
1662        {
1663            .name = "transport",
1664            .type = QEMU_OPT_STRING,
1665        },
1666        {
1667            .name = "portal",
1668            .type = QEMU_OPT_STRING,
1669        },
1670        {
1671            .name = "target",
1672            .type = QEMU_OPT_STRING,
1673        },
1674        {
1675            .name = "user",
1676            .type = QEMU_OPT_STRING,
1677        },
1678        {
1679            .name = "password",
1680            .type = QEMU_OPT_STRING,
1681        },
1682        {
1683            .name = "password-secret",
1684            .type = QEMU_OPT_STRING,
1685        },
1686        {
1687            .name = "lun",
1688            .type = QEMU_OPT_NUMBER,
1689        },
1690        {
1691            .name = "initiator-name",
1692            .type = QEMU_OPT_STRING,
1693        },
1694        {
1695            .name = "header-digest",
1696            .type = QEMU_OPT_STRING,
1697        },
1698        {
1699            .name = "timeout",
1700            .type = QEMU_OPT_NUMBER,
1701        },
1702        {
1703            .name = "filename",
1704            .type = QEMU_OPT_STRING,
1705        },
1706        { /* end of list */ }
1707    },
1708};
1709
1710static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1711                      Error **errp)
1712{
1713    IscsiLun *iscsilun = bs->opaque;
1714    struct iscsi_context *iscsi = NULL;
1715    struct scsi_task *task = NULL;
1716    struct scsi_inquiry_standard *inq = NULL;
1717    struct scsi_inquiry_supported_pages *inq_vpd;
1718    char *initiator_name = NULL;
1719    QemuOpts *opts;
1720    Error *local_err = NULL;
1721    const char *transport_name, *portal, *target, *filename;
1722#if LIBISCSI_API_VERSION >= (20160603)
1723    enum iscsi_transport_type transport;
1724#endif
1725    int i, ret = 0, timeout = 0, lun;
1726
1727    /* If we are given a filename, parse the filename, with precedence given to
1728     * filename encoded options */
1729    filename = qdict_get_try_str(options, "filename");
1730    if (filename) {
1731        warn_report("'filename' option specified. "
1732                    "This is an unsupported option, and may be deprecated "
1733                    "in the future");
1734        iscsi_parse_filename(filename, options, &local_err);
1735        if (local_err) {
1736            ret = -EINVAL;
1737            error_propagate(errp, local_err);
1738            goto exit;
1739        }
1740    }
1741
1742    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1743    qemu_opts_absorb_qdict(opts, options, &local_err);
1744    if (local_err) {
1745        error_propagate(errp, local_err);
1746        ret = -EINVAL;
1747        goto out;
1748    }
1749
1750    transport_name = qemu_opt_get(opts, "transport");
1751    portal = qemu_opt_get(opts, "portal");
1752    target = qemu_opt_get(opts, "target");
1753    lun = qemu_opt_get_number(opts, "lun", 0);
1754
1755    if (!transport_name || !portal || !target) {
1756        error_setg(errp, "Need all of transport, portal and target options");
1757        ret = -EINVAL;
1758        goto out;
1759    }
1760
1761    if (!strcmp(transport_name, "tcp")) {
1762#if LIBISCSI_API_VERSION >= (20160603)
1763        transport = TCP_TRANSPORT;
1764    } else if (!strcmp(transport_name, "iser")) {
1765        transport = ISER_TRANSPORT;
1766#else
1767        /* TCP is what older libiscsi versions always use */
1768#endif
1769    } else {
1770        error_setg(errp, "Unknown transport: %s", transport_name);
1771        ret = -EINVAL;
1772        goto out;
1773    }
1774
1775    memset(iscsilun, 0, sizeof(IscsiLun));
1776
1777    initiator_name = get_initiator_name(opts);
1778
1779    iscsi = iscsi_create_context(initiator_name);
1780    if (iscsi == NULL) {
1781        error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1782        ret = -ENOMEM;
1783        goto out;
1784    }
1785#if LIBISCSI_API_VERSION >= (20160603)
1786    if (iscsi_init_transport(iscsi, transport)) {
1787        error_setg(errp, ("Error initializing transport."));
1788        ret = -EINVAL;
1789        goto out;
1790    }
1791#endif
1792    if (iscsi_set_targetname(iscsi, target)) {
1793        error_setg(errp, "iSCSI: Failed to set target name.");
1794        ret = -EINVAL;
1795        goto out;
1796    }
1797
1798    /* check if we got CHAP username/password via the options */
1799    apply_chap(iscsi, opts, &local_err);
1800    if (local_err != NULL) {
1801        error_propagate(errp, local_err);
1802        ret = -EINVAL;
1803        goto out;
1804    }
1805
1806    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1807        error_setg(errp, "iSCSI: Failed to set session type to normal.");
1808        ret = -EINVAL;
1809        goto out;
1810    }
1811
1812    /* check if we got HEADER_DIGEST via the options */
1813    apply_header_digest(iscsi, opts, &local_err);
1814    if (local_err != NULL) {
1815        error_propagate(errp, local_err);
1816        ret = -EINVAL;
1817        goto out;
1818    }
1819
1820    /* timeout handling is broken in libiscsi before 1.15.0 */
1821    timeout = qemu_opt_get_number(opts, "timeout", 0);
1822#if LIBISCSI_API_VERSION >= 20150621
1823    iscsi_set_timeout(iscsi, timeout);
1824#else
1825    if (timeout) {
1826        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1827    }
1828#endif
1829
1830    if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
1831        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1832            iscsi_get_error(iscsi));
1833        ret = -EINVAL;
1834        goto out;
1835    }
1836
1837    iscsilun->iscsi = iscsi;
1838    iscsilun->aio_context = bdrv_get_aio_context(bs);
1839    iscsilun->lun = lun;
1840    iscsilun->has_write_same = true;
1841
1842    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1843                            (void **) &inq, errp);
1844    if (task == NULL) {
1845        ret = -EINVAL;
1846        goto out;
1847    }
1848    iscsilun->type = inq->periperal_device_type;
1849    scsi_free_scsi_task(task);
1850    task = NULL;
1851
1852    iscsi_modesense_sync(iscsilun);
1853    if (iscsilun->dpofua) {
1854        bs->supported_write_flags = BDRV_REQ_FUA;
1855    }
1856    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1857
1858    /* Check the write protect flag of the LUN if we want to write */
1859    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1860        iscsilun->write_protected) {
1861        error_setg(errp, "Cannot open a write protected LUN as read-write");
1862        ret = -EACCES;
1863        goto out;
1864    }
1865
1866    iscsi_readcapacity_sync(iscsilun, &local_err);
1867    if (local_err != NULL) {
1868        error_propagate(errp, local_err);
1869        ret = -EINVAL;
1870        goto out;
1871    }
1872    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1873
1874    /* We don't have any emulation for devices other than disks and CD-ROMs, so
1875     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1876     * will try to read from the device to guess the image format.
1877     */
1878    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1879        bs->sg = true;
1880    }
1881
1882    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1883                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1884                            (void **) &inq_vpd, errp);
1885    if (task == NULL) {
1886        ret = -EINVAL;
1887        goto out;
1888    }
1889    for (i = 0; i < inq_vpd->num_pages; i++) {
1890        struct scsi_task *inq_task;
1891        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1892        struct scsi_inquiry_block_limits *inq_bl;
1893        switch (inq_vpd->pages[i]) {
1894        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1895            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1896                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1897                                        (void **) &inq_lbp, errp);
1898            if (inq_task == NULL) {
1899                ret = -EINVAL;
1900                goto out;
1901            }
1902            memcpy(&iscsilun->lbp, inq_lbp,
1903                   sizeof(struct scsi_inquiry_logical_block_provisioning));
1904            scsi_free_scsi_task(inq_task);
1905            break;
1906        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1907            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1908                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1909                                    (void **) &inq_bl, errp);
1910            if (inq_task == NULL) {
1911                ret = -EINVAL;
1912                goto out;
1913            }
1914            memcpy(&iscsilun->bl, inq_bl,
1915                   sizeof(struct scsi_inquiry_block_limits));
1916            scsi_free_scsi_task(inq_task);
1917            break;
1918        default:
1919            break;
1920        }
1921    }
1922    scsi_free_scsi_task(task);
1923    task = NULL;
1924
1925    qemu_mutex_init(&iscsilun->mutex);
1926    iscsi_attach_aio_context(bs, iscsilun->aio_context);
1927
1928    /* Guess the internal cluster (page) size of the iscsi target by the means
1929     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1930     * reasonable size */
1931    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1932        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1933        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1934                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
1935        if (iscsilun->lbprz) {
1936            ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
1937        }
1938    }
1939
1940out:
1941    qemu_opts_del(opts);
1942    g_free(initiator_name);
1943    if (task != NULL) {
1944        scsi_free_scsi_task(task);
1945    }
1946
1947    if (ret) {
1948        if (iscsi != NULL) {
1949            if (iscsi_is_logged_in(iscsi)) {
1950                iscsi_logout_sync(iscsi);
1951            }
1952            iscsi_destroy_context(iscsi);
1953        }
1954        memset(iscsilun, 0, sizeof(IscsiLun));
1955    }
1956exit:
1957    return ret;
1958}
1959
1960static void iscsi_close(BlockDriverState *bs)
1961{
1962    IscsiLun *iscsilun = bs->opaque;
1963    struct iscsi_context *iscsi = iscsilun->iscsi;
1964
1965    iscsi_detach_aio_context(bs);
1966    if (iscsi_is_logged_in(iscsi)) {
1967        iscsi_logout_sync(iscsi);
1968    }
1969    iscsi_destroy_context(iscsi);
1970    g_free(iscsilun->zeroblock);
1971    iscsi_allocmap_free(iscsilun);
1972    qemu_mutex_destroy(&iscsilun->mutex);
1973    memset(iscsilun, 0, sizeof(IscsiLun));
1974}
1975
1976static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1977{
1978    /* We don't actually refresh here, but just return data queried in
1979     * iscsi_open(): iscsi targets don't change their limits. */
1980
1981    IscsiLun *iscsilun = bs->opaque;
1982    uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1983    unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
1984
1985    assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg);
1986
1987    bs->bl.request_alignment = block_size;
1988
1989    if (iscsilun->bl.max_xfer_len) {
1990        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1991    }
1992
1993    if (max_xfer_len * block_size < INT_MAX) {
1994        bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
1995    }
1996
1997    if (iscsilun->lbp.lbpu) {
1998        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
1999            bs->bl.max_pdiscard =
2000                iscsilun->bl.max_unmap * iscsilun->block_size;
2001        }
2002        bs->bl.pdiscard_alignment =
2003            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2004    } else {
2005        bs->bl.pdiscard_alignment = iscsilun->block_size;
2006    }
2007
2008    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
2009        bs->bl.max_pwrite_zeroes =
2010            iscsilun->bl.max_ws_len * iscsilun->block_size;
2011    }
2012    if (iscsilun->lbp.lbpws) {
2013        bs->bl.pwrite_zeroes_alignment =
2014            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2015    } else {
2016        bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
2017    }
2018    if (iscsilun->bl.opt_xfer_len &&
2019        iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
2020        bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
2021                                        iscsilun->block_size);
2022    }
2023}
2024
2025/* Note that this will not re-establish a connection with an iSCSI target - it
2026 * is effectively a NOP.  */
2027static int iscsi_reopen_prepare(BDRVReopenState *state,
2028                                BlockReopenQueue *queue, Error **errp)
2029{
2030    IscsiLun *iscsilun = state->bs->opaque;
2031
2032    if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
2033        error_setg(errp, "Cannot open a write protected LUN as read-write");
2034        return -EACCES;
2035    }
2036    return 0;
2037}
2038
2039static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
2040{
2041    IscsiLun *iscsilun = reopen_state->bs->opaque;
2042
2043    /* the cache.direct status might have changed */
2044    if (iscsilun->allocmap != NULL) {
2045        iscsi_allocmap_init(iscsilun, reopen_state->flags);
2046    }
2047}
2048
2049static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
2050                          PreallocMode prealloc, Error **errp)
2051{
2052    IscsiLun *iscsilun = bs->opaque;
2053    Error *local_err = NULL;
2054
2055    if (prealloc != PREALLOC_MODE_OFF) {
2056        error_setg(errp, "Unsupported preallocation mode '%s'",
2057                   PreallocMode_str(prealloc));
2058        return -ENOTSUP;
2059    }
2060
2061    if (iscsilun->type != TYPE_DISK) {
2062        error_setg(errp, "Cannot resize non-disk iSCSI devices");
2063        return -ENOTSUP;
2064    }
2065
2066    iscsi_readcapacity_sync(iscsilun, &local_err);
2067    if (local_err != NULL) {
2068        error_propagate(errp, local_err);
2069        return -EIO;
2070    }
2071
2072    if (offset > iscsi_getlength(bs)) {
2073        error_setg(errp, "Cannot grow iSCSI devices");
2074        return -EINVAL;
2075    }
2076
2077    if (iscsilun->allocmap != NULL) {
2078        iscsi_allocmap_init(iscsilun, bs->open_flags);
2079    }
2080
2081    return 0;
2082}
2083
2084static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
2085{
2086    int ret = 0;
2087    int64_t total_size = 0;
2088    BlockDriverState *bs;
2089    IscsiLun *iscsilun = NULL;
2090    QDict *bs_options;
2091    Error *local_err = NULL;
2092
2093    bs = bdrv_new();
2094
2095    /* Read out options */
2096    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
2097                              BDRV_SECTOR_SIZE);
2098    bs->opaque = g_new0(struct IscsiLun, 1);
2099    iscsilun = bs->opaque;
2100
2101    bs_options = qdict_new();
2102    iscsi_parse_filename(filename, bs_options, &local_err);
2103    if (local_err) {
2104        error_propagate(errp, local_err);
2105        ret = -EINVAL;
2106    } else {
2107        ret = iscsi_open(bs, bs_options, 0, NULL);
2108    }
2109    QDECREF(bs_options);
2110
2111    if (ret != 0) {
2112        goto out;
2113    }
2114    iscsi_detach_aio_context(bs);
2115    if (iscsilun->type != TYPE_DISK) {
2116        ret = -ENODEV;
2117        goto out;
2118    }
2119    if (bs->total_sectors < total_size) {
2120        ret = -ENOSPC;
2121        goto out;
2122    }
2123
2124    ret = 0;
2125out:
2126    if (iscsilun->iscsi != NULL) {
2127        iscsi_destroy_context(iscsilun->iscsi);
2128    }
2129    g_free(bs->opaque);
2130    bs->opaque = NULL;
2131    bdrv_unref(bs);
2132    return ret;
2133}
2134
2135static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2136{
2137    IscsiLun *iscsilun = bs->opaque;
2138    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
2139    bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
2140    bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
2141    return 0;
2142}
2143
2144static void iscsi_invalidate_cache(BlockDriverState *bs,
2145                                   Error **errp)
2146{
2147    IscsiLun *iscsilun = bs->opaque;
2148    iscsi_allocmap_invalidate(iscsilun);
2149}
2150
2151static QemuOptsList iscsi_create_opts = {
2152    .name = "iscsi-create-opts",
2153    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
2154    .desc = {
2155        {
2156            .name = BLOCK_OPT_SIZE,
2157            .type = QEMU_OPT_SIZE,
2158            .help = "Virtual disk size"
2159        },
2160        { /* end of list */ }
2161    }
2162};
2163
2164static BlockDriver bdrv_iscsi = {
2165    .format_name     = "iscsi",
2166    .protocol_name   = "iscsi",
2167
2168    .instance_size          = sizeof(IscsiLun),
2169    .bdrv_parse_filename    = iscsi_parse_filename,
2170    .bdrv_file_open         = iscsi_open,
2171    .bdrv_close             = iscsi_close,
2172    .bdrv_create            = iscsi_create,
2173    .create_opts            = &iscsi_create_opts,
2174    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2175    .bdrv_reopen_commit     = iscsi_reopen_commit,
2176    .bdrv_invalidate_cache  = iscsi_invalidate_cache,
2177
2178    .bdrv_getlength  = iscsi_getlength,
2179    .bdrv_get_info   = iscsi_get_info,
2180    .bdrv_truncate   = iscsi_truncate,
2181    .bdrv_refresh_limits = iscsi_refresh_limits,
2182
2183    .bdrv_co_get_block_status = iscsi_co_get_block_status,
2184    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2185    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2186    .bdrv_co_readv         = iscsi_co_readv,
2187    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2188    .bdrv_co_flush_to_disk = iscsi_co_flush,
2189
2190#ifdef __linux__
2191    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2192#endif
2193
2194    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2195    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2196};
2197
2198#if LIBISCSI_API_VERSION >= (20160603)
2199static BlockDriver bdrv_iser = {
2200    .format_name     = "iser",
2201    .protocol_name   = "iser",
2202
2203    .instance_size          = sizeof(IscsiLun),
2204    .bdrv_parse_filename    = iscsi_parse_filename,
2205    .bdrv_file_open         = iscsi_open,
2206    .bdrv_close             = iscsi_close,
2207    .bdrv_create            = iscsi_create,
2208    .create_opts            = &iscsi_create_opts,
2209    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2210    .bdrv_reopen_commit     = iscsi_reopen_commit,
2211    .bdrv_invalidate_cache  = iscsi_invalidate_cache,
2212
2213    .bdrv_getlength  = iscsi_getlength,
2214    .bdrv_get_info   = iscsi_get_info,
2215    .bdrv_truncate   = iscsi_truncate,
2216    .bdrv_refresh_limits = iscsi_refresh_limits,
2217
2218    .bdrv_co_get_block_status = iscsi_co_get_block_status,
2219    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2220    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2221    .bdrv_co_readv         = iscsi_co_readv,
2222    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2223    .bdrv_co_flush_to_disk = iscsi_co_flush,
2224
2225#ifdef __linux__
2226    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2227#endif
2228
2229    .bdrv_detach_aio_context = iscsi_detach_aio_context,
2230    .bdrv_attach_aio_context = iscsi_attach_aio_context,
2231};
2232#endif
2233
2234static void iscsi_block_init(void)
2235{
2236    bdrv_register(&bdrv_iscsi);
2237#if LIBISCSI_API_VERSION >= (20160603)
2238    bdrv_register(&bdrv_iser);
2239#endif
2240}
2241
2242block_init(iscsi_block_init);
2243