qemu/hw/block/nvme-dif.c
<<
>>
Prefs
   1/*
   2 * QEMU NVM Express End-to-End Data Protection support
   3 *
   4 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
   5 *
   6 * Authors:
   7 *   Klaus Jensen           <k.jensen@samsung.com>
   8 *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "hw/block/block.h"
  13#include "sysemu/dma.h"
  14#include "sysemu/block-backend.h"
  15#include "qapi/error.h"
  16#include "trace.h"
  17#include "nvme.h"
  18#include "nvme-dif.h"
  19
  20uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
  21                           uint32_t reftag)
  22{
  23    if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
  24        (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
  25        return NVME_INVALID_PROT_INFO | NVME_DNR;
  26    }
  27
  28    return NVME_SUCCESS;
  29}
  30
  31/* from Linux kernel (crypto/crct10dif_common.c) */
  32static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
  33                           size_t len)
  34{
  35    unsigned int i;
  36
  37    for (i = 0; i < len; i++) {
  38        crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
  39    }
  40
  41    return crc;
  42}
  43
  44void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
  45                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
  46                                 uint32_t reftag)
  47{
  48    uint8_t *end = buf + len;
  49    size_t lsize = nvme_lsize(ns);
  50    size_t msize = nvme_msize(ns);
  51    int16_t pil = 0;
  52
  53    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
  54        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
  55    }
  56
  57    trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag,
  58                                          reftag);
  59
  60    for (; buf < end; buf += lsize, mbuf += msize) {
  61        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
  62        uint16_t crc = crc_t10dif(0x0, buf, lsize);
  63
  64        if (pil) {
  65            crc = crc_t10dif(crc, mbuf, pil);
  66        }
  67
  68        dif->guard = cpu_to_be16(crc);
  69        dif->apptag = cpu_to_be16(apptag);
  70        dif->reftag = cpu_to_be32(reftag);
  71
  72        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
  73            reftag++;
  74        }
  75    }
  76}
  77
  78static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
  79                               uint8_t *buf, uint8_t *mbuf, size_t pil,
  80                               uint16_t ctrl, uint16_t apptag,
  81                               uint16_t appmask, uint32_t reftag)
  82{
  83    switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  84    case NVME_ID_NS_DPS_TYPE_3:
  85        if (be32_to_cpu(dif->reftag) != 0xffffffff) {
  86            break;
  87        }
  88
  89        /* fallthrough */
  90    case NVME_ID_NS_DPS_TYPE_1:
  91    case NVME_ID_NS_DPS_TYPE_2:
  92        if (be16_to_cpu(dif->apptag) != 0xffff) {
  93            break;
  94        }
  95
  96        trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
  97                                          be32_to_cpu(dif->reftag));
  98
  99        return NVME_SUCCESS;
 100    }
 101
 102    if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
 103        uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns));
 104
 105        if (pil) {
 106            crc = crc_t10dif(crc, mbuf, pil);
 107        }
 108
 109        trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
 110
 111        if (be16_to_cpu(dif->guard) != crc) {
 112            return NVME_E2E_GUARD_ERROR;
 113        }
 114    }
 115
 116    if (ctrl & NVME_RW_PRINFO_PRCHK_APP) {
 117        trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
 118                                        appmask);
 119
 120        if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
 121            return NVME_E2E_APP_ERROR;
 122        }
 123    }
 124
 125    if (ctrl & NVME_RW_PRINFO_PRCHK_REF) {
 126        trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
 127
 128        if (be32_to_cpu(dif->reftag) != reftag) {
 129            return NVME_E2E_REF_ERROR;
 130        }
 131    }
 132
 133    return NVME_SUCCESS;
 134}
 135
 136uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
 137                        uint8_t *mbuf, size_t mlen, uint16_t ctrl,
 138                        uint64_t slba, uint16_t apptag,
 139                        uint16_t appmask, uint32_t reftag)
 140{
 141    uint8_t *end = buf + len;
 142    size_t lsize = nvme_lsize(ns);
 143    size_t msize = nvme_msize(ns);
 144    int16_t pil = 0;
 145    uint16_t status;
 146
 147    status = nvme_check_prinfo(ns, ctrl, slba, reftag);
 148    if (status) {
 149        return status;
 150    }
 151
 152    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
 153        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
 154    }
 155
 156    trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil);
 157
 158    for (; buf < end; buf += lsize, mbuf += msize) {
 159        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
 160
 161        status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag,
 162                                appmask, reftag);
 163        if (status) {
 164            return status;
 165        }
 166
 167        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
 168            reftag++;
 169        }
 170    }
 171
 172    return NVME_SUCCESS;
 173}
 174
 175uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
 176                               uint64_t slba)
 177{
 178    BlockBackend *blk = ns->blkconf.blk;
 179    BlockDriverState *bs = blk_bs(blk);
 180
 181    size_t msize = nvme_msize(ns);
 182    size_t lsize = nvme_lsize(ns);
 183    int64_t moffset = 0, offset = nvme_l2b(ns, slba);
 184    uint8_t *mbufp, *end;
 185    bool zeroed;
 186    int16_t pil = 0;
 187    int64_t bytes = (mlen / msize) * lsize;
 188    int64_t pnum = 0;
 189
 190    Error *err = NULL;
 191
 192
 193    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
 194        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
 195    }
 196
 197    do {
 198        int ret;
 199
 200        bytes -= pnum;
 201
 202        ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
 203        if (ret < 0) {
 204            error_setg_errno(&err, -ret, "unable to get block status");
 205            error_report_err(err);
 206
 207            return NVME_INTERNAL_DEV_ERROR;
 208        }
 209
 210        zeroed = !!(ret & BDRV_BLOCK_ZERO);
 211
 212        trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
 213
 214        if (zeroed) {
 215            mbufp = mbuf + moffset;
 216            mlen = (pnum / lsize) * msize;
 217            end = mbufp + mlen;
 218
 219            for (; mbufp < end; mbufp += msize) {
 220                memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
 221            }
 222        }
 223
 224        moffset += (pnum / lsize) * msize;
 225        offset += pnum;
 226    } while (pnum != bytes);
 227
 228    return NVME_SUCCESS;
 229}
 230
 231static void nvme_dif_rw_cb(void *opaque, int ret)
 232{
 233    NvmeBounceContext *ctx = opaque;
 234    NvmeRequest *req = ctx->req;
 235    NvmeNamespace *ns = req->ns;
 236    BlockBackend *blk = ns->blkconf.blk;
 237
 238    trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
 239
 240    qemu_iovec_destroy(&ctx->data.iov);
 241    g_free(ctx->data.bounce);
 242
 243    qemu_iovec_destroy(&ctx->mdata.iov);
 244    g_free(ctx->mdata.bounce);
 245
 246    g_free(ctx);
 247
 248    nvme_rw_complete_cb(req, ret);
 249}
 250
 251static void nvme_dif_rw_check_cb(void *opaque, int ret)
 252{
 253    NvmeBounceContext *ctx = opaque;
 254    NvmeRequest *req = ctx->req;
 255    NvmeNamespace *ns = req->ns;
 256    NvmeCtrl *n = nvme_ctrl(req);
 257    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 258    uint64_t slba = le64_to_cpu(rw->slba);
 259    uint16_t ctrl = le16_to_cpu(rw->control);
 260    uint16_t apptag = le16_to_cpu(rw->apptag);
 261    uint16_t appmask = le16_to_cpu(rw->appmask);
 262    uint32_t reftag = le32_to_cpu(rw->reftag);
 263    uint16_t status;
 264
 265    trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
 266                                   appmask, reftag);
 267
 268    if (ret) {
 269        goto out;
 270    }
 271
 272    status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
 273                                   slba);
 274    if (status) {
 275        req->status = status;
 276        goto out;
 277    }
 278
 279    status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
 280                            ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
 281                            slba, apptag, appmask, reftag);
 282    if (status) {
 283        req->status = status;
 284        goto out;
 285    }
 286
 287    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
 288                              NVME_TX_DIRECTION_FROM_DEVICE, req);
 289    if (status) {
 290        req->status = status;
 291        goto out;
 292    }
 293
 294    if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) {
 295        goto out;
 296    }
 297
 298    status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
 299                               NVME_TX_DIRECTION_FROM_DEVICE, req);
 300    if (status) {
 301        req->status = status;
 302    }
 303
 304out:
 305    nvme_dif_rw_cb(ctx, ret);
 306}
 307
 308static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
 309{
 310    NvmeBounceContext *ctx = opaque;
 311    NvmeRequest *req = ctx->req;
 312    NvmeNamespace *ns = req->ns;
 313    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 314    uint64_t slba = le64_to_cpu(rw->slba);
 315    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
 316    size_t mlen = nvme_m2b(ns, nlb);
 317    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
 318    BlockBackend *blk = ns->blkconf.blk;
 319
 320    trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
 321
 322    if (ret) {
 323        goto out;
 324    }
 325
 326    ctx->mdata.bounce = g_malloc(mlen);
 327
 328    qemu_iovec_reset(&ctx->mdata.iov);
 329    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 330
 331    req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
 332                                nvme_dif_rw_check_cb, ctx);
 333    return;
 334
 335out:
 336    nvme_dif_rw_cb(ctx, ret);
 337}
 338
 339static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
 340{
 341    NvmeBounceContext *ctx = opaque;
 342    NvmeRequest *req = ctx->req;
 343    NvmeNamespace *ns = req->ns;
 344    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 345    uint64_t slba = le64_to_cpu(rw->slba);
 346    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
 347    BlockBackend *blk = ns->blkconf.blk;
 348
 349    trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
 350
 351    if (ret) {
 352        goto out;
 353    }
 354
 355    req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
 356                                 nvme_dif_rw_cb, ctx);
 357    return;
 358
 359out:
 360    nvme_dif_rw_cb(ctx, ret);
 361}
 362
 363uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
 364{
 365    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 366    NvmeNamespace *ns = req->ns;
 367    BlockBackend *blk = ns->blkconf.blk;
 368    bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
 369    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
 370    uint64_t slba = le64_to_cpu(rw->slba);
 371    size_t len = nvme_l2b(ns, nlb);
 372    size_t mlen = nvme_m2b(ns, nlb);
 373    size_t mapped_len = len;
 374    int64_t offset = nvme_l2b(ns, slba);
 375    uint16_t ctrl = le16_to_cpu(rw->control);
 376    uint16_t apptag = le16_to_cpu(rw->apptag);
 377    uint16_t appmask = le16_to_cpu(rw->appmask);
 378    uint32_t reftag = le32_to_cpu(rw->reftag);
 379    bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT);
 380    NvmeBounceContext *ctx;
 381    uint16_t status;
 382
 383    trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl));
 384
 385    ctx = g_new0(NvmeBounceContext, 1);
 386    ctx->req = req;
 387
 388    if (wrz) {
 389        BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
 390
 391        if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
 392            status = NVME_INVALID_PROT_INFO | NVME_DNR;
 393            goto err;
 394        }
 395
 396        if (pract) {
 397            uint8_t *mbuf, *end;
 398            size_t msize = nvme_msize(ns);
 399            int16_t pil = msize - sizeof(NvmeDifTuple);
 400
 401            status = nvme_check_prinfo(ns, ctrl, slba, reftag);
 402            if (status) {
 403                goto err;
 404            }
 405
 406            flags = 0;
 407
 408            ctx->mdata.bounce = g_malloc0(mlen);
 409
 410            qemu_iovec_init(&ctx->mdata.iov, 1);
 411            qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 412
 413            mbuf = ctx->mdata.bounce;
 414            end = mbuf + mlen;
 415
 416            if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
 417                pil = 0;
 418            }
 419
 420            for (; mbuf < end; mbuf += msize) {
 421                NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
 422
 423                dif->apptag = cpu_to_be16(apptag);
 424                dif->reftag = cpu_to_be32(reftag);
 425
 426                switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
 427                case NVME_ID_NS_DPS_TYPE_1:
 428                case NVME_ID_NS_DPS_TYPE_2:
 429                    reftag++;
 430                }
 431            }
 432        }
 433
 434        req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
 435                                           nvme_dif_rw_mdata_out_cb, ctx);
 436        return NVME_NO_COMPLETE;
 437    }
 438
 439    if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) {
 440        mapped_len += mlen;
 441    }
 442
 443    status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
 444    if (status) {
 445        goto err;
 446    }
 447
 448    ctx->data.bounce = g_malloc(len);
 449
 450    qemu_iovec_init(&ctx->data.iov, 1);
 451    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
 452
 453    if (req->cmd.opcode == NVME_CMD_READ) {
 454        block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
 455                         BLOCK_ACCT_READ);
 456
 457        req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
 458                                    nvme_dif_rw_mdata_in_cb, ctx);
 459        return NVME_NO_COMPLETE;
 460    }
 461
 462    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
 463                              NVME_TX_DIRECTION_TO_DEVICE, req);
 464    if (status) {
 465        goto err;
 466    }
 467
 468    ctx->mdata.bounce = g_malloc(mlen);
 469
 470    qemu_iovec_init(&ctx->mdata.iov, 1);
 471    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 472
 473    if (!(pract && nvme_msize(ns) == 8)) {
 474        status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
 475                                   NVME_TX_DIRECTION_TO_DEVICE, req);
 476        if (status) {
 477            goto err;
 478        }
 479    }
 480
 481    status = nvme_check_prinfo(ns, ctrl, slba, reftag);
 482    if (status) {
 483        goto err;
 484    }
 485
 486    if (pract) {
 487        /* splice generated protection information into the buffer */
 488        nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
 489                                    ctx->mdata.bounce, ctx->mdata.iov.size,
 490                                    apptag, reftag);
 491    } else {
 492        status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
 493                                ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
 494                                slba, apptag, appmask, reftag);
 495        if (status) {
 496            goto err;
 497        }
 498    }
 499
 500    block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
 501                     BLOCK_ACCT_WRITE);
 502
 503    req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
 504                                 nvme_dif_rw_mdata_out_cb, ctx);
 505
 506    return NVME_NO_COMPLETE;
 507
 508err:
 509    qemu_iovec_destroy(&ctx->data.iov);
 510    g_free(ctx->data.bounce);
 511
 512    qemu_iovec_destroy(&ctx->mdata.iov);
 513    g_free(ctx->mdata.bounce);
 514
 515    g_free(ctx);
 516
 517    return status;
 518}
 519