qemu/hw/nvme/dif.c
<<
>>
Prefs
   1/*
   2 * QEMU NVM Express End-to-End Data Protection support
   3 *
   4 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
   5 *
   6 * Authors:
   7 *   Klaus Jensen           <k.jensen@samsung.com>
   8 *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qapi/error.h"
  13#include "sysemu/block-backend.h"
  14
  15#include "nvme.h"
  16#include "trace.h"
  17
  18uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
  19                           uint32_t reftag)
  20{
  21    if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
  22        (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
  23        return NVME_INVALID_PROT_INFO | NVME_DNR;
  24    }
  25
  26    return NVME_SUCCESS;
  27}
  28
  29/* from Linux kernel (crypto/crct10dif_common.c) */
  30static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
  31                           size_t len)
  32{
  33    unsigned int i;
  34
  35    for (i = 0; i < len; i++) {
  36        crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
  37    }
  38
  39    return crc;
  40}
  41
  42void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
  43                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
  44                                 uint32_t *reftag)
  45{
  46    uint8_t *end = buf + len;
  47    int16_t pil = 0;
  48
  49    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
  50        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
  51    }
  52
  53    trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
  54                                          apptag, *reftag);
  55
  56    for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
  57        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
  58        uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
  59
  60        if (pil) {
  61            crc = crc_t10dif(crc, mbuf, pil);
  62        }
  63
  64        dif->guard = cpu_to_be16(crc);
  65        dif->apptag = cpu_to_be16(apptag);
  66        dif->reftag = cpu_to_be32(*reftag);
  67
  68        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
  69            (*reftag)++;
  70        }
  71    }
  72}
  73
  74static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
  75                               uint8_t *buf, uint8_t *mbuf, size_t pil,
  76                               uint8_t prinfo, uint16_t apptag,
  77                               uint16_t appmask, uint32_t reftag)
  78{
  79    switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  80    case NVME_ID_NS_DPS_TYPE_3:
  81        if (be32_to_cpu(dif->reftag) != 0xffffffff) {
  82            break;
  83        }
  84
  85        /* fallthrough */
  86    case NVME_ID_NS_DPS_TYPE_1:
  87    case NVME_ID_NS_DPS_TYPE_2:
  88        if (be16_to_cpu(dif->apptag) != 0xffff) {
  89            break;
  90        }
  91
  92        trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
  93                                          be32_to_cpu(dif->reftag));
  94
  95        return NVME_SUCCESS;
  96    }
  97
  98    if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
  99        uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
 100
 101        if (pil) {
 102            crc = crc_t10dif(crc, mbuf, pil);
 103        }
 104
 105        trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
 106
 107        if (be16_to_cpu(dif->guard) != crc) {
 108            return NVME_E2E_GUARD_ERROR;
 109        }
 110    }
 111
 112    if (prinfo & NVME_PRINFO_PRCHK_APP) {
 113        trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
 114                                        appmask);
 115
 116        if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
 117            return NVME_E2E_APP_ERROR;
 118        }
 119    }
 120
 121    if (prinfo & NVME_PRINFO_PRCHK_REF) {
 122        trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
 123
 124        if (be32_to_cpu(dif->reftag) != reftag) {
 125            return NVME_E2E_REF_ERROR;
 126        }
 127    }
 128
 129    return NVME_SUCCESS;
 130}
 131
 132uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
 133                        uint8_t *mbuf, size_t mlen, uint8_t prinfo,
 134                        uint64_t slba, uint16_t apptag,
 135                        uint16_t appmask, uint32_t *reftag)
 136{
 137    uint8_t *end = buf + len;
 138    int16_t pil = 0;
 139    uint16_t status;
 140
 141    status = nvme_check_prinfo(ns, prinfo, slba, *reftag);
 142    if (status) {
 143        return status;
 144    }
 145
 146    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
 147        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
 148    }
 149
 150    trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
 151
 152    for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
 153        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
 154
 155        status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag,
 156                                appmask, *reftag);
 157        if (status) {
 158            return status;
 159        }
 160
 161        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
 162            (*reftag)++;
 163        }
 164    }
 165
 166    return NVME_SUCCESS;
 167}
 168
 169uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
 170                               uint64_t slba)
 171{
 172    BlockBackend *blk = ns->blkconf.blk;
 173    BlockDriverState *bs = blk_bs(blk);
 174
 175    int64_t moffset = 0, offset = nvme_l2b(ns, slba);
 176    uint8_t *mbufp, *end;
 177    bool zeroed;
 178    int16_t pil = 0;
 179    int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
 180    int64_t pnum = 0;
 181
 182    Error *err = NULL;
 183
 184
 185    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
 186        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
 187    }
 188
 189    do {
 190        int ret;
 191
 192        bytes -= pnum;
 193
 194        ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
 195        if (ret < 0) {
 196            error_setg_errno(&err, -ret, "unable to get block status");
 197            error_report_err(err);
 198
 199            return NVME_INTERNAL_DEV_ERROR;
 200        }
 201
 202        zeroed = !!(ret & BDRV_BLOCK_ZERO);
 203
 204        trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
 205
 206        if (zeroed) {
 207            mbufp = mbuf + moffset;
 208            mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
 209            end = mbufp + mlen;
 210
 211            for (; mbufp < end; mbufp += ns->lbaf.ms) {
 212                memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
 213            }
 214        }
 215
 216        moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
 217        offset += pnum;
 218    } while (pnum != bytes);
 219
 220    return NVME_SUCCESS;
 221}
 222
 223static void nvme_dif_rw_cb(void *opaque, int ret)
 224{
 225    NvmeBounceContext *ctx = opaque;
 226    NvmeRequest *req = ctx->req;
 227    NvmeNamespace *ns = req->ns;
 228    BlockBackend *blk = ns->blkconf.blk;
 229
 230    trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
 231
 232    qemu_iovec_destroy(&ctx->data.iov);
 233    g_free(ctx->data.bounce);
 234
 235    qemu_iovec_destroy(&ctx->mdata.iov);
 236    g_free(ctx->mdata.bounce);
 237
 238    g_free(ctx);
 239
 240    nvme_rw_complete_cb(req, ret);
 241}
 242
 243static void nvme_dif_rw_check_cb(void *opaque, int ret)
 244{
 245    NvmeBounceContext *ctx = opaque;
 246    NvmeRequest *req = ctx->req;
 247    NvmeNamespace *ns = req->ns;
 248    NvmeCtrl *n = nvme_ctrl(req);
 249    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 250    uint64_t slba = le64_to_cpu(rw->slba);
 251    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
 252    uint16_t apptag = le16_to_cpu(rw->apptag);
 253    uint16_t appmask = le16_to_cpu(rw->appmask);
 254    uint32_t reftag = le32_to_cpu(rw->reftag);
 255    uint16_t status;
 256
 257    trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
 258                                   reftag);
 259
 260    if (ret) {
 261        goto out;
 262    }
 263
 264    status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
 265                                   slba);
 266    if (status) {
 267        req->status = status;
 268        goto out;
 269    }
 270
 271    status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
 272                            ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
 273                            slba, apptag, appmask, &reftag);
 274    if (status) {
 275        req->status = status;
 276        goto out;
 277    }
 278
 279    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
 280                              NVME_TX_DIRECTION_FROM_DEVICE, req);
 281    if (status) {
 282        req->status = status;
 283        goto out;
 284    }
 285
 286    if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
 287        goto out;
 288    }
 289
 290    status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
 291                               NVME_TX_DIRECTION_FROM_DEVICE, req);
 292    if (status) {
 293        req->status = status;
 294    }
 295
 296out:
 297    nvme_dif_rw_cb(ctx, ret);
 298}
 299
 300static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
 301{
 302    NvmeBounceContext *ctx = opaque;
 303    NvmeRequest *req = ctx->req;
 304    NvmeNamespace *ns = req->ns;
 305    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 306    uint64_t slba = le64_to_cpu(rw->slba);
 307    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
 308    size_t mlen = nvme_m2b(ns, nlb);
 309    uint64_t offset = nvme_moff(ns, slba);
 310    BlockBackend *blk = ns->blkconf.blk;
 311
 312    trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
 313
 314    if (ret) {
 315        goto out;
 316    }
 317
 318    ctx->mdata.bounce = g_malloc(mlen);
 319
 320    qemu_iovec_reset(&ctx->mdata.iov);
 321    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 322
 323    req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
 324                                nvme_dif_rw_check_cb, ctx);
 325    return;
 326
 327out:
 328    nvme_dif_rw_cb(ctx, ret);
 329}
 330
 331static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
 332{
 333    NvmeBounceContext *ctx = opaque;
 334    NvmeRequest *req = ctx->req;
 335    NvmeNamespace *ns = req->ns;
 336    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 337    uint64_t slba = le64_to_cpu(rw->slba);
 338    uint64_t offset = nvme_moff(ns, slba);
 339    BlockBackend *blk = ns->blkconf.blk;
 340
 341    trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
 342
 343    if (ret) {
 344        goto out;
 345    }
 346
 347    req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
 348                                 nvme_dif_rw_cb, ctx);
 349    return;
 350
 351out:
 352    nvme_dif_rw_cb(ctx, ret);
 353}
 354
 355uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
 356{
 357    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 358    NvmeNamespace *ns = req->ns;
 359    BlockBackend *blk = ns->blkconf.blk;
 360    bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
 361    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
 362    uint64_t slba = le64_to_cpu(rw->slba);
 363    size_t len = nvme_l2b(ns, nlb);
 364    size_t mlen = nvme_m2b(ns, nlb);
 365    size_t mapped_len = len;
 366    int64_t offset = nvme_l2b(ns, slba);
 367    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
 368    uint16_t apptag = le16_to_cpu(rw->apptag);
 369    uint16_t appmask = le16_to_cpu(rw->appmask);
 370    uint32_t reftag = le32_to_cpu(rw->reftag);
 371    bool pract = !!(prinfo & NVME_PRINFO_PRACT);
 372    NvmeBounceContext *ctx;
 373    uint16_t status;
 374
 375    trace_pci_nvme_dif_rw(pract, prinfo);
 376
 377    ctx = g_new0(NvmeBounceContext, 1);
 378    ctx->req = req;
 379
 380    if (wrz) {
 381        BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
 382
 383        if (prinfo & NVME_PRINFO_PRCHK_MASK) {
 384            status = NVME_INVALID_PROT_INFO | NVME_DNR;
 385            goto err;
 386        }
 387
 388        if (pract) {
 389            uint8_t *mbuf, *end;
 390            int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
 391
 392            status = nvme_check_prinfo(ns, prinfo, slba, reftag);
 393            if (status) {
 394                goto err;
 395            }
 396
 397            flags = 0;
 398
 399            ctx->mdata.bounce = g_malloc0(mlen);
 400
 401            qemu_iovec_init(&ctx->mdata.iov, 1);
 402            qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 403
 404            mbuf = ctx->mdata.bounce;
 405            end = mbuf + mlen;
 406
 407            if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
 408                pil = 0;
 409            }
 410
 411            for (; mbuf < end; mbuf += ns->lbaf.ms) {
 412                NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
 413
 414                dif->apptag = cpu_to_be16(apptag);
 415                dif->reftag = cpu_to_be32(reftag);
 416
 417                switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
 418                case NVME_ID_NS_DPS_TYPE_1:
 419                case NVME_ID_NS_DPS_TYPE_2:
 420                    reftag++;
 421                }
 422            }
 423        }
 424
 425        req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
 426                                           nvme_dif_rw_mdata_out_cb, ctx);
 427        return NVME_NO_COMPLETE;
 428    }
 429
 430    if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
 431        mapped_len += mlen;
 432    }
 433
 434    status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
 435    if (status) {
 436        goto err;
 437    }
 438
 439    ctx->data.bounce = g_malloc(len);
 440
 441    qemu_iovec_init(&ctx->data.iov, 1);
 442    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
 443
 444    if (req->cmd.opcode == NVME_CMD_READ) {
 445        block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
 446                         BLOCK_ACCT_READ);
 447
 448        req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
 449                                    nvme_dif_rw_mdata_in_cb, ctx);
 450        return NVME_NO_COMPLETE;
 451    }
 452
 453    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
 454                              NVME_TX_DIRECTION_TO_DEVICE, req);
 455    if (status) {
 456        goto err;
 457    }
 458
 459    ctx->mdata.bounce = g_malloc(mlen);
 460
 461    qemu_iovec_init(&ctx->mdata.iov, 1);
 462    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 463
 464    if (!(pract && ns->lbaf.ms == 8)) {
 465        status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
 466                                   NVME_TX_DIRECTION_TO_DEVICE, req);
 467        if (status) {
 468            goto err;
 469        }
 470    }
 471
 472    status = nvme_check_prinfo(ns, prinfo, slba, reftag);
 473    if (status) {
 474        goto err;
 475    }
 476
 477    if (pract) {
 478        /* splice generated protection information into the buffer */
 479        nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
 480                                    ctx->mdata.bounce, ctx->mdata.iov.size,
 481                                    apptag, &reftag);
 482    } else {
 483        status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
 484                                ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
 485                                slba, apptag, appmask, &reftag);
 486        if (status) {
 487            goto err;
 488        }
 489    }
 490
 491    block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
 492                     BLOCK_ACCT_WRITE);
 493
 494    req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
 495                                 nvme_dif_rw_mdata_out_cb, ctx);
 496
 497    return NVME_NO_COMPLETE;
 498
 499err:
 500    qemu_iovec_destroy(&ctx->data.iov);
 501    g_free(ctx->data.bounce);
 502
 503    qemu_iovec_destroy(&ctx->mdata.iov);
 504    g_free(ctx->mdata.bounce);
 505
 506    g_free(ctx);
 507
 508    return status;
 509}
 510