qemu/hw/nvme/nvme.h
<<
>>
Prefs
   1/*
   2 * QEMU NVM Express
   3 *
   4 * Copyright (c) 2012 Intel Corporation
   5 * Copyright (c) 2021 Minwoo Im
   6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
   7 *
   8 * Authors:
   9 *   Keith Busch            <kbusch@kernel.org>
  10 *   Klaus Jensen           <k.jensen@samsung.com>
  11 *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
  12 *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
  13 *   Minwoo Im              <minwoo.im.dev@gmail.com>
  14 *
  15 * This code is licensed under the GNU GPL v2 or later.
  16 */
  17
  18#ifndef HW_NVME_NVME_H
  19#define HW_NVME_NVME_H
  20
  21#include "qemu/uuid.h"
  22#include "hw/pci/pci.h"
  23#include "hw/block/block.h"
  24
  25#include "block/nvme.h"
  26
  27#define NVME_MAX_CONTROLLERS 256
  28#define NVME_MAX_NAMESPACES  256
  29#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
  30
  31QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
  32
  33typedef struct NvmeCtrl NvmeCtrl;
  34typedef struct NvmeNamespace NvmeNamespace;
  35
  36#define TYPE_NVME_BUS "nvme-bus"
  37OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
  38
  39typedef struct NvmeBus {
  40    BusState parent_bus;
  41} NvmeBus;
  42
  43#define TYPE_NVME_SUBSYS "nvme-subsys"
  44#define NVME_SUBSYS(obj) \
  45    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
  46#define SUBSYS_SLOT_RSVD (void *)0xFFFF
  47
  48typedef struct NvmeSubsystem {
  49    DeviceState parent_obj;
  50    NvmeBus     bus;
  51    uint8_t     subnqn[256];
  52    char        *serial;
  53
  54    NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
  55    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
  56
  57    struct {
  58        char *nqn;
  59    } params;
  60} NvmeSubsystem;
  61
  62int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
  63void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
  64
  65static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
  66                                         uint32_t cntlid)
  67{
  68    if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
  69        return NULL;
  70    }
  71
  72    if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
  73        return NULL;
  74    }
  75
  76    return subsys->ctrls[cntlid];
  77}
  78
  79static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
  80                                            uint32_t nsid)
  81{
  82    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
  83        return NULL;
  84    }
  85
  86    return subsys->namespaces[nsid];
  87}
  88
  89#define TYPE_NVME_NS "nvme-ns"
  90#define NVME_NS(obj) \
  91    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
  92
  93typedef struct NvmeZone {
  94    NvmeZoneDescr   d;
  95    uint64_t        w_ptr;
  96    QTAILQ_ENTRY(NvmeZone) entry;
  97} NvmeZone;
  98
  99typedef struct NvmeNamespaceParams {
 100    bool     detached;
 101    bool     shared;
 102    uint32_t nsid;
 103    QemuUUID uuid;
 104    uint64_t eui64;
 105    bool     eui64_default;
 106
 107    uint16_t ms;
 108    uint8_t  mset;
 109    uint8_t  pi;
 110    uint8_t  pil;
 111    uint8_t  pif;
 112
 113    uint16_t mssrl;
 114    uint32_t mcl;
 115    uint8_t  msrc;
 116
 117    bool     zoned;
 118    bool     cross_zone_read;
 119    uint64_t zone_size_bs;
 120    uint64_t zone_cap_bs;
 121    uint32_t max_active_zones;
 122    uint32_t max_open_zones;
 123    uint32_t zd_extension_size;
 124
 125    uint32_t numzrwa;
 126    uint64_t zrwas;
 127    uint64_t zrwafg;
 128} NvmeNamespaceParams;
 129
 130typedef struct NvmeNamespace {
 131    DeviceState  parent_obj;
 132    BlockConf    blkconf;
 133    int32_t      bootindex;
 134    int64_t      size;
 135    int64_t      moff;
 136    NvmeIdNs     id_ns;
 137    NvmeIdNsNvm  id_ns_nvm;
 138    NvmeLBAF     lbaf;
 139    unsigned int nlbaf;
 140    size_t       lbasz;
 141    const uint32_t *iocs;
 142    uint8_t      csi;
 143    uint16_t     status;
 144    int          attached;
 145    uint8_t      pif;
 146
 147    struct {
 148        uint16_t zrwas;
 149        uint16_t zrwafg;
 150        uint32_t numzrwa;
 151    } zns;
 152
 153    QTAILQ_ENTRY(NvmeNamespace) entry;
 154
 155    NvmeIdNsZoned   *id_ns_zoned;
 156    NvmeZone        *zone_array;
 157    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
 158    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
 159    QTAILQ_HEAD(, NvmeZone) closed_zones;
 160    QTAILQ_HEAD(, NvmeZone) full_zones;
 161    uint32_t        num_zones;
 162    uint64_t        zone_size;
 163    uint64_t        zone_capacity;
 164    uint32_t        zone_size_log2;
 165    uint8_t         *zd_extensions;
 166    int32_t         nr_open_zones;
 167    int32_t         nr_active_zones;
 168
 169    NvmeNamespaceParams params;
 170
 171    struct {
 172        uint32_t err_rec;
 173    } features;
 174} NvmeNamespace;
 175
 176static inline uint32_t nvme_nsid(NvmeNamespace *ns)
 177{
 178    if (ns) {
 179        return ns->params.nsid;
 180    }
 181
 182    return 0;
 183}
 184
 185static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
 186{
 187    return lba << ns->lbaf.ds;
 188}
 189
 190static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
 191{
 192    return ns->lbaf.ms * lba;
 193}
 194
 195static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
 196{
 197    return ns->moff + nvme_m2b(ns, lba);
 198}
 199
 200static inline bool nvme_ns_ext(NvmeNamespace *ns)
 201{
 202    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
 203}
 204
 205static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
 206{
 207    return zone->d.zs >> 4;
 208}
 209
 210static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
 211{
 212    zone->d.zs = state << 4;
 213}
 214
 215static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
 216{
 217    return zone->d.zslba + ns->zone_size;
 218}
 219
 220static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
 221{
 222    return zone->d.zslba + zone->d.zcap;
 223}
 224
 225static inline bool nvme_wp_is_valid(NvmeZone *zone)
 226{
 227    uint8_t st = nvme_get_zone_state(zone);
 228
 229    return st != NVME_ZONE_STATE_FULL &&
 230           st != NVME_ZONE_STATE_READ_ONLY &&
 231           st != NVME_ZONE_STATE_OFFLINE;
 232}
 233
 234static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
 235                                             uint32_t zone_idx)
 236{
 237    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
 238}
 239
 240static inline void nvme_aor_inc_open(NvmeNamespace *ns)
 241{
 242    assert(ns->nr_open_zones >= 0);
 243    if (ns->params.max_open_zones) {
 244        ns->nr_open_zones++;
 245        assert(ns->nr_open_zones <= ns->params.max_open_zones);
 246    }
 247}
 248
 249static inline void nvme_aor_dec_open(NvmeNamespace *ns)
 250{
 251    if (ns->params.max_open_zones) {
 252        assert(ns->nr_open_zones > 0);
 253        ns->nr_open_zones--;
 254    }
 255    assert(ns->nr_open_zones >= 0);
 256}
 257
 258static inline void nvme_aor_inc_active(NvmeNamespace *ns)
 259{
 260    assert(ns->nr_active_zones >= 0);
 261    if (ns->params.max_active_zones) {
 262        ns->nr_active_zones++;
 263        assert(ns->nr_active_zones <= ns->params.max_active_zones);
 264    }
 265}
 266
 267static inline void nvme_aor_dec_active(NvmeNamespace *ns)
 268{
 269    if (ns->params.max_active_zones) {
 270        assert(ns->nr_active_zones > 0);
 271        ns->nr_active_zones--;
 272        assert(ns->nr_active_zones >= ns->nr_open_zones);
 273    }
 274    assert(ns->nr_active_zones >= 0);
 275}
 276
 277void nvme_ns_init_format(NvmeNamespace *ns);
 278int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
 279void nvme_ns_drain(NvmeNamespace *ns);
 280void nvme_ns_shutdown(NvmeNamespace *ns);
 281void nvme_ns_cleanup(NvmeNamespace *ns);
 282
 283typedef struct NvmeAsyncEvent {
 284    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
 285    NvmeAerResult result;
 286} NvmeAsyncEvent;
 287
 288enum {
 289    NVME_SG_ALLOC = 1 << 0,
 290    NVME_SG_DMA   = 1 << 1,
 291};
 292
 293typedef struct NvmeSg {
 294    int flags;
 295
 296    union {
 297        QEMUSGList   qsg;
 298        QEMUIOVector iov;
 299    };
 300} NvmeSg;
 301
 302typedef enum NvmeTxDirection {
 303    NVME_TX_DIRECTION_TO_DEVICE   = 0,
 304    NVME_TX_DIRECTION_FROM_DEVICE = 1,
 305} NvmeTxDirection;
 306
 307typedef struct NvmeRequest {
 308    struct NvmeSQueue       *sq;
 309    struct NvmeNamespace    *ns;
 310    BlockAIOCB              *aiocb;
 311    uint16_t                status;
 312    void                    *opaque;
 313    NvmeCqe                 cqe;
 314    NvmeCmd                 cmd;
 315    BlockAcctCookie         acct;
 316    NvmeSg                  sg;
 317    QTAILQ_ENTRY(NvmeRequest)entry;
 318} NvmeRequest;
 319
 320typedef struct NvmeBounceContext {
 321    NvmeRequest *req;
 322
 323    struct {
 324        QEMUIOVector iov;
 325        uint8_t *bounce;
 326    } data, mdata;
 327} NvmeBounceContext;
 328
 329static inline const char *nvme_adm_opc_str(uint8_t opc)
 330{
 331    switch (opc) {
 332    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
 333    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
 334    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
 335    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
 336    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
 337    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
 338    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
 339    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
 340    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
 341    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
 342    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
 343    case NVME_ADM_CMD_VIRT_MNGMT:       return "NVME_ADM_CMD_VIRT_MNGMT";
 344    case NVME_ADM_CMD_DBBUF_CONFIG:     return "NVME_ADM_CMD_DBBUF_CONFIG";
 345    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
 346    default:                            return "NVME_ADM_CMD_UNKNOWN";
 347    }
 348}
 349
 350static inline const char *nvme_io_opc_str(uint8_t opc)
 351{
 352    switch (opc) {
 353    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
 354    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
 355    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
 356    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
 357    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
 358    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
 359    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
 360    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
 361    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
 362    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
 363    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
 364    default:                        return "NVME_NVM_CMD_UNKNOWN";
 365    }
 366}
 367
 368typedef struct NvmeSQueue {
 369    struct NvmeCtrl *ctrl;
 370    uint16_t    sqid;
 371    uint16_t    cqid;
 372    uint32_t    head;
 373    uint32_t    tail;
 374    uint32_t    size;
 375    uint64_t    dma_addr;
 376    uint64_t    db_addr;
 377    uint64_t    ei_addr;
 378    QEMUTimer   *timer;
 379    EventNotifier notifier;
 380    bool        ioeventfd_enabled;
 381    NvmeRequest *io_req;
 382    QTAILQ_HEAD(, NvmeRequest) req_list;
 383    QTAILQ_HEAD(, NvmeRequest) out_req_list;
 384    QTAILQ_ENTRY(NvmeSQueue) entry;
 385} NvmeSQueue;
 386
 387typedef struct NvmeCQueue {
 388    struct NvmeCtrl *ctrl;
 389    uint8_t     phase;
 390    uint16_t    cqid;
 391    uint16_t    irq_enabled;
 392    uint32_t    head;
 393    uint32_t    tail;
 394    uint32_t    vector;
 395    uint32_t    size;
 396    uint64_t    dma_addr;
 397    uint64_t    db_addr;
 398    uint64_t    ei_addr;
 399    QEMUTimer   *timer;
 400    EventNotifier notifier;
 401    bool        ioeventfd_enabled;
 402    QTAILQ_HEAD(, NvmeSQueue) sq_list;
 403    QTAILQ_HEAD(, NvmeRequest) req_list;
 404} NvmeCQueue;
 405
 406#define TYPE_NVME "nvme"
 407#define NVME(obj) \
 408        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
 409
 410typedef struct NvmeParams {
 411    char     *serial;
 412    uint32_t num_queues; /* deprecated since 5.1 */
 413    uint32_t max_ioqpairs;
 414    uint16_t msix_qsize;
 415    uint32_t cmb_size_mb;
 416    uint8_t  aerl;
 417    uint32_t aer_max_queued;
 418    uint8_t  mdts;
 419    uint8_t  vsl;
 420    bool     use_intel_id;
 421    uint8_t  zasl;
 422    bool     auto_transition_zones;
 423    bool     legacy_cmb;
 424    bool     ioeventfd;
 425    uint8_t  sriov_max_vfs;
 426    uint16_t sriov_vq_flexible;
 427    uint16_t sriov_vi_flexible;
 428    uint8_t  sriov_max_vq_per_vf;
 429    uint8_t  sriov_max_vi_per_vf;
 430} NvmeParams;
 431
 432typedef struct NvmeCtrl {
 433    PCIDevice    parent_obj;
 434    MemoryRegion bar0;
 435    MemoryRegion iomem;
 436    NvmeBar      bar;
 437    NvmeParams   params;
 438    NvmeBus      bus;
 439
 440    uint16_t    cntlid;
 441    bool        qs_created;
 442    uint32_t    page_size;
 443    uint16_t    page_bits;
 444    uint16_t    max_prp_ents;
 445    uint16_t    cqe_size;
 446    uint16_t    sqe_size;
 447    uint32_t    max_q_ents;
 448    uint8_t     outstanding_aers;
 449    uint32_t    irq_status;
 450    int         cq_pending;
 451    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
 452    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
 453    uint64_t    starttime_ms;
 454    uint16_t    temperature;
 455    uint8_t     smart_critical_warning;
 456    uint32_t    conf_msix_qsize;
 457    uint32_t    conf_ioqpairs;
 458    uint64_t    dbbuf_dbs;
 459    uint64_t    dbbuf_eis;
 460    bool        dbbuf_enabled;
 461
 462    struct {
 463        MemoryRegion mem;
 464        uint8_t      *buf;
 465        bool         cmse;
 466        hwaddr       cba;
 467    } cmb;
 468
 469    struct {
 470        HostMemoryBackend *dev;
 471        bool              cmse;
 472        hwaddr            cba;
 473    } pmr;
 474
 475    uint8_t     aer_mask;
 476    NvmeRequest **aer_reqs;
 477    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
 478    int         aer_queued;
 479
 480    uint32_t    dmrsl;
 481
 482    /* Namespace ID is started with 1 so bitmap should be 1-based */
 483#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
 484    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
 485
 486    NvmeSubsystem   *subsys;
 487
 488    NvmeNamespace   namespace;
 489    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
 490    NvmeSQueue      **sq;
 491    NvmeCQueue      **cq;
 492    NvmeSQueue      admin_sq;
 493    NvmeCQueue      admin_cq;
 494    NvmeIdCtrl      id_ctrl;
 495
 496    struct {
 497        struct {
 498            uint16_t temp_thresh_hi;
 499            uint16_t temp_thresh_low;
 500        };
 501
 502        uint32_t                async_config;
 503        NvmeHostBehaviorSupport hbs;
 504    } features;
 505
 506    NvmePriCtrlCap  pri_ctrl_cap;
 507    NvmeSecCtrlList sec_ctrl_list;
 508    struct {
 509        uint16_t    vqrfap;
 510        uint16_t    virfap;
 511    } next_pri_ctrl_cap;    /* These override pri_ctrl_cap after reset */
 512} NvmeCtrl;
 513
 514typedef enum NvmeResetType {
 515    NVME_RESET_FUNCTION   = 0,
 516    NVME_RESET_CONTROLLER = 1,
 517} NvmeResetType;
 518
 519static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
 520{
 521    if (!nsid || nsid > NVME_MAX_NAMESPACES) {
 522        return NULL;
 523    }
 524
 525    return n->namespaces[nsid];
 526}
 527
 528static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
 529{
 530    NvmeSQueue *sq = req->sq;
 531    NvmeCtrl *n = sq->ctrl;
 532
 533    return n->cq[sq->cqid];
 534}
 535
 536static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
 537{
 538    NvmeSQueue *sq = req->sq;
 539    return sq->ctrl;
 540}
 541
 542static inline uint16_t nvme_cid(NvmeRequest *req)
 543{
 544    if (!req) {
 545        return 0xffff;
 546    }
 547
 548    return le16_to_cpu(req->cqe.cid);
 549}
 550
 551static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
 552{
 553    PCIDevice *pci_dev = &n->parent_obj;
 554    NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
 555
 556    if (pci_is_vf(pci_dev)) {
 557        return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
 558    }
 559
 560    return NULL;
 561}
 562
 563static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
 564                                                      uint16_t cntlid)
 565{
 566    NvmeSecCtrlList *list = &n->sec_ctrl_list;
 567    uint8_t i;
 568
 569    for (i = 0; i < list->numcntl; i++) {
 570        if (le16_to_cpu(list->sec[i].scid) == cntlid) {
 571            return &list->sec[i];
 572        }
 573    }
 574
 575    return NULL;
 576}
 577
 578void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
 579uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
 580                          NvmeTxDirection dir, NvmeRequest *req);
 581uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
 582                           NvmeTxDirection dir, NvmeRequest *req);
 583void nvme_rw_complete_cb(void *opaque, int ret);
 584uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
 585                       NvmeCmd *cmd);
 586
 587#endif /* HW_NVME_NVME_H */
 588