1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#ifndef HW_NVME_INTERNAL_H
19#define HW_NVME_INTERNAL_H
20
21#include "qemu/uuid.h"
22#include "hw/pci/pci.h"
23#include "hw/block/block.h"
24
25#include "block/nvme.h"
26
27#define NVME_MAX_CONTROLLERS 32
28#define NVME_MAX_NAMESPACES 256
29#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30
31QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32
33typedef struct NvmeCtrl NvmeCtrl;
34typedef struct NvmeNamespace NvmeNamespace;
35
36#define TYPE_NVME_BUS "nvme-bus"
37OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38
39typedef struct NvmeBus {
40 BusState parent_bus;
41} NvmeBus;
42
43#define TYPE_NVME_SUBSYS "nvme-subsys"
44#define NVME_SUBSYS(obj) \
45 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46
47typedef struct NvmeSubsystem {
48 DeviceState parent_obj;
49 NvmeBus bus;
50 uint8_t subnqn[256];
51
52 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
53 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
54
55 struct {
56 char *nqn;
57 } params;
58} NvmeSubsystem;
59
60int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
61void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
62
63static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
64 uint32_t cntlid)
65{
66 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
67 return NULL;
68 }
69
70 return subsys->ctrls[cntlid];
71}
72
73static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
74 uint32_t nsid)
75{
76 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
77 return NULL;
78 }
79
80 return subsys->namespaces[nsid];
81}
82
83#define TYPE_NVME_NS "nvme-ns"
84#define NVME_NS(obj) \
85 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
86
87typedef struct NvmeZone {
88 NvmeZoneDescr d;
89 uint64_t w_ptr;
90 QTAILQ_ENTRY(NvmeZone) entry;
91} NvmeZone;
92
93typedef struct NvmeNamespaceParams {
94 bool detached;
95 bool shared;
96 uint32_t nsid;
97 QemuUUID uuid;
98 uint64_t eui64;
99 bool eui64_default;
100
101 uint16_t ms;
102 uint8_t mset;
103 uint8_t pi;
104 uint8_t pil;
105
106 uint16_t mssrl;
107 uint32_t mcl;
108 uint8_t msrc;
109
110 bool zoned;
111 bool cross_zone_read;
112 uint64_t zone_size_bs;
113 uint64_t zone_cap_bs;
114 uint32_t max_active_zones;
115 uint32_t max_open_zones;
116 uint32_t zd_extension_size;
117} NvmeNamespaceParams;
118
119typedef struct NvmeNamespace {
120 DeviceState parent_obj;
121 BlockConf blkconf;
122 int32_t bootindex;
123 int64_t size;
124 int64_t moff;
125 NvmeIdNs id_ns;
126 NvmeLBAF lbaf;
127 size_t lbasz;
128 const uint32_t *iocs;
129 uint8_t csi;
130 uint16_t status;
131 int attached;
132
133 QTAILQ_ENTRY(NvmeNamespace) entry;
134
135 NvmeIdNsZoned *id_ns_zoned;
136 NvmeZone *zone_array;
137 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
138 QTAILQ_HEAD(, NvmeZone) imp_open_zones;
139 QTAILQ_HEAD(, NvmeZone) closed_zones;
140 QTAILQ_HEAD(, NvmeZone) full_zones;
141 uint32_t num_zones;
142 uint64_t zone_size;
143 uint64_t zone_capacity;
144 uint32_t zone_size_log2;
145 uint8_t *zd_extensions;
146 int32_t nr_open_zones;
147 int32_t nr_active_zones;
148
149 NvmeNamespaceParams params;
150
151 struct {
152 uint32_t err_rec;
153 } features;
154} NvmeNamespace;
155
156static inline uint32_t nvme_nsid(NvmeNamespace *ns)
157{
158 if (ns) {
159 return ns->params.nsid;
160 }
161
162 return 0;
163}
164
165static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
166{
167 return lba << ns->lbaf.ds;
168}
169
170static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
171{
172 return ns->lbaf.ms * lba;
173}
174
175static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
176{
177 return ns->moff + nvme_m2b(ns, lba);
178}
179
180static inline bool nvme_ns_ext(NvmeNamespace *ns)
181{
182 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
183}
184
185static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
186{
187 return zone->d.zs >> 4;
188}
189
190static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
191{
192 zone->d.zs = state << 4;
193}
194
195static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
196{
197 return zone->d.zslba + ns->zone_size;
198}
199
200static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
201{
202 return zone->d.zslba + zone->d.zcap;
203}
204
205static inline bool nvme_wp_is_valid(NvmeZone *zone)
206{
207 uint8_t st = nvme_get_zone_state(zone);
208
209 return st != NVME_ZONE_STATE_FULL &&
210 st != NVME_ZONE_STATE_READ_ONLY &&
211 st != NVME_ZONE_STATE_OFFLINE;
212}
213
214static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
215 uint32_t zone_idx)
216{
217 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
218}
219
220static inline void nvme_aor_inc_open(NvmeNamespace *ns)
221{
222 assert(ns->nr_open_zones >= 0);
223 if (ns->params.max_open_zones) {
224 ns->nr_open_zones++;
225 assert(ns->nr_open_zones <= ns->params.max_open_zones);
226 }
227}
228
229static inline void nvme_aor_dec_open(NvmeNamespace *ns)
230{
231 if (ns->params.max_open_zones) {
232 assert(ns->nr_open_zones > 0);
233 ns->nr_open_zones--;
234 }
235 assert(ns->nr_open_zones >= 0);
236}
237
238static inline void nvme_aor_inc_active(NvmeNamespace *ns)
239{
240 assert(ns->nr_active_zones >= 0);
241 if (ns->params.max_active_zones) {
242 ns->nr_active_zones++;
243 assert(ns->nr_active_zones <= ns->params.max_active_zones);
244 }
245}
246
247static inline void nvme_aor_dec_active(NvmeNamespace *ns)
248{
249 if (ns->params.max_active_zones) {
250 assert(ns->nr_active_zones > 0);
251 ns->nr_active_zones--;
252 assert(ns->nr_active_zones >= ns->nr_open_zones);
253 }
254 assert(ns->nr_active_zones >= 0);
255}
256
257void nvme_ns_init_format(NvmeNamespace *ns);
258int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
259void nvme_ns_drain(NvmeNamespace *ns);
260void nvme_ns_shutdown(NvmeNamespace *ns);
261void nvme_ns_cleanup(NvmeNamespace *ns);
262
263typedef struct NvmeAsyncEvent {
264 QTAILQ_ENTRY(NvmeAsyncEvent) entry;
265 NvmeAerResult result;
266} NvmeAsyncEvent;
267
268enum {
269 NVME_SG_ALLOC = 1 << 0,
270 NVME_SG_DMA = 1 << 1,
271};
272
273typedef struct NvmeSg {
274 int flags;
275
276 union {
277 QEMUSGList qsg;
278 QEMUIOVector iov;
279 };
280} NvmeSg;
281
282typedef enum NvmeTxDirection {
283 NVME_TX_DIRECTION_TO_DEVICE = 0,
284 NVME_TX_DIRECTION_FROM_DEVICE = 1,
285} NvmeTxDirection;
286
287typedef struct NvmeRequest {
288 struct NvmeSQueue *sq;
289 struct NvmeNamespace *ns;
290 BlockAIOCB *aiocb;
291 uint16_t status;
292 void *opaque;
293 NvmeCqe cqe;
294 NvmeCmd cmd;
295 BlockAcctCookie acct;
296 NvmeSg sg;
297 QTAILQ_ENTRY(NvmeRequest)entry;
298} NvmeRequest;
299
300typedef struct NvmeBounceContext {
301 NvmeRequest *req;
302
303 struct {
304 QEMUIOVector iov;
305 uint8_t *bounce;
306 } data, mdata;
307} NvmeBounceContext;
308
309static inline const char *nvme_adm_opc_str(uint8_t opc)
310{
311 switch (opc) {
312 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ";
313 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ";
314 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
315 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ";
316 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ";
317 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
318 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT";
319 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
320 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
321 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
322 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
323 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
324 default: return "NVME_ADM_CMD_UNKNOWN";
325 }
326}
327
328static inline const char *nvme_io_opc_str(uint8_t opc)
329{
330 switch (opc) {
331 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
332 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
333 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
334 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
335 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
336 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
337 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY";
338 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
339 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
340 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
341 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
342 default: return "NVME_NVM_CMD_UNKNOWN";
343 }
344}
345
346typedef struct NvmeSQueue {
347 struct NvmeCtrl *ctrl;
348 uint16_t sqid;
349 uint16_t cqid;
350 uint32_t head;
351 uint32_t tail;
352 uint32_t size;
353 uint64_t dma_addr;
354 QEMUTimer *timer;
355 NvmeRequest *io_req;
356 QTAILQ_HEAD(, NvmeRequest) req_list;
357 QTAILQ_HEAD(, NvmeRequest) out_req_list;
358 QTAILQ_ENTRY(NvmeSQueue) entry;
359} NvmeSQueue;
360
361typedef struct NvmeCQueue {
362 struct NvmeCtrl *ctrl;
363 uint8_t phase;
364 uint16_t cqid;
365 uint16_t irq_enabled;
366 uint32_t head;
367 uint32_t tail;
368 uint32_t vector;
369 uint32_t size;
370 uint64_t dma_addr;
371 QEMUTimer *timer;
372 QTAILQ_HEAD(, NvmeSQueue) sq_list;
373 QTAILQ_HEAD(, NvmeRequest) req_list;
374} NvmeCQueue;
375
376#define TYPE_NVME "nvme"
377#define NVME(obj) \
378 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
379
380typedef struct NvmeParams {
381 char *serial;
382 uint32_t num_queues;
383 uint32_t max_ioqpairs;
384 uint16_t msix_qsize;
385 uint32_t cmb_size_mb;
386 uint8_t aerl;
387 uint32_t aer_max_queued;
388 uint8_t mdts;
389 uint8_t vsl;
390 bool use_intel_id;
391 uint8_t zasl;
392 bool auto_transition_zones;
393 bool legacy_cmb;
394} NvmeParams;
395
396typedef struct NvmeCtrl {
397 PCIDevice parent_obj;
398 MemoryRegion bar0;
399 MemoryRegion iomem;
400 NvmeBar bar;
401 NvmeParams params;
402 NvmeBus bus;
403
404 uint16_t cntlid;
405 bool qs_created;
406 uint32_t page_size;
407 uint16_t page_bits;
408 uint16_t max_prp_ents;
409 uint16_t cqe_size;
410 uint16_t sqe_size;
411 uint32_t reg_size;
412 uint32_t max_q_ents;
413 uint8_t outstanding_aers;
414 uint32_t irq_status;
415 int cq_pending;
416 uint64_t host_timestamp;
417 uint64_t timestamp_set_qemu_clock_ms;
418 uint64_t starttime_ms;
419 uint16_t temperature;
420 uint8_t smart_critical_warning;
421
422 struct {
423 MemoryRegion mem;
424 uint8_t *buf;
425 bool cmse;
426 hwaddr cba;
427 } cmb;
428
429 struct {
430 HostMemoryBackend *dev;
431 bool cmse;
432 hwaddr cba;
433 } pmr;
434
435 uint8_t aer_mask;
436 NvmeRequest **aer_reqs;
437 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
438 int aer_queued;
439
440 uint32_t dmrsl;
441
442
443#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
444 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
445
446 NvmeSubsystem *subsys;
447
448 NvmeNamespace namespace;
449 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
450 NvmeSQueue **sq;
451 NvmeCQueue **cq;
452 NvmeSQueue admin_sq;
453 NvmeCQueue admin_cq;
454 NvmeIdCtrl id_ctrl;
455
456 struct {
457 struct {
458 uint16_t temp_thresh_hi;
459 uint16_t temp_thresh_low;
460 };
461 uint32_t async_config;
462 } features;
463} NvmeCtrl;
464
465static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
466{
467 if (!nsid || nsid > NVME_MAX_NAMESPACES) {
468 return NULL;
469 }
470
471 return n->namespaces[nsid];
472}
473
474static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
475{
476 NvmeSQueue *sq = req->sq;
477 NvmeCtrl *n = sq->ctrl;
478
479 return n->cq[sq->cqid];
480}
481
482static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
483{
484 NvmeSQueue *sq = req->sq;
485 return sq->ctrl;
486}
487
488static inline uint16_t nvme_cid(NvmeRequest *req)
489{
490 if (!req) {
491 return 0xffff;
492 }
493
494 return le16_to_cpu(req->cqe.cid);
495}
496
497void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
498uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
499 NvmeTxDirection dir, NvmeRequest *req);
500uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
501 NvmeTxDirection dir, NvmeRequest *req);
502void nvme_rw_complete_cb(void *opaque, int ret);
503uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
504 NvmeCmd *cmd);
505
506
507static const uint16_t t10_dif_crc_table[256] = {
508 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
509 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
510 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
511 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
512 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
513 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
514 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
515 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
516 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
517 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
518 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
519 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
520 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
521 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
522 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
523 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
524 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
525 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
526 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
527 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
528 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
529 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
530 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
531 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
532 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
533 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
534 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
535 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
536 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
537 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
538 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
539 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
540};
541
542uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
543 uint32_t reftag);
544uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
545 uint64_t slba);
546void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
547 uint8_t *mbuf, size_t mlen, uint16_t apptag,
548 uint32_t *reftag);
549uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
550 uint8_t *mbuf, size_t mlen, uint8_t prinfo,
551 uint64_t slba, uint16_t apptag,
552 uint16_t appmask, uint32_t *reftag);
553uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
554
555
556#endif
557