1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#ifndef HW_NVME_INTERNAL_H
19#define HW_NVME_INTERNAL_H
20
21#include "qemu/uuid.h"
22#include "hw/pci/pci.h"
23#include "hw/block/block.h"
24
25#include "block/nvme.h"
26
27#define NVME_MAX_CONTROLLERS 32
28#define NVME_MAX_NAMESPACES 256
29#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30
31QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32
33typedef struct NvmeCtrl NvmeCtrl;
34typedef struct NvmeNamespace NvmeNamespace;
35
36#define TYPE_NVME_BUS "nvme-bus"
37OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38
39typedef struct NvmeBus {
40 BusState parent_bus;
41} NvmeBus;
42
43#define TYPE_NVME_SUBSYS "nvme-subsys"
44#define NVME_SUBSYS(obj) \
45 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46
47typedef struct NvmeSubsystem {
48 DeviceState parent_obj;
49 NvmeBus bus;
50 uint8_t subnqn[256];
51
52 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
53 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
54
55 struct {
56 char *nqn;
57 } params;
58} NvmeSubsystem;
59
60int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
61void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
62
63static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
64 uint32_t cntlid)
65{
66 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
67 return NULL;
68 }
69
70 return subsys->ctrls[cntlid];
71}
72
73static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
74 uint32_t nsid)
75{
76 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
77 return NULL;
78 }
79
80 return subsys->namespaces[nsid];
81}
82
83#define TYPE_NVME_NS "nvme-ns"
84#define NVME_NS(obj) \
85 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
86
87typedef struct NvmeZone {
88 NvmeZoneDescr d;
89 uint64_t w_ptr;
90 QTAILQ_ENTRY(NvmeZone) entry;
91} NvmeZone;
92
93typedef struct NvmeNamespaceParams {
94 bool detached;
95 bool shared;
96 uint32_t nsid;
97 QemuUUID uuid;
98 uint64_t eui64;
99 bool eui64_default;
100
101 uint16_t ms;
102 uint8_t mset;
103 uint8_t pi;
104 uint8_t pil;
105 uint8_t pif;
106
107 uint16_t mssrl;
108 uint32_t mcl;
109 uint8_t msrc;
110
111 bool zoned;
112 bool cross_zone_read;
113 uint64_t zone_size_bs;
114 uint64_t zone_cap_bs;
115 uint32_t max_active_zones;
116 uint32_t max_open_zones;
117 uint32_t zd_extension_size;
118
119 uint32_t numzrwa;
120 uint64_t zrwas;
121 uint64_t zrwafg;
122} NvmeNamespaceParams;
123
124typedef struct NvmeNamespace {
125 DeviceState parent_obj;
126 BlockConf blkconf;
127 int32_t bootindex;
128 int64_t size;
129 int64_t moff;
130 NvmeIdNs id_ns;
131 NvmeIdNsNvm id_ns_nvm;
132 NvmeLBAF lbaf;
133 unsigned int nlbaf;
134 size_t lbasz;
135 const uint32_t *iocs;
136 uint8_t csi;
137 uint16_t status;
138 int attached;
139 uint8_t pif;
140
141 struct {
142 uint16_t zrwas;
143 uint16_t zrwafg;
144 uint32_t numzrwa;
145 } zns;
146
147 QTAILQ_ENTRY(NvmeNamespace) entry;
148
149 NvmeIdNsZoned *id_ns_zoned;
150 NvmeZone *zone_array;
151 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
152 QTAILQ_HEAD(, NvmeZone) imp_open_zones;
153 QTAILQ_HEAD(, NvmeZone) closed_zones;
154 QTAILQ_HEAD(, NvmeZone) full_zones;
155 uint32_t num_zones;
156 uint64_t zone_size;
157 uint64_t zone_capacity;
158 uint32_t zone_size_log2;
159 uint8_t *zd_extensions;
160 int32_t nr_open_zones;
161 int32_t nr_active_zones;
162
163 NvmeNamespaceParams params;
164
165 struct {
166 uint32_t err_rec;
167 } features;
168} NvmeNamespace;
169
170static inline uint32_t nvme_nsid(NvmeNamespace *ns)
171{
172 if (ns) {
173 return ns->params.nsid;
174 }
175
176 return 0;
177}
178
179static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
180{
181 return lba << ns->lbaf.ds;
182}
183
184static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
185{
186 return ns->lbaf.ms * lba;
187}
188
189static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
190{
191 return ns->moff + nvme_m2b(ns, lba);
192}
193
194static inline bool nvme_ns_ext(NvmeNamespace *ns)
195{
196 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
197}
198
199static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
200{
201 return zone->d.zs >> 4;
202}
203
204static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
205{
206 zone->d.zs = state << 4;
207}
208
209static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
210{
211 return zone->d.zslba + ns->zone_size;
212}
213
214static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
215{
216 return zone->d.zslba + zone->d.zcap;
217}
218
219static inline bool nvme_wp_is_valid(NvmeZone *zone)
220{
221 uint8_t st = nvme_get_zone_state(zone);
222
223 return st != NVME_ZONE_STATE_FULL &&
224 st != NVME_ZONE_STATE_READ_ONLY &&
225 st != NVME_ZONE_STATE_OFFLINE;
226}
227
228static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
229 uint32_t zone_idx)
230{
231 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
232}
233
234static inline void nvme_aor_inc_open(NvmeNamespace *ns)
235{
236 assert(ns->nr_open_zones >= 0);
237 if (ns->params.max_open_zones) {
238 ns->nr_open_zones++;
239 assert(ns->nr_open_zones <= ns->params.max_open_zones);
240 }
241}
242
243static inline void nvme_aor_dec_open(NvmeNamespace *ns)
244{
245 if (ns->params.max_open_zones) {
246 assert(ns->nr_open_zones > 0);
247 ns->nr_open_zones--;
248 }
249 assert(ns->nr_open_zones >= 0);
250}
251
252static inline void nvme_aor_inc_active(NvmeNamespace *ns)
253{
254 assert(ns->nr_active_zones >= 0);
255 if (ns->params.max_active_zones) {
256 ns->nr_active_zones++;
257 assert(ns->nr_active_zones <= ns->params.max_active_zones);
258 }
259}
260
261static inline void nvme_aor_dec_active(NvmeNamespace *ns)
262{
263 if (ns->params.max_active_zones) {
264 assert(ns->nr_active_zones > 0);
265 ns->nr_active_zones--;
266 assert(ns->nr_active_zones >= ns->nr_open_zones);
267 }
268 assert(ns->nr_active_zones >= 0);
269}
270
271void nvme_ns_init_format(NvmeNamespace *ns);
272int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
273void nvme_ns_drain(NvmeNamespace *ns);
274void nvme_ns_shutdown(NvmeNamespace *ns);
275void nvme_ns_cleanup(NvmeNamespace *ns);
276
277typedef struct NvmeAsyncEvent {
278 QTAILQ_ENTRY(NvmeAsyncEvent) entry;
279 NvmeAerResult result;
280} NvmeAsyncEvent;
281
282enum {
283 NVME_SG_ALLOC = 1 << 0,
284 NVME_SG_DMA = 1 << 1,
285};
286
287typedef struct NvmeSg {
288 int flags;
289
290 union {
291 QEMUSGList qsg;
292 QEMUIOVector iov;
293 };
294} NvmeSg;
295
296typedef enum NvmeTxDirection {
297 NVME_TX_DIRECTION_TO_DEVICE = 0,
298 NVME_TX_DIRECTION_FROM_DEVICE = 1,
299} NvmeTxDirection;
300
301typedef struct NvmeRequest {
302 struct NvmeSQueue *sq;
303 struct NvmeNamespace *ns;
304 BlockAIOCB *aiocb;
305 uint16_t status;
306 void *opaque;
307 NvmeCqe cqe;
308 NvmeCmd cmd;
309 BlockAcctCookie acct;
310 NvmeSg sg;
311 QTAILQ_ENTRY(NvmeRequest)entry;
312} NvmeRequest;
313
314typedef struct NvmeBounceContext {
315 NvmeRequest *req;
316
317 struct {
318 QEMUIOVector iov;
319 uint8_t *bounce;
320 } data, mdata;
321} NvmeBounceContext;
322
323static inline const char *nvme_adm_opc_str(uint8_t opc)
324{
325 switch (opc) {
326 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ";
327 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ";
328 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
329 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ";
330 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ";
331 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
332 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT";
333 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
334 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
335 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
336 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
337 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
338 default: return "NVME_ADM_CMD_UNKNOWN";
339 }
340}
341
342static inline const char *nvme_io_opc_str(uint8_t opc)
343{
344 switch (opc) {
345 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
346 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
347 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
348 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
349 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
350 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
351 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY";
352 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
353 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
354 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
355 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
356 default: return "NVME_NVM_CMD_UNKNOWN";
357 }
358}
359
360typedef struct NvmeSQueue {
361 struct NvmeCtrl *ctrl;
362 uint16_t sqid;
363 uint16_t cqid;
364 uint32_t head;
365 uint32_t tail;
366 uint32_t size;
367 uint64_t dma_addr;
368 QEMUTimer *timer;
369 NvmeRequest *io_req;
370 QTAILQ_HEAD(, NvmeRequest) req_list;
371 QTAILQ_HEAD(, NvmeRequest) out_req_list;
372 QTAILQ_ENTRY(NvmeSQueue) entry;
373} NvmeSQueue;
374
375typedef struct NvmeCQueue {
376 struct NvmeCtrl *ctrl;
377 uint8_t phase;
378 uint16_t cqid;
379 uint16_t irq_enabled;
380 uint32_t head;
381 uint32_t tail;
382 uint32_t vector;
383 uint32_t size;
384 uint64_t dma_addr;
385 QEMUTimer *timer;
386 QTAILQ_HEAD(, NvmeSQueue) sq_list;
387 QTAILQ_HEAD(, NvmeRequest) req_list;
388} NvmeCQueue;
389
390#define TYPE_NVME "nvme"
391#define NVME(obj) \
392 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
393
394typedef struct NvmeParams {
395 char *serial;
396 uint32_t num_queues;
397 uint32_t max_ioqpairs;
398 uint16_t msix_qsize;
399 uint32_t cmb_size_mb;
400 uint8_t aerl;
401 uint32_t aer_max_queued;
402 uint8_t mdts;
403 uint8_t vsl;
404 bool use_intel_id;
405 uint8_t zasl;
406 bool auto_transition_zones;
407 bool legacy_cmb;
408} NvmeParams;
409
410typedef struct NvmeCtrl {
411 PCIDevice parent_obj;
412 MemoryRegion bar0;
413 MemoryRegion iomem;
414 NvmeBar bar;
415 NvmeParams params;
416 NvmeBus bus;
417
418 uint16_t cntlid;
419 bool qs_created;
420 uint32_t page_size;
421 uint16_t page_bits;
422 uint16_t max_prp_ents;
423 uint16_t cqe_size;
424 uint16_t sqe_size;
425 uint32_t reg_size;
426 uint32_t max_q_ents;
427 uint8_t outstanding_aers;
428 uint32_t irq_status;
429 int cq_pending;
430 uint64_t host_timestamp;
431 uint64_t timestamp_set_qemu_clock_ms;
432 uint64_t starttime_ms;
433 uint16_t temperature;
434 uint8_t smart_critical_warning;
435
436 struct {
437 MemoryRegion mem;
438 uint8_t *buf;
439 bool cmse;
440 hwaddr cba;
441 } cmb;
442
443 struct {
444 HostMemoryBackend *dev;
445 bool cmse;
446 hwaddr cba;
447 } pmr;
448
449 uint8_t aer_mask;
450 NvmeRequest **aer_reqs;
451 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
452 int aer_queued;
453
454 uint32_t dmrsl;
455
456
457#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
458 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
459
460 NvmeSubsystem *subsys;
461
462 NvmeNamespace namespace;
463 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
464 NvmeSQueue **sq;
465 NvmeCQueue **cq;
466 NvmeSQueue admin_sq;
467 NvmeCQueue admin_cq;
468 NvmeIdCtrl id_ctrl;
469
470 struct {
471 struct {
472 uint16_t temp_thresh_hi;
473 uint16_t temp_thresh_low;
474 };
475
476 uint32_t async_config;
477 NvmeHostBehaviorSupport hbs;
478 } features;
479} NvmeCtrl;
480
481static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
482{
483 if (!nsid || nsid > NVME_MAX_NAMESPACES) {
484 return NULL;
485 }
486
487 return n->namespaces[nsid];
488}
489
490static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
491{
492 NvmeSQueue *sq = req->sq;
493 NvmeCtrl *n = sq->ctrl;
494
495 return n->cq[sq->cqid];
496}
497
498static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
499{
500 NvmeSQueue *sq = req->sq;
501 return sq->ctrl;
502}
503
504static inline uint16_t nvme_cid(NvmeRequest *req)
505{
506 if (!req) {
507 return 0xffff;
508 }
509
510 return le16_to_cpu(req->cqe.cid);
511}
512
513void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
514uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
515 NvmeTxDirection dir, NvmeRequest *req);
516uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
517 NvmeTxDirection dir, NvmeRequest *req);
518void nvme_rw_complete_cb(void *opaque, int ret);
519uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
520 NvmeCmd *cmd);
521
522#endif
523