1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186#include "qemu/osdep.h"
187#include "qemu/cutils.h"
188#include "qemu/error-report.h"
189#include "qemu/log.h"
190#include "qemu/units.h"
191#include "qemu/range.h"
192#include "qapi/error.h"
193#include "qapi/visitor.h"
194#include "sysemu/sysemu.h"
195#include "sysemu/block-backend.h"
196#include "sysemu/hostmem.h"
197#include "hw/pci/msix.h"
198#include "hw/pci/pcie_sriov.h"
199#include "migration/vmstate.h"
200
201#include "nvme.h"
202#include "dif.h"
203#include "trace.h"
204
205#define NVME_MAX_IOQPAIRS 0xffff
206#define NVME_DB_SIZE 4
207#define NVME_SPEC_VER 0x00010400
208#define NVME_CMB_BIR 2
209#define NVME_PMR_BIR 4
210#define NVME_TEMPERATURE 0x143
211#define NVME_TEMPERATURE_WARNING 0x157
212#define NVME_TEMPERATURE_CRITICAL 0x175
213#define NVME_NUM_FW_SLOTS 1
214#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
215#define NVME_MAX_VFS 127
216#define NVME_VF_RES_GRANULARITY 1
217#define NVME_VF_OFFSET 0x1
218#define NVME_VF_STRIDE 1
219
220#define NVME_GUEST_ERR(trace, fmt, ...) \
221 do { \
222 (trace_##trace)(__VA_ARGS__); \
223 qemu_log_mask(LOG_GUEST_ERROR, #trace \
224 " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
225 } while (0)
226
227static const bool nvme_feature_support[NVME_FID_MAX] = {
228 [NVME_ARBITRATION] = true,
229 [NVME_POWER_MANAGEMENT] = true,
230 [NVME_TEMPERATURE_THRESHOLD] = true,
231 [NVME_ERROR_RECOVERY] = true,
232 [NVME_VOLATILE_WRITE_CACHE] = true,
233 [NVME_NUMBER_OF_QUEUES] = true,
234 [NVME_INTERRUPT_COALESCING] = true,
235 [NVME_INTERRUPT_VECTOR_CONF] = true,
236 [NVME_WRITE_ATOMICITY] = true,
237 [NVME_ASYNCHRONOUS_EVENT_CONF] = true,
238 [NVME_TIMESTAMP] = true,
239 [NVME_HOST_BEHAVIOR_SUPPORT] = true,
240 [NVME_COMMAND_SET_PROFILE] = true,
241};
242
243static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
244 [NVME_TEMPERATURE_THRESHOLD] = NVME_FEAT_CAP_CHANGE,
245 [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
246 [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
247 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
248 [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
249 [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
250 [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
251 [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE,
252};
253
254static const uint32_t nvme_cse_acs[256] = {
255 [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP,
256 [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP,
257 [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP,
258 [NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFF_CSUPP,
259 [NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFF_CSUPP,
260 [NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFF_CSUPP,
261 [NVME_ADM_CMD_ABORT] = NVME_CMD_EFF_CSUPP,
262 [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
263 [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
264 [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
265 [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
266 [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
267 [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
268 [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
269};
270
271static const uint32_t nvme_cse_iocs_none[256];
272
273static const uint32_t nvme_cse_iocs_nvm[256] = {
274 [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
275 [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
276 [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
277 [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
278 [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
279 [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
280 [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
281 [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
282};
283
284static const uint32_t nvme_cse_iocs_zoned[256] = {
285 [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
286 [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
287 [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
288 [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
289 [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
290 [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
291 [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
292 [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
293 [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
294 [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
295 [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
296};
297
298static void nvme_process_sq(void *opaque);
299static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
300
301static uint16_t nvme_sqid(NvmeRequest *req)
302{
303 return le16_to_cpu(req->sq->sqid);
304}
305
306static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
307 NvmeZoneState state)
308{
309 if (QTAILQ_IN_USE(zone, entry)) {
310 switch (nvme_get_zone_state(zone)) {
311 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
312 QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
313 break;
314 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
315 QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
316 break;
317 case NVME_ZONE_STATE_CLOSED:
318 QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
319 break;
320 case NVME_ZONE_STATE_FULL:
321 QTAILQ_REMOVE(&ns->full_zones, zone, entry);
322 default:
323 ;
324 }
325 }
326
327 nvme_set_zone_state(zone, state);
328
329 switch (state) {
330 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
331 QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry);
332 break;
333 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
334 QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry);
335 break;
336 case NVME_ZONE_STATE_CLOSED:
337 QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry);
338 break;
339 case NVME_ZONE_STATE_FULL:
340 QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry);
341 case NVME_ZONE_STATE_READ_ONLY:
342 break;
343 default:
344 zone->d.za = 0;
345 }
346}
347
348static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
349 uint32_t opn, uint32_t zrwa)
350{
351 if (ns->params.max_active_zones != 0 &&
352 ns->nr_active_zones + act > ns->params.max_active_zones) {
353 trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
354 return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
355 }
356
357 if (ns->params.max_open_zones != 0 &&
358 ns->nr_open_zones + opn > ns->params.max_open_zones) {
359 trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
360 return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
361 }
362
363 if (zrwa > ns->zns.numzrwa) {
364 return NVME_NOZRWA | NVME_DNR;
365 }
366
367 return NVME_SUCCESS;
368}
369
370
371
372
373
374static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
375{
376 return nvme_zns_check_resources(ns, act, opn, 0);
377}
378
379static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
380{
381 hwaddr hi, lo;
382
383 if (!n->cmb.cmse) {
384 return false;
385 }
386
387 lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
388 hi = lo + int128_get64(n->cmb.mem.size);
389
390 return addr >= lo && addr < hi;
391}
392
393static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
394{
395 hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
396 return &n->cmb.buf[addr - base];
397}
398
399static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
400{
401 hwaddr hi;
402
403 if (!n->pmr.cmse) {
404 return false;
405 }
406
407 hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size);
408
409 return addr >= n->pmr.cba && addr < hi;
410}
411
412static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
413{
414 return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
415}
416
417static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
418{
419 hwaddr hi, lo;
420
421
422
423
424
425
426
427
428
429 lo = n->bar0.addr;
430 hi = lo + int128_get64(n->bar0.size);
431
432 return addr >= lo && addr < hi;
433}
434
435static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
436{
437 hwaddr hi = addr + size - 1;
438 if (hi < addr) {
439 return 1;
440 }
441
442 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
443 memcpy(buf, nvme_addr_to_cmb(n, addr), size);
444 return 0;
445 }
446
447 if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
448 memcpy(buf, nvme_addr_to_pmr(n, addr), size);
449 return 0;
450 }
451
452 return pci_dma_read(&n->parent_obj, addr, buf, size);
453}
454
455static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size)
456{
457 hwaddr hi = addr + size - 1;
458 if (hi < addr) {
459 return 1;
460 }
461
462 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
463 memcpy(nvme_addr_to_cmb(n, addr), buf, size);
464 return 0;
465 }
466
467 if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
468 memcpy(nvme_addr_to_pmr(n, addr), buf, size);
469 return 0;
470 }
471
472 return pci_dma_write(&n->parent_obj, addr, buf, size);
473}
474
475static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
476{
477 return nsid &&
478 (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES);
479}
480
481static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
482{
483 return sqid < n->conf_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
484}
485
486static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
487{
488 return cqid < n->conf_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
489}
490
491static void nvme_inc_cq_tail(NvmeCQueue *cq)
492{
493 cq->tail++;
494 if (cq->tail >= cq->size) {
495 cq->tail = 0;
496 cq->phase = !cq->phase;
497 }
498}
499
500static void nvme_inc_sq_head(NvmeSQueue *sq)
501{
502 sq->head = (sq->head + 1) % sq->size;
503}
504
505static uint8_t nvme_cq_full(NvmeCQueue *cq)
506{
507 return (cq->tail + 1) % cq->size == cq->head;
508}
509
510static uint8_t nvme_sq_empty(NvmeSQueue *sq)
511{
512 return sq->head == sq->tail;
513}
514
515static void nvme_irq_check(NvmeCtrl *n)
516{
517 uint32_t intms = ldl_le_p(&n->bar.intms);
518
519 if (msix_enabled(&(n->parent_obj))) {
520 return;
521 }
522 if (~intms & n->irq_status) {
523 pci_irq_assert(&n->parent_obj);
524 } else {
525 pci_irq_deassert(&n->parent_obj);
526 }
527}
528
529static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
530{
531 if (cq->irq_enabled) {
532 if (msix_enabled(&(n->parent_obj))) {
533 trace_pci_nvme_irq_msix(cq->vector);
534 msix_notify(&(n->parent_obj), cq->vector);
535 } else {
536 trace_pci_nvme_irq_pin();
537 assert(cq->vector < 32);
538 n->irq_status |= 1 << cq->vector;
539 nvme_irq_check(n);
540 }
541 } else {
542 trace_pci_nvme_irq_masked();
543 }
544}
545
546static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
547{
548 if (cq->irq_enabled) {
549 if (msix_enabled(&(n->parent_obj))) {
550 return;
551 } else {
552 assert(cq->vector < 32);
553 if (!n->cq_pending) {
554 n->irq_status &= ~(1 << cq->vector);
555 }
556 nvme_irq_check(n);
557 }
558 }
559}
560
561static void nvme_req_clear(NvmeRequest *req)
562{
563 req->ns = NULL;
564 req->opaque = NULL;
565 req->aiocb = NULL;
566 memset(&req->cqe, 0x0, sizeof(req->cqe));
567 req->status = NVME_SUCCESS;
568}
569
570static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
571{
572 if (dma) {
573 pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
574 sg->flags = NVME_SG_DMA;
575 } else {
576 qemu_iovec_init(&sg->iov, 0);
577 }
578
579 sg->flags |= NVME_SG_ALLOC;
580}
581
582static inline void nvme_sg_unmap(NvmeSg *sg)
583{
584 if (!(sg->flags & NVME_SG_ALLOC)) {
585 return;
586 }
587
588 if (sg->flags & NVME_SG_DMA) {
589 qemu_sglist_destroy(&sg->qsg);
590 } else {
591 qemu_iovec_destroy(&sg->iov);
592 }
593
594 memset(sg, 0x0, sizeof(*sg));
595}
596
597
598
599
600
601
602static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data,
603 NvmeSg *mdata)
604{
605 NvmeSg *dst = data;
606 uint32_t trans_len, count = ns->lbasz;
607 uint64_t offset = 0;
608 bool dma = sg->flags & NVME_SG_DMA;
609 size_t sge_len;
610 size_t sg_len = dma ? sg->qsg.size : sg->iov.size;
611 int sg_idx = 0;
612
613 assert(sg->flags & NVME_SG_ALLOC);
614
615 while (sg_len) {
616 sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
617
618 trans_len = MIN(sg_len, count);
619 trans_len = MIN(trans_len, sge_len - offset);
620
621 if (dst) {
622 if (dma) {
623 qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset,
624 trans_len);
625 } else {
626 qemu_iovec_add(&dst->iov,
627 sg->iov.iov[sg_idx].iov_base + offset,
628 trans_len);
629 }
630 }
631
632 sg_len -= trans_len;
633 count -= trans_len;
634 offset += trans_len;
635
636 if (count == 0) {
637 dst = (dst == data) ? mdata : data;
638 count = (dst == data) ? ns->lbasz : ns->lbaf.ms;
639 }
640
641 if (sge_len == offset) {
642 offset = 0;
643 sg_idx++;
644 }
645 }
646}
647
648static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
649 size_t len)
650{
651 if (!len) {
652 return NVME_SUCCESS;
653 }
654
655 trace_pci_nvme_map_addr_cmb(addr, len);
656
657 if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
658 return NVME_DATA_TRAS_ERROR;
659 }
660
661 qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
662
663 return NVME_SUCCESS;
664}
665
666static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
667 size_t len)
668{
669 if (!len) {
670 return NVME_SUCCESS;
671 }
672
673 if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) {
674 return NVME_DATA_TRAS_ERROR;
675 }
676
677 qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len);
678
679 return NVME_SUCCESS;
680}
681
682static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
683{
684 bool cmb = false, pmr = false;
685
686 if (!len) {
687 return NVME_SUCCESS;
688 }
689
690 trace_pci_nvme_map_addr(addr, len);
691
692 if (nvme_addr_is_iomem(n, addr)) {
693 return NVME_DATA_TRAS_ERROR;
694 }
695
696 if (nvme_addr_is_cmb(n, addr)) {
697 cmb = true;
698 } else if (nvme_addr_is_pmr(n, addr)) {
699 pmr = true;
700 }
701
702 if (cmb || pmr) {
703 if (sg->flags & NVME_SG_DMA) {
704 return NVME_INVALID_USE_OF_CMB | NVME_DNR;
705 }
706
707 if (sg->iov.niov + 1 > IOV_MAX) {
708 goto max_mappings_exceeded;
709 }
710
711 if (cmb) {
712 return nvme_map_addr_cmb(n, &sg->iov, addr, len);
713 } else {
714 return nvme_map_addr_pmr(n, &sg->iov, addr, len);
715 }
716 }
717
718 if (!(sg->flags & NVME_SG_DMA)) {
719 return NVME_INVALID_USE_OF_CMB | NVME_DNR;
720 }
721
722 if (sg->qsg.nsg + 1 > IOV_MAX) {
723 goto max_mappings_exceeded;
724 }
725
726 qemu_sglist_add(&sg->qsg, addr, len);
727
728 return NVME_SUCCESS;
729
730max_mappings_exceeded:
731 NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings,
732 "number of mappings exceed 1024");
733 return NVME_INTERNAL_DEV_ERROR | NVME_DNR;
734}
735
736static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
737{
738 return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
739}
740
741static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
742 uint64_t prp2, uint32_t len)
743{
744 hwaddr trans_len = n->page_size - (prp1 % n->page_size);
745 trans_len = MIN(len, trans_len);
746 int num_prps = (len >> n->page_bits) + 1;
747 uint16_t status;
748 int ret;
749
750 trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
751
752 nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1));
753
754 status = nvme_map_addr(n, sg, prp1, trans_len);
755 if (status) {
756 goto unmap;
757 }
758
759 len -= trans_len;
760 if (len) {
761 if (len > n->page_size) {
762 uint64_t prp_list[n->max_prp_ents];
763 uint32_t nents, prp_trans;
764 int i = 0;
765
766
767
768
769
770
771 nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3;
772 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
773 ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
774 if (ret) {
775 trace_pci_nvme_err_addr_read(prp2);
776 status = NVME_DATA_TRAS_ERROR;
777 goto unmap;
778 }
779 while (len != 0) {
780 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
781
782 if (i == nents - 1 && len > n->page_size) {
783 if (unlikely(prp_ent & (n->page_size - 1))) {
784 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
785 status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
786 goto unmap;
787 }
788
789 i = 0;
790 nents = (len + n->page_size - 1) >> n->page_bits;
791 nents = MIN(nents, n->max_prp_ents);
792 prp_trans = nents * sizeof(uint64_t);
793 ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
794 prp_trans);
795 if (ret) {
796 trace_pci_nvme_err_addr_read(prp_ent);
797 status = NVME_DATA_TRAS_ERROR;
798 goto unmap;
799 }
800 prp_ent = le64_to_cpu(prp_list[i]);
801 }
802
803 if (unlikely(prp_ent & (n->page_size - 1))) {
804 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
805 status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
806 goto unmap;
807 }
808
809 trans_len = MIN(len, n->page_size);
810 status = nvme_map_addr(n, sg, prp_ent, trans_len);
811 if (status) {
812 goto unmap;
813 }
814
815 len -= trans_len;
816 i++;
817 }
818 } else {
819 if (unlikely(prp2 & (n->page_size - 1))) {
820 trace_pci_nvme_err_invalid_prp2_align(prp2);
821 status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
822 goto unmap;
823 }
824 status = nvme_map_addr(n, sg, prp2, len);
825 if (status) {
826 goto unmap;
827 }
828 }
829 }
830
831 return NVME_SUCCESS;
832
833unmap:
834 nvme_sg_unmap(sg);
835 return status;
836}
837
838
839
840
841
842static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
843 NvmeSglDescriptor *segment, uint64_t nsgld,
844 size_t *len, NvmeCmd *cmd)
845{
846 dma_addr_t addr, trans_len;
847 uint32_t dlen;
848 uint16_t status;
849
850 for (int i = 0; i < nsgld; i++) {
851 uint8_t type = NVME_SGL_TYPE(segment[i].type);
852
853 switch (type) {
854 case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
855 break;
856 case NVME_SGL_DESCR_TYPE_SEGMENT:
857 case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
858 return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
859 default:
860 return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
861 }
862
863 dlen = le32_to_cpu(segment[i].len);
864
865 if (!dlen) {
866 continue;
867 }
868
869 if (*len == 0) {
870
871
872
873
874
875 uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls);
876 if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
877 break;
878 }
879
880 trace_pci_nvme_err_invalid_sgl_excess_length(dlen);
881 return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
882 }
883
884 trans_len = MIN(*len, dlen);
885
886 addr = le64_to_cpu(segment[i].addr);
887
888 if (UINT64_MAX - addr < dlen) {
889 return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
890 }
891
892 status = nvme_map_addr(n, sg, addr, trans_len);
893 if (status) {
894 return status;
895 }
896
897 *len -= trans_len;
898 }
899
900 return NVME_SUCCESS;
901}
902
903static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
904 size_t len, NvmeCmd *cmd)
905{
906
907
908
909
910
911
912
913 const int SEG_CHUNK_SIZE = 256;
914
915 NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
916 uint64_t nsgld;
917 uint32_t seg_len;
918 uint16_t status;
919 hwaddr addr;
920 int ret;
921
922 sgld = &sgl;
923 addr = le64_to_cpu(sgl.addr);
924
925 trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len);
926
927 nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
928
929
930
931
932
933 if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
934 status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
935 if (status) {
936 goto unmap;
937 }
938
939 goto out;
940 }
941
942 for (;;) {
943 switch (NVME_SGL_TYPE(sgld->type)) {
944 case NVME_SGL_DESCR_TYPE_SEGMENT:
945 case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
946 break;
947 default:
948 return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
949 }
950
951 seg_len = le32_to_cpu(sgld->len);
952
953
954 if (!seg_len || seg_len & 0xf) {
955 return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
956 }
957
958 if (UINT64_MAX - addr < seg_len) {
959 return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
960 }
961
962 nsgld = seg_len / sizeof(NvmeSglDescriptor);
963
964 while (nsgld > SEG_CHUNK_SIZE) {
965 if (nvme_addr_read(n, addr, segment, sizeof(segment))) {
966 trace_pci_nvme_err_addr_read(addr);
967 status = NVME_DATA_TRAS_ERROR;
968 goto unmap;
969 }
970
971 status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
972 &len, cmd);
973 if (status) {
974 goto unmap;
975 }
976
977 nsgld -= SEG_CHUNK_SIZE;
978 addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor);
979 }
980
981 ret = nvme_addr_read(n, addr, segment, nsgld *
982 sizeof(NvmeSglDescriptor));
983 if (ret) {
984 trace_pci_nvme_err_addr_read(addr);
985 status = NVME_DATA_TRAS_ERROR;
986 goto unmap;
987 }
988
989 last_sgld = &segment[nsgld - 1];
990
991
992
993
994 if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
995 status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
996 if (status) {
997 goto unmap;
998 }
999
1000 goto out;
1001 }
1002
1003
1004
1005
1006
1007 if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
1008 status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
1009 goto unmap;
1010 }
1011
1012 sgld = last_sgld;
1013 addr = le64_to_cpu(sgld->addr);
1014
1015
1016
1017
1018
1019 status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
1020 if (status) {
1021 goto unmap;
1022 }
1023 }
1024
1025out:
1026
1027 if (len) {
1028 status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
1029 goto unmap;
1030 }
1031
1032 return NVME_SUCCESS;
1033
1034unmap:
1035 nvme_sg_unmap(sg);
1036 return status;
1037}
1038
1039uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
1040 NvmeCmd *cmd)
1041{
1042 uint64_t prp1, prp2;
1043
1044 switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) {
1045 case NVME_PSDT_PRP:
1046 prp1 = le64_to_cpu(cmd->dptr.prp1);
1047 prp2 = le64_to_cpu(cmd->dptr.prp2);
1048
1049 return nvme_map_prp(n, sg, prp1, prp2, len);
1050 case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
1051 case NVME_PSDT_SGL_MPTR_SGL:
1052 return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd);
1053 default:
1054 return NVME_INVALID_FIELD;
1055 }
1056}
1057
1058static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
1059 NvmeCmd *cmd)
1060{
1061 int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags);
1062 hwaddr mptr = le64_to_cpu(cmd->mptr);
1063 uint16_t status;
1064
1065 if (psdt == NVME_PSDT_SGL_MPTR_SGL) {
1066 NvmeSglDescriptor sgl;
1067
1068 if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) {
1069 return NVME_DATA_TRAS_ERROR;
1070 }
1071
1072 status = nvme_map_sgl(n, sg, sgl, len, cmd);
1073 if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) {
1074 status = NVME_MD_SGL_LEN_INVALID | NVME_DNR;
1075 }
1076
1077 return status;
1078 }
1079
1080 nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr));
1081 status = nvme_map_addr(n, sg, mptr, len);
1082 if (status) {
1083 nvme_sg_unmap(sg);
1084 }
1085
1086 return status;
1087}
1088
1089static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
1090{
1091 NvmeNamespace *ns = req->ns;
1092 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1093 bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
1094 bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
1095 size_t len = nvme_l2b(ns, nlb);
1096 uint16_t status;
1097
1098 if (nvme_ns_ext(ns) &&
1099 !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
1100 NvmeSg sg;
1101
1102 len += nvme_m2b(ns, nlb);
1103
1104 status = nvme_map_dptr(n, &sg, len, &req->cmd);
1105 if (status) {
1106 return status;
1107 }
1108
1109 nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
1110 nvme_sg_split(&sg, ns, &req->sg, NULL);
1111 nvme_sg_unmap(&sg);
1112
1113 return NVME_SUCCESS;
1114 }
1115
1116 return nvme_map_dptr(n, &req->sg, len, &req->cmd);
1117}
1118
1119static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
1120{
1121 NvmeNamespace *ns = req->ns;
1122 size_t len = nvme_m2b(ns, nlb);
1123 uint16_t status;
1124
1125 if (nvme_ns_ext(ns)) {
1126 NvmeSg sg;
1127
1128 len += nvme_l2b(ns, nlb);
1129
1130 status = nvme_map_dptr(n, &sg, len, &req->cmd);
1131 if (status) {
1132 return status;
1133 }
1134
1135 nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
1136 nvme_sg_split(&sg, ns, NULL, &req->sg);
1137 nvme_sg_unmap(&sg);
1138
1139 return NVME_SUCCESS;
1140 }
1141
1142 return nvme_map_mptr(n, &req->sg, len, &req->cmd);
1143}
1144
1145static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
1146 uint32_t len, uint32_t bytes,
1147 int32_t skip_bytes, int64_t offset,
1148 NvmeTxDirection dir)
1149{
1150 hwaddr addr;
1151 uint32_t trans_len, count = bytes;
1152 bool dma = sg->flags & NVME_SG_DMA;
1153 int64_t sge_len;
1154 int sg_idx = 0;
1155 int ret;
1156
1157 assert(sg->flags & NVME_SG_ALLOC);
1158
1159 while (len) {
1160 sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
1161
1162 if (sge_len - offset < 0) {
1163 offset -= sge_len;
1164 sg_idx++;
1165 continue;
1166 }
1167
1168 if (sge_len == offset) {
1169 offset = 0;
1170 sg_idx++;
1171 continue;
1172 }
1173
1174 trans_len = MIN(len, count);
1175 trans_len = MIN(trans_len, sge_len - offset);
1176
1177 if (dma) {
1178 addr = sg->qsg.sg[sg_idx].base + offset;
1179 } else {
1180 addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset;
1181 }
1182
1183 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
1184 ret = nvme_addr_read(n, addr, ptr, trans_len);
1185 } else {
1186 ret = nvme_addr_write(n, addr, ptr, trans_len);
1187 }
1188
1189 if (ret) {
1190 return NVME_DATA_TRAS_ERROR;
1191 }
1192
1193 ptr += trans_len;
1194 len -= trans_len;
1195 count -= trans_len;
1196 offset += trans_len;
1197
1198 if (count == 0) {
1199 count = bytes;
1200 offset += skip_bytes;
1201 }
1202 }
1203
1204 return NVME_SUCCESS;
1205}
1206
1207static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, void *ptr, uint32_t len,
1208 NvmeTxDirection dir)
1209{
1210 assert(sg->flags & NVME_SG_ALLOC);
1211
1212 if (sg->flags & NVME_SG_DMA) {
1213 const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
1214 dma_addr_t residual;
1215
1216 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
1217 dma_buf_write(ptr, len, &residual, &sg->qsg, attrs);
1218 } else {
1219 dma_buf_read(ptr, len, &residual, &sg->qsg, attrs);
1220 }
1221
1222 if (unlikely(residual)) {
1223 trace_pci_nvme_err_invalid_dma();
1224 return NVME_INVALID_FIELD | NVME_DNR;
1225 }
1226 } else {
1227 size_t bytes;
1228
1229 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
1230 bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len);
1231 } else {
1232 bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len);
1233 }
1234
1235 if (unlikely(bytes != len)) {
1236 trace_pci_nvme_err_invalid_dma();
1237 return NVME_INVALID_FIELD | NVME_DNR;
1238 }
1239 }
1240
1241 return NVME_SUCCESS;
1242}
1243
1244static inline uint16_t nvme_c2h(NvmeCtrl *n, void *ptr, uint32_t len,
1245 NvmeRequest *req)
1246{
1247 uint16_t status;
1248
1249 status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
1250 if (status) {
1251 return status;
1252 }
1253
1254 return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
1255}
1256
1257static inline uint16_t nvme_h2c(NvmeCtrl *n, void *ptr, uint32_t len,
1258 NvmeRequest *req)
1259{
1260 uint16_t status;
1261
1262 status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
1263 if (status) {
1264 return status;
1265 }
1266
1267 return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
1268}
1269
1270uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
1271 NvmeTxDirection dir, NvmeRequest *req)
1272{
1273 NvmeNamespace *ns = req->ns;
1274 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1275 bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
1276 bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
1277
1278 if (nvme_ns_ext(ns) &&
1279 !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
1280 return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
1281 ns->lbaf.ms, 0, dir);
1282 }
1283
1284 return nvme_tx(n, &req->sg, ptr, len, dir);
1285}
1286
1287uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
1288 NvmeTxDirection dir, NvmeRequest *req)
1289{
1290 NvmeNamespace *ns = req->ns;
1291 uint16_t status;
1292
1293 if (nvme_ns_ext(ns)) {
1294 return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms,
1295 ns->lbasz, ns->lbasz, dir);
1296 }
1297
1298 nvme_sg_unmap(&req->sg);
1299
1300 status = nvme_map_mptr(n, &req->sg, len, &req->cmd);
1301 if (status) {
1302 return status;
1303 }
1304
1305 return nvme_tx(n, &req->sg, ptr, len, dir);
1306}
1307
1308static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
1309 BlockCompletionFunc *cb, NvmeRequest *req)
1310{
1311 assert(req->sg.flags & NVME_SG_ALLOC);
1312
1313 if (req->sg.flags & NVME_SG_DMA) {
1314 req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
1315 cb, req);
1316 } else {
1317 req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
1318 }
1319}
1320
1321static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
1322 BlockCompletionFunc *cb, NvmeRequest *req)
1323{
1324 assert(req->sg.flags & NVME_SG_ALLOC);
1325
1326 if (req->sg.flags & NVME_SG_DMA) {
1327 req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
1328 cb, req);
1329 } else {
1330 req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
1331 }
1332}
1333
1334static void nvme_update_cq_head(NvmeCQueue *cq)
1335{
1336 pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head,
1337 sizeof(cq->head));
1338 trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
1339}
1340
1341static void nvme_post_cqes(void *opaque)
1342{
1343 NvmeCQueue *cq = opaque;
1344 NvmeCtrl *n = cq->ctrl;
1345 NvmeRequest *req, *next;
1346 bool pending = cq->head != cq->tail;
1347 int ret;
1348
1349 QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
1350 NvmeSQueue *sq;
1351 hwaddr addr;
1352
1353 if (n->dbbuf_enabled) {
1354 nvme_update_cq_head(cq);
1355 }
1356
1357 if (nvme_cq_full(cq)) {
1358 break;
1359 }
1360
1361 sq = req->sq;
1362 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
1363 req->cqe.sq_id = cpu_to_le16(sq->sqid);
1364 req->cqe.sq_head = cpu_to_le16(sq->head);
1365 addr = cq->dma_addr + cq->tail * n->cqe_size;
1366 ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
1367 sizeof(req->cqe));
1368 if (ret) {
1369 trace_pci_nvme_err_addr_write(addr);
1370 trace_pci_nvme_err_cfs();
1371 stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
1372 break;
1373 }
1374 QTAILQ_REMOVE(&cq->req_list, req, entry);
1375 nvme_inc_cq_tail(cq);
1376 nvme_sg_unmap(&req->sg);
1377 QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
1378 }
1379 if (cq->tail != cq->head) {
1380 if (cq->irq_enabled && !pending) {
1381 n->cq_pending++;
1382 }
1383
1384 nvme_irq_assert(n, cq);
1385 }
1386}
1387
1388static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
1389{
1390 assert(cq->cqid == req->sq->cqid);
1391 trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
1392 le32_to_cpu(req->cqe.result),
1393 le32_to_cpu(req->cqe.dw1),
1394 req->status);
1395
1396 if (req->status) {
1397 trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns),
1398 req->status, req->cmd.opcode);
1399 }
1400
1401 QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
1402 QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
1403
1404 qemu_bh_schedule(cq->bh);
1405}
1406
1407static void nvme_process_aers(void *opaque)
1408{
1409 NvmeCtrl *n = opaque;
1410 NvmeAsyncEvent *event, *next;
1411
1412 trace_pci_nvme_process_aers(n->aer_queued);
1413
1414 QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
1415 NvmeRequest *req;
1416 NvmeAerResult *result;
1417
1418
1419 if (!n->outstanding_aers) {
1420 trace_pci_nvme_no_outstanding_aers();
1421 break;
1422 }
1423
1424
1425 if (n->aer_mask & (1 << event->result.event_type)) {
1426 trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
1427 continue;
1428 }
1429
1430 QTAILQ_REMOVE(&n->aer_queue, event, entry);
1431 n->aer_queued--;
1432
1433 n->aer_mask |= 1 << event->result.event_type;
1434 n->outstanding_aers--;
1435
1436 req = n->aer_reqs[n->outstanding_aers];
1437
1438 result = (NvmeAerResult *) &req->cqe.result;
1439 result->event_type = event->result.event_type;
1440 result->event_info = event->result.event_info;
1441 result->log_page = event->result.log_page;
1442 g_free(event);
1443
1444 trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
1445 result->log_page);
1446
1447 nvme_enqueue_req_completion(&n->admin_cq, req);
1448 }
1449}
1450
1451static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
1452 uint8_t event_info, uint8_t log_page)
1453{
1454 NvmeAsyncEvent *event;
1455
1456 trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
1457
1458 if (n->aer_queued == n->params.aer_max_queued) {
1459 trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
1460 return;
1461 }
1462
1463 event = g_new(NvmeAsyncEvent, 1);
1464 event->result = (NvmeAerResult) {
1465 .event_type = event_type,
1466 .event_info = event_info,
1467 .log_page = log_page,
1468 };
1469
1470 QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
1471 n->aer_queued++;
1472
1473 nvme_process_aers(n);
1474}
1475
1476static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
1477{
1478 uint8_t aer_info;
1479
1480
1481 if (!(NVME_AEC_SMART(n->features.async_config) & event)) {
1482 return;
1483 }
1484
1485 switch (event) {
1486 case NVME_SMART_SPARE:
1487 aer_info = NVME_AER_INFO_SMART_SPARE_THRESH;
1488 break;
1489 case NVME_SMART_TEMPERATURE:
1490 aer_info = NVME_AER_INFO_SMART_TEMP_THRESH;
1491 break;
1492 case NVME_SMART_RELIABILITY:
1493 case NVME_SMART_MEDIA_READ_ONLY:
1494 case NVME_SMART_FAILED_VOLATILE_MEDIA:
1495 case NVME_SMART_PMR_UNRELIABLE:
1496 aer_info = NVME_AER_INFO_SMART_RELIABILITY;
1497 break;
1498 default:
1499 return;
1500 }
1501
1502 nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO);
1503}
1504
1505static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
1506{
1507 n->aer_mask &= ~(1 << event_type);
1508 if (!QTAILQ_EMPTY(&n->aer_queue)) {
1509 nvme_process_aers(n);
1510 }
1511}
1512
1513static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
1514{
1515 uint8_t mdts = n->params.mdts;
1516
1517 if (mdts && len > n->page_size << mdts) {
1518 trace_pci_nvme_err_mdts(len);
1519 return NVME_INVALID_FIELD | NVME_DNR;
1520 }
1521
1522 return NVME_SUCCESS;
1523}
1524
1525static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
1526 uint32_t nlb)
1527{
1528 uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
1529
1530 if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
1531 trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze);
1532 return NVME_LBA_RANGE | NVME_DNR;
1533 }
1534
1535 return NVME_SUCCESS;
1536}
1537
1538static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba,
1539 uint32_t nlb, int flags)
1540{
1541 BlockDriverState *bs = blk_bs(ns->blkconf.blk);
1542
1543 int64_t pnum = 0, bytes = nvme_l2b(ns, nlb);
1544 int64_t offset = nvme_l2b(ns, slba);
1545 int ret;
1546
1547
1548
1549
1550
1551
1552
1553 do {
1554 bytes -= pnum;
1555
1556 ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
1557 if (ret < 0) {
1558 return ret;
1559 }
1560
1561
1562 trace_pci_nvme_block_status(offset, bytes, pnum, ret,
1563 !!(ret & BDRV_BLOCK_ZERO));
1564
1565 if (!(ret & flags)) {
1566 return 1;
1567 }
1568
1569 offset += pnum;
1570 } while (pnum != bytes);
1571
1572 return 0;
1573}
1574
1575static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
1576 uint32_t nlb)
1577{
1578 int ret;
1579 Error *err = NULL;
1580
1581 ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA);
1582 if (ret) {
1583 if (ret < 0) {
1584 error_setg_errno(&err, -ret, "unable to get block status");
1585 error_report_err(err);
1586
1587 return NVME_INTERNAL_DEV_ERROR;
1588 }
1589
1590 return NVME_DULB;
1591 }
1592
1593 return NVME_SUCCESS;
1594}
1595
1596static void nvme_aio_err(NvmeRequest *req, int ret)
1597{
1598 uint16_t status = NVME_SUCCESS;
1599 Error *local_err = NULL;
1600
1601 switch (req->cmd.opcode) {
1602 case NVME_CMD_READ:
1603 status = NVME_UNRECOVERED_READ;
1604 break;
1605 case NVME_CMD_FLUSH:
1606 case NVME_CMD_WRITE:
1607 case NVME_CMD_WRITE_ZEROES:
1608 case NVME_CMD_ZONE_APPEND:
1609 status = NVME_WRITE_FAULT;
1610 break;
1611 default:
1612 status = NVME_INTERNAL_DEV_ERROR;
1613 break;
1614 }
1615
1616 trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
1617
1618 error_setg_errno(&local_err, -ret, "aio failed");
1619 error_report_err(local_err);
1620
1621
1622
1623
1624
1625 if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
1626 return;
1627 }
1628
1629 req->status = status;
1630}
1631
1632static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
1633{
1634 return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
1635 slba / ns->zone_size;
1636}
1637
1638static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
1639{
1640 uint32_t zone_idx = nvme_zone_idx(ns, slba);
1641
1642 if (zone_idx >= ns->num_zones) {
1643 return NULL;
1644 }
1645
1646 return &ns->zone_array[zone_idx];
1647}
1648
1649static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
1650{
1651 uint64_t zslba = zone->d.zslba;
1652
1653 switch (nvme_get_zone_state(zone)) {
1654 case NVME_ZONE_STATE_EMPTY:
1655 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1656 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1657 case NVME_ZONE_STATE_CLOSED:
1658 return NVME_SUCCESS;
1659 case NVME_ZONE_STATE_FULL:
1660 trace_pci_nvme_err_zone_is_full(zslba);
1661 return NVME_ZONE_FULL;
1662 case NVME_ZONE_STATE_OFFLINE:
1663 trace_pci_nvme_err_zone_is_offline(zslba);
1664 return NVME_ZONE_OFFLINE;
1665 case NVME_ZONE_STATE_READ_ONLY:
1666 trace_pci_nvme_err_zone_is_read_only(zslba);
1667 return NVME_ZONE_READ_ONLY;
1668 default:
1669 assert(false);
1670 }
1671
1672 return NVME_INTERNAL_DEV_ERROR;
1673}
1674
1675static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
1676 uint64_t slba, uint32_t nlb)
1677{
1678 uint64_t zcap = nvme_zone_wr_boundary(zone);
1679 uint16_t status;
1680
1681 status = nvme_check_zone_state_for_write(zone);
1682 if (status) {
1683 return status;
1684 }
1685
1686 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1687 uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas;
1688
1689 if (slba < zone->w_ptr || slba + nlb > ezrwa) {
1690 trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr);
1691 return NVME_ZONE_INVALID_WRITE;
1692 }
1693 } else {
1694 if (unlikely(slba != zone->w_ptr)) {
1695 trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
1696 zone->w_ptr);
1697 return NVME_ZONE_INVALID_WRITE;
1698 }
1699 }
1700
1701 if (unlikely((slba + nlb) > zcap)) {
1702 trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
1703 return NVME_ZONE_BOUNDARY_ERROR;
1704 }
1705
1706 return NVME_SUCCESS;
1707}
1708
1709static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
1710{
1711 switch (nvme_get_zone_state(zone)) {
1712 case NVME_ZONE_STATE_EMPTY:
1713 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1714 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1715 case NVME_ZONE_STATE_FULL:
1716 case NVME_ZONE_STATE_CLOSED:
1717 case NVME_ZONE_STATE_READ_ONLY:
1718 return NVME_SUCCESS;
1719 case NVME_ZONE_STATE_OFFLINE:
1720 trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
1721 return NVME_ZONE_OFFLINE;
1722 default:
1723 assert(false);
1724 }
1725
1726 return NVME_INTERNAL_DEV_ERROR;
1727}
1728
1729static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
1730 uint32_t nlb)
1731{
1732 NvmeZone *zone;
1733 uint64_t bndry, end;
1734 uint16_t status;
1735
1736 zone = nvme_get_zone_by_slba(ns, slba);
1737 assert(zone);
1738
1739 bndry = nvme_zone_rd_boundary(ns, zone);
1740 end = slba + nlb;
1741
1742 status = nvme_check_zone_state_for_read(zone);
1743 if (status) {
1744 ;
1745 } else if (unlikely(end > bndry)) {
1746 if (!ns->params.cross_zone_read) {
1747 status = NVME_ZONE_BOUNDARY_ERROR;
1748 } else {
1749
1750
1751
1752
1753 do {
1754 zone++;
1755 status = nvme_check_zone_state_for_read(zone);
1756 if (status) {
1757 break;
1758 }
1759 } while (end > nvme_zone_rd_boundary(ns, zone));
1760 }
1761 }
1762
1763 return status;
1764}
1765
1766static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone)
1767{
1768 switch (nvme_get_zone_state(zone)) {
1769 case NVME_ZONE_STATE_FULL:
1770 return NVME_SUCCESS;
1771
1772 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1773 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1774 nvme_aor_dec_open(ns);
1775
1776 case NVME_ZONE_STATE_CLOSED:
1777 nvme_aor_dec_active(ns);
1778
1779 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1780 zone->d.za &= ~NVME_ZA_ZRWA_VALID;
1781 if (ns->params.numzrwa) {
1782 ns->zns.numzrwa++;
1783 }
1784 }
1785
1786
1787 case NVME_ZONE_STATE_EMPTY:
1788 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
1789 return NVME_SUCCESS;
1790
1791 default:
1792 return NVME_ZONE_INVAL_TRANSITION;
1793 }
1794}
1795
1796static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
1797{
1798 switch (nvme_get_zone_state(zone)) {
1799 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1800 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1801 nvme_aor_dec_open(ns);
1802 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
1803
1804 case NVME_ZONE_STATE_CLOSED:
1805 return NVME_SUCCESS;
1806
1807 default:
1808 return NVME_ZONE_INVAL_TRANSITION;
1809 }
1810}
1811
1812static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
1813{
1814 switch (nvme_get_zone_state(zone)) {
1815 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1816 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1817 nvme_aor_dec_open(ns);
1818
1819 case NVME_ZONE_STATE_CLOSED:
1820 nvme_aor_dec_active(ns);
1821
1822 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1823 if (ns->params.numzrwa) {
1824 ns->zns.numzrwa++;
1825 }
1826 }
1827
1828
1829 case NVME_ZONE_STATE_FULL:
1830 zone->w_ptr = zone->d.zslba;
1831 zone->d.wp = zone->w_ptr;
1832 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
1833
1834 case NVME_ZONE_STATE_EMPTY:
1835 return NVME_SUCCESS;
1836
1837 default:
1838 return NVME_ZONE_INVAL_TRANSITION;
1839 }
1840}
1841
1842static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
1843{
1844 NvmeZone *zone;
1845
1846 if (ns->params.max_open_zones &&
1847 ns->nr_open_zones == ns->params.max_open_zones) {
1848 zone = QTAILQ_FIRST(&ns->imp_open_zones);
1849 if (zone) {
1850
1851
1852
1853 QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
1854 nvme_zrm_close(ns, zone);
1855 }
1856 }
1857}
1858
1859enum {
1860 NVME_ZRM_AUTO = 1 << 0,
1861 NVME_ZRM_ZRWA = 1 << 1,
1862};
1863
1864static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
1865 NvmeZone *zone, int flags)
1866{
1867 int act = 0;
1868 uint16_t status;
1869
1870 switch (nvme_get_zone_state(zone)) {
1871 case NVME_ZONE_STATE_EMPTY:
1872 act = 1;
1873
1874
1875
1876 case NVME_ZONE_STATE_CLOSED:
1877 if (n->params.auto_transition_zones) {
1878 nvme_zrm_auto_transition_zone(ns);
1879 }
1880 status = nvme_zns_check_resources(ns, act, 1,
1881 (flags & NVME_ZRM_ZRWA) ? 1 : 0);
1882 if (status) {
1883 return status;
1884 }
1885
1886 if (act) {
1887 nvme_aor_inc_active(ns);
1888 }
1889
1890 nvme_aor_inc_open(ns);
1891
1892 if (flags & NVME_ZRM_AUTO) {
1893 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
1894 return NVME_SUCCESS;
1895 }
1896
1897
1898
1899 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1900 if (flags & NVME_ZRM_AUTO) {
1901 return NVME_SUCCESS;
1902 }
1903
1904 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
1905
1906
1907
1908 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1909 if (flags & NVME_ZRM_ZRWA) {
1910 ns->zns.numzrwa--;
1911
1912 zone->d.za |= NVME_ZA_ZRWA_VALID;
1913 }
1914
1915 return NVME_SUCCESS;
1916
1917 default:
1918 return NVME_ZONE_INVAL_TRANSITION;
1919 }
1920}
1921
1922static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns,
1923 NvmeZone *zone)
1924{
1925 return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO);
1926}
1927
1928static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
1929 uint32_t nlb)
1930{
1931 zone->d.wp += nlb;
1932
1933 if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
1934 nvme_zrm_finish(ns, zone);
1935 }
1936}
1937
1938static void nvme_zoned_zrwa_implicit_flush(NvmeNamespace *ns, NvmeZone *zone,
1939 uint32_t nlbc)
1940{
1941 uint16_t nzrwafgs = DIV_ROUND_UP(nlbc, ns->zns.zrwafg);
1942
1943 nlbc = nzrwafgs * ns->zns.zrwafg;
1944
1945 trace_pci_nvme_zoned_zrwa_implicit_flush(zone->d.zslba, nlbc);
1946
1947 zone->w_ptr += nlbc;
1948
1949 nvme_advance_zone_wp(ns, zone, nlbc);
1950}
1951
1952static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
1953{
1954 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1955 NvmeZone *zone;
1956 uint64_t slba;
1957 uint32_t nlb;
1958
1959 slba = le64_to_cpu(rw->slba);
1960 nlb = le16_to_cpu(rw->nlb) + 1;
1961 zone = nvme_get_zone_by_slba(ns, slba);
1962 assert(zone);
1963
1964 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1965 uint64_t ezrwa = zone->w_ptr + ns->zns.zrwas - 1;
1966 uint64_t elba = slba + nlb - 1;
1967
1968 if (elba > ezrwa) {
1969 nvme_zoned_zrwa_implicit_flush(ns, zone, elba - ezrwa);
1970 }
1971
1972 return;
1973 }
1974
1975 nvme_advance_zone_wp(ns, zone, nlb);
1976}
1977
1978static inline bool nvme_is_write(NvmeRequest *req)
1979{
1980 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1981
1982 return rw->opcode == NVME_CMD_WRITE ||
1983 rw->opcode == NVME_CMD_ZONE_APPEND ||
1984 rw->opcode == NVME_CMD_WRITE_ZEROES;
1985}
1986
1987static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
1988{
1989 return qemu_get_aio_context();
1990}
1991
1992static void nvme_misc_cb(void *opaque, int ret)
1993{
1994 NvmeRequest *req = opaque;
1995
1996 trace_pci_nvme_misc_cb(nvme_cid(req));
1997
1998 if (ret) {
1999 nvme_aio_err(req, ret);
2000 }
2001
2002 nvme_enqueue_req_completion(nvme_cq(req), req);
2003}
2004
2005void nvme_rw_complete_cb(void *opaque, int ret)
2006{
2007 NvmeRequest *req = opaque;
2008 NvmeNamespace *ns = req->ns;
2009 BlockBackend *blk = ns->blkconf.blk;
2010 BlockAcctCookie *acct = &req->acct;
2011 BlockAcctStats *stats = blk_get_stats(blk);
2012
2013 trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
2014
2015 if (ret) {
2016 block_acct_failed(stats, acct);
2017 nvme_aio_err(req, ret);
2018 } else {
2019 block_acct_done(stats, acct);
2020 }
2021
2022 if (ns->params.zoned && nvme_is_write(req)) {
2023 nvme_finalize_zoned_write(ns, req);
2024 }
2025
2026 nvme_enqueue_req_completion(nvme_cq(req), req);
2027}
2028
2029static void nvme_rw_cb(void *opaque, int ret)
2030{
2031 NvmeRequest *req = opaque;
2032 NvmeNamespace *ns = req->ns;
2033
2034 BlockBackend *blk = ns->blkconf.blk;
2035
2036 trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
2037
2038 if (ret) {
2039 goto out;
2040 }
2041
2042 if (ns->lbaf.ms) {
2043 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2044 uint64_t slba = le64_to_cpu(rw->slba);
2045 uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
2046 uint64_t offset = nvme_moff(ns, slba);
2047
2048 if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
2049 size_t mlen = nvme_m2b(ns, nlb);
2050
2051 req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen,
2052 BDRV_REQ_MAY_UNMAP,
2053 nvme_rw_complete_cb, req);
2054 return;
2055 }
2056
2057 if (nvme_ns_ext(ns) || req->cmd.mptr) {
2058 uint16_t status;
2059
2060 nvme_sg_unmap(&req->sg);
2061 status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
2062 if (status) {
2063 ret = -EFAULT;
2064 goto out;
2065 }
2066
2067 if (req->cmd.opcode == NVME_CMD_READ) {
2068 return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req);
2069 }
2070
2071 return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req);
2072 }
2073 }
2074
2075out:
2076 nvme_rw_complete_cb(req, ret);
2077}
2078
2079static void nvme_verify_cb(void *opaque, int ret)
2080{
2081 NvmeBounceContext *ctx = opaque;
2082 NvmeRequest *req = ctx->req;
2083 NvmeNamespace *ns = req->ns;
2084 BlockBackend *blk = ns->blkconf.blk;
2085 BlockAcctCookie *acct = &req->acct;
2086 BlockAcctStats *stats = blk_get_stats(blk);
2087 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2088 uint64_t slba = le64_to_cpu(rw->slba);
2089 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
2090 uint16_t apptag = le16_to_cpu(rw->apptag);
2091 uint16_t appmask = le16_to_cpu(rw->appmask);
2092 uint64_t reftag = le32_to_cpu(rw->reftag);
2093 uint64_t cdw3 = le32_to_cpu(rw->cdw3);
2094 uint16_t status;
2095
2096 reftag |= cdw3 << 32;
2097
2098 trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
2099
2100 if (ret) {
2101 block_acct_failed(stats, acct);
2102 nvme_aio_err(req, ret);
2103 goto out;
2104 }
2105
2106 block_acct_done(stats, acct);
2107
2108 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2109 status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce,
2110 ctx->mdata.iov.size, slba);
2111 if (status) {
2112 req->status = status;
2113 goto out;
2114 }
2115
2116 req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
2117 ctx->mdata.bounce, ctx->mdata.iov.size,
2118 prinfo, slba, apptag, appmask, &reftag);
2119 }
2120
2121out:
2122 qemu_iovec_destroy(&ctx->data.iov);
2123 g_free(ctx->data.bounce);
2124
2125 qemu_iovec_destroy(&ctx->mdata.iov);
2126 g_free(ctx->mdata.bounce);
2127
2128 g_free(ctx);
2129
2130 nvme_enqueue_req_completion(nvme_cq(req), req);
2131}
2132
2133
2134static void nvme_verify_mdata_in_cb(void *opaque, int ret)
2135{
2136 NvmeBounceContext *ctx = opaque;
2137 NvmeRequest *req = ctx->req;
2138 NvmeNamespace *ns = req->ns;
2139 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2140 uint64_t slba = le64_to_cpu(rw->slba);
2141 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
2142 size_t mlen = nvme_m2b(ns, nlb);
2143 uint64_t offset = nvme_moff(ns, slba);
2144 BlockBackend *blk = ns->blkconf.blk;
2145
2146 trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
2147
2148 if (ret) {
2149 goto out;
2150 }
2151
2152 ctx->mdata.bounce = g_malloc(mlen);
2153
2154 qemu_iovec_reset(&ctx->mdata.iov);
2155 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
2156
2157 req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
2158 nvme_verify_cb, ctx);
2159 return;
2160
2161out:
2162 nvme_verify_cb(ctx, ret);
2163}
2164
2165struct nvme_compare_ctx {
2166 struct {
2167 QEMUIOVector iov;
2168 uint8_t *bounce;
2169 } data;
2170
2171 struct {
2172 QEMUIOVector iov;
2173 uint8_t *bounce;
2174 } mdata;
2175};
2176
2177static void nvme_compare_mdata_cb(void *opaque, int ret)
2178{
2179 NvmeRequest *req = opaque;
2180 NvmeNamespace *ns = req->ns;
2181 NvmeCtrl *n = nvme_ctrl(req);
2182 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2183 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
2184 uint16_t apptag = le16_to_cpu(rw->apptag);
2185 uint16_t appmask = le16_to_cpu(rw->appmask);
2186 uint64_t reftag = le32_to_cpu(rw->reftag);
2187 uint64_t cdw3 = le32_to_cpu(rw->cdw3);
2188 struct nvme_compare_ctx *ctx = req->opaque;
2189 g_autofree uint8_t *buf = NULL;
2190 BlockBackend *blk = ns->blkconf.blk;
2191 BlockAcctCookie *acct = &req->acct;
2192 BlockAcctStats *stats = blk_get_stats(blk);
2193 uint16_t status = NVME_SUCCESS;
2194
2195 reftag |= cdw3 << 32;
2196
2197 trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
2198
2199 if (ret) {
2200 block_acct_failed(stats, acct);
2201 nvme_aio_err(req, ret);
2202 goto out;
2203 }
2204
2205 buf = g_malloc(ctx->mdata.iov.size);
2206
2207 status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
2208 NVME_TX_DIRECTION_TO_DEVICE, req);
2209 if (status) {
2210 req->status = status;
2211 goto out;
2212 }
2213
2214 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2215 uint64_t slba = le64_to_cpu(rw->slba);
2216 uint8_t *bufp;
2217 uint8_t *mbufp = ctx->mdata.bounce;
2218 uint8_t *end = mbufp + ctx->mdata.iov.size;
2219 int16_t pil = 0;
2220
2221 status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
2222 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
2223 slba, apptag, appmask, &reftag);
2224 if (status) {
2225 req->status = status;
2226 goto out;
2227 }
2228
2229
2230
2231
2232
2233 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
2234 pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
2235 }
2236
2237 for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) {
2238 if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) {
2239 req->status = NVME_CMP_FAILURE;
2240 goto out;
2241 }
2242 }
2243
2244 goto out;
2245 }
2246
2247 if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
2248 req->status = NVME_CMP_FAILURE;
2249 goto out;
2250 }
2251
2252 block_acct_done(stats, acct);
2253
2254out:
2255 qemu_iovec_destroy(&ctx->data.iov);
2256 g_free(ctx->data.bounce);
2257
2258 qemu_iovec_destroy(&ctx->mdata.iov);
2259 g_free(ctx->mdata.bounce);
2260
2261 g_free(ctx);
2262
2263 nvme_enqueue_req_completion(nvme_cq(req), req);
2264}
2265
2266static void nvme_compare_data_cb(void *opaque, int ret)
2267{
2268 NvmeRequest *req = opaque;
2269 NvmeCtrl *n = nvme_ctrl(req);
2270 NvmeNamespace *ns = req->ns;
2271 BlockBackend *blk = ns->blkconf.blk;
2272 BlockAcctCookie *acct = &req->acct;
2273 BlockAcctStats *stats = blk_get_stats(blk);
2274
2275 struct nvme_compare_ctx *ctx = req->opaque;
2276 g_autofree uint8_t *buf = NULL;
2277 uint16_t status;
2278
2279 trace_pci_nvme_compare_data_cb(nvme_cid(req));
2280
2281 if (ret) {
2282 block_acct_failed(stats, acct);
2283 nvme_aio_err(req, ret);
2284 goto out;
2285 }
2286
2287 buf = g_malloc(ctx->data.iov.size);
2288
2289 status = nvme_bounce_data(n, buf, ctx->data.iov.size,
2290 NVME_TX_DIRECTION_TO_DEVICE, req);
2291 if (status) {
2292 req->status = status;
2293 goto out;
2294 }
2295
2296 if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) {
2297 req->status = NVME_CMP_FAILURE;
2298 goto out;
2299 }
2300
2301 if (ns->lbaf.ms) {
2302 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2303 uint64_t slba = le64_to_cpu(rw->slba);
2304 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
2305 size_t mlen = nvme_m2b(ns, nlb);
2306 uint64_t offset = nvme_moff(ns, slba);
2307
2308 ctx->mdata.bounce = g_malloc(mlen);
2309
2310 qemu_iovec_init(&ctx->mdata.iov, 1);
2311 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
2312
2313 req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
2314 nvme_compare_mdata_cb, req);
2315 return;
2316 }
2317
2318 block_acct_done(stats, acct);
2319
2320out:
2321 qemu_iovec_destroy(&ctx->data.iov);
2322 g_free(ctx->data.bounce);
2323 g_free(ctx);
2324
2325 nvme_enqueue_req_completion(nvme_cq(req), req);
2326}
2327
2328typedef struct NvmeDSMAIOCB {
2329 BlockAIOCB common;
2330 BlockAIOCB *aiocb;
2331 NvmeRequest *req;
2332 int ret;
2333
2334 NvmeDsmRange *range;
2335 unsigned int nr;
2336 unsigned int idx;
2337} NvmeDSMAIOCB;
2338
2339static void nvme_dsm_cancel(BlockAIOCB *aiocb)
2340{
2341 NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common);
2342
2343
2344 iocb->idx = iocb->nr;
2345 iocb->ret = -ECANCELED;
2346
2347 if (iocb->aiocb) {
2348 blk_aio_cancel_async(iocb->aiocb);
2349 iocb->aiocb = NULL;
2350 } else {
2351
2352
2353
2354
2355 assert(iocb->idx == iocb->nr);
2356 }
2357}
2358
2359static const AIOCBInfo nvme_dsm_aiocb_info = {
2360 .aiocb_size = sizeof(NvmeDSMAIOCB),
2361 .cancel_async = nvme_dsm_cancel,
2362};
2363
2364static void nvme_dsm_cb(void *opaque, int ret);
2365
2366static void nvme_dsm_md_cb(void *opaque, int ret)
2367{
2368 NvmeDSMAIOCB *iocb = opaque;
2369 NvmeRequest *req = iocb->req;
2370 NvmeNamespace *ns = req->ns;
2371 NvmeDsmRange *range;
2372 uint64_t slba;
2373 uint32_t nlb;
2374
2375 if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
2376 goto done;
2377 }
2378
2379 range = &iocb->range[iocb->idx - 1];
2380 slba = le64_to_cpu(range->slba);
2381 nlb = le32_to_cpu(range->nlb);
2382
2383
2384
2385
2386
2387
2388 ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO);
2389 if (ret) {
2390 if (ret < 0) {
2391 goto done;
2392 }
2393
2394 nvme_dsm_cb(iocb, 0);
2395 return;
2396 }
2397
2398 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba),
2399 nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP,
2400 nvme_dsm_cb, iocb);
2401 return;
2402
2403done:
2404 nvme_dsm_cb(iocb, ret);
2405}
2406
2407static void nvme_dsm_cb(void *opaque, int ret)
2408{
2409 NvmeDSMAIOCB *iocb = opaque;
2410 NvmeRequest *req = iocb->req;
2411 NvmeCtrl *n = nvme_ctrl(req);
2412 NvmeNamespace *ns = req->ns;
2413 NvmeDsmRange *range;
2414 uint64_t slba;
2415 uint32_t nlb;
2416
2417 if (iocb->ret < 0) {
2418 goto done;
2419 } else if (ret < 0) {
2420 iocb->ret = ret;
2421 goto done;
2422 }
2423
2424next:
2425 if (iocb->idx == iocb->nr) {
2426 goto done;
2427 }
2428
2429 range = &iocb->range[iocb->idx++];
2430 slba = le64_to_cpu(range->slba);
2431 nlb = le32_to_cpu(range->nlb);
2432
2433 trace_pci_nvme_dsm_deallocate(slba, nlb);
2434
2435 if (nlb > n->dmrsl) {
2436 trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
2437 goto next;
2438 }
2439
2440 if (nvme_check_bounds(ns, slba, nlb)) {
2441 trace_pci_nvme_err_invalid_lba_range(slba, nlb,
2442 ns->id_ns.nsze);
2443 goto next;
2444 }
2445
2446 iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba),
2447 nvme_l2b(ns, nlb),
2448 nvme_dsm_md_cb, iocb);
2449 return;
2450
2451done:
2452 iocb->aiocb = NULL;
2453 iocb->common.cb(iocb->common.opaque, iocb->ret);
2454 qemu_aio_unref(iocb);
2455}
2456
2457static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
2458{
2459 NvmeNamespace *ns = req->ns;
2460 NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
2461 uint32_t attr = le32_to_cpu(dsm->attributes);
2462 uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
2463 uint16_t status = NVME_SUCCESS;
2464
2465 trace_pci_nvme_dsm(nr, attr);
2466
2467 if (attr & NVME_DSMGMT_AD) {
2468 NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk,
2469 nvme_misc_cb, req);
2470
2471 iocb->req = req;
2472 iocb->ret = 0;
2473 iocb->range = g_new(NvmeDsmRange, nr);
2474 iocb->nr = nr;
2475 iocb->idx = 0;
2476
2477 status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr,
2478 req);
2479 if (status) {
2480 return status;
2481 }
2482
2483 req->aiocb = &iocb->common;
2484 nvme_dsm_cb(iocb, 0);
2485
2486 return NVME_NO_COMPLETE;
2487 }
2488
2489 return status;
2490}
2491
2492static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
2493{
2494 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2495 NvmeNamespace *ns = req->ns;
2496 BlockBackend *blk = ns->blkconf.blk;
2497 uint64_t slba = le64_to_cpu(rw->slba);
2498 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
2499 size_t len = nvme_l2b(ns, nlb);
2500 int64_t offset = nvme_l2b(ns, slba);
2501 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
2502 uint32_t reftag = le32_to_cpu(rw->reftag);
2503 NvmeBounceContext *ctx = NULL;
2504 uint16_t status;
2505
2506 trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
2507
2508 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2509 status = nvme_check_prinfo(ns, prinfo, slba, reftag);
2510 if (status) {
2511 return status;
2512 }
2513
2514 if (prinfo & NVME_PRINFO_PRACT) {
2515 return NVME_INVALID_PROT_INFO | NVME_DNR;
2516 }
2517 }
2518
2519 if (len > n->page_size << n->params.vsl) {
2520 return NVME_INVALID_FIELD | NVME_DNR;
2521 }
2522
2523 status = nvme_check_bounds(ns, slba, nlb);
2524 if (status) {
2525 return status;
2526 }
2527
2528 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
2529 status = nvme_check_dulbe(ns, slba, nlb);
2530 if (status) {
2531 return status;
2532 }
2533 }
2534
2535 ctx = g_new0(NvmeBounceContext, 1);
2536 ctx->req = req;
2537
2538 ctx->data.bounce = g_malloc(len);
2539
2540 qemu_iovec_init(&ctx->data.iov, 1);
2541 qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
2542
2543 block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
2544 BLOCK_ACCT_READ);
2545
2546 req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
2547 nvme_verify_mdata_in_cb, ctx);
2548 return NVME_NO_COMPLETE;
2549}
2550
2551typedef struct NvmeCopyAIOCB {
2552 BlockAIOCB common;
2553 BlockAIOCB *aiocb;
2554 NvmeRequest *req;
2555 int ret;
2556
2557 void *ranges;
2558 unsigned int format;
2559 int nr;
2560 int idx;
2561
2562 uint8_t *bounce;
2563 QEMUIOVector iov;
2564 struct {
2565 BlockAcctCookie read;
2566 BlockAcctCookie write;
2567 } acct;
2568
2569 uint64_t reftag;
2570 uint64_t slba;
2571
2572 NvmeZone *zone;
2573} NvmeCopyAIOCB;
2574
2575static void nvme_copy_cancel(BlockAIOCB *aiocb)
2576{
2577 NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common);
2578
2579 iocb->ret = -ECANCELED;
2580
2581 if (iocb->aiocb) {
2582 blk_aio_cancel_async(iocb->aiocb);
2583 iocb->aiocb = NULL;
2584 }
2585}
2586
2587static const AIOCBInfo nvme_copy_aiocb_info = {
2588 .aiocb_size = sizeof(NvmeCopyAIOCB),
2589 .cancel_async = nvme_copy_cancel,
2590};
2591
2592static void nvme_copy_done(NvmeCopyAIOCB *iocb)
2593{
2594 NvmeRequest *req = iocb->req;
2595 NvmeNamespace *ns = req->ns;
2596 BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk);
2597
2598 if (iocb->idx != iocb->nr) {
2599 req->cqe.result = cpu_to_le32(iocb->idx);
2600 }
2601
2602 qemu_iovec_destroy(&iocb->iov);
2603 g_free(iocb->bounce);
2604
2605 if (iocb->ret < 0) {
2606 block_acct_failed(stats, &iocb->acct.read);
2607 block_acct_failed(stats, &iocb->acct.write);
2608 } else {
2609 block_acct_done(stats, &iocb->acct.read);
2610 block_acct_done(stats, &iocb->acct.write);
2611 }
2612
2613 iocb->common.cb(iocb->common.opaque, iocb->ret);
2614 qemu_aio_unref(iocb);
2615}
2616
2617static void nvme_do_copy(NvmeCopyAIOCB *iocb);
2618
2619static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
2620 uint64_t *slba, uint32_t *nlb,
2621 uint16_t *apptag,
2622 uint16_t *appmask,
2623 uint64_t *reftag)
2624{
2625 NvmeCopySourceRangeFormat0 *_ranges = ranges;
2626
2627 if (slba) {
2628 *slba = le64_to_cpu(_ranges[idx].slba);
2629 }
2630
2631 if (nlb) {
2632 *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
2633 }
2634
2635 if (apptag) {
2636 *apptag = le16_to_cpu(_ranges[idx].apptag);
2637 }
2638
2639 if (appmask) {
2640 *appmask = le16_to_cpu(_ranges[idx].appmask);
2641 }
2642
2643 if (reftag) {
2644 *reftag = le32_to_cpu(_ranges[idx].reftag);
2645 }
2646}
2647
2648static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
2649 uint64_t *slba, uint32_t *nlb,
2650 uint16_t *apptag,
2651 uint16_t *appmask,
2652 uint64_t *reftag)
2653{
2654 NvmeCopySourceRangeFormat1 *_ranges = ranges;
2655
2656 if (slba) {
2657 *slba = le64_to_cpu(_ranges[idx].slba);
2658 }
2659
2660 if (nlb) {
2661 *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
2662 }
2663
2664 if (apptag) {
2665 *apptag = le16_to_cpu(_ranges[idx].apptag);
2666 }
2667
2668 if (appmask) {
2669 *appmask = le16_to_cpu(_ranges[idx].appmask);
2670 }
2671
2672 if (reftag) {
2673 *reftag = 0;
2674
2675 *reftag |= (uint64_t)_ranges[idx].sr[4] << 40;
2676 *reftag |= (uint64_t)_ranges[idx].sr[5] << 32;
2677 *reftag |= (uint64_t)_ranges[idx].sr[6] << 24;
2678 *reftag |= (uint64_t)_ranges[idx].sr[7] << 16;
2679 *reftag |= (uint64_t)_ranges[idx].sr[8] << 8;
2680 *reftag |= (uint64_t)_ranges[idx].sr[9];
2681 }
2682}
2683
2684static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
2685 uint64_t *slba, uint32_t *nlb,
2686 uint16_t *apptag, uint16_t *appmask,
2687 uint64_t *reftag)
2688{
2689 switch (format) {
2690 case NVME_COPY_FORMAT_0:
2691 nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag,
2692 appmask, reftag);
2693 break;
2694
2695 case NVME_COPY_FORMAT_1:
2696 nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag,
2697 appmask, reftag);
2698 break;
2699
2700 default:
2701 abort();
2702 }
2703}
2704
2705static void nvme_copy_out_completed_cb(void *opaque, int ret)
2706{
2707 NvmeCopyAIOCB *iocb = opaque;
2708 NvmeRequest *req = iocb->req;
2709 NvmeNamespace *ns = req->ns;
2710 uint32_t nlb;
2711
2712 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
2713 &nlb, NULL, NULL, NULL);
2714
2715 if (ret < 0) {
2716 iocb->ret = ret;
2717 goto out;
2718 } else if (iocb->ret < 0) {
2719 goto out;
2720 }
2721
2722 if (ns->params.zoned) {
2723 nvme_advance_zone_wp(ns, iocb->zone, nlb);
2724 }
2725
2726 iocb->idx++;
2727 iocb->slba += nlb;
2728out:
2729 nvme_do_copy(iocb);
2730}
2731
2732static void nvme_copy_out_cb(void *opaque, int ret)
2733{
2734 NvmeCopyAIOCB *iocb = opaque;
2735 NvmeRequest *req = iocb->req;
2736 NvmeNamespace *ns = req->ns;
2737 uint32_t nlb;
2738 size_t mlen;
2739 uint8_t *mbounce;
2740
2741 if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
2742 goto out;
2743 }
2744
2745 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
2746 &nlb, NULL, NULL, NULL);
2747
2748 mlen = nvme_m2b(ns, nlb);
2749 mbounce = iocb->bounce + nvme_l2b(ns, nlb);
2750
2751 qemu_iovec_reset(&iocb->iov);
2752 qemu_iovec_add(&iocb->iov, mbounce, mlen);
2753
2754 iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba),
2755 &iocb->iov, 0, nvme_copy_out_completed_cb,
2756 iocb);
2757
2758 return;
2759
2760out:
2761 nvme_copy_out_completed_cb(iocb, ret);
2762}
2763
2764static void nvme_copy_in_completed_cb(void *opaque, int ret)
2765{
2766 NvmeCopyAIOCB *iocb = opaque;
2767 NvmeRequest *req = iocb->req;
2768 NvmeNamespace *ns = req->ns;
2769 uint32_t nlb;
2770 uint64_t slba;
2771 uint16_t apptag, appmask;
2772 uint64_t reftag;
2773 size_t len;
2774 uint16_t status;
2775
2776 if (ret < 0) {
2777 iocb->ret = ret;
2778 goto out;
2779 } else if (iocb->ret < 0) {
2780 goto out;
2781 }
2782
2783 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
2784 &nlb, &apptag, &appmask, &reftag);
2785 len = nvme_l2b(ns, nlb);
2786
2787 trace_pci_nvme_copy_out(iocb->slba, nlb);
2788
2789 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2790 NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
2791
2792 uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
2793 uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
2794
2795 size_t mlen = nvme_m2b(ns, nlb);
2796 uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
2797
2798 status = nvme_dif_mangle_mdata(ns, mbounce, mlen, slba);
2799 if (status) {
2800 goto invalid;
2801 }
2802 status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor,
2803 slba, apptag, appmask, &reftag);
2804 if (status) {
2805 goto invalid;
2806 }
2807
2808 apptag = le16_to_cpu(copy->apptag);
2809 appmask = le16_to_cpu(copy->appmask);
2810
2811 if (prinfow & NVME_PRINFO_PRACT) {
2812 status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag);
2813 if (status) {
2814 goto invalid;
2815 }
2816
2817 nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen,
2818 apptag, &iocb->reftag);
2819 } else {
2820 status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen,
2821 prinfow, iocb->slba, apptag, appmask,
2822 &iocb->reftag);
2823 if (status) {
2824 goto invalid;
2825 }
2826 }
2827 }
2828
2829 status = nvme_check_bounds(ns, iocb->slba, nlb);
2830 if (status) {
2831 goto invalid;
2832 }
2833
2834 if (ns->params.zoned) {
2835 status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb);
2836 if (status) {
2837 goto invalid;
2838 }
2839
2840 if (!(iocb->zone->d.za & NVME_ZA_ZRWA_VALID)) {
2841 iocb->zone->w_ptr += nlb;
2842 }
2843 }
2844
2845 qemu_iovec_reset(&iocb->iov);
2846 qemu_iovec_add(&iocb->iov, iocb->bounce, len);
2847
2848 iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba),
2849 &iocb->iov, 0, nvme_copy_out_cb, iocb);
2850
2851 return;
2852
2853invalid:
2854 req->status = status;
2855 iocb->ret = -1;
2856out:
2857 nvme_do_copy(iocb);
2858}
2859
2860static void nvme_copy_in_cb(void *opaque, int ret)
2861{
2862 NvmeCopyAIOCB *iocb = opaque;
2863 NvmeRequest *req = iocb->req;
2864 NvmeNamespace *ns = req->ns;
2865 uint64_t slba;
2866 uint32_t nlb;
2867
2868 if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
2869 goto out;
2870 }
2871
2872 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
2873 &nlb, NULL, NULL, NULL);
2874
2875 qemu_iovec_reset(&iocb->iov);
2876 qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb),
2877 nvme_m2b(ns, nlb));
2878
2879 iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba),
2880 &iocb->iov, 0, nvme_copy_in_completed_cb,
2881 iocb);
2882 return;
2883
2884out:
2885 nvme_copy_in_completed_cb(iocb, ret);
2886}
2887
2888static void nvme_do_copy(NvmeCopyAIOCB *iocb)
2889{
2890 NvmeRequest *req = iocb->req;
2891 NvmeNamespace *ns = req->ns;
2892 uint64_t slba;
2893 uint32_t nlb;
2894 size_t len;
2895 uint16_t status;
2896
2897 if (iocb->ret < 0) {
2898 goto done;
2899 }
2900
2901 if (iocb->idx == iocb->nr) {
2902 goto done;
2903 }
2904
2905 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
2906 &nlb, NULL, NULL, NULL);
2907 len = nvme_l2b(ns, nlb);
2908
2909 trace_pci_nvme_copy_source_range(slba, nlb);
2910
2911 if (nlb > le16_to_cpu(ns->id_ns.mssrl)) {
2912 status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
2913 goto invalid;
2914 }
2915
2916 status = nvme_check_bounds(ns, slba, nlb);
2917 if (status) {
2918 goto invalid;
2919 }
2920
2921 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
2922 status = nvme_check_dulbe(ns, slba, nlb);
2923 if (status) {
2924 goto invalid;
2925 }
2926 }
2927
2928 if (ns->params.zoned) {
2929 status = nvme_check_zone_read(ns, slba, nlb);
2930 if (status) {
2931 goto invalid;
2932 }
2933 }
2934
2935 qemu_iovec_reset(&iocb->iov);
2936 qemu_iovec_add(&iocb->iov, iocb->bounce, len);
2937
2938 iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba),
2939 &iocb->iov, 0, nvme_copy_in_cb, iocb);
2940 return;
2941
2942invalid:
2943 req->status = status;
2944 iocb->ret = -1;
2945done:
2946 nvme_copy_done(iocb);
2947}
2948
2949static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
2950{
2951 NvmeNamespace *ns = req->ns;
2952 NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
2953 NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk,
2954 nvme_misc_cb, req);
2955 uint16_t nr = copy->nr + 1;
2956 uint8_t format = copy->control[0] & 0xf;
2957 uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
2958 uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
2959 size_t len = sizeof(NvmeCopySourceRangeFormat0);
2960
2961 uint16_t status;
2962
2963 trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
2964
2965 iocb->ranges = NULL;
2966 iocb->zone = NULL;
2967
2968 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
2969 ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
2970 status = NVME_INVALID_FIELD | NVME_DNR;
2971 goto invalid;
2972 }
2973
2974 if (!(n->id_ctrl.ocfs & (1 << format))) {
2975 trace_pci_nvme_err_copy_invalid_format(format);
2976 status = NVME_INVALID_FIELD | NVME_DNR;
2977 goto invalid;
2978 }
2979
2980 if (nr > ns->id_ns.msrc + 1) {
2981 status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
2982 goto invalid;
2983 }
2984
2985 if ((ns->pif == 0x0 && format != 0x0) ||
2986 (ns->pif != 0x0 && format != 0x1)) {
2987 status = NVME_INVALID_FORMAT | NVME_DNR;
2988 goto invalid;
2989 }
2990
2991 if (ns->pif) {
2992 len = sizeof(NvmeCopySourceRangeFormat1);
2993 }
2994
2995 iocb->format = format;
2996 iocb->ranges = g_malloc_n(nr, len);
2997 status = nvme_h2c(n, (uint8_t *)iocb->ranges, len * nr, req);
2998 if (status) {
2999 goto invalid;
3000 }
3001
3002 iocb->slba = le64_to_cpu(copy->sdlba);
3003
3004 if (ns->params.zoned) {
3005 iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba);
3006 if (!iocb->zone) {
3007 status = NVME_LBA_RANGE | NVME_DNR;
3008 goto invalid;
3009 }
3010
3011 status = nvme_zrm_auto(n, ns, iocb->zone);
3012 if (status) {
3013 goto invalid;
3014 }
3015 }
3016
3017 iocb->req = req;
3018 iocb->ret = 0;
3019 iocb->nr = nr;
3020 iocb->idx = 0;
3021 iocb->reftag = le32_to_cpu(copy->reftag);
3022 iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32;
3023 iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl),
3024 ns->lbasz + ns->lbaf.ms);
3025
3026 qemu_iovec_init(&iocb->iov, 1);
3027
3028 block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0,
3029 BLOCK_ACCT_READ);
3030 block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0,
3031 BLOCK_ACCT_WRITE);
3032
3033 req->aiocb = &iocb->common;
3034 nvme_do_copy(iocb);
3035
3036 return NVME_NO_COMPLETE;
3037
3038invalid:
3039 g_free(iocb->ranges);
3040 qemu_aio_unref(iocb);
3041 return status;
3042}
3043
3044static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
3045{
3046 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
3047 NvmeNamespace *ns = req->ns;
3048 BlockBackend *blk = ns->blkconf.blk;
3049 uint64_t slba = le64_to_cpu(rw->slba);
3050 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
3051 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
3052 size_t data_len = nvme_l2b(ns, nlb);
3053 size_t len = data_len;
3054 int64_t offset = nvme_l2b(ns, slba);
3055 struct nvme_compare_ctx *ctx = NULL;
3056 uint16_t status;
3057
3058 trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
3059
3060 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) {
3061 return NVME_INVALID_PROT_INFO | NVME_DNR;
3062 }
3063
3064 if (nvme_ns_ext(ns)) {
3065 len += nvme_m2b(ns, nlb);
3066 }
3067
3068 status = nvme_check_mdts(n, len);
3069 if (status) {
3070 return status;
3071 }
3072
3073 status = nvme_check_bounds(ns, slba, nlb);
3074 if (status) {
3075 return status;
3076 }
3077
3078 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
3079 status = nvme_check_dulbe(ns, slba, nlb);
3080 if (status) {
3081 return status;
3082 }
3083 }
3084
3085 status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
3086 if (status) {
3087 return status;
3088 }
3089
3090 ctx = g_new(struct nvme_compare_ctx, 1);
3091 ctx->data.bounce = g_malloc(data_len);
3092
3093 req->opaque = ctx;
3094
3095 qemu_iovec_init(&ctx->data.iov, 1);
3096 qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len);
3097
3098 block_acct_start(blk_get_stats(blk), &req->acct, data_len,
3099 BLOCK_ACCT_READ);
3100 req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0,
3101 nvme_compare_data_cb, req);
3102
3103 return NVME_NO_COMPLETE;
3104}
3105
3106typedef struct NvmeFlushAIOCB {
3107 BlockAIOCB common;
3108 BlockAIOCB *aiocb;
3109 NvmeRequest *req;
3110 int ret;
3111
3112 NvmeNamespace *ns;
3113 uint32_t nsid;
3114 bool broadcast;
3115} NvmeFlushAIOCB;
3116
3117static void nvme_flush_cancel(BlockAIOCB *acb)
3118{
3119 NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common);
3120
3121 iocb->ret = -ECANCELED;
3122
3123 if (iocb->aiocb) {
3124 blk_aio_cancel_async(iocb->aiocb);
3125 iocb->aiocb = NULL;
3126 }
3127}
3128
3129static const AIOCBInfo nvme_flush_aiocb_info = {
3130 .aiocb_size = sizeof(NvmeFlushAIOCB),
3131 .cancel_async = nvme_flush_cancel,
3132 .get_aio_context = nvme_get_aio_context,
3133};
3134
3135static void nvme_do_flush(NvmeFlushAIOCB *iocb);
3136
3137static void nvme_flush_ns_cb(void *opaque, int ret)
3138{
3139 NvmeFlushAIOCB *iocb = opaque;
3140 NvmeNamespace *ns = iocb->ns;
3141
3142 if (ret < 0) {
3143 iocb->ret = ret;
3144 goto out;
3145 } else if (iocb->ret < 0) {
3146 goto out;
3147 }
3148
3149 if (ns) {
3150 trace_pci_nvme_flush_ns(iocb->nsid);
3151
3152 iocb->ns = NULL;
3153 iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb);
3154 return;
3155 }
3156
3157out:
3158 nvme_do_flush(iocb);
3159}
3160
3161static void nvme_do_flush(NvmeFlushAIOCB *iocb)
3162{
3163 NvmeRequest *req = iocb->req;
3164 NvmeCtrl *n = nvme_ctrl(req);
3165 int i;
3166
3167 if (iocb->ret < 0) {
3168 goto done;
3169 }
3170
3171 if (iocb->broadcast) {
3172 for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
3173 iocb->ns = nvme_ns(n, i);
3174 if (iocb->ns) {
3175 iocb->nsid = i;
3176 break;
3177 }
3178 }
3179 }
3180
3181 if (!iocb->ns) {
3182 goto done;
3183 }
3184
3185 nvme_flush_ns_cb(iocb, 0);
3186 return;
3187
3188done:
3189 iocb->common.cb(iocb->common.opaque, iocb->ret);
3190 qemu_aio_unref(iocb);
3191}
3192
3193static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
3194{
3195 NvmeFlushAIOCB *iocb;
3196 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
3197 uint16_t status;
3198
3199 iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req);
3200
3201 iocb->req = req;
3202 iocb->ret = 0;
3203 iocb->ns = NULL;
3204 iocb->nsid = 0;
3205 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
3206
3207 if (!iocb->broadcast) {
3208 if (!nvme_nsid_valid(n, nsid)) {
3209 status = NVME_INVALID_NSID | NVME_DNR;
3210 goto out;
3211 }
3212
3213 iocb->ns = nvme_ns(n, nsid);
3214 if (!iocb->ns) {
3215 status = NVME_INVALID_FIELD | NVME_DNR;
3216 goto out;
3217 }
3218
3219 iocb->nsid = nsid;
3220 }
3221
3222 req->aiocb = &iocb->common;
3223 nvme_do_flush(iocb);
3224
3225 return NVME_NO_COMPLETE;
3226
3227out:
3228 qemu_aio_unref(iocb);
3229
3230 return status;
3231}
3232
3233static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
3234{
3235 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
3236 NvmeNamespace *ns = req->ns;
3237 uint64_t slba = le64_to_cpu(rw->slba);
3238 uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
3239 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
3240 uint64_t data_size = nvme_l2b(ns, nlb);
3241 uint64_t mapped_size = data_size;
3242 uint64_t data_offset;
3243 BlockBackend *blk = ns->blkconf.blk;
3244 uint16_t status;
3245
3246 if (nvme_ns_ext(ns)) {
3247 mapped_size += nvme_m2b(ns, nlb);
3248
3249 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3250 bool pract = prinfo & NVME_PRINFO_PRACT;
3251
3252 if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
3253 mapped_size = data_size;
3254 }
3255 }
3256 }
3257
3258 trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
3259
3260 status = nvme_check_mdts(n, mapped_size);
3261 if (status) {
3262 goto invalid;
3263 }
3264
3265 status = nvme_check_bounds(ns, slba, nlb);
3266 if (status) {
3267 goto invalid;
3268 }
3269
3270 if (ns->params.zoned) {
3271 status = nvme_check_zone_read(ns, slba, nlb);
3272 if (status) {
3273 trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status);
3274 goto invalid;
3275 }
3276 }
3277
3278 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
3279 status = nvme_check_dulbe(ns, slba, nlb);
3280 if (status) {
3281 goto invalid;
3282 }
3283 }
3284
3285 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3286 return nvme_dif_rw(n, req);
3287 }
3288
3289 status = nvme_map_data(n, nlb, req);
3290 if (status) {
3291 goto invalid;
3292 }
3293
3294 data_offset = nvme_l2b(ns, slba);
3295
3296 block_acct_start(blk_get_stats(blk), &req->acct, data_size,
3297 BLOCK_ACCT_READ);
3298 nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
3299 return NVME_NO_COMPLETE;
3300
3301invalid:
3302 block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
3303 return status | NVME_DNR;
3304}
3305
3306static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
3307 bool wrz)
3308{
3309 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
3310 NvmeNamespace *ns = req->ns;
3311 uint64_t slba = le64_to_cpu(rw->slba);
3312 uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
3313 uint16_t ctrl = le16_to_cpu(rw->control);
3314 uint8_t prinfo = NVME_RW_PRINFO(ctrl);
3315 uint64_t data_size = nvme_l2b(ns, nlb);
3316 uint64_t mapped_size = data_size;
3317 uint64_t data_offset;
3318 NvmeZone *zone;
3319 NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
3320 BlockBackend *blk = ns->blkconf.blk;
3321 uint16_t status;
3322
3323 if (nvme_ns_ext(ns)) {
3324 mapped_size += nvme_m2b(ns, nlb);
3325
3326 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3327 bool pract = prinfo & NVME_PRINFO_PRACT;
3328
3329 if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
3330 mapped_size -= nvme_m2b(ns, nlb);
3331 }
3332 }
3333 }
3334
3335 trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
3336 nvme_nsid(ns), nlb, mapped_size, slba);
3337
3338 if (!wrz) {
3339 status = nvme_check_mdts(n, mapped_size);
3340 if (status) {
3341 goto invalid;
3342 }
3343 }
3344
3345 status = nvme_check_bounds(ns, slba, nlb);
3346 if (status) {
3347 goto invalid;
3348 }
3349
3350 if (ns->params.zoned) {
3351 zone = nvme_get_zone_by_slba(ns, slba);
3352 assert(zone);
3353
3354 if (append) {
3355 bool piremap = !!(ctrl & NVME_RW_PIREMAP);
3356
3357 if (unlikely(zone->d.za & NVME_ZA_ZRWA_VALID)) {
3358 return NVME_INVALID_ZONE_OP | NVME_DNR;
3359 }
3360
3361 if (unlikely(slba != zone->d.zslba)) {
3362 trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
3363 status = NVME_INVALID_FIELD;
3364 goto invalid;
3365 }
3366
3367 if (n->params.zasl &&
3368 data_size > (uint64_t)n->page_size << n->params.zasl) {
3369 trace_pci_nvme_err_zasl(data_size);
3370 return NVME_INVALID_FIELD | NVME_DNR;
3371 }
3372
3373 slba = zone->w_ptr;
3374 rw->slba = cpu_to_le64(slba);
3375 res->slba = cpu_to_le64(slba);
3376
3377 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3378 case NVME_ID_NS_DPS_TYPE_1:
3379 if (!piremap) {
3380 return NVME_INVALID_PROT_INFO | NVME_DNR;
3381 }
3382
3383
3384
3385 case NVME_ID_NS_DPS_TYPE_2:
3386 if (piremap) {
3387 uint32_t reftag = le32_to_cpu(rw->reftag);
3388 rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
3389 }
3390
3391 break;
3392
3393 case NVME_ID_NS_DPS_TYPE_3:
3394 if (piremap) {
3395 return NVME_INVALID_PROT_INFO | NVME_DNR;
3396 }
3397
3398 break;
3399 }
3400 }
3401
3402 status = nvme_check_zone_write(ns, zone, slba, nlb);
3403 if (status) {
3404 goto invalid;
3405 }
3406
3407 status = nvme_zrm_auto(n, ns, zone);
3408 if (status) {
3409 goto invalid;
3410 }
3411
3412 if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
3413 zone->w_ptr += nlb;
3414 }
3415 }
3416
3417 data_offset = nvme_l2b(ns, slba);
3418
3419 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3420 return nvme_dif_rw(n, req);
3421 }
3422
3423 if (!wrz) {
3424 status = nvme_map_data(n, nlb, req);
3425 if (status) {
3426 goto invalid;
3427 }
3428
3429 block_acct_start(blk_get_stats(blk), &req->acct, data_size,
3430 BLOCK_ACCT_WRITE);
3431 nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
3432 } else {
3433 req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
3434 BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
3435 req);
3436 }
3437
3438 return NVME_NO_COMPLETE;
3439
3440invalid:
3441 block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
3442 return status | NVME_DNR;
3443}
3444
3445static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
3446{
3447 return nvme_do_write(n, req, false, false);
3448}
3449
3450static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
3451{
3452 return nvme_do_write(n, req, false, true);
3453}
3454
3455static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
3456{
3457 return nvme_do_write(n, req, true, false);
3458}
3459
3460static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
3461 uint64_t *slba, uint32_t *zone_idx)
3462{
3463 uint32_t dw10 = le32_to_cpu(c->cdw10);
3464 uint32_t dw11 = le32_to_cpu(c->cdw11);
3465
3466 if (!ns->params.zoned) {
3467 trace_pci_nvme_err_invalid_opc(c->opcode);
3468 return NVME_INVALID_OPCODE | NVME_DNR;
3469 }
3470
3471 *slba = ((uint64_t)dw11) << 32 | dw10;
3472 if (unlikely(*slba >= ns->id_ns.nsze)) {
3473 trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze);
3474 *slba = 0;
3475 return NVME_LBA_RANGE | NVME_DNR;
3476 }
3477
3478 *zone_idx = nvme_zone_idx(ns, *slba);
3479 assert(*zone_idx < ns->num_zones);
3480
3481 return NVME_SUCCESS;
3482}
3483
3484typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState,
3485 NvmeRequest *);
3486
3487enum NvmeZoneProcessingMask {
3488 NVME_PROC_CURRENT_ZONE = 0,
3489 NVME_PROC_OPENED_ZONES = 1 << 0,
3490 NVME_PROC_CLOSED_ZONES = 1 << 1,
3491 NVME_PROC_READ_ONLY_ZONES = 1 << 2,
3492 NVME_PROC_FULL_ZONES = 1 << 3,
3493};
3494
3495static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
3496 NvmeZoneState state, NvmeRequest *req)
3497{
3498 NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
3499 int flags = 0;
3500
3501 if (cmd->zsflags & NVME_ZSFLAG_ZRWA_ALLOC) {
3502 uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs);
3503
3504 if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) {
3505 return NVME_INVALID_ZONE_OP | NVME_DNR;
3506 }
3507
3508 if (zone->w_ptr % ns->zns.zrwafg) {
3509 return NVME_NOZRWA | NVME_DNR;
3510 }
3511
3512 flags = NVME_ZRM_ZRWA;
3513 }
3514
3515 return nvme_zrm_open_flags(nvme_ctrl(req), ns, zone, flags);
3516}
3517
3518static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
3519 NvmeZoneState state, NvmeRequest *req)
3520{
3521 return nvme_zrm_close(ns, zone);
3522}
3523
3524static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
3525 NvmeZoneState state, NvmeRequest *req)
3526{
3527 return nvme_zrm_finish(ns, zone);
3528}
3529
3530static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
3531 NvmeZoneState state, NvmeRequest *req)
3532{
3533 switch (state) {
3534 case NVME_ZONE_STATE_READ_ONLY:
3535 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE);
3536
3537 case NVME_ZONE_STATE_OFFLINE:
3538 return NVME_SUCCESS;
3539 default:
3540 return NVME_ZONE_INVAL_TRANSITION;
3541 }
3542}
3543
3544static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone)
3545{
3546 uint16_t status;
3547 uint8_t state = nvme_get_zone_state(zone);
3548
3549 if (state == NVME_ZONE_STATE_EMPTY) {
3550 status = nvme_aor_check(ns, 1, 0);
3551 if (status) {
3552 return status;
3553 }
3554 nvme_aor_inc_active(ns);
3555 zone->d.za |= NVME_ZA_ZD_EXT_VALID;
3556 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
3557 return NVME_SUCCESS;
3558 }
3559
3560 return NVME_ZONE_INVAL_TRANSITION;
3561}
3562
3563static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
3564 enum NvmeZoneProcessingMask proc_mask,
3565 op_handler_t op_hndlr, NvmeRequest *req)
3566{
3567 uint16_t status = NVME_SUCCESS;
3568 NvmeZoneState zs = nvme_get_zone_state(zone);
3569 bool proc_zone;
3570
3571 switch (zs) {
3572 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
3573 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
3574 proc_zone = proc_mask & NVME_PROC_OPENED_ZONES;
3575 break;
3576 case NVME_ZONE_STATE_CLOSED:
3577 proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES;
3578 break;
3579 case NVME_ZONE_STATE_READ_ONLY:
3580 proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES;
3581 break;
3582 case NVME_ZONE_STATE_FULL:
3583 proc_zone = proc_mask & NVME_PROC_FULL_ZONES;
3584 break;
3585 default:
3586 proc_zone = false;
3587 }
3588
3589 if (proc_zone) {
3590 status = op_hndlr(ns, zone, zs, req);
3591 }
3592
3593 return status;
3594}
3595
3596static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
3597 enum NvmeZoneProcessingMask proc_mask,
3598 op_handler_t op_hndlr, NvmeRequest *req)
3599{
3600 NvmeZone *next;
3601 uint16_t status = NVME_SUCCESS;
3602 int i;
3603
3604 if (!proc_mask) {
3605 status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req);
3606 } else {
3607 if (proc_mask & NVME_PROC_CLOSED_ZONES) {
3608 QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
3609 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3610 req);
3611 if (status && status != NVME_NO_COMPLETE) {
3612 goto out;
3613 }
3614 }
3615 }
3616 if (proc_mask & NVME_PROC_OPENED_ZONES) {
3617 QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
3618 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3619 req);
3620 if (status && status != NVME_NO_COMPLETE) {
3621 goto out;
3622 }
3623 }
3624
3625 QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
3626 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3627 req);
3628 if (status && status != NVME_NO_COMPLETE) {
3629 goto out;
3630 }
3631 }
3632 }
3633 if (proc_mask & NVME_PROC_FULL_ZONES) {
3634 QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) {
3635 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3636 req);
3637 if (status && status != NVME_NO_COMPLETE) {
3638 goto out;
3639 }
3640 }
3641 }
3642
3643 if (proc_mask & NVME_PROC_READ_ONLY_ZONES) {
3644 for (i = 0; i < ns->num_zones; i++, zone++) {
3645 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3646 req);
3647 if (status && status != NVME_NO_COMPLETE) {
3648 goto out;
3649 }
3650 }
3651 }
3652 }
3653
3654out:
3655 return status;
3656}
3657
3658typedef struct NvmeZoneResetAIOCB {
3659 BlockAIOCB common;
3660 BlockAIOCB *aiocb;
3661 NvmeRequest *req;
3662 int ret;
3663
3664 bool all;
3665 int idx;
3666 NvmeZone *zone;
3667} NvmeZoneResetAIOCB;
3668
3669static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
3670{
3671 NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
3672 NvmeRequest *req = iocb->req;
3673 NvmeNamespace *ns = req->ns;
3674
3675 iocb->idx = ns->num_zones;
3676
3677 iocb->ret = -ECANCELED;
3678
3679 if (iocb->aiocb) {
3680 blk_aio_cancel_async(iocb->aiocb);
3681 iocb->aiocb = NULL;
3682 }
3683}
3684
3685static const AIOCBInfo nvme_zone_reset_aiocb_info = {
3686 .aiocb_size = sizeof(NvmeZoneResetAIOCB),
3687 .cancel_async = nvme_zone_reset_cancel,
3688};
3689
3690static void nvme_zone_reset_cb(void *opaque, int ret);
3691
3692static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
3693{
3694 NvmeZoneResetAIOCB *iocb = opaque;
3695 NvmeRequest *req = iocb->req;
3696 NvmeNamespace *ns = req->ns;
3697 int64_t moff;
3698 int count;
3699
3700 if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
3701 goto out;
3702 }
3703
3704 moff = nvme_moff(ns, iocb->zone->d.zslba);
3705 count = nvme_m2b(ns, ns->zone_size);
3706
3707 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
3708 BDRV_REQ_MAY_UNMAP,
3709 nvme_zone_reset_cb, iocb);
3710 return;
3711
3712out:
3713 nvme_zone_reset_cb(iocb, ret);
3714}
3715
3716static void nvme_zone_reset_cb(void *opaque, int ret)
3717{
3718 NvmeZoneResetAIOCB *iocb = opaque;
3719 NvmeRequest *req = iocb->req;
3720 NvmeNamespace *ns = req->ns;
3721
3722 if (iocb->ret < 0) {
3723 goto done;
3724 } else if (ret < 0) {
3725 iocb->ret = ret;
3726 goto done;
3727 }
3728
3729 if (iocb->zone) {
3730 nvme_zrm_reset(ns, iocb->zone);
3731
3732 if (!iocb->all) {
3733 goto done;
3734 }
3735 }
3736
3737 while (iocb->idx < ns->num_zones) {
3738 NvmeZone *zone = &ns->zone_array[iocb->idx++];
3739
3740 switch (nvme_get_zone_state(zone)) {
3741 case NVME_ZONE_STATE_EMPTY:
3742 if (!iocb->all) {
3743 goto done;
3744 }
3745
3746 continue;
3747
3748 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
3749 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
3750 case NVME_ZONE_STATE_CLOSED:
3751 case NVME_ZONE_STATE_FULL:
3752 iocb->zone = zone;
3753 break;
3754
3755 default:
3756 continue;
3757 }
3758
3759 trace_pci_nvme_zns_zone_reset(zone->d.zslba);
3760
3761 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
3762 nvme_l2b(ns, zone->d.zslba),
3763 nvme_l2b(ns, ns->zone_size),
3764 BDRV_REQ_MAY_UNMAP,
3765 nvme_zone_reset_epilogue_cb,
3766 iocb);
3767 return;
3768 }
3769
3770done:
3771 iocb->aiocb = NULL;
3772
3773 iocb->common.cb(iocb->common.opaque, iocb->ret);
3774 qemu_aio_unref(iocb);
3775}
3776
3777static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl *n, NvmeZone *zone,
3778 uint64_t elba, NvmeRequest *req)
3779{
3780 NvmeNamespace *ns = req->ns;
3781 uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs);
3782 uint64_t wp = zone->d.wp;
3783 uint32_t nlb = elba - wp + 1;
3784 uint16_t status;
3785
3786
3787 if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) {
3788 return NVME_INVALID_ZONE_OP | NVME_DNR;
3789 }
3790
3791 if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
3792 return NVME_INVALID_FIELD | NVME_DNR;
3793 }
3794
3795 if (elba < wp || elba > wp + ns->zns.zrwas) {
3796 return NVME_ZONE_BOUNDARY_ERROR | NVME_DNR;
3797 }
3798
3799 if (nlb % ns->zns.zrwafg) {
3800 return NVME_INVALID_FIELD | NVME_DNR;
3801 }
3802
3803 status = nvme_zrm_auto(n, ns, zone);
3804 if (status) {
3805 return status;
3806 }
3807
3808 zone->w_ptr += nlb;
3809
3810 nvme_advance_zone_wp(ns, zone, nlb);
3811
3812 return NVME_SUCCESS;
3813}
3814
3815static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
3816{
3817 NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
3818 NvmeNamespace *ns = req->ns;
3819 NvmeZone *zone;
3820 NvmeZoneResetAIOCB *iocb;
3821 uint8_t *zd_ext;
3822 uint64_t slba = 0;
3823 uint32_t zone_idx = 0;
3824 uint16_t status;
3825 uint8_t action = cmd->zsa;
3826 bool all;
3827 enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
3828
3829 all = cmd->zsflags & NVME_ZSFLAG_SELECT_ALL;
3830
3831 req->status = NVME_SUCCESS;
3832
3833 if (!all) {
3834 status = nvme_get_mgmt_zone_slba_idx(ns, &req->cmd, &slba, &zone_idx);
3835 if (status) {
3836 return status;
3837 }
3838 }
3839
3840 zone = &ns->zone_array[zone_idx];
3841 if (slba != zone->d.zslba && action != NVME_ZONE_ACTION_ZRWA_FLUSH) {
3842 trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba);
3843 return NVME_INVALID_FIELD | NVME_DNR;
3844 }
3845
3846 switch (action) {
3847
3848 case NVME_ZONE_ACTION_OPEN:
3849 if (all) {
3850 proc_mask = NVME_PROC_CLOSED_ZONES;
3851 }
3852 trace_pci_nvme_open_zone(slba, zone_idx, all);
3853 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req);
3854 break;
3855
3856 case NVME_ZONE_ACTION_CLOSE:
3857 if (all) {
3858 proc_mask = NVME_PROC_OPENED_ZONES;
3859 }
3860 trace_pci_nvme_close_zone(slba, zone_idx, all);
3861 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req);
3862 break;
3863
3864 case NVME_ZONE_ACTION_FINISH:
3865 if (all) {
3866 proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
3867 }
3868 trace_pci_nvme_finish_zone(slba, zone_idx, all);
3869 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req);
3870 break;
3871
3872 case NVME_ZONE_ACTION_RESET:
3873 trace_pci_nvme_reset_zone(slba, zone_idx, all);
3874
3875 iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
3876 nvme_misc_cb, req);
3877
3878 iocb->req = req;
3879 iocb->ret = 0;
3880 iocb->all = all;
3881 iocb->idx = zone_idx;
3882 iocb->zone = NULL;
3883
3884 req->aiocb = &iocb->common;
3885 nvme_zone_reset_cb(iocb, 0);
3886
3887 return NVME_NO_COMPLETE;
3888
3889 case NVME_ZONE_ACTION_OFFLINE:
3890 if (all) {
3891 proc_mask = NVME_PROC_READ_ONLY_ZONES;
3892 }
3893 trace_pci_nvme_offline_zone(slba, zone_idx, all);
3894 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req);
3895 break;
3896
3897 case NVME_ZONE_ACTION_SET_ZD_EXT:
3898 trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
3899 if (all || !ns->params.zd_extension_size) {
3900 return NVME_INVALID_FIELD | NVME_DNR;
3901 }
3902 zd_ext = nvme_get_zd_extension(ns, zone_idx);
3903 status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
3904 if (status) {
3905 trace_pci_nvme_err_zd_extension_map_error(zone_idx);
3906 return status;
3907 }
3908
3909 status = nvme_set_zd_ext(ns, zone);
3910 if (status == NVME_SUCCESS) {
3911 trace_pci_nvme_zd_extension_set(zone_idx);
3912 return status;
3913 }
3914 break;
3915
3916 case NVME_ZONE_ACTION_ZRWA_FLUSH:
3917 if (all) {
3918 return NVME_INVALID_FIELD | NVME_DNR;
3919 }
3920
3921 return nvme_zone_mgmt_send_zrwa_flush(n, zone, slba, req);
3922
3923 default:
3924 trace_pci_nvme_err_invalid_mgmt_action(action);
3925 status = NVME_INVALID_FIELD;
3926 }
3927
3928 if (status == NVME_ZONE_INVAL_TRANSITION) {
3929 trace_pci_nvme_err_invalid_zone_state_transition(action, slba,
3930 zone->d.za);
3931 }
3932 if (status) {
3933 status |= NVME_DNR;
3934 }
3935
3936 return status;
3937}
3938
3939static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
3940{
3941 NvmeZoneState zs = nvme_get_zone_state(zl);
3942
3943 switch (zafs) {
3944 case NVME_ZONE_REPORT_ALL:
3945 return true;
3946 case NVME_ZONE_REPORT_EMPTY:
3947 return zs == NVME_ZONE_STATE_EMPTY;
3948 case NVME_ZONE_REPORT_IMPLICITLY_OPEN:
3949 return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN;
3950 case NVME_ZONE_REPORT_EXPLICITLY_OPEN:
3951 return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN;
3952 case NVME_ZONE_REPORT_CLOSED:
3953 return zs == NVME_ZONE_STATE_CLOSED;
3954 case NVME_ZONE_REPORT_FULL:
3955 return zs == NVME_ZONE_STATE_FULL;
3956 case NVME_ZONE_REPORT_READ_ONLY:
3957 return zs == NVME_ZONE_STATE_READ_ONLY;
3958 case NVME_ZONE_REPORT_OFFLINE:
3959 return zs == NVME_ZONE_STATE_OFFLINE;
3960 default:
3961 return false;
3962 }
3963}
3964
3965static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
3966{
3967 NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
3968 NvmeNamespace *ns = req->ns;
3969
3970 uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2;
3971 uint32_t dw13 = le32_to_cpu(cmd->cdw13);
3972 uint32_t zone_idx, zra, zrasf, partial;
3973 uint64_t max_zones, nr_zones = 0;
3974 uint16_t status;
3975 uint64_t slba;
3976 NvmeZoneDescr *z;
3977 NvmeZone *zone;
3978 NvmeZoneReportHeader *header;
3979 void *buf, *buf_p;
3980 size_t zone_entry_sz;
3981 int i;
3982
3983 req->status = NVME_SUCCESS;
3984
3985 status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
3986 if (status) {
3987 return status;
3988 }
3989
3990 zra = dw13 & 0xff;
3991 if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) {
3992 return NVME_INVALID_FIELD | NVME_DNR;
3993 }
3994 if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) {
3995 return NVME_INVALID_FIELD | NVME_DNR;
3996 }
3997
3998 zrasf = (dw13 >> 8) & 0xff;
3999 if (zrasf > NVME_ZONE_REPORT_OFFLINE) {
4000 return NVME_INVALID_FIELD | NVME_DNR;
4001 }
4002
4003 if (data_size < sizeof(NvmeZoneReportHeader)) {
4004 return NVME_INVALID_FIELD | NVME_DNR;
4005 }
4006
4007 status = nvme_check_mdts(n, data_size);
4008 if (status) {
4009 return status;
4010 }
4011
4012 partial = (dw13 >> 16) & 0x01;
4013
4014 zone_entry_sz = sizeof(NvmeZoneDescr);
4015 if (zra == NVME_ZONE_REPORT_EXTENDED) {
4016 zone_entry_sz += ns->params.zd_extension_size;
4017 }
4018
4019 max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
4020 buf = g_malloc0(data_size);
4021
4022 zone = &ns->zone_array[zone_idx];
4023 for (i = zone_idx; i < ns->num_zones; i++) {
4024 if (partial && nr_zones >= max_zones) {
4025 break;
4026 }
4027 if (nvme_zone_matches_filter(zrasf, zone++)) {
4028 nr_zones++;
4029 }
4030 }
4031 header = (NvmeZoneReportHeader *)buf;
4032 header->nr_zones = cpu_to_le64(nr_zones);
4033
4034 buf_p = buf + sizeof(NvmeZoneReportHeader);
4035 for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) {
4036 zone = &ns->zone_array[zone_idx];
4037 if (nvme_zone_matches_filter(zrasf, zone)) {
4038 z = (NvmeZoneDescr *)buf_p;
4039 buf_p += sizeof(NvmeZoneDescr);
4040
4041 z->zt = zone->d.zt;
4042 z->zs = zone->d.zs;
4043 z->zcap = cpu_to_le64(zone->d.zcap);
4044 z->zslba = cpu_to_le64(zone->d.zslba);
4045 z->za = zone->d.za;
4046
4047 if (nvme_wp_is_valid(zone)) {
4048 z->wp = cpu_to_le64(zone->d.wp);
4049 } else {
4050 z->wp = cpu_to_le64(~0ULL);
4051 }
4052
4053 if (zra == NVME_ZONE_REPORT_EXTENDED) {
4054 if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
4055 memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx),
4056 ns->params.zd_extension_size);
4057 }
4058 buf_p += ns->params.zd_extension_size;
4059 }
4060
4061 max_zones--;
4062 }
4063 }
4064
4065 status = nvme_c2h(n, (uint8_t *)buf, data_size, req);
4066
4067 g_free(buf);
4068
4069 return status;
4070}
4071
4072static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
4073{
4074 NvmeNamespace *ns;
4075 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
4076
4077 trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
4078 req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
4079
4080 if (!nvme_nsid_valid(n, nsid)) {
4081 return NVME_INVALID_NSID | NVME_DNR;
4082 }
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103 if (req->cmd.opcode == NVME_CMD_FLUSH) {
4104 return nvme_flush(n, req);
4105 }
4106
4107 ns = nvme_ns(n, nsid);
4108 if (unlikely(!ns)) {
4109 return NVME_INVALID_FIELD | NVME_DNR;
4110 }
4111
4112 if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
4113 trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
4114 return NVME_INVALID_OPCODE | NVME_DNR;
4115 }
4116
4117 if (ns->status) {
4118 return ns->status;
4119 }
4120
4121 if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
4122 return NVME_INVALID_FIELD;
4123 }
4124
4125 req->ns = ns;
4126
4127 switch (req->cmd.opcode) {
4128 case NVME_CMD_WRITE_ZEROES:
4129 return nvme_write_zeroes(n, req);
4130 case NVME_CMD_ZONE_APPEND:
4131 return nvme_zone_append(n, req);
4132 case NVME_CMD_WRITE:
4133 return nvme_write(n, req);
4134 case NVME_CMD_READ:
4135 return nvme_read(n, req);
4136 case NVME_CMD_COMPARE:
4137 return nvme_compare(n, req);
4138 case NVME_CMD_DSM:
4139 return nvme_dsm(n, req);
4140 case NVME_CMD_VERIFY:
4141 return nvme_verify(n, req);
4142 case NVME_CMD_COPY:
4143 return nvme_copy(n, req);
4144 case NVME_CMD_ZONE_MGMT_SEND:
4145 return nvme_zone_mgmt_send(n, req);
4146 case NVME_CMD_ZONE_MGMT_RECV:
4147 return nvme_zone_mgmt_recv(n, req);
4148 default:
4149 assert(false);
4150 }
4151
4152 return NVME_INVALID_OPCODE | NVME_DNR;
4153}
4154
4155static void nvme_cq_notifier(EventNotifier *e)
4156{
4157 NvmeCQueue *cq = container_of(e, NvmeCQueue, notifier);
4158 NvmeCtrl *n = cq->ctrl;
4159
4160 if (!event_notifier_test_and_clear(e)) {
4161 return;
4162 }
4163
4164 nvme_update_cq_head(cq);
4165
4166 if (cq->tail == cq->head) {
4167 if (cq->irq_enabled) {
4168 n->cq_pending--;
4169 }
4170
4171 nvme_irq_deassert(n, cq);
4172 }
4173
4174 qemu_bh_schedule(cq->bh);
4175}
4176
4177static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
4178{
4179 NvmeCtrl *n = cq->ctrl;
4180 uint16_t offset = (cq->cqid << 3) + (1 << 2);
4181 int ret;
4182
4183 ret = event_notifier_init(&cq->notifier, 0);
4184 if (ret < 0) {
4185 return ret;
4186 }
4187
4188 event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
4189 memory_region_add_eventfd(&n->iomem,
4190 0x1000 + offset, 4, false, 0, &cq->notifier);
4191
4192 return 0;
4193}
4194
4195static void nvme_sq_notifier(EventNotifier *e)
4196{
4197 NvmeSQueue *sq = container_of(e, NvmeSQueue, notifier);
4198
4199 if (!event_notifier_test_and_clear(e)) {
4200 return;
4201 }
4202
4203 nvme_process_sq(sq);
4204}
4205
4206static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
4207{
4208 NvmeCtrl *n = sq->ctrl;
4209 uint16_t offset = sq->sqid << 3;
4210 int ret;
4211
4212 ret = event_notifier_init(&sq->notifier, 0);
4213 if (ret < 0) {
4214 return ret;
4215 }
4216
4217 event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
4218 memory_region_add_eventfd(&n->iomem,
4219 0x1000 + offset, 4, false, 0, &sq->notifier);
4220
4221 return 0;
4222}
4223
4224static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
4225{
4226 uint16_t offset = sq->sqid << 3;
4227
4228 n->sq[sq->sqid] = NULL;
4229 qemu_bh_delete(sq->bh);
4230 if (sq->ioeventfd_enabled) {
4231 memory_region_del_eventfd(&n->iomem,
4232 0x1000 + offset, 4, false, 0, &sq->notifier);
4233 event_notifier_set_handler(&sq->notifier, NULL);
4234 event_notifier_cleanup(&sq->notifier);
4235 }
4236 g_free(sq->io_req);
4237 if (sq->sqid) {
4238 g_free(sq);
4239 }
4240}
4241
4242static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
4243{
4244 NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
4245 NvmeRequest *r, *next;
4246 NvmeSQueue *sq;
4247 NvmeCQueue *cq;
4248 uint16_t qid = le16_to_cpu(c->qid);
4249
4250 if (unlikely(!qid || nvme_check_sqid(n, qid))) {
4251 trace_pci_nvme_err_invalid_del_sq(qid);
4252 return NVME_INVALID_QID | NVME_DNR;
4253 }
4254
4255 trace_pci_nvme_del_sq(qid);
4256
4257 sq = n->sq[qid];
4258 while (!QTAILQ_EMPTY(&sq->out_req_list)) {
4259 r = QTAILQ_FIRST(&sq->out_req_list);
4260 assert(r->aiocb);
4261 blk_aio_cancel(r->aiocb);
4262 }
4263
4264 assert(QTAILQ_EMPTY(&sq->out_req_list));
4265
4266 if (!nvme_check_cqid(n, sq->cqid)) {
4267 cq = n->cq[sq->cqid];
4268 QTAILQ_REMOVE(&cq->sq_list, sq, entry);
4269
4270 nvme_post_cqes(cq);
4271 QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
4272 if (r->sq == sq) {
4273 QTAILQ_REMOVE(&cq->req_list, r, entry);
4274 QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
4275 }
4276 }
4277 }
4278
4279 nvme_free_sq(sq, n);
4280 return NVME_SUCCESS;
4281}
4282
4283static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
4284 uint16_t sqid, uint16_t cqid, uint16_t size)
4285{
4286 int i;
4287 NvmeCQueue *cq;
4288
4289 sq->ctrl = n;
4290 sq->dma_addr = dma_addr;
4291 sq->sqid = sqid;
4292 sq->size = size;
4293 sq->cqid = cqid;
4294 sq->head = sq->tail = 0;
4295 sq->io_req = g_new0(NvmeRequest, sq->size);
4296
4297 QTAILQ_INIT(&sq->req_list);
4298 QTAILQ_INIT(&sq->out_req_list);
4299 for (i = 0; i < sq->size; i++) {
4300 sq->io_req[i].sq = sq;
4301 QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
4302 }
4303
4304 sq->bh = qemu_bh_new(nvme_process_sq, sq);
4305
4306 if (n->dbbuf_enabled) {
4307 sq->db_addr = n->dbbuf_dbs + (sqid << 3);
4308 sq->ei_addr = n->dbbuf_eis + (sqid << 3);
4309
4310 if (n->params.ioeventfd && sq->sqid != 0) {
4311 if (!nvme_init_sq_ioeventfd(sq)) {
4312 sq->ioeventfd_enabled = true;
4313 }
4314 }
4315 }
4316
4317 assert(n->cq[cqid]);
4318 cq = n->cq[cqid];
4319 QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
4320 n->sq[sqid] = sq;
4321}
4322
4323static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
4324{
4325 NvmeSQueue *sq;
4326 NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
4327
4328 uint16_t cqid = le16_to_cpu(c->cqid);
4329 uint16_t sqid = le16_to_cpu(c->sqid);
4330 uint16_t qsize = le16_to_cpu(c->qsize);
4331 uint16_t qflags = le16_to_cpu(c->sq_flags);
4332 uint64_t prp1 = le64_to_cpu(c->prp1);
4333
4334 trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
4335
4336 if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
4337 trace_pci_nvme_err_invalid_create_sq_cqid(cqid);
4338 return NVME_INVALID_CQID | NVME_DNR;
4339 }
4340 if (unlikely(!sqid || sqid > n->conf_ioqpairs || n->sq[sqid] != NULL)) {
4341 trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
4342 return NVME_INVALID_QID | NVME_DNR;
4343 }
4344 if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
4345 trace_pci_nvme_err_invalid_create_sq_size(qsize);
4346 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
4347 }
4348 if (unlikely(prp1 & (n->page_size - 1))) {
4349 trace_pci_nvme_err_invalid_create_sq_addr(prp1);
4350 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
4351 }
4352 if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
4353 trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
4354 return NVME_INVALID_FIELD | NVME_DNR;
4355 }
4356 sq = g_malloc0(sizeof(*sq));
4357 nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
4358 return NVME_SUCCESS;
4359}
4360
4361struct nvme_stats {
4362 uint64_t units_read;
4363 uint64_t units_written;
4364 uint64_t read_commands;
4365 uint64_t write_commands;
4366};
4367
4368static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
4369{
4370 BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
4371
4372 stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
4373 stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
4374 stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
4375 stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
4376}
4377
4378static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
4379 uint64_t off, NvmeRequest *req)
4380{
4381 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
4382 struct nvme_stats stats = { 0 };
4383 NvmeSmartLog smart = { 0 };
4384 uint32_t trans_len;
4385 NvmeNamespace *ns;
4386 time_t current_ms;
4387
4388 if (off >= sizeof(smart)) {
4389 return NVME_INVALID_FIELD | NVME_DNR;
4390 }
4391
4392 if (nsid != 0xffffffff) {
4393 ns = nvme_ns(n, nsid);
4394 if (!ns) {
4395 return NVME_INVALID_NSID | NVME_DNR;
4396 }
4397 nvme_set_blk_stats(ns, &stats);
4398 } else {
4399 int i;
4400
4401 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
4402 ns = nvme_ns(n, i);
4403 if (!ns) {
4404 continue;
4405 }
4406 nvme_set_blk_stats(ns, &stats);
4407 }
4408 }
4409
4410 trans_len = MIN(sizeof(smart) - off, buf_len);
4411 smart.critical_warning = n->smart_critical_warning;
4412
4413 smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
4414 1000));
4415 smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
4416 1000));
4417 smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
4418 smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
4419
4420 smart.temperature = cpu_to_le16(n->temperature);
4421
4422 if ((n->temperature >= n->features.temp_thresh_hi) ||
4423 (n->temperature <= n->features.temp_thresh_low)) {
4424 smart.critical_warning |= NVME_SMART_TEMPERATURE;
4425 }
4426
4427 current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
4428 smart.power_on_hours[0] =
4429 cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
4430
4431 if (!rae) {
4432 nvme_clear_events(n, NVME_AER_TYPE_SMART);
4433 }
4434
4435 return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
4436}
4437
4438static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
4439 NvmeRequest *req)
4440{
4441 uint32_t trans_len;
4442 NvmeFwSlotInfoLog fw_log = {
4443 .afi = 0x1,
4444 };
4445
4446 if (off >= sizeof(fw_log)) {
4447 return NVME_INVALID_FIELD | NVME_DNR;
4448 }
4449
4450 strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
4451 trans_len = MIN(sizeof(fw_log) - off, buf_len);
4452
4453 return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req);
4454}
4455
4456static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
4457 uint64_t off, NvmeRequest *req)
4458{
4459 uint32_t trans_len;
4460 NvmeErrorLog errlog;
4461
4462 if (off >= sizeof(errlog)) {
4463 return NVME_INVALID_FIELD | NVME_DNR;
4464 }
4465
4466 if (!rae) {
4467 nvme_clear_events(n, NVME_AER_TYPE_ERROR);
4468 }
4469
4470 memset(&errlog, 0x0, sizeof(errlog));
4471 trans_len = MIN(sizeof(errlog) - off, buf_len);
4472
4473 return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req);
4474}
4475
4476static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
4477 uint64_t off, NvmeRequest *req)
4478{
4479 uint32_t nslist[1024];
4480 uint32_t trans_len;
4481 int i = 0;
4482 uint32_t nsid;
4483
4484 if (off >= sizeof(nslist)) {
4485 trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist));
4486 return NVME_INVALID_FIELD | NVME_DNR;
4487 }
4488
4489 memset(nslist, 0x0, sizeof(nslist));
4490 trans_len = MIN(sizeof(nslist) - off, buf_len);
4491
4492 while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
4493 NVME_CHANGED_NSID_SIZE) {
4494
4495
4496
4497
4498 if (i == ARRAY_SIZE(nslist)) {
4499 memset(nslist, 0x0, sizeof(nslist));
4500 nslist[0] = 0xffffffff;
4501 break;
4502 }
4503
4504 nslist[i++] = nsid;
4505 clear_bit(nsid, n->changed_nsids);
4506 }
4507
4508
4509
4510
4511
4512 if (nslist[0] == 0xffffffff) {
4513 bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE);
4514 }
4515
4516 if (!rae) {
4517 nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
4518 }
4519
4520 return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req);
4521}
4522
4523static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
4524 uint64_t off, NvmeRequest *req)
4525{
4526 NvmeEffectsLog log = {};
4527 const uint32_t *src_iocs = NULL;
4528 uint32_t trans_len;
4529
4530 if (off >= sizeof(log)) {
4531 trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log));
4532 return NVME_INVALID_FIELD | NVME_DNR;
4533 }
4534
4535 switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
4536 case NVME_CC_CSS_NVM:
4537 src_iocs = nvme_cse_iocs_nvm;
4538
4539 case NVME_CC_CSS_ADMIN_ONLY:
4540 break;
4541 case NVME_CC_CSS_CSI:
4542 switch (csi) {
4543 case NVME_CSI_NVM:
4544 src_iocs = nvme_cse_iocs_nvm;
4545 break;
4546 case NVME_CSI_ZONED:
4547 src_iocs = nvme_cse_iocs_zoned;
4548 break;
4549 }
4550 }
4551
4552 memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
4553
4554 if (src_iocs) {
4555 memcpy(log.iocs, src_iocs, sizeof(log.iocs));
4556 }
4557
4558 trans_len = MIN(sizeof(log) - off, buf_len);
4559
4560 return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
4561}
4562
4563static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
4564{
4565 NvmeCmd *cmd = &req->cmd;
4566
4567 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
4568 uint32_t dw11 = le32_to_cpu(cmd->cdw11);
4569 uint32_t dw12 = le32_to_cpu(cmd->cdw12);
4570 uint32_t dw13 = le32_to_cpu(cmd->cdw13);
4571 uint8_t lid = dw10 & 0xff;
4572 uint8_t lsp = (dw10 >> 8) & 0xf;
4573 uint8_t rae = (dw10 >> 15) & 0x1;
4574 uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24;
4575 uint32_t numdl, numdu;
4576 uint64_t off, lpol, lpou;
4577 size_t len;
4578 uint16_t status;
4579
4580 numdl = (dw10 >> 16);
4581 numdu = (dw11 & 0xffff);
4582 lpol = dw12;
4583 lpou = dw13;
4584
4585 len = (((numdu << 16) | numdl) + 1) << 2;
4586 off = (lpou << 32ULL) | lpol;
4587
4588 if (off & 0x3) {
4589 return NVME_INVALID_FIELD | NVME_DNR;
4590 }
4591
4592 trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
4593
4594 status = nvme_check_mdts(n, len);
4595 if (status) {
4596 return status;
4597 }
4598
4599 switch (lid) {
4600 case NVME_LOG_ERROR_INFO:
4601 return nvme_error_info(n, rae, len, off, req);
4602 case NVME_LOG_SMART_INFO:
4603 return nvme_smart_info(n, rae, len, off, req);
4604 case NVME_LOG_FW_SLOT_INFO:
4605 return nvme_fw_log_info(n, len, off, req);
4606 case NVME_LOG_CHANGED_NSLIST:
4607 return nvme_changed_nslist(n, rae, len, off, req);
4608 case NVME_LOG_CMD_EFFECTS:
4609 return nvme_cmd_effects(n, csi, len, off, req);
4610 default:
4611 trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
4612 return NVME_INVALID_FIELD | NVME_DNR;
4613 }
4614}
4615
4616static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
4617{
4618 uint16_t offset = (cq->cqid << 3) + (1 << 2);
4619
4620 n->cq[cq->cqid] = NULL;
4621 qemu_bh_delete(cq->bh);
4622 if (cq->ioeventfd_enabled) {
4623 memory_region_del_eventfd(&n->iomem,
4624 0x1000 + offset, 4, false, 0, &cq->notifier);
4625 event_notifier_set_handler(&cq->notifier, NULL);
4626 event_notifier_cleanup(&cq->notifier);
4627 }
4628 if (msix_enabled(&n->parent_obj)) {
4629 msix_vector_unuse(&n->parent_obj, cq->vector);
4630 }
4631 if (cq->cqid) {
4632 g_free(cq);
4633 }
4634}
4635
4636static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
4637{
4638 NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
4639 NvmeCQueue *cq;
4640 uint16_t qid = le16_to_cpu(c->qid);
4641
4642 if (unlikely(!qid || nvme_check_cqid(n, qid))) {
4643 trace_pci_nvme_err_invalid_del_cq_cqid(qid);
4644 return NVME_INVALID_CQID | NVME_DNR;
4645 }
4646
4647 cq = n->cq[qid];
4648 if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
4649 trace_pci_nvme_err_invalid_del_cq_notempty(qid);
4650 return NVME_INVALID_QUEUE_DEL;
4651 }
4652
4653 if (cq->irq_enabled && cq->tail != cq->head) {
4654 n->cq_pending--;
4655 }
4656
4657 nvme_irq_deassert(n, cq);
4658 trace_pci_nvme_del_cq(qid);
4659 nvme_free_cq(cq, n);
4660 return NVME_SUCCESS;
4661}
4662
4663static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
4664 uint16_t cqid, uint16_t vector, uint16_t size,
4665 uint16_t irq_enabled)
4666{
4667 if (msix_enabled(&n->parent_obj)) {
4668 msix_vector_use(&n->parent_obj, vector);
4669 }
4670 cq->ctrl = n;
4671 cq->cqid = cqid;
4672 cq->size = size;
4673 cq->dma_addr = dma_addr;
4674 cq->phase = 1;
4675 cq->irq_enabled = irq_enabled;
4676 cq->vector = vector;
4677 cq->head = cq->tail = 0;
4678 QTAILQ_INIT(&cq->req_list);
4679 QTAILQ_INIT(&cq->sq_list);
4680 if (n->dbbuf_enabled) {
4681 cq->db_addr = n->dbbuf_dbs + (cqid << 3) + (1 << 2);
4682 cq->ei_addr = n->dbbuf_eis + (cqid << 3) + (1 << 2);
4683
4684 if (n->params.ioeventfd && cqid != 0) {
4685 if (!nvme_init_cq_ioeventfd(cq)) {
4686 cq->ioeventfd_enabled = true;
4687 }
4688 }
4689 }
4690 n->cq[cqid] = cq;
4691 cq->bh = qemu_bh_new(nvme_post_cqes, cq);
4692}
4693
4694static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
4695{
4696 NvmeCQueue *cq;
4697 NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
4698 uint16_t cqid = le16_to_cpu(c->cqid);
4699 uint16_t vector = le16_to_cpu(c->irq_vector);
4700 uint16_t qsize = le16_to_cpu(c->qsize);
4701 uint16_t qflags = le16_to_cpu(c->cq_flags);
4702 uint64_t prp1 = le64_to_cpu(c->prp1);
4703
4704 trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
4705 NVME_CQ_FLAGS_IEN(qflags) != 0);
4706
4707 if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
4708 trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
4709 return NVME_INVALID_QID | NVME_DNR;
4710 }
4711 if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
4712 trace_pci_nvme_err_invalid_create_cq_size(qsize);
4713 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
4714 }
4715 if (unlikely(prp1 & (n->page_size - 1))) {
4716 trace_pci_nvme_err_invalid_create_cq_addr(prp1);
4717 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
4718 }
4719 if (unlikely(!msix_enabled(&n->parent_obj) && vector)) {
4720 trace_pci_nvme_err_invalid_create_cq_vector(vector);
4721 return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
4722 }
4723 if (unlikely(vector >= n->conf_msix_qsize)) {
4724 trace_pci_nvme_err_invalid_create_cq_vector(vector);
4725 return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
4726 }
4727 if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
4728 trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
4729 return NVME_INVALID_FIELD | NVME_DNR;
4730 }
4731
4732 cq = g_malloc0(sizeof(*cq));
4733 nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
4734 NVME_CQ_FLAGS_IEN(qflags));
4735
4736
4737
4738
4739
4740
4741 n->qs_created = true;
4742 return NVME_SUCCESS;
4743}
4744
4745static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
4746{
4747 uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
4748
4749 return nvme_c2h(n, id, sizeof(id), req);
4750}
4751
4752static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
4753{
4754 trace_pci_nvme_identify_ctrl();
4755
4756 return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req);
4757}
4758
4759static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
4760{
4761 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4762 uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
4763 NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id;
4764
4765 trace_pci_nvme_identify_ctrl_csi(c->csi);
4766
4767 switch (c->csi) {
4768 case NVME_CSI_NVM:
4769 id_nvm->vsl = n->params.vsl;
4770 id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
4771 break;
4772
4773 case NVME_CSI_ZONED:
4774 ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl;
4775 break;
4776
4777 default:
4778 return NVME_INVALID_FIELD | NVME_DNR;
4779 }
4780
4781 return nvme_c2h(n, id, sizeof(id), req);
4782}
4783
4784static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
4785{
4786 NvmeNamespace *ns;
4787 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4788 uint32_t nsid = le32_to_cpu(c->nsid);
4789
4790 trace_pci_nvme_identify_ns(nsid);
4791
4792 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
4793 return NVME_INVALID_NSID | NVME_DNR;
4794 }
4795
4796 ns = nvme_ns(n, nsid);
4797 if (unlikely(!ns)) {
4798 if (!active) {
4799 ns = nvme_subsys_ns(n->subsys, nsid);
4800 if (!ns) {
4801 return nvme_rpt_empty_id_struct(n, req);
4802 }
4803 } else {
4804 return nvme_rpt_empty_id_struct(n, req);
4805 }
4806 }
4807
4808 if (active || ns->csi == NVME_CSI_NVM) {
4809 return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
4810 }
4811
4812 return NVME_INVALID_CMD_SET | NVME_DNR;
4813}
4814
4815static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
4816 bool attached)
4817{
4818 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4819 uint32_t nsid = le32_to_cpu(c->nsid);
4820 uint16_t min_id = le16_to_cpu(c->ctrlid);
4821 uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
4822 uint16_t *ids = &list[1];
4823 NvmeNamespace *ns;
4824 NvmeCtrl *ctrl;
4825 int cntlid, nr_ids = 0;
4826
4827 trace_pci_nvme_identify_ctrl_list(c->cns, min_id);
4828
4829 if (!n->subsys) {
4830 return NVME_INVALID_FIELD | NVME_DNR;
4831 }
4832
4833 if (attached) {
4834 if (nsid == NVME_NSID_BROADCAST) {
4835 return NVME_INVALID_FIELD | NVME_DNR;
4836 }
4837
4838 ns = nvme_subsys_ns(n->subsys, nsid);
4839 if (!ns) {
4840 return NVME_INVALID_FIELD | NVME_DNR;
4841 }
4842 }
4843
4844 for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
4845 ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
4846 if (!ctrl) {
4847 continue;
4848 }
4849
4850 if (attached && !nvme_ns(ctrl, nsid)) {
4851 continue;
4852 }
4853
4854 ids[nr_ids++] = cntlid;
4855 }
4856
4857 list[0] = nr_ids;
4858
4859 return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
4860}
4861
4862static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req)
4863{
4864 trace_pci_nvme_identify_pri_ctrl_cap(le16_to_cpu(n->pri_ctrl_cap.cntlid));
4865
4866 return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap,
4867 sizeof(NvmePriCtrlCap), req);
4868}
4869
4870static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
4871{
4872 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4873 uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid);
4874 uint16_t min_id = le16_to_cpu(c->ctrlid);
4875 uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl;
4876 NvmeSecCtrlList list = {0};
4877 uint8_t i;
4878
4879 for (i = 0; i < num_sec_ctrl; i++) {
4880 if (n->sec_ctrl_list.sec[i].scid >= min_id) {
4881 list.numcntl = num_sec_ctrl - i;
4882 memcpy(&list.sec, n->sec_ctrl_list.sec + i,
4883 list.numcntl * sizeof(NvmeSecCtrlEntry));
4884 break;
4885 }
4886 }
4887
4888 trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl);
4889
4890 return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req);
4891}
4892
4893static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
4894 bool active)
4895{
4896 NvmeNamespace *ns;
4897 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4898 uint32_t nsid = le32_to_cpu(c->nsid);
4899
4900 trace_pci_nvme_identify_ns_csi(nsid, c->csi);
4901
4902 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
4903 return NVME_INVALID_NSID | NVME_DNR;
4904 }
4905
4906 ns = nvme_ns(n, nsid);
4907 if (unlikely(!ns)) {
4908 if (!active) {
4909 ns = nvme_subsys_ns(n->subsys, nsid);
4910 if (!ns) {
4911 return nvme_rpt_empty_id_struct(n, req);
4912 }
4913 } else {
4914 return nvme_rpt_empty_id_struct(n, req);
4915 }
4916 }
4917
4918 if (c->csi == NVME_CSI_NVM) {
4919 return nvme_c2h(n, (uint8_t *)&ns->id_ns_nvm, sizeof(NvmeIdNsNvm),
4920 req);
4921 } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
4922 return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
4923 req);
4924 }
4925
4926 return NVME_INVALID_FIELD | NVME_DNR;
4927}
4928
4929static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
4930 bool active)
4931{
4932 NvmeNamespace *ns;
4933 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4934 uint32_t min_nsid = le32_to_cpu(c->nsid);
4935 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
4936 static const int data_len = sizeof(list);
4937 uint32_t *list_ptr = (uint32_t *)list;
4938 int i, j = 0;
4939
4940 trace_pci_nvme_identify_nslist(min_nsid);
4941
4942
4943
4944
4945
4946
4947
4948 if (min_nsid >= NVME_NSID_BROADCAST - 1) {
4949 return NVME_INVALID_NSID | NVME_DNR;
4950 }
4951
4952 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
4953 ns = nvme_ns(n, i);
4954 if (!ns) {
4955 if (!active) {
4956 ns = nvme_subsys_ns(n->subsys, i);
4957 if (!ns) {
4958 continue;
4959 }
4960 } else {
4961 continue;
4962 }
4963 }
4964 if (ns->params.nsid <= min_nsid) {
4965 continue;
4966 }
4967 list_ptr[j++] = cpu_to_le32(ns->params.nsid);
4968 if (j == data_len / sizeof(uint32_t)) {
4969 break;
4970 }
4971 }
4972
4973 return nvme_c2h(n, list, data_len, req);
4974}
4975
4976static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
4977 bool active)
4978{
4979 NvmeNamespace *ns;
4980 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4981 uint32_t min_nsid = le32_to_cpu(c->nsid);
4982 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
4983 static const int data_len = sizeof(list);
4984 uint32_t *list_ptr = (uint32_t *)list;
4985 int i, j = 0;
4986
4987 trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
4988
4989
4990
4991
4992 if (min_nsid >= NVME_NSID_BROADCAST - 1) {
4993 return NVME_INVALID_NSID | NVME_DNR;
4994 }
4995
4996 if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) {
4997 return NVME_INVALID_FIELD | NVME_DNR;
4998 }
4999
5000 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5001 ns = nvme_ns(n, i);
5002 if (!ns) {
5003 if (!active) {
5004 ns = nvme_subsys_ns(n->subsys, i);
5005 if (!ns) {
5006 continue;
5007 }
5008 } else {
5009 continue;
5010 }
5011 }
5012 if (ns->params.nsid <= min_nsid || c->csi != ns->csi) {
5013 continue;
5014 }
5015 list_ptr[j++] = cpu_to_le32(ns->params.nsid);
5016 if (j == data_len / sizeof(uint32_t)) {
5017 break;
5018 }
5019 }
5020
5021 return nvme_c2h(n, list, data_len, req);
5022}
5023
5024static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
5025{
5026 NvmeNamespace *ns;
5027 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
5028 uint32_t nsid = le32_to_cpu(c->nsid);
5029 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
5030 uint8_t *pos = list;
5031 struct {
5032 NvmeIdNsDescr hdr;
5033 uint8_t v[NVME_NIDL_UUID];
5034 } QEMU_PACKED uuid = {};
5035 struct {
5036 NvmeIdNsDescr hdr;
5037 uint64_t v;
5038 } QEMU_PACKED eui64 = {};
5039 struct {
5040 NvmeIdNsDescr hdr;
5041 uint8_t v;
5042 } QEMU_PACKED csi = {};
5043
5044 trace_pci_nvme_identify_ns_descr_list(nsid);
5045
5046 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
5047 return NVME_INVALID_NSID | NVME_DNR;
5048 }
5049
5050 ns = nvme_ns(n, nsid);
5051 if (unlikely(!ns)) {
5052 return NVME_INVALID_FIELD | NVME_DNR;
5053 }
5054
5055 if (!qemu_uuid_is_null(&ns->params.uuid)) {
5056 uuid.hdr.nidt = NVME_NIDT_UUID;
5057 uuid.hdr.nidl = NVME_NIDL_UUID;
5058 memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
5059 memcpy(pos, &uuid, sizeof(uuid));
5060 pos += sizeof(uuid);
5061 }
5062
5063 if (ns->params.eui64) {
5064 eui64.hdr.nidt = NVME_NIDT_EUI64;
5065 eui64.hdr.nidl = NVME_NIDL_EUI64;
5066 eui64.v = cpu_to_be64(ns->params.eui64);
5067 memcpy(pos, &eui64, sizeof(eui64));
5068 pos += sizeof(eui64);
5069 }
5070
5071 csi.hdr.nidt = NVME_NIDT_CSI;
5072 csi.hdr.nidl = NVME_NIDL_CSI;
5073 csi.v = ns->csi;
5074 memcpy(pos, &csi, sizeof(csi));
5075 pos += sizeof(csi);
5076
5077 return nvme_c2h(n, list, sizeof(list), req);
5078}
5079
5080static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
5081{
5082 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
5083 static const int data_len = sizeof(list);
5084
5085 trace_pci_nvme_identify_cmd_set();
5086
5087 NVME_SET_CSI(*list, NVME_CSI_NVM);
5088 NVME_SET_CSI(*list, NVME_CSI_ZONED);
5089
5090 return nvme_c2h(n, list, data_len, req);
5091}
5092
5093static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
5094{
5095 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
5096
5097 trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid),
5098 c->csi);
5099
5100 switch (c->cns) {
5101 case NVME_ID_CNS_NS:
5102 return nvme_identify_ns(n, req, true);
5103 case NVME_ID_CNS_NS_PRESENT:
5104 return nvme_identify_ns(n, req, false);
5105 case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
5106 return nvme_identify_ctrl_list(n, req, true);
5107 case NVME_ID_CNS_CTRL_LIST:
5108 return nvme_identify_ctrl_list(n, req, false);
5109 case NVME_ID_CNS_PRIMARY_CTRL_CAP:
5110 return nvme_identify_pri_ctrl_cap(n, req);
5111 case NVME_ID_CNS_SECONDARY_CTRL_LIST:
5112 return nvme_identify_sec_ctrl_list(n, req);
5113 case NVME_ID_CNS_CS_NS:
5114 return nvme_identify_ns_csi(n, req, true);
5115 case NVME_ID_CNS_CS_NS_PRESENT:
5116 return nvme_identify_ns_csi(n, req, false);
5117 case NVME_ID_CNS_CTRL:
5118 return nvme_identify_ctrl(n, req);
5119 case NVME_ID_CNS_CS_CTRL:
5120 return nvme_identify_ctrl_csi(n, req);
5121 case NVME_ID_CNS_NS_ACTIVE_LIST:
5122 return nvme_identify_nslist(n, req, true);
5123 case NVME_ID_CNS_NS_PRESENT_LIST:
5124 return nvme_identify_nslist(n, req, false);
5125 case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
5126 return nvme_identify_nslist_csi(n, req, true);
5127 case NVME_ID_CNS_CS_NS_PRESENT_LIST:
5128 return nvme_identify_nslist_csi(n, req, false);
5129 case NVME_ID_CNS_NS_DESCR_LIST:
5130 return nvme_identify_ns_descr_list(n, req);
5131 case NVME_ID_CNS_IO_COMMAND_SET:
5132 return nvme_identify_cmd_set(n, req);
5133 default:
5134 trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
5135 return NVME_INVALID_FIELD | NVME_DNR;
5136 }
5137}
5138
5139static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
5140{
5141 uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
5142
5143 req->cqe.result = 1;
5144 if (nvme_check_sqid(n, sqid)) {
5145 return NVME_INVALID_FIELD | NVME_DNR;
5146 }
5147
5148 return NVME_SUCCESS;
5149}
5150
5151static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
5152{
5153 trace_pci_nvme_setfeat_timestamp(ts);
5154
5155 n->host_timestamp = le64_to_cpu(ts);
5156 n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
5157}
5158
5159static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
5160{
5161 uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
5162 uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
5163
5164 union nvme_timestamp {
5165 struct {
5166 uint64_t timestamp:48;
5167 uint64_t sync:1;
5168 uint64_t origin:3;
5169 uint64_t rsvd1:12;
5170 };
5171 uint64_t all;
5172 };
5173
5174 union nvme_timestamp ts;
5175 ts.all = 0;
5176 ts.timestamp = n->host_timestamp + elapsed_time;
5177
5178
5179 ts.origin = n->host_timestamp ? 0x01 : 0x00;
5180
5181 trace_pci_nvme_getfeat_timestamp(ts.all);
5182
5183 return cpu_to_le64(ts.all);
5184}
5185
5186static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
5187{
5188 uint64_t timestamp = nvme_get_timestamp(n);
5189
5190 return nvme_c2h(n, (uint8_t *)×tamp, sizeof(timestamp), req);
5191}
5192
5193static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
5194{
5195 NvmeCmd *cmd = &req->cmd;
5196 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
5197 uint32_t dw11 = le32_to_cpu(cmd->cdw11);
5198 uint32_t nsid = le32_to_cpu(cmd->nsid);
5199 uint32_t result;
5200 uint8_t fid = NVME_GETSETFEAT_FID(dw10);
5201 NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
5202 uint16_t iv;
5203 NvmeNamespace *ns;
5204 int i;
5205
5206 static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
5207 [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
5208 };
5209
5210 trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11);
5211
5212 if (!nvme_feature_support[fid]) {
5213 return NVME_INVALID_FIELD | NVME_DNR;
5214 }
5215
5216 if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
5217 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
5218
5219
5220
5221
5222
5223
5224
5225 return NVME_INVALID_NSID | NVME_DNR;
5226 }
5227
5228 if (!nvme_ns(n, nsid)) {
5229 return NVME_INVALID_FIELD | NVME_DNR;
5230 }
5231 }
5232
5233 switch (sel) {
5234 case NVME_GETFEAT_SELECT_CURRENT:
5235 break;
5236 case NVME_GETFEAT_SELECT_SAVED:
5237
5238 case NVME_GETFEAT_SELECT_DEFAULT:
5239 goto defaults;
5240 case NVME_GETFEAT_SELECT_CAP:
5241 result = nvme_feature_cap[fid];
5242 goto out;
5243 }
5244
5245 switch (fid) {
5246 case NVME_TEMPERATURE_THRESHOLD:
5247 result = 0;
5248
5249
5250
5251
5252
5253 if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
5254 goto out;
5255 }
5256
5257 switch (NVME_TEMP_THSEL(dw11)) {
5258 case NVME_TEMP_THSEL_OVER:
5259 result = n->features.temp_thresh_hi;
5260 goto out;
5261 case NVME_TEMP_THSEL_UNDER:
5262 result = n->features.temp_thresh_low;
5263 goto out;
5264 }
5265
5266 return NVME_INVALID_FIELD | NVME_DNR;
5267 case NVME_ERROR_RECOVERY:
5268 if (!nvme_nsid_valid(n, nsid)) {
5269 return NVME_INVALID_NSID | NVME_DNR;
5270 }
5271
5272 ns = nvme_ns(n, nsid);
5273 if (unlikely(!ns)) {
5274 return NVME_INVALID_FIELD | NVME_DNR;
5275 }
5276
5277 result = ns->features.err_rec;
5278 goto out;
5279 case NVME_VOLATILE_WRITE_CACHE:
5280 result = 0;
5281 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5282 ns = nvme_ns(n, i);
5283 if (!ns) {
5284 continue;
5285 }
5286
5287 result = blk_enable_write_cache(ns->blkconf.blk);
5288 if (result) {
5289 break;
5290 }
5291 }
5292 trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
5293 goto out;
5294 case NVME_ASYNCHRONOUS_EVENT_CONF:
5295 result = n->features.async_config;
5296 goto out;
5297 case NVME_TIMESTAMP:
5298 return nvme_get_feature_timestamp(n, req);
5299 case NVME_HOST_BEHAVIOR_SUPPORT:
5300 return nvme_c2h(n, (uint8_t *)&n->features.hbs,
5301 sizeof(n->features.hbs), req);
5302 default:
5303 break;
5304 }
5305
5306defaults:
5307 switch (fid) {
5308 case NVME_TEMPERATURE_THRESHOLD:
5309 result = 0;
5310
5311 if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
5312 break;
5313 }
5314
5315 if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) {
5316 result = NVME_TEMPERATURE_WARNING;
5317 }
5318
5319 break;
5320 case NVME_NUMBER_OF_QUEUES:
5321 result = (n->conf_ioqpairs - 1) | ((n->conf_ioqpairs - 1) << 16);
5322 trace_pci_nvme_getfeat_numq(result);
5323 break;
5324 case NVME_INTERRUPT_VECTOR_CONF:
5325 iv = dw11 & 0xffff;
5326 if (iv >= n->conf_ioqpairs + 1) {
5327 return NVME_INVALID_FIELD | NVME_DNR;
5328 }
5329
5330 result = iv;
5331 if (iv == n->admin_cq.vector) {
5332 result |= NVME_INTVC_NOCOALESCING;
5333 }
5334 break;
5335 default:
5336 result = nvme_feature_default[fid];
5337 break;
5338 }
5339
5340out:
5341 req->cqe.result = cpu_to_le32(result);
5342 return NVME_SUCCESS;
5343}
5344
5345static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
5346{
5347 uint16_t ret;
5348 uint64_t timestamp;
5349
5350 ret = nvme_h2c(n, (uint8_t *)×tamp, sizeof(timestamp), req);
5351 if (ret) {
5352 return ret;
5353 }
5354
5355 nvme_set_timestamp(n, timestamp);
5356
5357 return NVME_SUCCESS;
5358}
5359
5360static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
5361{
5362 NvmeNamespace *ns = NULL;
5363
5364 NvmeCmd *cmd = &req->cmd;
5365 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
5366 uint32_t dw11 = le32_to_cpu(cmd->cdw11);
5367 uint32_t nsid = le32_to_cpu(cmd->nsid);
5368 uint8_t fid = NVME_GETSETFEAT_FID(dw10);
5369 uint8_t save = NVME_SETFEAT_SAVE(dw10);
5370 uint16_t status;
5371 int i;
5372
5373 trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
5374
5375 if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) {
5376 return NVME_FID_NOT_SAVEABLE | NVME_DNR;
5377 }
5378
5379 if (!nvme_feature_support[fid]) {
5380 return NVME_INVALID_FIELD | NVME_DNR;
5381 }
5382
5383 if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
5384 if (nsid != NVME_NSID_BROADCAST) {
5385 if (!nvme_nsid_valid(n, nsid)) {
5386 return NVME_INVALID_NSID | NVME_DNR;
5387 }
5388
5389 ns = nvme_ns(n, nsid);
5390 if (unlikely(!ns)) {
5391 return NVME_INVALID_FIELD | NVME_DNR;
5392 }
5393 }
5394 } else if (nsid && nsid != NVME_NSID_BROADCAST) {
5395 if (!nvme_nsid_valid(n, nsid)) {
5396 return NVME_INVALID_NSID | NVME_DNR;
5397 }
5398
5399 return NVME_FEAT_NOT_NS_SPEC | NVME_DNR;
5400 }
5401
5402 if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) {
5403 return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
5404 }
5405
5406 switch (fid) {
5407 case NVME_TEMPERATURE_THRESHOLD:
5408 if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
5409 break;
5410 }
5411
5412 switch (NVME_TEMP_THSEL(dw11)) {
5413 case NVME_TEMP_THSEL_OVER:
5414 n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
5415 break;
5416 case NVME_TEMP_THSEL_UNDER:
5417 n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
5418 break;
5419 default:
5420 return NVME_INVALID_FIELD | NVME_DNR;
5421 }
5422
5423 if ((n->temperature >= n->features.temp_thresh_hi) ||
5424 (n->temperature <= n->features.temp_thresh_low)) {
5425 nvme_smart_event(n, NVME_SMART_TEMPERATURE);
5426 }
5427
5428 break;
5429 case NVME_ERROR_RECOVERY:
5430 if (nsid == NVME_NSID_BROADCAST) {
5431 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5432 ns = nvme_ns(n, i);
5433
5434 if (!ns) {
5435 continue;
5436 }
5437
5438 if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
5439 ns->features.err_rec = dw11;
5440 }
5441 }
5442
5443 break;
5444 }
5445
5446 assert(ns);
5447 if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
5448 ns->features.err_rec = dw11;
5449 }
5450 break;
5451 case NVME_VOLATILE_WRITE_CACHE:
5452 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5453 ns = nvme_ns(n, i);
5454 if (!ns) {
5455 continue;
5456 }
5457
5458 if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) {
5459 blk_flush(ns->blkconf.blk);
5460 }
5461
5462 blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1);
5463 }
5464
5465 break;
5466
5467 case NVME_NUMBER_OF_QUEUES:
5468 if (n->qs_created) {
5469 return NVME_CMD_SEQ_ERROR | NVME_DNR;
5470 }
5471
5472
5473
5474
5475
5476 if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
5477 return NVME_INVALID_FIELD | NVME_DNR;
5478 }
5479
5480 trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1,
5481 ((dw11 >> 16) & 0xffff) + 1,
5482 n->conf_ioqpairs,
5483 n->conf_ioqpairs);
5484 req->cqe.result = cpu_to_le32((n->conf_ioqpairs - 1) |
5485 ((n->conf_ioqpairs - 1) << 16));
5486 break;
5487 case NVME_ASYNCHRONOUS_EVENT_CONF:
5488 n->features.async_config = dw11;
5489 break;
5490 case NVME_TIMESTAMP:
5491 return nvme_set_feature_timestamp(n, req);
5492 case NVME_HOST_BEHAVIOR_SUPPORT:
5493 status = nvme_h2c(n, (uint8_t *)&n->features.hbs,
5494 sizeof(n->features.hbs), req);
5495 if (status) {
5496 return status;
5497 }
5498
5499 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5500 ns = nvme_ns(n, i);
5501
5502 if (!ns) {
5503 continue;
5504 }
5505
5506 ns->id_ns.nlbaf = ns->nlbaf - 1;
5507 if (!n->features.hbs.lbafee) {
5508 ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15);
5509 }
5510 }
5511
5512 return status;
5513 case NVME_COMMAND_SET_PROFILE:
5514 if (dw11 & 0x1ff) {
5515 trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
5516 return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
5517 }
5518 break;
5519 default:
5520 return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
5521 }
5522 return NVME_SUCCESS;
5523}
5524
5525static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
5526{
5527 trace_pci_nvme_aer(nvme_cid(req));
5528
5529 if (n->outstanding_aers > n->params.aerl) {
5530 trace_pci_nvme_aer_aerl_exceeded();
5531 return NVME_AER_LIMIT_EXCEEDED;
5532 }
5533
5534 n->aer_reqs[n->outstanding_aers] = req;
5535 n->outstanding_aers++;
5536
5537 if (!QTAILQ_EMPTY(&n->aer_queue)) {
5538 nvme_process_aers(n);
5539 }
5540
5541 return NVME_NO_COMPLETE;
5542}
5543
5544static void nvme_update_dmrsl(NvmeCtrl *n)
5545{
5546 int nsid;
5547
5548 for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
5549 NvmeNamespace *ns = nvme_ns(n, nsid);
5550 if (!ns) {
5551 continue;
5552 }
5553
5554 n->dmrsl = MIN_NON_ZERO(n->dmrsl,
5555 BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
5556 }
5557}
5558
5559static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
5560{
5561 uint32_t cc = ldl_le_p(&n->bar.cc);
5562
5563 ns->iocs = nvme_cse_iocs_none;
5564 switch (ns->csi) {
5565 case NVME_CSI_NVM:
5566 if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
5567 ns->iocs = nvme_cse_iocs_nvm;
5568 }
5569 break;
5570 case NVME_CSI_ZONED:
5571 if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
5572 ns->iocs = nvme_cse_iocs_zoned;
5573 } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
5574 ns->iocs = nvme_cse_iocs_nvm;
5575 }
5576 break;
5577 }
5578}
5579
5580static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
5581{
5582 NvmeNamespace *ns;
5583 NvmeCtrl *ctrl;
5584 uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
5585 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
5586 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
5587 uint8_t sel = dw10 & 0xf;
5588 uint16_t *nr_ids = &list[0];
5589 uint16_t *ids = &list[1];
5590 uint16_t ret;
5591 int i;
5592
5593 trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
5594
5595 if (!nvme_nsid_valid(n, nsid)) {
5596 return NVME_INVALID_NSID | NVME_DNR;
5597 }
5598
5599 ns = nvme_subsys_ns(n->subsys, nsid);
5600 if (!ns) {
5601 return NVME_INVALID_FIELD | NVME_DNR;
5602 }
5603
5604 ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
5605 if (ret) {
5606 return ret;
5607 }
5608
5609 if (!*nr_ids) {
5610 return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
5611 }
5612
5613 *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1);
5614 for (i = 0; i < *nr_ids; i++) {
5615 ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
5616 if (!ctrl) {
5617 return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
5618 }
5619
5620 switch (sel) {
5621 case NVME_NS_ATTACHMENT_ATTACH:
5622 if (nvme_ns(ctrl, nsid)) {
5623 return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
5624 }
5625
5626 if (ns->attached && !ns->params.shared) {
5627 return NVME_NS_PRIVATE | NVME_DNR;
5628 }
5629
5630 nvme_attach_ns(ctrl, ns);
5631 nvme_select_iocs_ns(ctrl, ns);
5632
5633 break;
5634
5635 case NVME_NS_ATTACHMENT_DETACH:
5636 if (!nvme_ns(ctrl, nsid)) {
5637 return NVME_NS_NOT_ATTACHED | NVME_DNR;
5638 }
5639
5640 ctrl->namespaces[nsid] = NULL;
5641 ns->attached--;
5642
5643 nvme_update_dmrsl(ctrl);
5644
5645 break;
5646
5647 default:
5648 return NVME_INVALID_FIELD | NVME_DNR;
5649 }
5650
5651
5652
5653
5654
5655 if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
5656 nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
5657 NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
5658 NVME_LOG_CHANGED_NSLIST);
5659 }
5660 }
5661
5662 return NVME_SUCCESS;
5663}
5664
5665typedef struct NvmeFormatAIOCB {
5666 BlockAIOCB common;
5667 BlockAIOCB *aiocb;
5668 NvmeRequest *req;
5669 int ret;
5670
5671 NvmeNamespace *ns;
5672 uint32_t nsid;
5673 bool broadcast;
5674 int64_t offset;
5675
5676 uint8_t lbaf;
5677 uint8_t mset;
5678 uint8_t pi;
5679 uint8_t pil;
5680} NvmeFormatAIOCB;
5681
5682static void nvme_format_cancel(BlockAIOCB *aiocb)
5683{
5684 NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common);
5685
5686 iocb->ret = -ECANCELED;
5687
5688 if (iocb->aiocb) {
5689 blk_aio_cancel_async(iocb->aiocb);
5690 iocb->aiocb = NULL;
5691 }
5692}
5693
5694static const AIOCBInfo nvme_format_aiocb_info = {
5695 .aiocb_size = sizeof(NvmeFormatAIOCB),
5696 .cancel_async = nvme_format_cancel,
5697 .get_aio_context = nvme_get_aio_context,
5698};
5699
5700static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
5701 uint8_t pi, uint8_t pil)
5702{
5703 uint8_t lbafl = lbaf & 0xf;
5704 uint8_t lbafu = lbaf >> 4;
5705
5706 trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
5707
5708 ns->id_ns.dps = (pil << 3) | pi;
5709 ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl;
5710
5711 nvme_ns_init_format(ns);
5712}
5713
5714static void nvme_do_format(NvmeFormatAIOCB *iocb);
5715
5716static void nvme_format_ns_cb(void *opaque, int ret)
5717{
5718 NvmeFormatAIOCB *iocb = opaque;
5719 NvmeNamespace *ns = iocb->ns;
5720 int bytes;
5721
5722 if (iocb->ret < 0) {
5723 goto done;
5724 } else if (ret < 0) {
5725 iocb->ret = ret;
5726 goto done;
5727 }
5728
5729 assert(ns);
5730
5731 if (iocb->offset < ns->size) {
5732 bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset);
5733
5734 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset,
5735 bytes, BDRV_REQ_MAY_UNMAP,
5736 nvme_format_ns_cb, iocb);
5737
5738 iocb->offset += bytes;
5739 return;
5740 }
5741
5742 nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil);
5743 ns->status = 0x0;
5744 iocb->ns = NULL;
5745 iocb->offset = 0;
5746
5747done:
5748 nvme_do_format(iocb);
5749}
5750
5751static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi)
5752{
5753 if (ns->params.zoned) {
5754 return NVME_INVALID_FORMAT | NVME_DNR;
5755 }
5756
5757 if (lbaf > ns->id_ns.nlbaf) {
5758 return NVME_INVALID_FORMAT | NVME_DNR;
5759 }
5760
5761 if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) {
5762 return NVME_INVALID_FORMAT | NVME_DNR;
5763 }
5764
5765 if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
5766 return NVME_INVALID_FIELD | NVME_DNR;
5767 }
5768
5769 return NVME_SUCCESS;
5770}
5771
5772static void nvme_do_format(NvmeFormatAIOCB *iocb)
5773{
5774 NvmeRequest *req = iocb->req;
5775 NvmeCtrl *n = nvme_ctrl(req);
5776 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
5777 uint8_t lbaf = dw10 & 0xf;
5778 uint8_t pi = (dw10 >> 5) & 0x7;
5779 uint16_t status;
5780 int i;
5781
5782 if (iocb->ret < 0) {
5783 goto done;
5784 }
5785
5786 if (iocb->broadcast) {
5787 for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
5788 iocb->ns = nvme_ns(n, i);
5789 if (iocb->ns) {
5790 iocb->nsid = i;
5791 break;
5792 }
5793 }
5794 }
5795
5796 if (!iocb->ns) {
5797 goto done;
5798 }
5799
5800 status = nvme_format_check(iocb->ns, lbaf, pi);
5801 if (status) {
5802 req->status = status;
5803 goto done;
5804 }
5805
5806 iocb->ns->status = NVME_FORMAT_IN_PROGRESS;
5807 nvme_format_ns_cb(iocb, 0);
5808 return;
5809
5810done:
5811 iocb->common.cb(iocb->common.opaque, iocb->ret);
5812 qemu_aio_unref(iocb);
5813}
5814
5815static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
5816{
5817 NvmeFormatAIOCB *iocb;
5818 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
5819 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
5820 uint8_t lbaf = dw10 & 0xf;
5821 uint8_t mset = (dw10 >> 4) & 0x1;
5822 uint8_t pi = (dw10 >> 5) & 0x7;
5823 uint8_t pil = (dw10 >> 8) & 0x1;
5824 uint8_t lbafu = (dw10 >> 12) & 0x3;
5825 uint16_t status;
5826
5827 iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
5828
5829 iocb->req = req;
5830 iocb->ret = 0;
5831 iocb->ns = NULL;
5832 iocb->nsid = 0;
5833 iocb->lbaf = lbaf;
5834 iocb->mset = mset;
5835 iocb->pi = pi;
5836 iocb->pil = pil;
5837 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
5838 iocb->offset = 0;
5839
5840 if (n->features.hbs.lbafee) {
5841 iocb->lbaf |= lbafu << 4;
5842 }
5843
5844 if (!iocb->broadcast) {
5845 if (!nvme_nsid_valid(n, nsid)) {
5846 status = NVME_INVALID_NSID | NVME_DNR;
5847 goto out;
5848 }
5849
5850 iocb->ns = nvme_ns(n, nsid);
5851 if (!iocb->ns) {
5852 status = NVME_INVALID_FIELD | NVME_DNR;
5853 goto out;
5854 }
5855 }
5856
5857 req->aiocb = &iocb->common;
5858 nvme_do_format(iocb);
5859
5860 return NVME_NO_COMPLETE;
5861
5862out:
5863 qemu_aio_unref(iocb);
5864
5865 return status;
5866}
5867
5868static void nvme_get_virt_res_num(NvmeCtrl *n, uint8_t rt, int *num_total,
5869 int *num_prim, int *num_sec)
5870{
5871 *num_total = le32_to_cpu(rt ?
5872 n->pri_ctrl_cap.vifrt : n->pri_ctrl_cap.vqfrt);
5873 *num_prim = le16_to_cpu(rt ?
5874 n->pri_ctrl_cap.virfap : n->pri_ctrl_cap.vqrfap);
5875 *num_sec = le16_to_cpu(rt ? n->pri_ctrl_cap.virfa : n->pri_ctrl_cap.vqrfa);
5876}
5877
5878static uint16_t nvme_assign_virt_res_to_prim(NvmeCtrl *n, NvmeRequest *req,
5879 uint16_t cntlid, uint8_t rt,
5880 int nr)
5881{
5882 int num_total, num_prim, num_sec;
5883
5884 if (cntlid != n->cntlid) {
5885 return NVME_INVALID_CTRL_ID | NVME_DNR;
5886 }
5887
5888 nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
5889
5890 if (nr > num_total) {
5891 return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
5892 }
5893
5894 if (nr > num_total - num_sec) {
5895 return NVME_INVALID_RESOURCE_ID | NVME_DNR;
5896 }
5897
5898 if (rt) {
5899 n->next_pri_ctrl_cap.virfap = cpu_to_le16(nr);
5900 } else {
5901 n->next_pri_ctrl_cap.vqrfap = cpu_to_le16(nr);
5902 }
5903
5904 req->cqe.result = cpu_to_le32(nr);
5905 return req->status;
5906}
5907
5908static void nvme_update_virt_res(NvmeCtrl *n, NvmeSecCtrlEntry *sctrl,
5909 uint8_t rt, int nr)
5910{
5911 int prev_nr, prev_total;
5912
5913 if (rt) {
5914 prev_nr = le16_to_cpu(sctrl->nvi);
5915 prev_total = le32_to_cpu(n->pri_ctrl_cap.virfa);
5916 sctrl->nvi = cpu_to_le16(nr);
5917 n->pri_ctrl_cap.virfa = cpu_to_le32(prev_total + nr - prev_nr);
5918 } else {
5919 prev_nr = le16_to_cpu(sctrl->nvq);
5920 prev_total = le32_to_cpu(n->pri_ctrl_cap.vqrfa);
5921 sctrl->nvq = cpu_to_le16(nr);
5922 n->pri_ctrl_cap.vqrfa = cpu_to_le32(prev_total + nr - prev_nr);
5923 }
5924}
5925
5926static uint16_t nvme_assign_virt_res_to_sec(NvmeCtrl *n, NvmeRequest *req,
5927 uint16_t cntlid, uint8_t rt, int nr)
5928{
5929 int num_total, num_prim, num_sec, num_free, diff, limit;
5930 NvmeSecCtrlEntry *sctrl;
5931
5932 sctrl = nvme_sctrl_for_cntlid(n, cntlid);
5933 if (!sctrl) {
5934 return NVME_INVALID_CTRL_ID | NVME_DNR;
5935 }
5936
5937 if (sctrl->scs) {
5938 return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
5939 }
5940
5941 limit = le16_to_cpu(rt ? n->pri_ctrl_cap.vifrsm : n->pri_ctrl_cap.vqfrsm);
5942 if (nr > limit) {
5943 return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
5944 }
5945
5946 nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
5947 num_free = num_total - num_prim - num_sec;
5948 diff = nr - le16_to_cpu(rt ? sctrl->nvi : sctrl->nvq);
5949
5950 if (diff > num_free) {
5951 return NVME_INVALID_RESOURCE_ID | NVME_DNR;
5952 }
5953
5954 nvme_update_virt_res(n, sctrl, rt, nr);
5955 req->cqe.result = cpu_to_le32(nr);
5956
5957 return req->status;
5958}
5959
5960static uint16_t nvme_virt_set_state(NvmeCtrl *n, uint16_t cntlid, bool online)
5961{
5962 NvmeCtrl *sn = NULL;
5963 NvmeSecCtrlEntry *sctrl;
5964 int vf_index;
5965
5966 sctrl = nvme_sctrl_for_cntlid(n, cntlid);
5967 if (!sctrl) {
5968 return NVME_INVALID_CTRL_ID | NVME_DNR;
5969 }
5970
5971 if (!pci_is_vf(&n->parent_obj)) {
5972 vf_index = le16_to_cpu(sctrl->vfn) - 1;
5973 sn = NVME(pcie_sriov_get_vf_at_index(&n->parent_obj, vf_index));
5974 }
5975
5976 if (online) {
5977 if (!sctrl->nvi || (le16_to_cpu(sctrl->nvq) < 2) || !sn) {
5978 return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
5979 }
5980
5981 if (!sctrl->scs) {
5982 sctrl->scs = 0x1;
5983 nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
5984 }
5985 } else {
5986 nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_INTERRUPT, 0);
5987 nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_QUEUE, 0);
5988
5989 if (sctrl->scs) {
5990 sctrl->scs = 0x0;
5991 if (sn) {
5992 nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
5993 }
5994 }
5995 }
5996
5997 return NVME_SUCCESS;
5998}
5999
6000static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest *req)
6001{
6002 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
6003 uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
6004 uint8_t act = dw10 & 0xf;
6005 uint8_t rt = (dw10 >> 8) & 0x7;
6006 uint16_t cntlid = (dw10 >> 16) & 0xffff;
6007 int nr = dw11 & 0xffff;
6008
6009 trace_pci_nvme_virt_mngmt(nvme_cid(req), act, cntlid, rt ? "VI" : "VQ", nr);
6010
6011 if (rt != NVME_VIRT_RES_QUEUE && rt != NVME_VIRT_RES_INTERRUPT) {
6012 return NVME_INVALID_RESOURCE_ID | NVME_DNR;
6013 }
6014
6015 switch (act) {
6016 case NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN:
6017 return nvme_assign_virt_res_to_sec(n, req, cntlid, rt, nr);
6018 case NVME_VIRT_MNGMT_ACTION_PRM_ALLOC:
6019 return nvme_assign_virt_res_to_prim(n, req, cntlid, rt, nr);
6020 case NVME_VIRT_MNGMT_ACTION_SEC_ONLINE:
6021 return nvme_virt_set_state(n, cntlid, true);
6022 case NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE:
6023 return nvme_virt_set_state(n, cntlid, false);
6024 default:
6025 return NVME_INVALID_FIELD | NVME_DNR;
6026 }
6027}
6028
6029static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
6030{
6031 uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
6032 uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
6033 int i;
6034
6035
6036 if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size - 1)) {
6037 return NVME_INVALID_FIELD | NVME_DNR;
6038 }
6039
6040
6041 n->dbbuf_dbs = dbs_addr;
6042 n->dbbuf_eis = eis_addr;
6043 n->dbbuf_enabled = true;
6044
6045 for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
6046 NvmeSQueue *sq = n->sq[i];
6047 NvmeCQueue *cq = n->cq[i];
6048
6049 if (sq) {
6050
6051
6052
6053
6054
6055 sq->db_addr = dbs_addr + (i << 3);
6056 sq->ei_addr = eis_addr + (i << 3);
6057 pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
6058 sizeof(sq->tail));
6059
6060 if (n->params.ioeventfd && sq->sqid != 0) {
6061 if (!nvme_init_sq_ioeventfd(sq)) {
6062 sq->ioeventfd_enabled = true;
6063 }
6064 }
6065 }
6066
6067 if (cq) {
6068
6069 cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
6070 cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
6071 pci_dma_write(&n->parent_obj, cq->db_addr, &cq->head,
6072 sizeof(cq->head));
6073
6074 if (n->params.ioeventfd && cq->cqid != 0) {
6075 if (!nvme_init_cq_ioeventfd(cq)) {
6076 cq->ioeventfd_enabled = true;
6077 }
6078 }
6079 }
6080 }
6081
6082 trace_pci_nvme_dbbuf_config(dbs_addr, eis_addr);
6083
6084 return NVME_SUCCESS;
6085}
6086
6087static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
6088{
6089 trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
6090 nvme_adm_opc_str(req->cmd.opcode));
6091
6092 if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
6093 trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
6094 return NVME_INVALID_OPCODE | NVME_DNR;
6095 }
6096
6097
6098 if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) {
6099 return NVME_INVALID_FIELD | NVME_DNR;
6100 }
6101
6102 if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
6103 return NVME_INVALID_FIELD;
6104 }
6105
6106 switch (req->cmd.opcode) {
6107 case NVME_ADM_CMD_DELETE_SQ:
6108 return nvme_del_sq(n, req);
6109 case NVME_ADM_CMD_CREATE_SQ:
6110 return nvme_create_sq(n, req);
6111 case NVME_ADM_CMD_GET_LOG_PAGE:
6112 return nvme_get_log(n, req);
6113 case NVME_ADM_CMD_DELETE_CQ:
6114 return nvme_del_cq(n, req);
6115 case NVME_ADM_CMD_CREATE_CQ:
6116 return nvme_create_cq(n, req);
6117 case NVME_ADM_CMD_IDENTIFY:
6118 return nvme_identify(n, req);
6119 case NVME_ADM_CMD_ABORT:
6120 return nvme_abort(n, req);
6121 case NVME_ADM_CMD_SET_FEATURES:
6122 return nvme_set_feature(n, req);
6123 case NVME_ADM_CMD_GET_FEATURES:
6124 return nvme_get_feature(n, req);
6125 case NVME_ADM_CMD_ASYNC_EV_REQ:
6126 return nvme_aer(n, req);
6127 case NVME_ADM_CMD_NS_ATTACHMENT:
6128 return nvme_ns_attachment(n, req);
6129 case NVME_ADM_CMD_VIRT_MNGMT:
6130 return nvme_virt_mngmt(n, req);
6131 case NVME_ADM_CMD_DBBUF_CONFIG:
6132 return nvme_dbbuf_config(n, req);
6133 case NVME_ADM_CMD_FORMAT_NVM:
6134 return nvme_format(n, req);
6135 default:
6136 assert(false);
6137 }
6138
6139 return NVME_INVALID_OPCODE | NVME_DNR;
6140}
6141
6142static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
6143{
6144 pci_dma_write(&sq->ctrl->parent_obj, sq->ei_addr, &sq->tail,
6145 sizeof(sq->tail));
6146 trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail);
6147}
6148
6149static void nvme_update_sq_tail(NvmeSQueue *sq)
6150{
6151 pci_dma_read(&sq->ctrl->parent_obj, sq->db_addr, &sq->tail,
6152 sizeof(sq->tail));
6153 trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail);
6154}
6155
6156static void nvme_process_sq(void *opaque)
6157{
6158 NvmeSQueue *sq = opaque;
6159 NvmeCtrl *n = sq->ctrl;
6160 NvmeCQueue *cq = n->cq[sq->cqid];
6161
6162 uint16_t status;
6163 hwaddr addr;
6164 NvmeCmd cmd;
6165 NvmeRequest *req;
6166
6167 if (n->dbbuf_enabled) {
6168 nvme_update_sq_tail(sq);
6169 }
6170
6171 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
6172 addr = sq->dma_addr + sq->head * n->sqe_size;
6173 if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
6174 trace_pci_nvme_err_addr_read(addr);
6175 trace_pci_nvme_err_cfs();
6176 stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
6177 break;
6178 }
6179 nvme_inc_sq_head(sq);
6180
6181 req = QTAILQ_FIRST(&sq->req_list);
6182 QTAILQ_REMOVE(&sq->req_list, req, entry);
6183 QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
6184 nvme_req_clear(req);
6185 req->cqe.cid = cmd.cid;
6186 memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
6187
6188 status = sq->sqid ? nvme_io_cmd(n, req) :
6189 nvme_admin_cmd(n, req);
6190 if (status != NVME_NO_COMPLETE) {
6191 req->status = status;
6192 nvme_enqueue_req_completion(cq, req);
6193 }
6194
6195 if (n->dbbuf_enabled) {
6196 nvme_update_sq_eventidx(sq);
6197 nvme_update_sq_tail(sq);
6198 }
6199 }
6200}
6201
6202static void nvme_update_msixcap_ts(PCIDevice *pci_dev, uint32_t table_size)
6203{
6204 uint8_t *config;
6205
6206 if (!msix_present(pci_dev)) {
6207 return;
6208 }
6209
6210 assert(table_size > 0 && table_size <= pci_dev->msix_entries_nr);
6211
6212 config = pci_dev->config + pci_dev->msix_cap;
6213 pci_set_word_by_mask(config + PCI_MSIX_FLAGS, PCI_MSIX_FLAGS_QSIZE,
6214 table_size - 1);
6215}
6216
6217static void nvme_activate_virt_res(NvmeCtrl *n)
6218{
6219 PCIDevice *pci_dev = &n->parent_obj;
6220 NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
6221 NvmeSecCtrlEntry *sctrl;
6222
6223
6224 if (pci_is_vf(pci_dev)) {
6225 sctrl = nvme_sctrl(n);
6226 cap->vqprt = sctrl->nvq;
6227 cap->viprt = sctrl->nvi;
6228 n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
6229 n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
6230 } else {
6231 cap->vqrfap = n->next_pri_ctrl_cap.vqrfap;
6232 cap->virfap = n->next_pri_ctrl_cap.virfap;
6233 n->conf_ioqpairs = le16_to_cpu(cap->vqprt) +
6234 le16_to_cpu(cap->vqrfap) - 1;
6235 n->conf_msix_qsize = le16_to_cpu(cap->viprt) +
6236 le16_to_cpu(cap->virfap);
6237 }
6238}
6239
6240static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
6241{
6242 PCIDevice *pci_dev = &n->parent_obj;
6243 NvmeSecCtrlEntry *sctrl;
6244 NvmeNamespace *ns;
6245 int i;
6246
6247 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
6248 ns = nvme_ns(n, i);
6249 if (!ns) {
6250 continue;
6251 }
6252
6253 nvme_ns_drain(ns);
6254 }
6255
6256 for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
6257 if (n->sq[i] != NULL) {
6258 nvme_free_sq(n->sq[i], n);
6259 }
6260 }
6261 for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
6262 if (n->cq[i] != NULL) {
6263 nvme_free_cq(n->cq[i], n);
6264 }
6265 }
6266
6267 while (!QTAILQ_EMPTY(&n->aer_queue)) {
6268 NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
6269 QTAILQ_REMOVE(&n->aer_queue, event, entry);
6270 g_free(event);
6271 }
6272
6273 if (n->params.sriov_max_vfs) {
6274 if (!pci_is_vf(pci_dev)) {
6275 for (i = 0; i < n->sec_ctrl_list.numcntl; i++) {
6276 sctrl = &n->sec_ctrl_list.sec[i];
6277 nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
6278 }
6279
6280 if (rst != NVME_RESET_CONTROLLER) {
6281 pcie_sriov_pf_disable_vfs(pci_dev);
6282 }
6283 }
6284
6285 if (rst != NVME_RESET_CONTROLLER) {
6286 nvme_activate_virt_res(n);
6287 }
6288 }
6289
6290 n->aer_queued = 0;
6291 n->aer_mask = 0;
6292 n->outstanding_aers = 0;
6293 n->qs_created = false;
6294
6295 nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
6296
6297 if (pci_is_vf(pci_dev)) {
6298 sctrl = nvme_sctrl(n);
6299
6300 stl_le_p(&n->bar.csts, sctrl->scs ? 0 : NVME_CSTS_FAILED);
6301 } else {
6302 stl_le_p(&n->bar.csts, 0);
6303 }
6304
6305 stl_le_p(&n->bar.intms, 0);
6306 stl_le_p(&n->bar.intmc, 0);
6307 stl_le_p(&n->bar.cc, 0);
6308
6309 n->dbbuf_dbs = 0;
6310 n->dbbuf_eis = 0;
6311 n->dbbuf_enabled = false;
6312}
6313
6314static void nvme_ctrl_shutdown(NvmeCtrl *n)
6315{
6316 NvmeNamespace *ns;
6317 int i;
6318
6319 if (n->pmr.dev) {
6320 memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
6321 }
6322
6323 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
6324 ns = nvme_ns(n, i);
6325 if (!ns) {
6326 continue;
6327 }
6328
6329 nvme_ns_shutdown(ns);
6330 }
6331}
6332
6333static void nvme_select_iocs(NvmeCtrl *n)
6334{
6335 NvmeNamespace *ns;
6336 int i;
6337
6338 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
6339 ns = nvme_ns(n, i);
6340 if (!ns) {
6341 continue;
6342 }
6343
6344 nvme_select_iocs_ns(n, ns);
6345 }
6346}
6347
6348static int nvme_start_ctrl(NvmeCtrl *n)
6349{
6350 uint64_t cap = ldq_le_p(&n->bar.cap);
6351 uint32_t cc = ldl_le_p(&n->bar.cc);
6352 uint32_t aqa = ldl_le_p(&n->bar.aqa);
6353 uint64_t asq = ldq_le_p(&n->bar.asq);
6354 uint64_t acq = ldq_le_p(&n->bar.acq);
6355 uint32_t page_bits = NVME_CC_MPS(cc) + 12;
6356 uint32_t page_size = 1 << page_bits;
6357 NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
6358
6359 if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
6360 trace_pci_nvme_err_startfail_virt_state(le16_to_cpu(sctrl->nvi),
6361 le16_to_cpu(sctrl->nvq),
6362 sctrl->scs ? "ONLINE" :
6363 "OFFLINE");
6364 return -1;
6365 }
6366 if (unlikely(n->cq[0])) {
6367 trace_pci_nvme_err_startfail_cq();
6368 return -1;
6369 }
6370 if (unlikely(n->sq[0])) {
6371 trace_pci_nvme_err_startfail_sq();
6372 return -1;
6373 }
6374 if (unlikely(asq & (page_size - 1))) {
6375 trace_pci_nvme_err_startfail_asq_misaligned(asq);
6376 return -1;
6377 }
6378 if (unlikely(acq & (page_size - 1))) {
6379 trace_pci_nvme_err_startfail_acq_misaligned(acq);
6380 return -1;
6381 }
6382 if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) {
6383 trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc));
6384 return -1;
6385 }
6386 if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) {
6387 trace_pci_nvme_err_startfail_page_too_small(
6388 NVME_CC_MPS(cc),
6389 NVME_CAP_MPSMIN(cap));
6390 return -1;
6391 }
6392 if (unlikely(NVME_CC_MPS(cc) >
6393 NVME_CAP_MPSMAX(cap))) {
6394 trace_pci_nvme_err_startfail_page_too_large(
6395 NVME_CC_MPS(cc),
6396 NVME_CAP_MPSMAX(cap));
6397 return -1;
6398 }
6399 if (unlikely(NVME_CC_IOCQES(cc) <
6400 NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
6401 trace_pci_nvme_err_startfail_cqent_too_small(
6402 NVME_CC_IOCQES(cc),
6403 NVME_CTRL_CQES_MIN(cap));
6404 return -1;
6405 }
6406 if (unlikely(NVME_CC_IOCQES(cc) >
6407 NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
6408 trace_pci_nvme_err_startfail_cqent_too_large(
6409 NVME_CC_IOCQES(cc),
6410 NVME_CTRL_CQES_MAX(cap));
6411 return -1;
6412 }
6413 if (unlikely(NVME_CC_IOSQES(cc) <
6414 NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
6415 trace_pci_nvme_err_startfail_sqent_too_small(
6416 NVME_CC_IOSQES(cc),
6417 NVME_CTRL_SQES_MIN(cap));
6418 return -1;
6419 }
6420 if (unlikely(NVME_CC_IOSQES(cc) >
6421 NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
6422 trace_pci_nvme_err_startfail_sqent_too_large(
6423 NVME_CC_IOSQES(cc),
6424 NVME_CTRL_SQES_MAX(cap));
6425 return -1;
6426 }
6427 if (unlikely(!NVME_AQA_ASQS(aqa))) {
6428 trace_pci_nvme_err_startfail_asqent_sz_zero();
6429 return -1;
6430 }
6431 if (unlikely(!NVME_AQA_ACQS(aqa))) {
6432 trace_pci_nvme_err_startfail_acqent_sz_zero();
6433 return -1;
6434 }
6435
6436 n->page_bits = page_bits;
6437 n->page_size = page_size;
6438 n->max_prp_ents = n->page_size / sizeof(uint64_t);
6439 n->cqe_size = 1 << NVME_CC_IOCQES(cc);
6440 n->sqe_size = 1 << NVME_CC_IOSQES(cc);
6441 nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
6442 nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
6443
6444 nvme_set_timestamp(n, 0ULL);
6445
6446 nvme_select_iocs(n);
6447
6448 return 0;
6449}
6450
6451static void nvme_cmb_enable_regs(NvmeCtrl *n)
6452{
6453 uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc);
6454 uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz);
6455
6456 NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
6457 NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
6458 NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
6459 stl_le_p(&n->bar.cmbloc, cmbloc);
6460
6461 NVME_CMBSZ_SET_SQS(cmbsz, 1);
6462 NVME_CMBSZ_SET_CQS(cmbsz, 0);
6463 NVME_CMBSZ_SET_LISTS(cmbsz, 1);
6464 NVME_CMBSZ_SET_RDS(cmbsz, 1);
6465 NVME_CMBSZ_SET_WDS(cmbsz, 1);
6466 NVME_CMBSZ_SET_SZU(cmbsz, 2);
6467 NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
6468 stl_le_p(&n->bar.cmbsz, cmbsz);
6469}
6470
6471static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
6472 unsigned size)
6473{
6474 uint64_t cap = ldq_le_p(&n->bar.cap);
6475 uint32_t cc = ldl_le_p(&n->bar.cc);
6476 uint32_t intms = ldl_le_p(&n->bar.intms);
6477 uint32_t csts = ldl_le_p(&n->bar.csts);
6478 uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts);
6479
6480 if (unlikely(offset & (sizeof(uint32_t) - 1))) {
6481 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
6482 "MMIO write not 32-bit aligned,"
6483 " offset=0x%"PRIx64"", offset);
6484
6485 }
6486
6487 if (unlikely(size < sizeof(uint32_t))) {
6488 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
6489 "MMIO write smaller than 32-bits,"
6490 " offset=0x%"PRIx64", size=%u",
6491 offset, size);
6492
6493 }
6494
6495 switch (offset) {
6496 case NVME_REG_INTMS:
6497 if (unlikely(msix_enabled(&(n->parent_obj)))) {
6498 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
6499 "undefined access to interrupt mask set"
6500 " when MSI-X is enabled");
6501
6502 }
6503 intms |= data;
6504 stl_le_p(&n->bar.intms, intms);
6505 n->bar.intmc = n->bar.intms;
6506 trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
6507 nvme_irq_check(n);
6508 break;
6509 case NVME_REG_INTMC:
6510 if (unlikely(msix_enabled(&(n->parent_obj)))) {
6511 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
6512 "undefined access to interrupt mask clr"
6513 " when MSI-X is enabled");
6514
6515 }
6516 intms &= ~data;
6517 stl_le_p(&n->bar.intms, intms);
6518 n->bar.intmc = n->bar.intms;
6519 trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
6520 nvme_irq_check(n);
6521 break;
6522 case NVME_REG_CC:
6523 stl_le_p(&n->bar.cc, data);
6524
6525 trace_pci_nvme_mmio_cfg(data & 0xffffffff);
6526
6527 if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
6528 trace_pci_nvme_mmio_shutdown_set();
6529 nvme_ctrl_shutdown(n);
6530 csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
6531 csts |= NVME_CSTS_SHST_COMPLETE;
6532 } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
6533 trace_pci_nvme_mmio_shutdown_cleared();
6534 csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
6535 }
6536
6537 if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
6538 if (unlikely(nvme_start_ctrl(n))) {
6539 trace_pci_nvme_err_startfail();
6540 csts = NVME_CSTS_FAILED;
6541 } else {
6542 trace_pci_nvme_mmio_start_success();
6543 csts = NVME_CSTS_READY;
6544 }
6545 } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
6546 trace_pci_nvme_mmio_stopped();
6547 nvme_ctrl_reset(n, NVME_RESET_CONTROLLER);
6548
6549 break;
6550 }
6551
6552 stl_le_p(&n->bar.csts, csts);
6553
6554 break;
6555 case NVME_REG_CSTS:
6556 if (data & (1 << 4)) {
6557 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
6558 "attempted to W1C CSTS.NSSRO"
6559 " but CAP.NSSRS is zero (not supported)");
6560 } else if (data != 0) {
6561 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts,
6562 "attempted to set a read only bit"
6563 " of controller status");
6564 }
6565 break;
6566 case NVME_REG_NSSR:
6567 if (data == 0x4e564d65) {
6568 trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
6569 } else {
6570
6571 return;
6572 }
6573 break;
6574 case NVME_REG_AQA:
6575 stl_le_p(&n->bar.aqa, data);
6576 trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
6577 break;
6578 case NVME_REG_ASQ:
6579 stn_le_p(&n->bar.asq, size, data);
6580 trace_pci_nvme_mmio_asqaddr(data);
6581 break;
6582 case NVME_REG_ASQ + 4:
6583 stl_le_p((uint8_t *)&n->bar.asq + 4, data);
6584 trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq));
6585 break;
6586 case NVME_REG_ACQ:
6587 trace_pci_nvme_mmio_acqaddr(data);
6588 stn_le_p(&n->bar.acq, size, data);
6589 break;
6590 case NVME_REG_ACQ + 4:
6591 stl_le_p((uint8_t *)&n->bar.acq + 4, data);
6592 trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq));
6593 break;
6594 case NVME_REG_CMBLOC:
6595 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved,
6596 "invalid write to reserved CMBLOC"
6597 " when CMBSZ is zero, ignored");
6598 return;
6599 case NVME_REG_CMBSZ:
6600 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
6601 "invalid write to read only CMBSZ, ignored");
6602 return;
6603 case NVME_REG_CMBMSC:
6604 if (!NVME_CAP_CMBS(cap)) {
6605 return;
6606 }
6607
6608 stn_le_p(&n->bar.cmbmsc, size, data);
6609 n->cmb.cmse = false;
6610
6611 if (NVME_CMBMSC_CRE(data)) {
6612 nvme_cmb_enable_regs(n);
6613
6614 if (NVME_CMBMSC_CMSE(data)) {
6615 uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc);
6616 hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT;
6617 if (cba + int128_get64(n->cmb.mem.size) < cba) {
6618 uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts);
6619 NVME_CMBSTS_SET_CBAI(cmbsts, 1);
6620 stl_le_p(&n->bar.cmbsts, cmbsts);
6621 return;
6622 }
6623
6624 n->cmb.cba = cba;
6625 n->cmb.cmse = true;
6626 }
6627 } else {
6628 n->bar.cmbsz = 0;
6629 n->bar.cmbloc = 0;
6630 }
6631
6632 return;
6633 case NVME_REG_CMBMSC + 4:
6634 stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data);
6635 return;
6636
6637 case NVME_REG_PMRCAP:
6638 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
6639 "invalid write to PMRCAP register, ignored");
6640 return;
6641 case NVME_REG_PMRCTL:
6642 if (!NVME_CAP_PMRS(cap)) {
6643 return;
6644 }
6645
6646 stl_le_p(&n->bar.pmrctl, data);
6647 if (NVME_PMRCTL_EN(data)) {
6648 memory_region_set_enabled(&n->pmr.dev->mr, true);
6649 pmrsts = 0;
6650 } else {
6651 memory_region_set_enabled(&n->pmr.dev->mr, false);
6652 NVME_PMRSTS_SET_NRDY(pmrsts, 1);
6653 n->pmr.cmse = false;
6654 }
6655 stl_le_p(&n->bar.pmrsts, pmrsts);
6656 return;
6657 case NVME_REG_PMRSTS:
6658 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
6659 "invalid write to PMRSTS register, ignored");
6660 return;
6661 case NVME_REG_PMREBS:
6662 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly,
6663 "invalid write to PMREBS register, ignored");
6664 return;
6665 case NVME_REG_PMRSWTP:
6666 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,
6667 "invalid write to PMRSWTP register, ignored");
6668 return;
6669 case NVME_REG_PMRMSCL:
6670 if (!NVME_CAP_PMRS(cap)) {
6671 return;
6672 }
6673
6674 stl_le_p(&n->bar.pmrmscl, data);
6675 n->pmr.cmse = false;
6676
6677 if (NVME_PMRMSCL_CMSE(data)) {
6678 uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu);
6679 hwaddr cba = pmrmscu << 32 |
6680 (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT);
6681 if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
6682 NVME_PMRSTS_SET_CBAI(pmrsts, 1);
6683 stl_le_p(&n->bar.pmrsts, pmrsts);
6684 return;
6685 }
6686
6687 n->pmr.cmse = true;
6688 n->pmr.cba = cba;
6689 }
6690
6691 return;
6692 case NVME_REG_PMRMSCU:
6693 if (!NVME_CAP_PMRS(cap)) {
6694 return;
6695 }
6696
6697 stl_le_p(&n->bar.pmrmscu, data);
6698 return;
6699 default:
6700 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
6701 "invalid MMIO write,"
6702 " offset=0x%"PRIx64", data=%"PRIx64"",
6703 offset, data);
6704 break;
6705 }
6706}
6707
6708static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
6709{
6710 NvmeCtrl *n = (NvmeCtrl *)opaque;
6711 uint8_t *ptr = (uint8_t *)&n->bar;
6712
6713 trace_pci_nvme_mmio_read(addr, size);
6714
6715 if (unlikely(addr & (sizeof(uint32_t) - 1))) {
6716 NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32,
6717 "MMIO read not 32-bit aligned,"
6718 " offset=0x%"PRIx64"", addr);
6719
6720 } else if (unlikely(size < sizeof(uint32_t))) {
6721 NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall,
6722 "MMIO read smaller than 32-bits,"
6723 " offset=0x%"PRIx64"", addr);
6724
6725 }
6726
6727 if (addr > sizeof(n->bar) - size) {
6728 NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
6729 "MMIO read beyond last register,"
6730 " offset=0x%"PRIx64", returning 0", addr);
6731
6732 return 0;
6733 }
6734
6735 if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs &&
6736 addr != NVME_REG_CSTS) {
6737 trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
6738 return 0;
6739 }
6740
6741
6742
6743
6744
6745
6746 if (addr == NVME_REG_PMRSTS &&
6747 (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) {
6748 memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
6749 }
6750
6751 return ldn_le_p(ptr + addr, size);
6752}
6753
6754static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
6755{
6756 uint32_t qid;
6757
6758 if (unlikely(addr & ((1 << 2) - 1))) {
6759 NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
6760 "doorbell write not 32-bit aligned,"
6761 " offset=0x%"PRIx64", ignoring", addr);
6762 return;
6763 }
6764
6765 if (((addr - 0x1000) >> 2) & 1) {
6766
6767
6768 uint16_t new_head = val & 0xffff;
6769 int start_sqs;
6770 NvmeCQueue *cq;
6771
6772 qid = (addr - (0x1000 + (1 << 2))) >> 3;
6773 if (unlikely(nvme_check_cqid(n, qid))) {
6774 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq,
6775 "completion queue doorbell write"
6776 " for nonexistent queue,"
6777 " sqid=%"PRIu32", ignoring", qid);
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792 if (n->outstanding_aers) {
6793 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6794 NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
6795 NVME_LOG_ERROR_INFO);
6796 }
6797
6798 return;
6799 }
6800
6801 cq = n->cq[qid];
6802 if (unlikely(new_head >= cq->size)) {
6803 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead,
6804 "completion queue doorbell write value"
6805 " beyond queue size, sqid=%"PRIu32","
6806 " new_head=%"PRIu16", ignoring",
6807 qid, new_head);
6808
6809 if (n->outstanding_aers) {
6810 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6811 NVME_AER_INFO_ERR_INVALID_DB_VALUE,
6812 NVME_LOG_ERROR_INFO);
6813 }
6814
6815 return;
6816 }
6817
6818 trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
6819
6820 start_sqs = nvme_cq_full(cq) ? 1 : 0;
6821 cq->head = new_head;
6822 if (!qid && n->dbbuf_enabled) {
6823 pci_dma_write(&n->parent_obj, cq->db_addr, &cq->head,
6824 sizeof(cq->head));
6825 }
6826 if (start_sqs) {
6827 NvmeSQueue *sq;
6828 QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
6829 qemu_bh_schedule(sq->bh);
6830 }
6831 qemu_bh_schedule(cq->bh);
6832 }
6833
6834 if (cq->tail == cq->head) {
6835 if (cq->irq_enabled) {
6836 n->cq_pending--;
6837 }
6838
6839 nvme_irq_deassert(n, cq);
6840 }
6841 } else {
6842
6843
6844 uint16_t new_tail = val & 0xffff;
6845 NvmeSQueue *sq;
6846
6847 qid = (addr - 0x1000) >> 3;
6848 if (unlikely(nvme_check_sqid(n, qid))) {
6849 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq,
6850 "submission queue doorbell write"
6851 " for nonexistent queue,"
6852 " sqid=%"PRIu32", ignoring", qid);
6853
6854 if (n->outstanding_aers) {
6855 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6856 NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
6857 NVME_LOG_ERROR_INFO);
6858 }
6859
6860 return;
6861 }
6862
6863 sq = n->sq[qid];
6864 if (unlikely(new_tail >= sq->size)) {
6865 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail,
6866 "submission queue doorbell write value"
6867 " beyond queue size, sqid=%"PRIu32","
6868 " new_tail=%"PRIu16", ignoring",
6869 qid, new_tail);
6870
6871 if (n->outstanding_aers) {
6872 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6873 NVME_AER_INFO_ERR_INVALID_DB_VALUE,
6874 NVME_LOG_ERROR_INFO);
6875 }
6876
6877 return;
6878 }
6879
6880 trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
6881
6882 sq->tail = new_tail;
6883 if (!qid && n->dbbuf_enabled) {
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897 pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
6898 sizeof(sq->tail));
6899 }
6900
6901 qemu_bh_schedule(sq->bh);
6902 }
6903}
6904
6905static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
6906 unsigned size)
6907{
6908 NvmeCtrl *n = (NvmeCtrl *)opaque;
6909
6910 trace_pci_nvme_mmio_write(addr, data, size);
6911
6912 if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs &&
6913 addr != NVME_REG_CSTS) {
6914 trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
6915 return;
6916 }
6917
6918 if (addr < sizeof(n->bar)) {
6919 nvme_write_bar(n, addr, data, size);
6920 } else {
6921 nvme_process_db(n, addr, data);
6922 }
6923}
6924
6925static const MemoryRegionOps nvme_mmio_ops = {
6926 .read = nvme_mmio_read,
6927 .write = nvme_mmio_write,
6928 .endianness = DEVICE_LITTLE_ENDIAN,
6929 .impl = {
6930 .min_access_size = 2,
6931 .max_access_size = 8,
6932 },
6933};
6934
6935static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
6936 unsigned size)
6937{
6938 NvmeCtrl *n = (NvmeCtrl *)opaque;
6939 stn_le_p(&n->cmb.buf[addr], size, data);
6940}
6941
6942static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
6943{
6944 NvmeCtrl *n = (NvmeCtrl *)opaque;
6945 return ldn_le_p(&n->cmb.buf[addr], size);
6946}
6947
6948static const MemoryRegionOps nvme_cmb_ops = {
6949 .read = nvme_cmb_read,
6950 .write = nvme_cmb_write,
6951 .endianness = DEVICE_LITTLE_ENDIAN,
6952 .impl = {
6953 .min_access_size = 1,
6954 .max_access_size = 8,
6955 },
6956};
6957
6958static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
6959{
6960 NvmeParams *params = &n->params;
6961
6962 if (params->num_queues) {
6963 warn_report("num_queues is deprecated; please use max_ioqpairs "
6964 "instead");
6965
6966 params->max_ioqpairs = params->num_queues - 1;
6967 }
6968
6969 if (n->namespace.blkconf.blk && n->subsys) {
6970 error_setg(errp, "subsystem support is unavailable with legacy "
6971 "namespace ('drive' property)");
6972 return;
6973 }
6974
6975 if (params->max_ioqpairs < 1 ||
6976 params->max_ioqpairs > NVME_MAX_IOQPAIRS) {
6977 error_setg(errp, "max_ioqpairs must be between 1 and %d",
6978 NVME_MAX_IOQPAIRS);
6979 return;
6980 }
6981
6982 if (params->msix_qsize < 1 ||
6983 params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) {
6984 error_setg(errp, "msix_qsize must be between 1 and %d",
6985 PCI_MSIX_FLAGS_QSIZE + 1);
6986 return;
6987 }
6988
6989 if (!params->serial) {
6990 error_setg(errp, "serial property not set");
6991 return;
6992 }
6993
6994 if (n->pmr.dev) {
6995 if (host_memory_backend_is_mapped(n->pmr.dev)) {
6996 error_setg(errp, "can't use already busy memdev: %s",
6997 object_get_canonical_path_component(OBJECT(n->pmr.dev)));
6998 return;
6999 }
7000
7001 if (!is_power_of_2(n->pmr.dev->size)) {
7002 error_setg(errp, "pmr backend size needs to be power of 2 in size");
7003 return;
7004 }
7005
7006 host_memory_backend_set_mapped(n->pmr.dev, true);
7007 }
7008
7009 if (n->params.zasl > n->params.mdts) {
7010 error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
7011 "than or equal to mdts (Maximum Data Transfer Size)");
7012 return;
7013 }
7014
7015 if (!n->params.vsl) {
7016 error_setg(errp, "vsl must be non-zero");
7017 return;
7018 }
7019
7020 if (params->sriov_max_vfs) {
7021 if (!n->subsys) {
7022 error_setg(errp, "subsystem is required for the use of SR-IOV");
7023 return;
7024 }
7025
7026 if (params->sriov_max_vfs > NVME_MAX_VFS) {
7027 error_setg(errp, "sriov_max_vfs must be between 0 and %d",
7028 NVME_MAX_VFS);
7029 return;
7030 }
7031
7032 if (params->cmb_size_mb) {
7033 error_setg(errp, "CMB is not supported with SR-IOV");
7034 return;
7035 }
7036
7037 if (n->pmr.dev) {
7038 error_setg(errp, "PMR is not supported with SR-IOV");
7039 return;
7040 }
7041
7042 if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) {
7043 error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible"
7044 " must be set for the use of SR-IOV");
7045 return;
7046 }
7047
7048 if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) {
7049 error_setg(errp, "sriov_vq_flexible must be greater than or equal"
7050 " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2);
7051 return;
7052 }
7053
7054 if (params->max_ioqpairs < params->sriov_vq_flexible + 2) {
7055 error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be"
7056 " greater than or equal to 2");
7057 return;
7058 }
7059
7060 if (params->sriov_vi_flexible < params->sriov_max_vfs) {
7061 error_setg(errp, "sriov_vi_flexible must be greater than or equal"
7062 " to %d (sriov_max_vfs)", params->sriov_max_vfs);
7063 return;
7064 }
7065
7066 if (params->msix_qsize < params->sriov_vi_flexible + 1) {
7067 error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be"
7068 " greater than or equal to 1");
7069 return;
7070 }
7071
7072 if (params->sriov_max_vi_per_vf &&
7073 (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
7074 error_setg(errp, "sriov_max_vi_per_vf must meet:"
7075 " (sriov_max_vi_per_vf - 1) %% %d == 0 and"
7076 " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY);
7077 return;
7078 }
7079
7080 if (params->sriov_max_vq_per_vf &&
7081 (params->sriov_max_vq_per_vf < 2 ||
7082 (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) {
7083 error_setg(errp, "sriov_max_vq_per_vf must meet:"
7084 " (sriov_max_vq_per_vf - 1) %% %d == 0 and"
7085 " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY);
7086 return;
7087 }
7088 }
7089}
7090
7091static void nvme_init_state(NvmeCtrl *n)
7092{
7093 NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
7094 NvmeSecCtrlList *list = &n->sec_ctrl_list;
7095 NvmeSecCtrlEntry *sctrl;
7096 uint8_t max_vfs;
7097 int i;
7098
7099 if (pci_is_vf(&n->parent_obj)) {
7100 sctrl = nvme_sctrl(n);
7101 max_vfs = 0;
7102 n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
7103 n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
7104 } else {
7105 max_vfs = n->params.sriov_max_vfs;
7106 n->conf_ioqpairs = n->params.max_ioqpairs;
7107 n->conf_msix_qsize = n->params.msix_qsize;
7108 }
7109
7110 n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
7111 n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
7112 n->temperature = NVME_TEMPERATURE;
7113 n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
7114 n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
7115 n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
7116 QTAILQ_INIT(&n->aer_queue);
7117
7118 list->numcntl = cpu_to_le16(max_vfs);
7119 for (i = 0; i < max_vfs; i++) {
7120 sctrl = &list->sec[i];
7121 sctrl->pcid = cpu_to_le16(n->cntlid);
7122 sctrl->vfn = cpu_to_le16(i + 1);
7123 }
7124
7125 cap->cntlid = cpu_to_le16(n->cntlid);
7126 cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
7127
7128 if (pci_is_vf(&n->parent_obj)) {
7129 cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs);
7130 } else {
7131 cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs -
7132 n->params.sriov_vq_flexible);
7133 cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible);
7134 cap->vqrfap = cap->vqfrt;
7135 cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
7136 cap->vqfrsm = n->params.sriov_max_vq_per_vf ?
7137 cpu_to_le16(n->params.sriov_max_vq_per_vf) :
7138 cap->vqfrt / MAX(max_vfs, 1);
7139 }
7140
7141 if (pci_is_vf(&n->parent_obj)) {
7142 cap->viprt = cpu_to_le16(n->conf_msix_qsize);
7143 } else {
7144 cap->viprt = cpu_to_le16(n->params.msix_qsize -
7145 n->params.sriov_vi_flexible);
7146 cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible);
7147 cap->virfap = cap->vifrt;
7148 cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
7149 cap->vifrsm = n->params.sriov_max_vi_per_vf ?
7150 cpu_to_le16(n->params.sriov_max_vi_per_vf) :
7151 cap->vifrt / MAX(max_vfs, 1);
7152 }
7153}
7154
7155static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
7156{
7157 uint64_t cmb_size = n->params.cmb_size_mb * MiB;
7158 uint64_t cap = ldq_le_p(&n->bar.cap);
7159
7160 n->cmb.buf = g_malloc0(cmb_size);
7161 memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n,
7162 "nvme-cmb", cmb_size);
7163 pci_register_bar(pci_dev, NVME_CMB_BIR,
7164 PCI_BASE_ADDRESS_SPACE_MEMORY |
7165 PCI_BASE_ADDRESS_MEM_TYPE_64 |
7166 PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem);
7167
7168 NVME_CAP_SET_CMBS(cap, 1);
7169 stq_le_p(&n->bar.cap, cap);
7170
7171 if (n->params.legacy_cmb) {
7172 nvme_cmb_enable_regs(n);
7173 n->cmb.cmse = true;
7174 }
7175}
7176
7177static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
7178{
7179 uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap);
7180
7181 NVME_PMRCAP_SET_RDS(pmrcap, 1);
7182 NVME_PMRCAP_SET_WDS(pmrcap, 1);
7183 NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR);
7184
7185 NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02);
7186 NVME_PMRCAP_SET_CMSS(pmrcap, 1);
7187 stl_le_p(&n->bar.pmrcap, pmrcap);
7188
7189 pci_register_bar(pci_dev, NVME_PMR_BIR,
7190 PCI_BASE_ADDRESS_SPACE_MEMORY |
7191 PCI_BASE_ADDRESS_MEM_TYPE_64 |
7192 PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr);
7193
7194 memory_region_set_enabled(&n->pmr.dev->mr, false);
7195}
7196
7197static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
7198 unsigned *msix_table_offset,
7199 unsigned *msix_pba_offset)
7200{
7201 uint64_t bar_size, msix_table_size, msix_pba_size;
7202
7203 bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
7204 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
7205
7206 if (msix_table_offset) {
7207 *msix_table_offset = bar_size;
7208 }
7209
7210 msix_table_size = PCI_MSIX_ENTRY_SIZE * total_irqs;
7211 bar_size += msix_table_size;
7212 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
7213
7214 if (msix_pba_offset) {
7215 *msix_pba_offset = bar_size;
7216 }
7217
7218 msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
7219 bar_size += msix_pba_size;
7220
7221 bar_size = pow2ceil(bar_size);
7222 return bar_size;
7223}
7224
7225static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
7226{
7227 uint16_t vf_dev_id = n->params.use_intel_id ?
7228 PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
7229 NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
7230 uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
7231 le16_to_cpu(cap->vifrsm),
7232 NULL, NULL);
7233
7234 pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
7235 n->params.sriov_max_vfs, n->params.sriov_max_vfs,
7236 NVME_VF_OFFSET, NVME_VF_STRIDE);
7237
7238 pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
7239 PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
7240}
7241
7242static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
7243{
7244 Error *err = NULL;
7245 int ret;
7246
7247 ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
7248 PCI_PM_SIZEOF, &err);
7249 if (err) {
7250 error_report_err(err);
7251 return ret;
7252 }
7253
7254 pci_set_word(pci_dev->config + offset + PCI_PM_PMC,
7255 PCI_PM_CAP_VER_1_2);
7256 pci_set_word(pci_dev->config + offset + PCI_PM_CTRL,
7257 PCI_PM_CTRL_NO_SOFT_RESET);
7258 pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL,
7259 PCI_PM_CTRL_STATE_MASK);
7260
7261 return 0;
7262}
7263
7264static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
7265{
7266 uint8_t *pci_conf = pci_dev->config;
7267 uint64_t bar_size;
7268 unsigned msix_table_offset, msix_pba_offset;
7269 int ret;
7270
7271 Error *err = NULL;
7272
7273 pci_conf[PCI_INTERRUPT_PIN] = 1;
7274 pci_config_set_prog_interface(pci_conf, 0x2);
7275
7276 if (n->params.use_intel_id) {
7277 pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
7278 pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME);
7279 } else {
7280 pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
7281 pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
7282 }
7283
7284 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
7285 nvme_add_pm_capability(pci_dev, 0x60);
7286 pcie_endpoint_cap_init(pci_dev, 0x80);
7287 pcie_cap_flr_init(pci_dev);
7288 if (n->params.sriov_max_vfs) {
7289 pcie_ari_init(pci_dev, 0x100, 1);
7290 }
7291
7292
7293 bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
7294 &msix_table_offset, &msix_pba_offset);
7295
7296 memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
7297 memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
7298 msix_table_offset);
7299 memory_region_add_subregion(&n->bar0, 0, &n->iomem);
7300
7301 if (pci_is_vf(pci_dev)) {
7302 pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
7303 } else {
7304 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
7305 PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
7306 }
7307 ret = msix_init(pci_dev, n->params.msix_qsize,
7308 &n->bar0, 0, msix_table_offset,
7309 &n->bar0, 0, msix_pba_offset, 0, &err);
7310 if (ret < 0) {
7311 if (ret == -ENOTSUP) {
7312 warn_report_err(err);
7313 } else {
7314 error_propagate(errp, err);
7315 return ret;
7316 }
7317 }
7318
7319 nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
7320
7321 if (n->params.cmb_size_mb) {
7322 nvme_init_cmb(n, pci_dev);
7323 }
7324
7325 if (n->pmr.dev) {
7326 nvme_init_pmr(n, pci_dev);
7327 }
7328
7329 if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
7330 nvme_init_sriov(n, pci_dev, 0x120);
7331 }
7332
7333 return 0;
7334}
7335
7336static void nvme_init_subnqn(NvmeCtrl *n)
7337{
7338 NvmeSubsystem *subsys = n->subsys;
7339 NvmeIdCtrl *id = &n->id_ctrl;
7340
7341 if (!subsys) {
7342 snprintf((char *)id->subnqn, sizeof(id->subnqn),
7343 "nqn.2019-08.org.qemu:%s", n->params.serial);
7344 } else {
7345 pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
7346 }
7347}
7348
7349static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
7350{
7351 NvmeIdCtrl *id = &n->id_ctrl;
7352 uint8_t *pci_conf = pci_dev->config;
7353 uint64_t cap = ldq_le_p(&n->bar.cap);
7354 NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
7355
7356 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
7357 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
7358 strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
7359 strpadcpy((char *)id->fr, sizeof(id->fr), QEMU_VERSION, ' ');
7360 strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
7361
7362 id->cntlid = cpu_to_le16(n->cntlid);
7363
7364 id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
7365 id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS);
7366
7367 id->rab = 6;
7368
7369 if (n->params.use_intel_id) {
7370 id->ieee[0] = 0xb3;
7371 id->ieee[1] = 0x02;
7372 id->ieee[2] = 0x00;
7373 } else {
7374 id->ieee[0] = 0x00;
7375 id->ieee[1] = 0x54;
7376 id->ieee[2] = 0x52;
7377 }
7378
7379 id->mdts = n->params.mdts;
7380 id->ver = cpu_to_le32(NVME_SPEC_VER);
7381 id->oacs =
7382 cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF);
7383 id->cntrltype = 0x1;
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396 id->acl = 3;
7397 id->aerl = n->params.aerl;
7398 id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
7399 id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED;
7400
7401
7402 id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
7403 id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
7404
7405 id->sqes = (0x6 << 4) | 0x6;
7406 id->cqes = (0x4 << 4) | 0x4;
7407 id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
7408 id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
7409 NVME_ONCS_FEATURES | NVME_ONCS_DSM |
7410 NVME_ONCS_COMPARE | NVME_ONCS_COPY);
7411
7412
7413
7414
7415
7416
7417
7418
7419 id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
7420
7421 id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1);
7422 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN);
7423
7424 nvme_init_subnqn(n);
7425
7426 id->psd[0].mp = cpu_to_le16(0x9c4);
7427 id->psd[0].enlat = cpu_to_le32(0x10);
7428 id->psd[0].exlat = cpu_to_le32(0x4);
7429
7430 if (n->subsys) {
7431 id->cmic |= NVME_CMIC_MULTI_CTRL;
7432 }
7433
7434 NVME_CAP_SET_MQES(cap, 0x7ff);
7435 NVME_CAP_SET_CQR(cap, 1);
7436 NVME_CAP_SET_TO(cap, 0xf);
7437 NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
7438 NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP);
7439 NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY);
7440 NVME_CAP_SET_MPSMAX(cap, 4);
7441 NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
7442 NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
7443 stq_le_p(&n->bar.cap, cap);
7444
7445 stl_le_p(&n->bar.vs, NVME_SPEC_VER);
7446 n->bar.intmc = n->bar.intms = 0;
7447
7448 if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
7449 stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
7450 }
7451}
7452
7453static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
7454{
7455 int cntlid;
7456
7457 if (!n->subsys) {
7458 return 0;
7459 }
7460
7461 cntlid = nvme_subsys_register_ctrl(n, errp);
7462 if (cntlid < 0) {
7463 return -1;
7464 }
7465
7466 n->cntlid = cntlid;
7467
7468 return 0;
7469}
7470
7471void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
7472{
7473 uint32_t nsid = ns->params.nsid;
7474 assert(nsid && nsid <= NVME_MAX_NAMESPACES);
7475
7476 n->namespaces[nsid] = ns;
7477 ns->attached++;
7478
7479 n->dmrsl = MIN_NON_ZERO(n->dmrsl,
7480 BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
7481}
7482
7483static void nvme_realize(PCIDevice *pci_dev, Error **errp)
7484{
7485 NvmeCtrl *n = NVME(pci_dev);
7486 NvmeNamespace *ns;
7487 Error *local_err = NULL;
7488 NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
7489
7490 if (pci_is_vf(pci_dev)) {
7491
7492
7493
7494
7495 memcpy(&n->params, &pn->params, sizeof(NvmeParams));
7496 n->subsys = pn->subsys;
7497 }
7498
7499 nvme_check_constraints(n, &local_err);
7500 if (local_err) {
7501 error_propagate(errp, local_err);
7502 return;
7503 }
7504
7505 qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS,
7506 &pci_dev->qdev, n->parent_obj.qdev.id);
7507
7508 if (nvme_init_subsys(n, errp)) {
7509 error_propagate(errp, local_err);
7510 return;
7511 }
7512 nvme_init_state(n);
7513 if (nvme_init_pci(n, pci_dev, errp)) {
7514 return;
7515 }
7516 nvme_init_ctrl(n, pci_dev);
7517
7518
7519 if (n->namespace.blkconf.blk) {
7520 ns = &n->namespace;
7521 ns->params.nsid = 1;
7522
7523 if (nvme_ns_setup(ns, errp)) {
7524 return;
7525 }
7526
7527 nvme_attach_ns(n, ns);
7528 }
7529}
7530
7531static void nvme_exit(PCIDevice *pci_dev)
7532{
7533 NvmeCtrl *n = NVME(pci_dev);
7534 NvmeNamespace *ns;
7535 int i;
7536
7537 nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
7538
7539 if (n->subsys) {
7540 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
7541 ns = nvme_ns(n, i);
7542 if (ns) {
7543 ns->attached--;
7544 }
7545 }
7546
7547 nvme_subsys_unregister_ctrl(n->subsys, n);
7548 }
7549
7550 g_free(n->cq);
7551 g_free(n->sq);
7552 g_free(n->aer_reqs);
7553
7554 if (n->params.cmb_size_mb) {
7555 g_free(n->cmb.buf);
7556 }
7557
7558 if (n->pmr.dev) {
7559 host_memory_backend_set_mapped(n->pmr.dev, false);
7560 }
7561
7562 if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
7563 pcie_sriov_pf_exit(pci_dev);
7564 }
7565
7566 msix_uninit(pci_dev, &n->bar0, &n->bar0);
7567 memory_region_del_subregion(&n->bar0, &n->iomem);
7568}
7569
7570static Property nvme_props[] = {
7571 DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
7572 DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
7573 HostMemoryBackend *),
7574 DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
7575 NvmeSubsystem *),
7576 DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
7577 DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
7578 DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
7579 DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
7580 DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
7581 DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
7582 DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
7583 DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
7584 DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
7585 DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
7586 DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
7587 DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
7588 DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
7589 DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
7590 params.auto_transition_zones, true),
7591 DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
7592 DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl,
7593 params.sriov_vq_flexible, 0),
7594 DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl,
7595 params.sriov_vi_flexible, 0),
7596 DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl,
7597 params.sriov_max_vi_per_vf, 0),
7598 DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
7599 params.sriov_max_vq_per_vf, 0),
7600 DEFINE_PROP_END_OF_LIST(),
7601};
7602
7603static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
7604 void *opaque, Error **errp)
7605{
7606 NvmeCtrl *n = NVME(obj);
7607 uint8_t value = n->smart_critical_warning;
7608
7609 visit_type_uint8(v, name, &value, errp);
7610}
7611
7612static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
7613 void *opaque, Error **errp)
7614{
7615 NvmeCtrl *n = NVME(obj);
7616 uint8_t value, old_value, cap = 0, index, event;
7617
7618 if (!visit_type_uint8(v, name, &value, errp)) {
7619 return;
7620 }
7621
7622 cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY
7623 | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA;
7624 if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) {
7625 cap |= NVME_SMART_PMR_UNRELIABLE;
7626 }
7627
7628 if ((value & cap) != value) {
7629 error_setg(errp, "unsupported smart critical warning bits: 0x%x",
7630 value & ~cap);
7631 return;
7632 }
7633
7634 old_value = n->smart_critical_warning;
7635 n->smart_critical_warning = value;
7636
7637
7638 for (index = 0; index < NVME_SMART_WARN_MAX; index++) {
7639 event = 1 << index;
7640 if (value & ~old_value & event)
7641 nvme_smart_event(n, event);
7642 }
7643}
7644
7645static void nvme_pci_reset(DeviceState *qdev)
7646{
7647 PCIDevice *pci_dev = PCI_DEVICE(qdev);
7648 NvmeCtrl *n = NVME(pci_dev);
7649
7650 trace_pci_nvme_pci_reset();
7651 nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
7652}
7653
7654static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
7655 uint32_t val, int len)
7656{
7657 NvmeCtrl *n = NVME(dev);
7658 NvmeSecCtrlEntry *sctrl;
7659 uint16_t sriov_cap = dev->exp.sriov_cap;
7660 uint32_t off = address - sriov_cap;
7661 int i, num_vfs;
7662
7663 if (!sriov_cap) {
7664 return;
7665 }
7666
7667 if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
7668 if (!(val & PCI_SRIOV_CTRL_VFE)) {
7669 num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
7670 for (i = 0; i < num_vfs; i++) {
7671 sctrl = &n->sec_ctrl_list.sec[i];
7672 nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
7673 }
7674 }
7675 }
7676}
7677
7678static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
7679 uint32_t val, int len)
7680{
7681 nvme_sriov_pre_write_ctrl(dev, address, val, len);
7682 pci_default_write_config(dev, address, val, len);
7683 pcie_cap_flr_write_config(dev, address, val, len);
7684}
7685
7686static const VMStateDescription nvme_vmstate = {
7687 .name = "nvme",
7688 .unmigratable = 1,
7689};
7690
7691static void nvme_class_init(ObjectClass *oc, void *data)
7692{
7693 DeviceClass *dc = DEVICE_CLASS(oc);
7694 PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
7695
7696 pc->realize = nvme_realize;
7697 pc->config_write = nvme_pci_write_config;
7698 pc->exit = nvme_exit;
7699 pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
7700 pc->revision = 2;
7701
7702 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
7703 dc->desc = "Non-Volatile Memory Express";
7704 device_class_set_props(dc, nvme_props);
7705 dc->vmsd = &nvme_vmstate;
7706 dc->reset = nvme_pci_reset;
7707}
7708
7709static void nvme_instance_init(Object *obj)
7710{
7711 NvmeCtrl *n = NVME(obj);
7712
7713 device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex,
7714 "bootindex", "/namespace@1,0",
7715 DEVICE(obj));
7716
7717 object_property_add(obj, "smart_critical_warning", "uint8",
7718 nvme_get_smart_warning,
7719 nvme_set_smart_warning, NULL, NULL);
7720}
7721
7722static const TypeInfo nvme_info = {
7723 .name = TYPE_NVME,
7724 .parent = TYPE_PCI_DEVICE,
7725 .instance_size = sizeof(NvmeCtrl),
7726 .instance_init = nvme_instance_init,
7727 .class_init = nvme_class_init,
7728 .interfaces = (InterfaceInfo[]) {
7729 { INTERFACE_PCIE_DEVICE },
7730 { }
7731 },
7732};
7733
7734static const TypeInfo nvme_bus_info = {
7735 .name = TYPE_NVME_BUS,
7736 .parent = TYPE_BUS,
7737 .instance_size = sizeof(NvmeBus),
7738};
7739
7740static void nvme_register_types(void)
7741{
7742 type_register_static(&nvme_info);
7743 type_register_static(&nvme_bus_info);
7744}
7745
7746type_init(nvme_register_types)
7747