1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186#include "qemu/osdep.h"
187#include "qemu/cutils.h"
188#include "qemu/error-report.h"
189#include "qemu/log.h"
190#include "qemu/units.h"
191#include "qemu/range.h"
192#include "qapi/error.h"
193#include "qapi/visitor.h"
194#include "sysemu/sysemu.h"
195#include "sysemu/block-backend.h"
196#include "sysemu/hostmem.h"
197#include "hw/pci/msix.h"
198#include "hw/pci/pcie_sriov.h"
199#include "migration/vmstate.h"
200
201#include "nvme.h"
202#include "dif.h"
203#include "trace.h"
204
205#define NVME_MAX_IOQPAIRS 0xffff
206#define NVME_DB_SIZE 4
207#define NVME_SPEC_VER 0x00010400
208#define NVME_CMB_BIR 2
209#define NVME_PMR_BIR 4
210#define NVME_TEMPERATURE 0x143
211#define NVME_TEMPERATURE_WARNING 0x157
212#define NVME_TEMPERATURE_CRITICAL 0x175
213#define NVME_NUM_FW_SLOTS 1
214#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
215#define NVME_MAX_VFS 127
216#define NVME_VF_RES_GRANULARITY 1
217#define NVME_VF_OFFSET 0x1
218#define NVME_VF_STRIDE 1
219
220#define NVME_GUEST_ERR(trace, fmt, ...) \
221 do { \
222 (trace_##trace)(__VA_ARGS__); \
223 qemu_log_mask(LOG_GUEST_ERROR, #trace \
224 " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
225 } while (0)
226
227static const bool nvme_feature_support[NVME_FID_MAX] = {
228 [NVME_ARBITRATION] = true,
229 [NVME_POWER_MANAGEMENT] = true,
230 [NVME_TEMPERATURE_THRESHOLD] = true,
231 [NVME_ERROR_RECOVERY] = true,
232 [NVME_VOLATILE_WRITE_CACHE] = true,
233 [NVME_NUMBER_OF_QUEUES] = true,
234 [NVME_INTERRUPT_COALESCING] = true,
235 [NVME_INTERRUPT_VECTOR_CONF] = true,
236 [NVME_WRITE_ATOMICITY] = true,
237 [NVME_ASYNCHRONOUS_EVENT_CONF] = true,
238 [NVME_TIMESTAMP] = true,
239 [NVME_HOST_BEHAVIOR_SUPPORT] = true,
240 [NVME_COMMAND_SET_PROFILE] = true,
241};
242
243static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
244 [NVME_TEMPERATURE_THRESHOLD] = NVME_FEAT_CAP_CHANGE,
245 [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
246 [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
247 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
248 [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
249 [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
250 [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
251 [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE,
252};
253
254static const uint32_t nvme_cse_acs[256] = {
255 [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP,
256 [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP,
257 [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP,
258 [NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFF_CSUPP,
259 [NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFF_CSUPP,
260 [NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFF_CSUPP,
261 [NVME_ADM_CMD_ABORT] = NVME_CMD_EFF_CSUPP,
262 [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
263 [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
264 [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
265 [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
266 [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
267 [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
268 [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
269};
270
271static const uint32_t nvme_cse_iocs_none[256];
272
273static const uint32_t nvme_cse_iocs_nvm[256] = {
274 [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
275 [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
276 [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
277 [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
278 [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
279 [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
280 [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
281 [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
282};
283
284static const uint32_t nvme_cse_iocs_zoned[256] = {
285 [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
286 [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
287 [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
288 [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
289 [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
290 [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
291 [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
292 [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
293 [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
294 [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
295 [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
296};
297
298static void nvme_process_sq(void *opaque);
299static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
300
301static uint16_t nvme_sqid(NvmeRequest *req)
302{
303 return le16_to_cpu(req->sq->sqid);
304}
305
306static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
307 NvmeZoneState state)
308{
309 if (QTAILQ_IN_USE(zone, entry)) {
310 switch (nvme_get_zone_state(zone)) {
311 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
312 QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
313 break;
314 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
315 QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
316 break;
317 case NVME_ZONE_STATE_CLOSED:
318 QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
319 break;
320 case NVME_ZONE_STATE_FULL:
321 QTAILQ_REMOVE(&ns->full_zones, zone, entry);
322 default:
323 ;
324 }
325 }
326
327 nvme_set_zone_state(zone, state);
328
329 switch (state) {
330 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
331 QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry);
332 break;
333 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
334 QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry);
335 break;
336 case NVME_ZONE_STATE_CLOSED:
337 QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry);
338 break;
339 case NVME_ZONE_STATE_FULL:
340 QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry);
341 case NVME_ZONE_STATE_READ_ONLY:
342 break;
343 default:
344 zone->d.za = 0;
345 }
346}
347
348static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
349 uint32_t opn, uint32_t zrwa)
350{
351 if (ns->params.max_active_zones != 0 &&
352 ns->nr_active_zones + act > ns->params.max_active_zones) {
353 trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
354 return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
355 }
356
357 if (ns->params.max_open_zones != 0 &&
358 ns->nr_open_zones + opn > ns->params.max_open_zones) {
359 trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
360 return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
361 }
362
363 if (zrwa > ns->zns.numzrwa) {
364 return NVME_NOZRWA | NVME_DNR;
365 }
366
367 return NVME_SUCCESS;
368}
369
370
371
372
373
374static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
375{
376 return nvme_zns_check_resources(ns, act, opn, 0);
377}
378
379static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
380{
381 hwaddr hi, lo;
382
383 if (!n->cmb.cmse) {
384 return false;
385 }
386
387 lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
388 hi = lo + int128_get64(n->cmb.mem.size);
389
390 return addr >= lo && addr < hi;
391}
392
393static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
394{
395 hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
396 return &n->cmb.buf[addr - base];
397}
398
399static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
400{
401 hwaddr hi;
402
403 if (!n->pmr.cmse) {
404 return false;
405 }
406
407 hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size);
408
409 return addr >= n->pmr.cba && addr < hi;
410}
411
412static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
413{
414 return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
415}
416
417static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
418{
419 hwaddr hi, lo;
420
421
422
423
424
425
426
427
428
429 lo = n->bar0.addr;
430 hi = lo + int128_get64(n->bar0.size);
431
432 return addr >= lo && addr < hi;
433}
434
435static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
436{
437 hwaddr hi = addr + size - 1;
438 if (hi < addr) {
439 return 1;
440 }
441
442 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
443 memcpy(buf, nvme_addr_to_cmb(n, addr), size);
444 return 0;
445 }
446
447 if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
448 memcpy(buf, nvme_addr_to_pmr(n, addr), size);
449 return 0;
450 }
451
452 return pci_dma_read(&n->parent_obj, addr, buf, size);
453}
454
455static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size)
456{
457 hwaddr hi = addr + size - 1;
458 if (hi < addr) {
459 return 1;
460 }
461
462 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
463 memcpy(nvme_addr_to_cmb(n, addr), buf, size);
464 return 0;
465 }
466
467 if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
468 memcpy(nvme_addr_to_pmr(n, addr), buf, size);
469 return 0;
470 }
471
472 return pci_dma_write(&n->parent_obj, addr, buf, size);
473}
474
475static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
476{
477 return nsid &&
478 (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES);
479}
480
481static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
482{
483 return sqid < n->conf_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
484}
485
486static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
487{
488 return cqid < n->conf_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
489}
490
491static void nvme_inc_cq_tail(NvmeCQueue *cq)
492{
493 cq->tail++;
494 if (cq->tail >= cq->size) {
495 cq->tail = 0;
496 cq->phase = !cq->phase;
497 }
498}
499
500static void nvme_inc_sq_head(NvmeSQueue *sq)
501{
502 sq->head = (sq->head + 1) % sq->size;
503}
504
505static uint8_t nvme_cq_full(NvmeCQueue *cq)
506{
507 return (cq->tail + 1) % cq->size == cq->head;
508}
509
510static uint8_t nvme_sq_empty(NvmeSQueue *sq)
511{
512 return sq->head == sq->tail;
513}
514
515static void nvme_irq_check(NvmeCtrl *n)
516{
517 uint32_t intms = ldl_le_p(&n->bar.intms);
518
519 if (msix_enabled(&(n->parent_obj))) {
520 return;
521 }
522 if (~intms & n->irq_status) {
523 pci_irq_assert(&n->parent_obj);
524 } else {
525 pci_irq_deassert(&n->parent_obj);
526 }
527}
528
529static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
530{
531 if (cq->irq_enabled) {
532 if (msix_enabled(&(n->parent_obj))) {
533 trace_pci_nvme_irq_msix(cq->vector);
534 msix_notify(&(n->parent_obj), cq->vector);
535 } else {
536 trace_pci_nvme_irq_pin();
537 assert(cq->vector < 32);
538 n->irq_status |= 1 << cq->vector;
539 nvme_irq_check(n);
540 }
541 } else {
542 trace_pci_nvme_irq_masked();
543 }
544}
545
546static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
547{
548 if (cq->irq_enabled) {
549 if (msix_enabled(&(n->parent_obj))) {
550 return;
551 } else {
552 assert(cq->vector < 32);
553 if (!n->cq_pending) {
554 n->irq_status &= ~(1 << cq->vector);
555 }
556 nvme_irq_check(n);
557 }
558 }
559}
560
561static void nvme_req_clear(NvmeRequest *req)
562{
563 req->ns = NULL;
564 req->opaque = NULL;
565 req->aiocb = NULL;
566 memset(&req->cqe, 0x0, sizeof(req->cqe));
567 req->status = NVME_SUCCESS;
568}
569
570static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
571{
572 if (dma) {
573 pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
574 sg->flags = NVME_SG_DMA;
575 } else {
576 qemu_iovec_init(&sg->iov, 0);
577 }
578
579 sg->flags |= NVME_SG_ALLOC;
580}
581
582static inline void nvme_sg_unmap(NvmeSg *sg)
583{
584 if (!(sg->flags & NVME_SG_ALLOC)) {
585 return;
586 }
587
588 if (sg->flags & NVME_SG_DMA) {
589 qemu_sglist_destroy(&sg->qsg);
590 } else {
591 qemu_iovec_destroy(&sg->iov);
592 }
593
594 memset(sg, 0x0, sizeof(*sg));
595}
596
597
598
599
600
601
602static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data,
603 NvmeSg *mdata)
604{
605 NvmeSg *dst = data;
606 uint32_t trans_len, count = ns->lbasz;
607 uint64_t offset = 0;
608 bool dma = sg->flags & NVME_SG_DMA;
609 size_t sge_len;
610 size_t sg_len = dma ? sg->qsg.size : sg->iov.size;
611 int sg_idx = 0;
612
613 assert(sg->flags & NVME_SG_ALLOC);
614
615 while (sg_len) {
616 sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
617
618 trans_len = MIN(sg_len, count);
619 trans_len = MIN(trans_len, sge_len - offset);
620
621 if (dst) {
622 if (dma) {
623 qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset,
624 trans_len);
625 } else {
626 qemu_iovec_add(&dst->iov,
627 sg->iov.iov[sg_idx].iov_base + offset,
628 trans_len);
629 }
630 }
631
632 sg_len -= trans_len;
633 count -= trans_len;
634 offset += trans_len;
635
636 if (count == 0) {
637 dst = (dst == data) ? mdata : data;
638 count = (dst == data) ? ns->lbasz : ns->lbaf.ms;
639 }
640
641 if (sge_len == offset) {
642 offset = 0;
643 sg_idx++;
644 }
645 }
646}
647
648static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
649 size_t len)
650{
651 if (!len) {
652 return NVME_SUCCESS;
653 }
654
655 trace_pci_nvme_map_addr_cmb(addr, len);
656
657 if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
658 return NVME_DATA_TRAS_ERROR;
659 }
660
661 qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
662
663 return NVME_SUCCESS;
664}
665
666static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
667 size_t len)
668{
669 if (!len) {
670 return NVME_SUCCESS;
671 }
672
673 if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) {
674 return NVME_DATA_TRAS_ERROR;
675 }
676
677 qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len);
678
679 return NVME_SUCCESS;
680}
681
682static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
683{
684 bool cmb = false, pmr = false;
685
686 if (!len) {
687 return NVME_SUCCESS;
688 }
689
690 trace_pci_nvme_map_addr(addr, len);
691
692 if (nvme_addr_is_iomem(n, addr)) {
693 return NVME_DATA_TRAS_ERROR;
694 }
695
696 if (nvme_addr_is_cmb(n, addr)) {
697 cmb = true;
698 } else if (nvme_addr_is_pmr(n, addr)) {
699 pmr = true;
700 }
701
702 if (cmb || pmr) {
703 if (sg->flags & NVME_SG_DMA) {
704 return NVME_INVALID_USE_OF_CMB | NVME_DNR;
705 }
706
707 if (sg->iov.niov + 1 > IOV_MAX) {
708 goto max_mappings_exceeded;
709 }
710
711 if (cmb) {
712 return nvme_map_addr_cmb(n, &sg->iov, addr, len);
713 } else {
714 return nvme_map_addr_pmr(n, &sg->iov, addr, len);
715 }
716 }
717
718 if (!(sg->flags & NVME_SG_DMA)) {
719 return NVME_INVALID_USE_OF_CMB | NVME_DNR;
720 }
721
722 if (sg->qsg.nsg + 1 > IOV_MAX) {
723 goto max_mappings_exceeded;
724 }
725
726 qemu_sglist_add(&sg->qsg, addr, len);
727
728 return NVME_SUCCESS;
729
730max_mappings_exceeded:
731 NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings,
732 "number of mappings exceed 1024");
733 return NVME_INTERNAL_DEV_ERROR | NVME_DNR;
734}
735
736static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
737{
738 return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
739}
740
741static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
742 uint64_t prp2, uint32_t len)
743{
744 hwaddr trans_len = n->page_size - (prp1 % n->page_size);
745 trans_len = MIN(len, trans_len);
746 int num_prps = (len >> n->page_bits) + 1;
747 uint16_t status;
748 int ret;
749
750 trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
751
752 nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1));
753
754 status = nvme_map_addr(n, sg, prp1, trans_len);
755 if (status) {
756 goto unmap;
757 }
758
759 len -= trans_len;
760 if (len) {
761 if (len > n->page_size) {
762 uint64_t prp_list[n->max_prp_ents];
763 uint32_t nents, prp_trans;
764 int i = 0;
765
766
767
768
769
770
771 nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3;
772 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
773 ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
774 if (ret) {
775 trace_pci_nvme_err_addr_read(prp2);
776 status = NVME_DATA_TRAS_ERROR;
777 goto unmap;
778 }
779 while (len != 0) {
780 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
781
782 if (i == nents - 1 && len > n->page_size) {
783 if (unlikely(prp_ent & (n->page_size - 1))) {
784 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
785 status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
786 goto unmap;
787 }
788
789 i = 0;
790 nents = (len + n->page_size - 1) >> n->page_bits;
791 nents = MIN(nents, n->max_prp_ents);
792 prp_trans = nents * sizeof(uint64_t);
793 ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
794 prp_trans);
795 if (ret) {
796 trace_pci_nvme_err_addr_read(prp_ent);
797 status = NVME_DATA_TRAS_ERROR;
798 goto unmap;
799 }
800 prp_ent = le64_to_cpu(prp_list[i]);
801 }
802
803 if (unlikely(prp_ent & (n->page_size - 1))) {
804 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
805 status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
806 goto unmap;
807 }
808
809 trans_len = MIN(len, n->page_size);
810 status = nvme_map_addr(n, sg, prp_ent, trans_len);
811 if (status) {
812 goto unmap;
813 }
814
815 len -= trans_len;
816 i++;
817 }
818 } else {
819 if (unlikely(prp2 & (n->page_size - 1))) {
820 trace_pci_nvme_err_invalid_prp2_align(prp2);
821 status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
822 goto unmap;
823 }
824 status = nvme_map_addr(n, sg, prp2, len);
825 if (status) {
826 goto unmap;
827 }
828 }
829 }
830
831 return NVME_SUCCESS;
832
833unmap:
834 nvme_sg_unmap(sg);
835 return status;
836}
837
838
839
840
841
842static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
843 NvmeSglDescriptor *segment, uint64_t nsgld,
844 size_t *len, NvmeCmd *cmd)
845{
846 dma_addr_t addr, trans_len;
847 uint32_t dlen;
848 uint16_t status;
849
850 for (int i = 0; i < nsgld; i++) {
851 uint8_t type = NVME_SGL_TYPE(segment[i].type);
852
853 switch (type) {
854 case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
855 break;
856 case NVME_SGL_DESCR_TYPE_SEGMENT:
857 case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
858 return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
859 default:
860 return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
861 }
862
863 dlen = le32_to_cpu(segment[i].len);
864
865 if (!dlen) {
866 continue;
867 }
868
869 if (*len == 0) {
870
871
872
873
874
875 uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls);
876 if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
877 break;
878 }
879
880 trace_pci_nvme_err_invalid_sgl_excess_length(dlen);
881 return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
882 }
883
884 trans_len = MIN(*len, dlen);
885
886 addr = le64_to_cpu(segment[i].addr);
887
888 if (UINT64_MAX - addr < dlen) {
889 return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
890 }
891
892 status = nvme_map_addr(n, sg, addr, trans_len);
893 if (status) {
894 return status;
895 }
896
897 *len -= trans_len;
898 }
899
900 return NVME_SUCCESS;
901}
902
903static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
904 size_t len, NvmeCmd *cmd)
905{
906
907
908
909
910
911
912
913 const int SEG_CHUNK_SIZE = 256;
914
915 NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
916 uint64_t nsgld;
917 uint32_t seg_len;
918 uint16_t status;
919 hwaddr addr;
920 int ret;
921
922 sgld = &sgl;
923 addr = le64_to_cpu(sgl.addr);
924
925 trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len);
926
927 nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
928
929
930
931
932
933 if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
934 status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
935 if (status) {
936 goto unmap;
937 }
938
939 goto out;
940 }
941
942 for (;;) {
943 switch (NVME_SGL_TYPE(sgld->type)) {
944 case NVME_SGL_DESCR_TYPE_SEGMENT:
945 case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
946 break;
947 default:
948 return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
949 }
950
951 seg_len = le32_to_cpu(sgld->len);
952
953
954 if (!seg_len || seg_len & 0xf) {
955 return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
956 }
957
958 if (UINT64_MAX - addr < seg_len) {
959 return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
960 }
961
962 nsgld = seg_len / sizeof(NvmeSglDescriptor);
963
964 while (nsgld > SEG_CHUNK_SIZE) {
965 if (nvme_addr_read(n, addr, segment, sizeof(segment))) {
966 trace_pci_nvme_err_addr_read(addr);
967 status = NVME_DATA_TRAS_ERROR;
968 goto unmap;
969 }
970
971 status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
972 &len, cmd);
973 if (status) {
974 goto unmap;
975 }
976
977 nsgld -= SEG_CHUNK_SIZE;
978 addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor);
979 }
980
981 ret = nvme_addr_read(n, addr, segment, nsgld *
982 sizeof(NvmeSglDescriptor));
983 if (ret) {
984 trace_pci_nvme_err_addr_read(addr);
985 status = NVME_DATA_TRAS_ERROR;
986 goto unmap;
987 }
988
989 last_sgld = &segment[nsgld - 1];
990
991
992
993
994 if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
995 status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
996 if (status) {
997 goto unmap;
998 }
999
1000 goto out;
1001 }
1002
1003
1004
1005
1006
1007 if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
1008 status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
1009 goto unmap;
1010 }
1011
1012 sgld = last_sgld;
1013 addr = le64_to_cpu(sgld->addr);
1014
1015
1016
1017
1018
1019 status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
1020 if (status) {
1021 goto unmap;
1022 }
1023 }
1024
1025out:
1026
1027 if (len) {
1028 status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
1029 goto unmap;
1030 }
1031
1032 return NVME_SUCCESS;
1033
1034unmap:
1035 nvme_sg_unmap(sg);
1036 return status;
1037}
1038
1039uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
1040 NvmeCmd *cmd)
1041{
1042 uint64_t prp1, prp2;
1043
1044 switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) {
1045 case NVME_PSDT_PRP:
1046 prp1 = le64_to_cpu(cmd->dptr.prp1);
1047 prp2 = le64_to_cpu(cmd->dptr.prp2);
1048
1049 return nvme_map_prp(n, sg, prp1, prp2, len);
1050 case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
1051 case NVME_PSDT_SGL_MPTR_SGL:
1052 return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd);
1053 default:
1054 return NVME_INVALID_FIELD;
1055 }
1056}
1057
1058static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
1059 NvmeCmd *cmd)
1060{
1061 int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags);
1062 hwaddr mptr = le64_to_cpu(cmd->mptr);
1063 uint16_t status;
1064
1065 if (psdt == NVME_PSDT_SGL_MPTR_SGL) {
1066 NvmeSglDescriptor sgl;
1067
1068 if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) {
1069 return NVME_DATA_TRAS_ERROR;
1070 }
1071
1072 status = nvme_map_sgl(n, sg, sgl, len, cmd);
1073 if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) {
1074 status = NVME_MD_SGL_LEN_INVALID | NVME_DNR;
1075 }
1076
1077 return status;
1078 }
1079
1080 nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr));
1081 status = nvme_map_addr(n, sg, mptr, len);
1082 if (status) {
1083 nvme_sg_unmap(sg);
1084 }
1085
1086 return status;
1087}
1088
1089static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
1090{
1091 NvmeNamespace *ns = req->ns;
1092 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1093 bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
1094 bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
1095 size_t len = nvme_l2b(ns, nlb);
1096 uint16_t status;
1097
1098 if (nvme_ns_ext(ns) &&
1099 !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
1100 NvmeSg sg;
1101
1102 len += nvme_m2b(ns, nlb);
1103
1104 status = nvme_map_dptr(n, &sg, len, &req->cmd);
1105 if (status) {
1106 return status;
1107 }
1108
1109 nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
1110 nvme_sg_split(&sg, ns, &req->sg, NULL);
1111 nvme_sg_unmap(&sg);
1112
1113 return NVME_SUCCESS;
1114 }
1115
1116 return nvme_map_dptr(n, &req->sg, len, &req->cmd);
1117}
1118
1119static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
1120{
1121 NvmeNamespace *ns = req->ns;
1122 size_t len = nvme_m2b(ns, nlb);
1123 uint16_t status;
1124
1125 if (nvme_ns_ext(ns)) {
1126 NvmeSg sg;
1127
1128 len += nvme_l2b(ns, nlb);
1129
1130 status = nvme_map_dptr(n, &sg, len, &req->cmd);
1131 if (status) {
1132 return status;
1133 }
1134
1135 nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
1136 nvme_sg_split(&sg, ns, NULL, &req->sg);
1137 nvme_sg_unmap(&sg);
1138
1139 return NVME_SUCCESS;
1140 }
1141
1142 return nvme_map_mptr(n, &req->sg, len, &req->cmd);
1143}
1144
1145static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
1146 uint32_t len, uint32_t bytes,
1147 int32_t skip_bytes, int64_t offset,
1148 NvmeTxDirection dir)
1149{
1150 hwaddr addr;
1151 uint32_t trans_len, count = bytes;
1152 bool dma = sg->flags & NVME_SG_DMA;
1153 int64_t sge_len;
1154 int sg_idx = 0;
1155 int ret;
1156
1157 assert(sg->flags & NVME_SG_ALLOC);
1158
1159 while (len) {
1160 sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
1161
1162 if (sge_len - offset < 0) {
1163 offset -= sge_len;
1164 sg_idx++;
1165 continue;
1166 }
1167
1168 if (sge_len == offset) {
1169 offset = 0;
1170 sg_idx++;
1171 continue;
1172 }
1173
1174 trans_len = MIN(len, count);
1175 trans_len = MIN(trans_len, sge_len - offset);
1176
1177 if (dma) {
1178 addr = sg->qsg.sg[sg_idx].base + offset;
1179 } else {
1180 addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset;
1181 }
1182
1183 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
1184 ret = nvme_addr_read(n, addr, ptr, trans_len);
1185 } else {
1186 ret = nvme_addr_write(n, addr, ptr, trans_len);
1187 }
1188
1189 if (ret) {
1190 return NVME_DATA_TRAS_ERROR;
1191 }
1192
1193 ptr += trans_len;
1194 len -= trans_len;
1195 count -= trans_len;
1196 offset += trans_len;
1197
1198 if (count == 0) {
1199 count = bytes;
1200 offset += skip_bytes;
1201 }
1202 }
1203
1204 return NVME_SUCCESS;
1205}
1206
1207static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, void *ptr, uint32_t len,
1208 NvmeTxDirection dir)
1209{
1210 assert(sg->flags & NVME_SG_ALLOC);
1211
1212 if (sg->flags & NVME_SG_DMA) {
1213 const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
1214 dma_addr_t residual;
1215
1216 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
1217 dma_buf_write(ptr, len, &residual, &sg->qsg, attrs);
1218 } else {
1219 dma_buf_read(ptr, len, &residual, &sg->qsg, attrs);
1220 }
1221
1222 if (unlikely(residual)) {
1223 trace_pci_nvme_err_invalid_dma();
1224 return NVME_INVALID_FIELD | NVME_DNR;
1225 }
1226 } else {
1227 size_t bytes;
1228
1229 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
1230 bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len);
1231 } else {
1232 bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len);
1233 }
1234
1235 if (unlikely(bytes != len)) {
1236 trace_pci_nvme_err_invalid_dma();
1237 return NVME_INVALID_FIELD | NVME_DNR;
1238 }
1239 }
1240
1241 return NVME_SUCCESS;
1242}
1243
1244static inline uint16_t nvme_c2h(NvmeCtrl *n, void *ptr, uint32_t len,
1245 NvmeRequest *req)
1246{
1247 uint16_t status;
1248
1249 status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
1250 if (status) {
1251 return status;
1252 }
1253
1254 return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
1255}
1256
1257static inline uint16_t nvme_h2c(NvmeCtrl *n, void *ptr, uint32_t len,
1258 NvmeRequest *req)
1259{
1260 uint16_t status;
1261
1262 status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
1263 if (status) {
1264 return status;
1265 }
1266
1267 return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
1268}
1269
1270uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
1271 NvmeTxDirection dir, NvmeRequest *req)
1272{
1273 NvmeNamespace *ns = req->ns;
1274 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1275 bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
1276 bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
1277
1278 if (nvme_ns_ext(ns) &&
1279 !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
1280 return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
1281 ns->lbaf.ms, 0, dir);
1282 }
1283
1284 return nvme_tx(n, &req->sg, ptr, len, dir);
1285}
1286
1287uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
1288 NvmeTxDirection dir, NvmeRequest *req)
1289{
1290 NvmeNamespace *ns = req->ns;
1291 uint16_t status;
1292
1293 if (nvme_ns_ext(ns)) {
1294 return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms,
1295 ns->lbasz, ns->lbasz, dir);
1296 }
1297
1298 nvme_sg_unmap(&req->sg);
1299
1300 status = nvme_map_mptr(n, &req->sg, len, &req->cmd);
1301 if (status) {
1302 return status;
1303 }
1304
1305 return nvme_tx(n, &req->sg, ptr, len, dir);
1306}
1307
1308static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
1309 BlockCompletionFunc *cb, NvmeRequest *req)
1310{
1311 assert(req->sg.flags & NVME_SG_ALLOC);
1312
1313 if (req->sg.flags & NVME_SG_DMA) {
1314 req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
1315 cb, req);
1316 } else {
1317 req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
1318 }
1319}
1320
1321static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
1322 BlockCompletionFunc *cb, NvmeRequest *req)
1323{
1324 assert(req->sg.flags & NVME_SG_ALLOC);
1325
1326 if (req->sg.flags & NVME_SG_DMA) {
1327 req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
1328 cb, req);
1329 } else {
1330 req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
1331 }
1332}
1333
1334static void nvme_update_cq_head(NvmeCQueue *cq)
1335{
1336 pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head,
1337 sizeof(cq->head));
1338 trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
1339}
1340
1341static void nvme_post_cqes(void *opaque)
1342{
1343 NvmeCQueue *cq = opaque;
1344 NvmeCtrl *n = cq->ctrl;
1345 NvmeRequest *req, *next;
1346 bool pending = cq->head != cq->tail;
1347 int ret;
1348
1349 QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
1350 NvmeSQueue *sq;
1351 hwaddr addr;
1352
1353 if (n->dbbuf_enabled) {
1354 nvme_update_cq_head(cq);
1355 }
1356
1357 if (nvme_cq_full(cq)) {
1358 break;
1359 }
1360
1361 sq = req->sq;
1362 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
1363 req->cqe.sq_id = cpu_to_le16(sq->sqid);
1364 req->cqe.sq_head = cpu_to_le16(sq->head);
1365 addr = cq->dma_addr + cq->tail * n->cqe_size;
1366 ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
1367 sizeof(req->cqe));
1368 if (ret) {
1369 trace_pci_nvme_err_addr_write(addr);
1370 trace_pci_nvme_err_cfs();
1371 stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
1372 break;
1373 }
1374 QTAILQ_REMOVE(&cq->req_list, req, entry);
1375 nvme_inc_cq_tail(cq);
1376 nvme_sg_unmap(&req->sg);
1377 QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
1378 }
1379 if (cq->tail != cq->head) {
1380 if (cq->irq_enabled && !pending) {
1381 n->cq_pending++;
1382 }
1383
1384 nvme_irq_assert(n, cq);
1385 }
1386}
1387
1388static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
1389{
1390 assert(cq->cqid == req->sq->cqid);
1391 trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
1392 le32_to_cpu(req->cqe.result),
1393 le32_to_cpu(req->cqe.dw1),
1394 req->status);
1395
1396 if (req->status) {
1397 trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns),
1398 req->status, req->cmd.opcode);
1399 }
1400
1401 QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
1402 QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
1403
1404 if (req->sq->ioeventfd_enabled) {
1405
1406 nvme_post_cqes(cq);
1407 } else {
1408
1409 timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
1410 }
1411}
1412
1413static void nvme_process_aers(void *opaque)
1414{
1415 NvmeCtrl *n = opaque;
1416 NvmeAsyncEvent *event, *next;
1417
1418 trace_pci_nvme_process_aers(n->aer_queued);
1419
1420 QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
1421 NvmeRequest *req;
1422 NvmeAerResult *result;
1423
1424
1425 if (!n->outstanding_aers) {
1426 trace_pci_nvme_no_outstanding_aers();
1427 break;
1428 }
1429
1430
1431 if (n->aer_mask & (1 << event->result.event_type)) {
1432 trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
1433 continue;
1434 }
1435
1436 QTAILQ_REMOVE(&n->aer_queue, event, entry);
1437 n->aer_queued--;
1438
1439 n->aer_mask |= 1 << event->result.event_type;
1440 n->outstanding_aers--;
1441
1442 req = n->aer_reqs[n->outstanding_aers];
1443
1444 result = (NvmeAerResult *) &req->cqe.result;
1445 result->event_type = event->result.event_type;
1446 result->event_info = event->result.event_info;
1447 result->log_page = event->result.log_page;
1448 g_free(event);
1449
1450 trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
1451 result->log_page);
1452
1453 nvme_enqueue_req_completion(&n->admin_cq, req);
1454 }
1455}
1456
1457static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
1458 uint8_t event_info, uint8_t log_page)
1459{
1460 NvmeAsyncEvent *event;
1461
1462 trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
1463
1464 if (n->aer_queued == n->params.aer_max_queued) {
1465 trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
1466 return;
1467 }
1468
1469 event = g_new(NvmeAsyncEvent, 1);
1470 event->result = (NvmeAerResult) {
1471 .event_type = event_type,
1472 .event_info = event_info,
1473 .log_page = log_page,
1474 };
1475
1476 QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
1477 n->aer_queued++;
1478
1479 nvme_process_aers(n);
1480}
1481
1482static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
1483{
1484 uint8_t aer_info;
1485
1486
1487 if (!(NVME_AEC_SMART(n->features.async_config) & event)) {
1488 return;
1489 }
1490
1491 switch (event) {
1492 case NVME_SMART_SPARE:
1493 aer_info = NVME_AER_INFO_SMART_SPARE_THRESH;
1494 break;
1495 case NVME_SMART_TEMPERATURE:
1496 aer_info = NVME_AER_INFO_SMART_TEMP_THRESH;
1497 break;
1498 case NVME_SMART_RELIABILITY:
1499 case NVME_SMART_MEDIA_READ_ONLY:
1500 case NVME_SMART_FAILED_VOLATILE_MEDIA:
1501 case NVME_SMART_PMR_UNRELIABLE:
1502 aer_info = NVME_AER_INFO_SMART_RELIABILITY;
1503 break;
1504 default:
1505 return;
1506 }
1507
1508 nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO);
1509}
1510
1511static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
1512{
1513 n->aer_mask &= ~(1 << event_type);
1514 if (!QTAILQ_EMPTY(&n->aer_queue)) {
1515 nvme_process_aers(n);
1516 }
1517}
1518
1519static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
1520{
1521 uint8_t mdts = n->params.mdts;
1522
1523 if (mdts && len > n->page_size << mdts) {
1524 trace_pci_nvme_err_mdts(len);
1525 return NVME_INVALID_FIELD | NVME_DNR;
1526 }
1527
1528 return NVME_SUCCESS;
1529}
1530
1531static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
1532 uint32_t nlb)
1533{
1534 uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
1535
1536 if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
1537 trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze);
1538 return NVME_LBA_RANGE | NVME_DNR;
1539 }
1540
1541 return NVME_SUCCESS;
1542}
1543
1544static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba,
1545 uint32_t nlb, int flags)
1546{
1547 BlockDriverState *bs = blk_bs(ns->blkconf.blk);
1548
1549 int64_t pnum = 0, bytes = nvme_l2b(ns, nlb);
1550 int64_t offset = nvme_l2b(ns, slba);
1551 int ret;
1552
1553
1554
1555
1556
1557
1558
1559 do {
1560 bytes -= pnum;
1561
1562 ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
1563 if (ret < 0) {
1564 return ret;
1565 }
1566
1567
1568 trace_pci_nvme_block_status(offset, bytes, pnum, ret,
1569 !!(ret & BDRV_BLOCK_ZERO));
1570
1571 if (!(ret & flags)) {
1572 return 1;
1573 }
1574
1575 offset += pnum;
1576 } while (pnum != bytes);
1577
1578 return 0;
1579}
1580
1581static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
1582 uint32_t nlb)
1583{
1584 int ret;
1585 Error *err = NULL;
1586
1587 ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA);
1588 if (ret) {
1589 if (ret < 0) {
1590 error_setg_errno(&err, -ret, "unable to get block status");
1591 error_report_err(err);
1592
1593 return NVME_INTERNAL_DEV_ERROR;
1594 }
1595
1596 return NVME_DULB;
1597 }
1598
1599 return NVME_SUCCESS;
1600}
1601
1602static void nvme_aio_err(NvmeRequest *req, int ret)
1603{
1604 uint16_t status = NVME_SUCCESS;
1605 Error *local_err = NULL;
1606
1607 switch (req->cmd.opcode) {
1608 case NVME_CMD_READ:
1609 status = NVME_UNRECOVERED_READ;
1610 break;
1611 case NVME_CMD_FLUSH:
1612 case NVME_CMD_WRITE:
1613 case NVME_CMD_WRITE_ZEROES:
1614 case NVME_CMD_ZONE_APPEND:
1615 status = NVME_WRITE_FAULT;
1616 break;
1617 default:
1618 status = NVME_INTERNAL_DEV_ERROR;
1619 break;
1620 }
1621
1622 trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
1623
1624 error_setg_errno(&local_err, -ret, "aio failed");
1625 error_report_err(local_err);
1626
1627
1628
1629
1630
1631 if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
1632 return;
1633 }
1634
1635 req->status = status;
1636}
1637
1638static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
1639{
1640 return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
1641 slba / ns->zone_size;
1642}
1643
1644static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
1645{
1646 uint32_t zone_idx = nvme_zone_idx(ns, slba);
1647
1648 if (zone_idx >= ns->num_zones) {
1649 return NULL;
1650 }
1651
1652 return &ns->zone_array[zone_idx];
1653}
1654
1655static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
1656{
1657 uint64_t zslba = zone->d.zslba;
1658
1659 switch (nvme_get_zone_state(zone)) {
1660 case NVME_ZONE_STATE_EMPTY:
1661 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1662 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1663 case NVME_ZONE_STATE_CLOSED:
1664 return NVME_SUCCESS;
1665 case NVME_ZONE_STATE_FULL:
1666 trace_pci_nvme_err_zone_is_full(zslba);
1667 return NVME_ZONE_FULL;
1668 case NVME_ZONE_STATE_OFFLINE:
1669 trace_pci_nvme_err_zone_is_offline(zslba);
1670 return NVME_ZONE_OFFLINE;
1671 case NVME_ZONE_STATE_READ_ONLY:
1672 trace_pci_nvme_err_zone_is_read_only(zslba);
1673 return NVME_ZONE_READ_ONLY;
1674 default:
1675 assert(false);
1676 }
1677
1678 return NVME_INTERNAL_DEV_ERROR;
1679}
1680
1681static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
1682 uint64_t slba, uint32_t nlb)
1683{
1684 uint64_t zcap = nvme_zone_wr_boundary(zone);
1685 uint16_t status;
1686
1687 status = nvme_check_zone_state_for_write(zone);
1688 if (status) {
1689 return status;
1690 }
1691
1692 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1693 uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas;
1694
1695 if (slba < zone->w_ptr || slba + nlb > ezrwa) {
1696 trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr);
1697 return NVME_ZONE_INVALID_WRITE;
1698 }
1699 } else {
1700 if (unlikely(slba != zone->w_ptr)) {
1701 trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
1702 zone->w_ptr);
1703 return NVME_ZONE_INVALID_WRITE;
1704 }
1705 }
1706
1707 if (unlikely((slba + nlb) > zcap)) {
1708 trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
1709 return NVME_ZONE_BOUNDARY_ERROR;
1710 }
1711
1712 return NVME_SUCCESS;
1713}
1714
1715static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
1716{
1717 switch (nvme_get_zone_state(zone)) {
1718 case NVME_ZONE_STATE_EMPTY:
1719 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1720 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1721 case NVME_ZONE_STATE_FULL:
1722 case NVME_ZONE_STATE_CLOSED:
1723 case NVME_ZONE_STATE_READ_ONLY:
1724 return NVME_SUCCESS;
1725 case NVME_ZONE_STATE_OFFLINE:
1726 trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
1727 return NVME_ZONE_OFFLINE;
1728 default:
1729 assert(false);
1730 }
1731
1732 return NVME_INTERNAL_DEV_ERROR;
1733}
1734
1735static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
1736 uint32_t nlb)
1737{
1738 NvmeZone *zone;
1739 uint64_t bndry, end;
1740 uint16_t status;
1741
1742 zone = nvme_get_zone_by_slba(ns, slba);
1743 assert(zone);
1744
1745 bndry = nvme_zone_rd_boundary(ns, zone);
1746 end = slba + nlb;
1747
1748 status = nvme_check_zone_state_for_read(zone);
1749 if (status) {
1750 ;
1751 } else if (unlikely(end > bndry)) {
1752 if (!ns->params.cross_zone_read) {
1753 status = NVME_ZONE_BOUNDARY_ERROR;
1754 } else {
1755
1756
1757
1758
1759 do {
1760 zone++;
1761 status = nvme_check_zone_state_for_read(zone);
1762 if (status) {
1763 break;
1764 }
1765 } while (end > nvme_zone_rd_boundary(ns, zone));
1766 }
1767 }
1768
1769 return status;
1770}
1771
1772static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone)
1773{
1774 switch (nvme_get_zone_state(zone)) {
1775 case NVME_ZONE_STATE_FULL:
1776 return NVME_SUCCESS;
1777
1778 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1779 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1780 nvme_aor_dec_open(ns);
1781
1782 case NVME_ZONE_STATE_CLOSED:
1783 nvme_aor_dec_active(ns);
1784
1785 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1786 zone->d.za &= ~NVME_ZA_ZRWA_VALID;
1787 if (ns->params.numzrwa) {
1788 ns->zns.numzrwa++;
1789 }
1790 }
1791
1792
1793 case NVME_ZONE_STATE_EMPTY:
1794 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
1795 return NVME_SUCCESS;
1796
1797 default:
1798 return NVME_ZONE_INVAL_TRANSITION;
1799 }
1800}
1801
1802static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
1803{
1804 switch (nvme_get_zone_state(zone)) {
1805 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1806 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1807 nvme_aor_dec_open(ns);
1808 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
1809
1810 case NVME_ZONE_STATE_CLOSED:
1811 return NVME_SUCCESS;
1812
1813 default:
1814 return NVME_ZONE_INVAL_TRANSITION;
1815 }
1816}
1817
1818static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
1819{
1820 switch (nvme_get_zone_state(zone)) {
1821 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1822 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1823 nvme_aor_dec_open(ns);
1824
1825 case NVME_ZONE_STATE_CLOSED:
1826 nvme_aor_dec_active(ns);
1827
1828 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1829 if (ns->params.numzrwa) {
1830 ns->zns.numzrwa++;
1831 }
1832 }
1833
1834
1835 case NVME_ZONE_STATE_FULL:
1836 zone->w_ptr = zone->d.zslba;
1837 zone->d.wp = zone->w_ptr;
1838 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
1839
1840 case NVME_ZONE_STATE_EMPTY:
1841 return NVME_SUCCESS;
1842
1843 default:
1844 return NVME_ZONE_INVAL_TRANSITION;
1845 }
1846}
1847
1848static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
1849{
1850 NvmeZone *zone;
1851
1852 if (ns->params.max_open_zones &&
1853 ns->nr_open_zones == ns->params.max_open_zones) {
1854 zone = QTAILQ_FIRST(&ns->imp_open_zones);
1855 if (zone) {
1856
1857
1858
1859 QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
1860 nvme_zrm_close(ns, zone);
1861 }
1862 }
1863}
1864
1865enum {
1866 NVME_ZRM_AUTO = 1 << 0,
1867 NVME_ZRM_ZRWA = 1 << 1,
1868};
1869
1870static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
1871 NvmeZone *zone, int flags)
1872{
1873 int act = 0;
1874 uint16_t status;
1875
1876 switch (nvme_get_zone_state(zone)) {
1877 case NVME_ZONE_STATE_EMPTY:
1878 act = 1;
1879
1880
1881
1882 case NVME_ZONE_STATE_CLOSED:
1883 if (n->params.auto_transition_zones) {
1884 nvme_zrm_auto_transition_zone(ns);
1885 }
1886 status = nvme_zns_check_resources(ns, act, 1,
1887 (flags & NVME_ZRM_ZRWA) ? 1 : 0);
1888 if (status) {
1889 return status;
1890 }
1891
1892 if (act) {
1893 nvme_aor_inc_active(ns);
1894 }
1895
1896 nvme_aor_inc_open(ns);
1897
1898 if (flags & NVME_ZRM_AUTO) {
1899 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
1900 return NVME_SUCCESS;
1901 }
1902
1903
1904
1905 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
1906 if (flags & NVME_ZRM_AUTO) {
1907 return NVME_SUCCESS;
1908 }
1909
1910 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
1911
1912
1913
1914 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
1915 if (flags & NVME_ZRM_ZRWA) {
1916 ns->zns.numzrwa--;
1917
1918 zone->d.za |= NVME_ZA_ZRWA_VALID;
1919 }
1920
1921 return NVME_SUCCESS;
1922
1923 default:
1924 return NVME_ZONE_INVAL_TRANSITION;
1925 }
1926}
1927
1928static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns,
1929 NvmeZone *zone)
1930{
1931 return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO);
1932}
1933
1934static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
1935 uint32_t nlb)
1936{
1937 zone->d.wp += nlb;
1938
1939 if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
1940 nvme_zrm_finish(ns, zone);
1941 }
1942}
1943
1944static void nvme_zoned_zrwa_implicit_flush(NvmeNamespace *ns, NvmeZone *zone,
1945 uint32_t nlbc)
1946{
1947 uint16_t nzrwafgs = DIV_ROUND_UP(nlbc, ns->zns.zrwafg);
1948
1949 nlbc = nzrwafgs * ns->zns.zrwafg;
1950
1951 trace_pci_nvme_zoned_zrwa_implicit_flush(zone->d.zslba, nlbc);
1952
1953 zone->w_ptr += nlbc;
1954
1955 nvme_advance_zone_wp(ns, zone, nlbc);
1956}
1957
1958static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
1959{
1960 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1961 NvmeZone *zone;
1962 uint64_t slba;
1963 uint32_t nlb;
1964
1965 slba = le64_to_cpu(rw->slba);
1966 nlb = le16_to_cpu(rw->nlb) + 1;
1967 zone = nvme_get_zone_by_slba(ns, slba);
1968 assert(zone);
1969
1970 if (zone->d.za & NVME_ZA_ZRWA_VALID) {
1971 uint64_t ezrwa = zone->w_ptr + ns->zns.zrwas - 1;
1972 uint64_t elba = slba + nlb - 1;
1973
1974 if (elba > ezrwa) {
1975 nvme_zoned_zrwa_implicit_flush(ns, zone, elba - ezrwa);
1976 }
1977
1978 return;
1979 }
1980
1981 nvme_advance_zone_wp(ns, zone, nlb);
1982}
1983
1984static inline bool nvme_is_write(NvmeRequest *req)
1985{
1986 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
1987
1988 return rw->opcode == NVME_CMD_WRITE ||
1989 rw->opcode == NVME_CMD_ZONE_APPEND ||
1990 rw->opcode == NVME_CMD_WRITE_ZEROES;
1991}
1992
1993static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
1994{
1995 return qemu_get_aio_context();
1996}
1997
1998static void nvme_misc_cb(void *opaque, int ret)
1999{
2000 NvmeRequest *req = opaque;
2001
2002 trace_pci_nvme_misc_cb(nvme_cid(req));
2003
2004 if (ret) {
2005 nvme_aio_err(req, ret);
2006 }
2007
2008 nvme_enqueue_req_completion(nvme_cq(req), req);
2009}
2010
2011void nvme_rw_complete_cb(void *opaque, int ret)
2012{
2013 NvmeRequest *req = opaque;
2014 NvmeNamespace *ns = req->ns;
2015 BlockBackend *blk = ns->blkconf.blk;
2016 BlockAcctCookie *acct = &req->acct;
2017 BlockAcctStats *stats = blk_get_stats(blk);
2018
2019 trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
2020
2021 if (ret) {
2022 block_acct_failed(stats, acct);
2023 nvme_aio_err(req, ret);
2024 } else {
2025 block_acct_done(stats, acct);
2026 }
2027
2028 if (ns->params.zoned && nvme_is_write(req)) {
2029 nvme_finalize_zoned_write(ns, req);
2030 }
2031
2032 nvme_enqueue_req_completion(nvme_cq(req), req);
2033}
2034
2035static void nvme_rw_cb(void *opaque, int ret)
2036{
2037 NvmeRequest *req = opaque;
2038 NvmeNamespace *ns = req->ns;
2039
2040 BlockBackend *blk = ns->blkconf.blk;
2041
2042 trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
2043
2044 if (ret) {
2045 goto out;
2046 }
2047
2048 if (ns->lbaf.ms) {
2049 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2050 uint64_t slba = le64_to_cpu(rw->slba);
2051 uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
2052 uint64_t offset = nvme_moff(ns, slba);
2053
2054 if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
2055 size_t mlen = nvme_m2b(ns, nlb);
2056
2057 req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen,
2058 BDRV_REQ_MAY_UNMAP,
2059 nvme_rw_complete_cb, req);
2060 return;
2061 }
2062
2063 if (nvme_ns_ext(ns) || req->cmd.mptr) {
2064 uint16_t status;
2065
2066 nvme_sg_unmap(&req->sg);
2067 status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
2068 if (status) {
2069 ret = -EFAULT;
2070 goto out;
2071 }
2072
2073 if (req->cmd.opcode == NVME_CMD_READ) {
2074 return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req);
2075 }
2076
2077 return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req);
2078 }
2079 }
2080
2081out:
2082 nvme_rw_complete_cb(req, ret);
2083}
2084
2085static void nvme_verify_cb(void *opaque, int ret)
2086{
2087 NvmeBounceContext *ctx = opaque;
2088 NvmeRequest *req = ctx->req;
2089 NvmeNamespace *ns = req->ns;
2090 BlockBackend *blk = ns->blkconf.blk;
2091 BlockAcctCookie *acct = &req->acct;
2092 BlockAcctStats *stats = blk_get_stats(blk);
2093 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2094 uint64_t slba = le64_to_cpu(rw->slba);
2095 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
2096 uint16_t apptag = le16_to_cpu(rw->apptag);
2097 uint16_t appmask = le16_to_cpu(rw->appmask);
2098 uint64_t reftag = le32_to_cpu(rw->reftag);
2099 uint64_t cdw3 = le32_to_cpu(rw->cdw3);
2100 uint16_t status;
2101
2102 reftag |= cdw3 << 32;
2103
2104 trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
2105
2106 if (ret) {
2107 block_acct_failed(stats, acct);
2108 nvme_aio_err(req, ret);
2109 goto out;
2110 }
2111
2112 block_acct_done(stats, acct);
2113
2114 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2115 status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce,
2116 ctx->mdata.iov.size, slba);
2117 if (status) {
2118 req->status = status;
2119 goto out;
2120 }
2121
2122 req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
2123 ctx->mdata.bounce, ctx->mdata.iov.size,
2124 prinfo, slba, apptag, appmask, &reftag);
2125 }
2126
2127out:
2128 qemu_iovec_destroy(&ctx->data.iov);
2129 g_free(ctx->data.bounce);
2130
2131 qemu_iovec_destroy(&ctx->mdata.iov);
2132 g_free(ctx->mdata.bounce);
2133
2134 g_free(ctx);
2135
2136 nvme_enqueue_req_completion(nvme_cq(req), req);
2137}
2138
2139
2140static void nvme_verify_mdata_in_cb(void *opaque, int ret)
2141{
2142 NvmeBounceContext *ctx = opaque;
2143 NvmeRequest *req = ctx->req;
2144 NvmeNamespace *ns = req->ns;
2145 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2146 uint64_t slba = le64_to_cpu(rw->slba);
2147 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
2148 size_t mlen = nvme_m2b(ns, nlb);
2149 uint64_t offset = nvme_moff(ns, slba);
2150 BlockBackend *blk = ns->blkconf.blk;
2151
2152 trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
2153
2154 if (ret) {
2155 goto out;
2156 }
2157
2158 ctx->mdata.bounce = g_malloc(mlen);
2159
2160 qemu_iovec_reset(&ctx->mdata.iov);
2161 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
2162
2163 req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
2164 nvme_verify_cb, ctx);
2165 return;
2166
2167out:
2168 nvme_verify_cb(ctx, ret);
2169}
2170
2171struct nvme_compare_ctx {
2172 struct {
2173 QEMUIOVector iov;
2174 uint8_t *bounce;
2175 } data;
2176
2177 struct {
2178 QEMUIOVector iov;
2179 uint8_t *bounce;
2180 } mdata;
2181};
2182
2183static void nvme_compare_mdata_cb(void *opaque, int ret)
2184{
2185 NvmeRequest *req = opaque;
2186 NvmeNamespace *ns = req->ns;
2187 NvmeCtrl *n = nvme_ctrl(req);
2188 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2189 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
2190 uint16_t apptag = le16_to_cpu(rw->apptag);
2191 uint16_t appmask = le16_to_cpu(rw->appmask);
2192 uint64_t reftag = le32_to_cpu(rw->reftag);
2193 uint64_t cdw3 = le32_to_cpu(rw->cdw3);
2194 struct nvme_compare_ctx *ctx = req->opaque;
2195 g_autofree uint8_t *buf = NULL;
2196 BlockBackend *blk = ns->blkconf.blk;
2197 BlockAcctCookie *acct = &req->acct;
2198 BlockAcctStats *stats = blk_get_stats(blk);
2199 uint16_t status = NVME_SUCCESS;
2200
2201 reftag |= cdw3 << 32;
2202
2203 trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
2204
2205 if (ret) {
2206 block_acct_failed(stats, acct);
2207 nvme_aio_err(req, ret);
2208 goto out;
2209 }
2210
2211 buf = g_malloc(ctx->mdata.iov.size);
2212
2213 status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
2214 NVME_TX_DIRECTION_TO_DEVICE, req);
2215 if (status) {
2216 req->status = status;
2217 goto out;
2218 }
2219
2220 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2221 uint64_t slba = le64_to_cpu(rw->slba);
2222 uint8_t *bufp;
2223 uint8_t *mbufp = ctx->mdata.bounce;
2224 uint8_t *end = mbufp + ctx->mdata.iov.size;
2225 int16_t pil = 0;
2226
2227 status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
2228 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
2229 slba, apptag, appmask, &reftag);
2230 if (status) {
2231 req->status = status;
2232 goto out;
2233 }
2234
2235
2236
2237
2238
2239 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
2240 pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
2241 }
2242
2243 for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) {
2244 if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) {
2245 req->status = NVME_CMP_FAILURE;
2246 goto out;
2247 }
2248 }
2249
2250 goto out;
2251 }
2252
2253 if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
2254 req->status = NVME_CMP_FAILURE;
2255 goto out;
2256 }
2257
2258 block_acct_done(stats, acct);
2259
2260out:
2261 qemu_iovec_destroy(&ctx->data.iov);
2262 g_free(ctx->data.bounce);
2263
2264 qemu_iovec_destroy(&ctx->mdata.iov);
2265 g_free(ctx->mdata.bounce);
2266
2267 g_free(ctx);
2268
2269 nvme_enqueue_req_completion(nvme_cq(req), req);
2270}
2271
2272static void nvme_compare_data_cb(void *opaque, int ret)
2273{
2274 NvmeRequest *req = opaque;
2275 NvmeCtrl *n = nvme_ctrl(req);
2276 NvmeNamespace *ns = req->ns;
2277 BlockBackend *blk = ns->blkconf.blk;
2278 BlockAcctCookie *acct = &req->acct;
2279 BlockAcctStats *stats = blk_get_stats(blk);
2280
2281 struct nvme_compare_ctx *ctx = req->opaque;
2282 g_autofree uint8_t *buf = NULL;
2283 uint16_t status;
2284
2285 trace_pci_nvme_compare_data_cb(nvme_cid(req));
2286
2287 if (ret) {
2288 block_acct_failed(stats, acct);
2289 nvme_aio_err(req, ret);
2290 goto out;
2291 }
2292
2293 buf = g_malloc(ctx->data.iov.size);
2294
2295 status = nvme_bounce_data(n, buf, ctx->data.iov.size,
2296 NVME_TX_DIRECTION_TO_DEVICE, req);
2297 if (status) {
2298 req->status = status;
2299 goto out;
2300 }
2301
2302 if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) {
2303 req->status = NVME_CMP_FAILURE;
2304 goto out;
2305 }
2306
2307 if (ns->lbaf.ms) {
2308 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2309 uint64_t slba = le64_to_cpu(rw->slba);
2310 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
2311 size_t mlen = nvme_m2b(ns, nlb);
2312 uint64_t offset = nvme_moff(ns, slba);
2313
2314 ctx->mdata.bounce = g_malloc(mlen);
2315
2316 qemu_iovec_init(&ctx->mdata.iov, 1);
2317 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
2318
2319 req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
2320 nvme_compare_mdata_cb, req);
2321 return;
2322 }
2323
2324 block_acct_done(stats, acct);
2325
2326out:
2327 qemu_iovec_destroy(&ctx->data.iov);
2328 g_free(ctx->data.bounce);
2329 g_free(ctx);
2330
2331 nvme_enqueue_req_completion(nvme_cq(req), req);
2332}
2333
2334typedef struct NvmeDSMAIOCB {
2335 BlockAIOCB common;
2336 BlockAIOCB *aiocb;
2337 NvmeRequest *req;
2338 QEMUBH *bh;
2339 int ret;
2340
2341 NvmeDsmRange *range;
2342 unsigned int nr;
2343 unsigned int idx;
2344} NvmeDSMAIOCB;
2345
2346static void nvme_dsm_cancel(BlockAIOCB *aiocb)
2347{
2348 NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common);
2349
2350
2351 iocb->idx = iocb->nr;
2352 iocb->ret = -ECANCELED;
2353
2354 if (iocb->aiocb) {
2355 blk_aio_cancel_async(iocb->aiocb);
2356 iocb->aiocb = NULL;
2357 } else {
2358
2359
2360
2361
2362 assert(iocb->idx == iocb->nr);
2363 }
2364}
2365
2366static const AIOCBInfo nvme_dsm_aiocb_info = {
2367 .aiocb_size = sizeof(NvmeDSMAIOCB),
2368 .cancel_async = nvme_dsm_cancel,
2369};
2370
2371static void nvme_dsm_bh(void *opaque)
2372{
2373 NvmeDSMAIOCB *iocb = opaque;
2374
2375 iocb->common.cb(iocb->common.opaque, iocb->ret);
2376
2377 qemu_bh_delete(iocb->bh);
2378 iocb->bh = NULL;
2379 qemu_aio_unref(iocb);
2380}
2381
2382static void nvme_dsm_cb(void *opaque, int ret);
2383
2384static void nvme_dsm_md_cb(void *opaque, int ret)
2385{
2386 NvmeDSMAIOCB *iocb = opaque;
2387 NvmeRequest *req = iocb->req;
2388 NvmeNamespace *ns = req->ns;
2389 NvmeDsmRange *range;
2390 uint64_t slba;
2391 uint32_t nlb;
2392
2393 if (ret < 0) {
2394 iocb->ret = ret;
2395 goto done;
2396 }
2397
2398 if (!ns->lbaf.ms) {
2399 nvme_dsm_cb(iocb, 0);
2400 return;
2401 }
2402
2403 range = &iocb->range[iocb->idx - 1];
2404 slba = le64_to_cpu(range->slba);
2405 nlb = le32_to_cpu(range->nlb);
2406
2407
2408
2409
2410
2411
2412 ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO);
2413 if (ret) {
2414 if (ret < 0) {
2415 iocb->ret = ret;
2416 goto done;
2417 }
2418
2419 nvme_dsm_cb(iocb, 0);
2420 return;
2421 }
2422
2423 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba),
2424 nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP,
2425 nvme_dsm_cb, iocb);
2426 return;
2427
2428done:
2429 iocb->aiocb = NULL;
2430 qemu_bh_schedule(iocb->bh);
2431}
2432
2433static void nvme_dsm_cb(void *opaque, int ret)
2434{
2435 NvmeDSMAIOCB *iocb = opaque;
2436 NvmeRequest *req = iocb->req;
2437 NvmeCtrl *n = nvme_ctrl(req);
2438 NvmeNamespace *ns = req->ns;
2439 NvmeDsmRange *range;
2440 uint64_t slba;
2441 uint32_t nlb;
2442
2443 if (ret < 0) {
2444 iocb->ret = ret;
2445 goto done;
2446 }
2447
2448next:
2449 if (iocb->idx == iocb->nr) {
2450 goto done;
2451 }
2452
2453 range = &iocb->range[iocb->idx++];
2454 slba = le64_to_cpu(range->slba);
2455 nlb = le32_to_cpu(range->nlb);
2456
2457 trace_pci_nvme_dsm_deallocate(slba, nlb);
2458
2459 if (nlb > n->dmrsl) {
2460 trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
2461 goto next;
2462 }
2463
2464 if (nvme_check_bounds(ns, slba, nlb)) {
2465 trace_pci_nvme_err_invalid_lba_range(slba, nlb,
2466 ns->id_ns.nsze);
2467 goto next;
2468 }
2469
2470 iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba),
2471 nvme_l2b(ns, nlb),
2472 nvme_dsm_md_cb, iocb);
2473 return;
2474
2475done:
2476 iocb->aiocb = NULL;
2477 qemu_bh_schedule(iocb->bh);
2478}
2479
2480static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
2481{
2482 NvmeNamespace *ns = req->ns;
2483 NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
2484 uint32_t attr = le32_to_cpu(dsm->attributes);
2485 uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
2486 uint16_t status = NVME_SUCCESS;
2487
2488 trace_pci_nvme_dsm(nr, attr);
2489
2490 if (attr & NVME_DSMGMT_AD) {
2491 NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk,
2492 nvme_misc_cb, req);
2493
2494 iocb->req = req;
2495 iocb->bh = qemu_bh_new(nvme_dsm_bh, iocb);
2496 iocb->ret = 0;
2497 iocb->range = g_new(NvmeDsmRange, nr);
2498 iocb->nr = nr;
2499 iocb->idx = 0;
2500
2501 status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr,
2502 req);
2503 if (status) {
2504 return status;
2505 }
2506
2507 req->aiocb = &iocb->common;
2508 nvme_dsm_cb(iocb, 0);
2509
2510 return NVME_NO_COMPLETE;
2511 }
2512
2513 return status;
2514}
2515
2516static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
2517{
2518 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
2519 NvmeNamespace *ns = req->ns;
2520 BlockBackend *blk = ns->blkconf.blk;
2521 uint64_t slba = le64_to_cpu(rw->slba);
2522 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
2523 size_t len = nvme_l2b(ns, nlb);
2524 int64_t offset = nvme_l2b(ns, slba);
2525 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
2526 uint32_t reftag = le32_to_cpu(rw->reftag);
2527 NvmeBounceContext *ctx = NULL;
2528 uint16_t status;
2529
2530 trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
2531
2532 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2533 status = nvme_check_prinfo(ns, prinfo, slba, reftag);
2534 if (status) {
2535 return status;
2536 }
2537
2538 if (prinfo & NVME_PRINFO_PRACT) {
2539 return NVME_INVALID_PROT_INFO | NVME_DNR;
2540 }
2541 }
2542
2543 if (len > n->page_size << n->params.vsl) {
2544 return NVME_INVALID_FIELD | NVME_DNR;
2545 }
2546
2547 status = nvme_check_bounds(ns, slba, nlb);
2548 if (status) {
2549 return status;
2550 }
2551
2552 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
2553 status = nvme_check_dulbe(ns, slba, nlb);
2554 if (status) {
2555 return status;
2556 }
2557 }
2558
2559 ctx = g_new0(NvmeBounceContext, 1);
2560 ctx->req = req;
2561
2562 ctx->data.bounce = g_malloc(len);
2563
2564 qemu_iovec_init(&ctx->data.iov, 1);
2565 qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
2566
2567 block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
2568 BLOCK_ACCT_READ);
2569
2570 req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
2571 nvme_verify_mdata_in_cb, ctx);
2572 return NVME_NO_COMPLETE;
2573}
2574
2575typedef struct NvmeCopyAIOCB {
2576 BlockAIOCB common;
2577 BlockAIOCB *aiocb;
2578 NvmeRequest *req;
2579 QEMUBH *bh;
2580 int ret;
2581
2582 void *ranges;
2583 unsigned int format;
2584 int nr;
2585 int idx;
2586
2587 uint8_t *bounce;
2588 QEMUIOVector iov;
2589 struct {
2590 BlockAcctCookie read;
2591 BlockAcctCookie write;
2592 } acct;
2593
2594 uint64_t reftag;
2595 uint64_t slba;
2596
2597 NvmeZone *zone;
2598} NvmeCopyAIOCB;
2599
2600static void nvme_copy_cancel(BlockAIOCB *aiocb)
2601{
2602 NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common);
2603
2604 iocb->ret = -ECANCELED;
2605
2606 if (iocb->aiocb) {
2607 blk_aio_cancel_async(iocb->aiocb);
2608 iocb->aiocb = NULL;
2609 }
2610}
2611
2612static const AIOCBInfo nvme_copy_aiocb_info = {
2613 .aiocb_size = sizeof(NvmeCopyAIOCB),
2614 .cancel_async = nvme_copy_cancel,
2615};
2616
2617static void nvme_copy_bh(void *opaque)
2618{
2619 NvmeCopyAIOCB *iocb = opaque;
2620 NvmeRequest *req = iocb->req;
2621 NvmeNamespace *ns = req->ns;
2622 BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk);
2623
2624 if (iocb->idx != iocb->nr) {
2625 req->cqe.result = cpu_to_le32(iocb->idx);
2626 }
2627
2628 qemu_iovec_destroy(&iocb->iov);
2629 g_free(iocb->bounce);
2630
2631 qemu_bh_delete(iocb->bh);
2632 iocb->bh = NULL;
2633
2634 if (iocb->ret < 0) {
2635 block_acct_failed(stats, &iocb->acct.read);
2636 block_acct_failed(stats, &iocb->acct.write);
2637 } else {
2638 block_acct_done(stats, &iocb->acct.read);
2639 block_acct_done(stats, &iocb->acct.write);
2640 }
2641
2642 iocb->common.cb(iocb->common.opaque, iocb->ret);
2643 qemu_aio_unref(iocb);
2644}
2645
2646static void nvme_copy_cb(void *opaque, int ret);
2647
2648static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
2649 uint64_t *slba, uint32_t *nlb,
2650 uint16_t *apptag,
2651 uint16_t *appmask,
2652 uint64_t *reftag)
2653{
2654 NvmeCopySourceRangeFormat0 *_ranges = ranges;
2655
2656 if (slba) {
2657 *slba = le64_to_cpu(_ranges[idx].slba);
2658 }
2659
2660 if (nlb) {
2661 *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
2662 }
2663
2664 if (apptag) {
2665 *apptag = le16_to_cpu(_ranges[idx].apptag);
2666 }
2667
2668 if (appmask) {
2669 *appmask = le16_to_cpu(_ranges[idx].appmask);
2670 }
2671
2672 if (reftag) {
2673 *reftag = le32_to_cpu(_ranges[idx].reftag);
2674 }
2675}
2676
2677static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
2678 uint64_t *slba, uint32_t *nlb,
2679 uint16_t *apptag,
2680 uint16_t *appmask,
2681 uint64_t *reftag)
2682{
2683 NvmeCopySourceRangeFormat1 *_ranges = ranges;
2684
2685 if (slba) {
2686 *slba = le64_to_cpu(_ranges[idx].slba);
2687 }
2688
2689 if (nlb) {
2690 *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
2691 }
2692
2693 if (apptag) {
2694 *apptag = le16_to_cpu(_ranges[idx].apptag);
2695 }
2696
2697 if (appmask) {
2698 *appmask = le16_to_cpu(_ranges[idx].appmask);
2699 }
2700
2701 if (reftag) {
2702 *reftag = 0;
2703
2704 *reftag |= (uint64_t)_ranges[idx].sr[4] << 40;
2705 *reftag |= (uint64_t)_ranges[idx].sr[5] << 32;
2706 *reftag |= (uint64_t)_ranges[idx].sr[6] << 24;
2707 *reftag |= (uint64_t)_ranges[idx].sr[7] << 16;
2708 *reftag |= (uint64_t)_ranges[idx].sr[8] << 8;
2709 *reftag |= (uint64_t)_ranges[idx].sr[9];
2710 }
2711}
2712
2713static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
2714 uint64_t *slba, uint32_t *nlb,
2715 uint16_t *apptag, uint16_t *appmask,
2716 uint64_t *reftag)
2717{
2718 switch (format) {
2719 case NVME_COPY_FORMAT_0:
2720 nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag,
2721 appmask, reftag);
2722 break;
2723
2724 case NVME_COPY_FORMAT_1:
2725 nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag,
2726 appmask, reftag);
2727 break;
2728
2729 default:
2730 abort();
2731 }
2732}
2733
2734static void nvme_copy_out_completed_cb(void *opaque, int ret)
2735{
2736 NvmeCopyAIOCB *iocb = opaque;
2737 NvmeRequest *req = iocb->req;
2738 NvmeNamespace *ns = req->ns;
2739 uint32_t nlb;
2740
2741 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
2742 &nlb, NULL, NULL, NULL);
2743
2744 if (ret < 0) {
2745 iocb->ret = ret;
2746 goto out;
2747 } else if (iocb->ret < 0) {
2748 goto out;
2749 }
2750
2751 if (ns->params.zoned) {
2752 nvme_advance_zone_wp(ns, iocb->zone, nlb);
2753 }
2754
2755 iocb->idx++;
2756 iocb->slba += nlb;
2757out:
2758 nvme_copy_cb(iocb, iocb->ret);
2759}
2760
2761static void nvme_copy_out_cb(void *opaque, int ret)
2762{
2763 NvmeCopyAIOCB *iocb = opaque;
2764 NvmeRequest *req = iocb->req;
2765 NvmeNamespace *ns = req->ns;
2766 uint32_t nlb;
2767 size_t mlen;
2768 uint8_t *mbounce;
2769
2770 if (ret < 0) {
2771 iocb->ret = ret;
2772 goto out;
2773 } else if (iocb->ret < 0) {
2774 goto out;
2775 }
2776
2777 if (!ns->lbaf.ms) {
2778 nvme_copy_out_completed_cb(iocb, 0);
2779 return;
2780 }
2781
2782 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
2783 &nlb, NULL, NULL, NULL);
2784
2785 mlen = nvme_m2b(ns, nlb);
2786 mbounce = iocb->bounce + nvme_l2b(ns, nlb);
2787
2788 qemu_iovec_reset(&iocb->iov);
2789 qemu_iovec_add(&iocb->iov, mbounce, mlen);
2790
2791 iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba),
2792 &iocb->iov, 0, nvme_copy_out_completed_cb,
2793 iocb);
2794
2795 return;
2796
2797out:
2798 nvme_copy_cb(iocb, ret);
2799}
2800
2801static void nvme_copy_in_completed_cb(void *opaque, int ret)
2802{
2803 NvmeCopyAIOCB *iocb = opaque;
2804 NvmeRequest *req = iocb->req;
2805 NvmeNamespace *ns = req->ns;
2806 uint32_t nlb;
2807 uint64_t slba;
2808 uint16_t apptag, appmask;
2809 uint64_t reftag;
2810 size_t len;
2811 uint16_t status;
2812
2813 if (ret < 0) {
2814 iocb->ret = ret;
2815 goto out;
2816 } else if (iocb->ret < 0) {
2817 goto out;
2818 }
2819
2820 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
2821 &nlb, &apptag, &appmask, &reftag);
2822 len = nvme_l2b(ns, nlb);
2823
2824 trace_pci_nvme_copy_out(iocb->slba, nlb);
2825
2826 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
2827 NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
2828
2829 uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
2830 uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
2831
2832 size_t mlen = nvme_m2b(ns, nlb);
2833 uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
2834
2835 status = nvme_dif_mangle_mdata(ns, mbounce, mlen, slba);
2836 if (status) {
2837 goto invalid;
2838 }
2839 status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor,
2840 slba, apptag, appmask, &reftag);
2841 if (status) {
2842 goto invalid;
2843 }
2844
2845 apptag = le16_to_cpu(copy->apptag);
2846 appmask = le16_to_cpu(copy->appmask);
2847
2848 if (prinfow & NVME_PRINFO_PRACT) {
2849 status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag);
2850 if (status) {
2851 goto invalid;
2852 }
2853
2854 nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen,
2855 apptag, &iocb->reftag);
2856 } else {
2857 status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen,
2858 prinfow, iocb->slba, apptag, appmask,
2859 &iocb->reftag);
2860 if (status) {
2861 goto invalid;
2862 }
2863 }
2864 }
2865
2866 status = nvme_check_bounds(ns, iocb->slba, nlb);
2867 if (status) {
2868 goto invalid;
2869 }
2870
2871 if (ns->params.zoned) {
2872 status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb);
2873 if (status) {
2874 goto invalid;
2875 }
2876
2877 if (!(iocb->zone->d.za & NVME_ZA_ZRWA_VALID)) {
2878 iocb->zone->w_ptr += nlb;
2879 }
2880 }
2881
2882 qemu_iovec_reset(&iocb->iov);
2883 qemu_iovec_add(&iocb->iov, iocb->bounce, len);
2884
2885 iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba),
2886 &iocb->iov, 0, nvme_copy_out_cb, iocb);
2887
2888 return;
2889
2890invalid:
2891 req->status = status;
2892 iocb->aiocb = NULL;
2893 if (iocb->bh) {
2894 qemu_bh_schedule(iocb->bh);
2895 }
2896
2897 return;
2898
2899out:
2900 nvme_copy_cb(iocb, ret);
2901}
2902
2903static void nvme_copy_in_cb(void *opaque, int ret)
2904{
2905 NvmeCopyAIOCB *iocb = opaque;
2906 NvmeRequest *req = iocb->req;
2907 NvmeNamespace *ns = req->ns;
2908 uint64_t slba;
2909 uint32_t nlb;
2910
2911 if (ret < 0) {
2912 iocb->ret = ret;
2913 goto out;
2914 } else if (iocb->ret < 0) {
2915 goto out;
2916 }
2917
2918 if (!ns->lbaf.ms) {
2919 nvme_copy_in_completed_cb(iocb, 0);
2920 return;
2921 }
2922
2923 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
2924 &nlb, NULL, NULL, NULL);
2925
2926 qemu_iovec_reset(&iocb->iov);
2927 qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb),
2928 nvme_m2b(ns, nlb));
2929
2930 iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba),
2931 &iocb->iov, 0, nvme_copy_in_completed_cb,
2932 iocb);
2933 return;
2934
2935out:
2936 nvme_copy_cb(iocb, iocb->ret);
2937}
2938
2939static void nvme_copy_cb(void *opaque, int ret)
2940{
2941 NvmeCopyAIOCB *iocb = opaque;
2942 NvmeRequest *req = iocb->req;
2943 NvmeNamespace *ns = req->ns;
2944 uint64_t slba;
2945 uint32_t nlb;
2946 size_t len;
2947 uint16_t status;
2948
2949 if (ret < 0) {
2950 iocb->ret = ret;
2951 goto done;
2952 } else if (iocb->ret < 0) {
2953 goto done;
2954 }
2955
2956 if (iocb->idx == iocb->nr) {
2957 goto done;
2958 }
2959
2960 nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
2961 &nlb, NULL, NULL, NULL);
2962 len = nvme_l2b(ns, nlb);
2963
2964 trace_pci_nvme_copy_source_range(slba, nlb);
2965
2966 if (nlb > le16_to_cpu(ns->id_ns.mssrl)) {
2967 status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
2968 goto invalid;
2969 }
2970
2971 status = nvme_check_bounds(ns, slba, nlb);
2972 if (status) {
2973 goto invalid;
2974 }
2975
2976 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
2977 status = nvme_check_dulbe(ns, slba, nlb);
2978 if (status) {
2979 goto invalid;
2980 }
2981 }
2982
2983 if (ns->params.zoned) {
2984 status = nvme_check_zone_read(ns, slba, nlb);
2985 if (status) {
2986 goto invalid;
2987 }
2988 }
2989
2990 qemu_iovec_reset(&iocb->iov);
2991 qemu_iovec_add(&iocb->iov, iocb->bounce, len);
2992
2993 iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba),
2994 &iocb->iov, 0, nvme_copy_in_cb, iocb);
2995 return;
2996
2997invalid:
2998 req->status = status;
2999done:
3000 iocb->aiocb = NULL;
3001 if (iocb->bh) {
3002 qemu_bh_schedule(iocb->bh);
3003 }
3004}
3005
3006
3007static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
3008{
3009 NvmeNamespace *ns = req->ns;
3010 NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
3011 NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk,
3012 nvme_misc_cb, req);
3013 uint16_t nr = copy->nr + 1;
3014 uint8_t format = copy->control[0] & 0xf;
3015 uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
3016 uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
3017 size_t len = sizeof(NvmeCopySourceRangeFormat0);
3018
3019 uint16_t status;
3020
3021 trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
3022
3023 iocb->ranges = NULL;
3024 iocb->zone = NULL;
3025
3026 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
3027 ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
3028 status = NVME_INVALID_FIELD | NVME_DNR;
3029 goto invalid;
3030 }
3031
3032 if (!(n->id_ctrl.ocfs & (1 << format))) {
3033 trace_pci_nvme_err_copy_invalid_format(format);
3034 status = NVME_INVALID_FIELD | NVME_DNR;
3035 goto invalid;
3036 }
3037
3038 if (nr > ns->id_ns.msrc + 1) {
3039 status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
3040 goto invalid;
3041 }
3042
3043 if (ns->pif && format != 0x1) {
3044 status = NVME_INVALID_FORMAT | NVME_DNR;
3045 goto invalid;
3046 }
3047
3048 if (ns->pif) {
3049 len = sizeof(NvmeCopySourceRangeFormat1);
3050 }
3051
3052 iocb->format = format;
3053 iocb->ranges = g_malloc_n(nr, len);
3054 status = nvme_h2c(n, (uint8_t *)iocb->ranges, len * nr, req);
3055 if (status) {
3056 goto invalid;
3057 }
3058
3059 iocb->slba = le64_to_cpu(copy->sdlba);
3060
3061 if (ns->params.zoned) {
3062 iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba);
3063 if (!iocb->zone) {
3064 status = NVME_LBA_RANGE | NVME_DNR;
3065 goto invalid;
3066 }
3067
3068 status = nvme_zrm_auto(n, ns, iocb->zone);
3069 if (status) {
3070 goto invalid;
3071 }
3072 }
3073
3074 iocb->req = req;
3075 iocb->bh = qemu_bh_new(nvme_copy_bh, iocb);
3076 iocb->ret = 0;
3077 iocb->nr = nr;
3078 iocb->idx = 0;
3079 iocb->reftag = le32_to_cpu(copy->reftag);
3080 iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32;
3081 iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl),
3082 ns->lbasz + ns->lbaf.ms);
3083
3084 qemu_iovec_init(&iocb->iov, 1);
3085
3086 block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0,
3087 BLOCK_ACCT_READ);
3088 block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0,
3089 BLOCK_ACCT_WRITE);
3090
3091 req->aiocb = &iocb->common;
3092 nvme_copy_cb(iocb, 0);
3093
3094 return NVME_NO_COMPLETE;
3095
3096invalid:
3097 g_free(iocb->ranges);
3098 qemu_aio_unref(iocb);
3099 return status;
3100}
3101
3102static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
3103{
3104 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
3105 NvmeNamespace *ns = req->ns;
3106 BlockBackend *blk = ns->blkconf.blk;
3107 uint64_t slba = le64_to_cpu(rw->slba);
3108 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
3109 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
3110 size_t data_len = nvme_l2b(ns, nlb);
3111 size_t len = data_len;
3112 int64_t offset = nvme_l2b(ns, slba);
3113 struct nvme_compare_ctx *ctx = NULL;
3114 uint16_t status;
3115
3116 trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
3117
3118 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) {
3119 return NVME_INVALID_PROT_INFO | NVME_DNR;
3120 }
3121
3122 if (nvme_ns_ext(ns)) {
3123 len += nvme_m2b(ns, nlb);
3124 }
3125
3126 status = nvme_check_mdts(n, len);
3127 if (status) {
3128 return status;
3129 }
3130
3131 status = nvme_check_bounds(ns, slba, nlb);
3132 if (status) {
3133 return status;
3134 }
3135
3136 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
3137 status = nvme_check_dulbe(ns, slba, nlb);
3138 if (status) {
3139 return status;
3140 }
3141 }
3142
3143 status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
3144 if (status) {
3145 return status;
3146 }
3147
3148 ctx = g_new(struct nvme_compare_ctx, 1);
3149 ctx->data.bounce = g_malloc(data_len);
3150
3151 req->opaque = ctx;
3152
3153 qemu_iovec_init(&ctx->data.iov, 1);
3154 qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len);
3155
3156 block_acct_start(blk_get_stats(blk), &req->acct, data_len,
3157 BLOCK_ACCT_READ);
3158 req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0,
3159 nvme_compare_data_cb, req);
3160
3161 return NVME_NO_COMPLETE;
3162}
3163
3164typedef struct NvmeFlushAIOCB {
3165 BlockAIOCB common;
3166 BlockAIOCB *aiocb;
3167 NvmeRequest *req;
3168 QEMUBH *bh;
3169 int ret;
3170
3171 NvmeNamespace *ns;
3172 uint32_t nsid;
3173 bool broadcast;
3174} NvmeFlushAIOCB;
3175
3176static void nvme_flush_cancel(BlockAIOCB *acb)
3177{
3178 NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common);
3179
3180 iocb->ret = -ECANCELED;
3181
3182 if (iocb->aiocb) {
3183 blk_aio_cancel_async(iocb->aiocb);
3184 }
3185}
3186
3187static const AIOCBInfo nvme_flush_aiocb_info = {
3188 .aiocb_size = sizeof(NvmeFlushAIOCB),
3189 .cancel_async = nvme_flush_cancel,
3190 .get_aio_context = nvme_get_aio_context,
3191};
3192
3193static void nvme_flush_ns_cb(void *opaque, int ret)
3194{
3195 NvmeFlushAIOCB *iocb = opaque;
3196 NvmeNamespace *ns = iocb->ns;
3197
3198 if (ret < 0) {
3199 iocb->ret = ret;
3200 goto out;
3201 } else if (iocb->ret < 0) {
3202 goto out;
3203 }
3204
3205 if (ns) {
3206 trace_pci_nvme_flush_ns(iocb->nsid);
3207
3208 iocb->ns = NULL;
3209 iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb);
3210 return;
3211 }
3212
3213out:
3214 iocb->aiocb = NULL;
3215 qemu_bh_schedule(iocb->bh);
3216}
3217
3218static void nvme_flush_bh(void *opaque)
3219{
3220 NvmeFlushAIOCB *iocb = opaque;
3221 NvmeRequest *req = iocb->req;
3222 NvmeCtrl *n = nvme_ctrl(req);
3223 int i;
3224
3225 if (iocb->ret < 0) {
3226 goto done;
3227 }
3228
3229 if (iocb->broadcast) {
3230 for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
3231 iocb->ns = nvme_ns(n, i);
3232 if (iocb->ns) {
3233 iocb->nsid = i;
3234 break;
3235 }
3236 }
3237 }
3238
3239 if (!iocb->ns) {
3240 goto done;
3241 }
3242
3243 nvme_flush_ns_cb(iocb, 0);
3244 return;
3245
3246done:
3247 qemu_bh_delete(iocb->bh);
3248 iocb->bh = NULL;
3249
3250 iocb->common.cb(iocb->common.opaque, iocb->ret);
3251
3252 qemu_aio_unref(iocb);
3253
3254 return;
3255}
3256
3257static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
3258{
3259 NvmeFlushAIOCB *iocb;
3260 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
3261 uint16_t status;
3262
3263 iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req);
3264
3265 iocb->req = req;
3266 iocb->bh = qemu_bh_new(nvme_flush_bh, iocb);
3267 iocb->ret = 0;
3268 iocb->ns = NULL;
3269 iocb->nsid = 0;
3270 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
3271
3272 if (!iocb->broadcast) {
3273 if (!nvme_nsid_valid(n, nsid)) {
3274 status = NVME_INVALID_NSID | NVME_DNR;
3275 goto out;
3276 }
3277
3278 iocb->ns = nvme_ns(n, nsid);
3279 if (!iocb->ns) {
3280 status = NVME_INVALID_FIELD | NVME_DNR;
3281 goto out;
3282 }
3283
3284 iocb->nsid = nsid;
3285 }
3286
3287 req->aiocb = &iocb->common;
3288 qemu_bh_schedule(iocb->bh);
3289
3290 return NVME_NO_COMPLETE;
3291
3292out:
3293 qemu_bh_delete(iocb->bh);
3294 iocb->bh = NULL;
3295 qemu_aio_unref(iocb);
3296
3297 return status;
3298}
3299
3300static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
3301{
3302 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
3303 NvmeNamespace *ns = req->ns;
3304 uint64_t slba = le64_to_cpu(rw->slba);
3305 uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
3306 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
3307 uint64_t data_size = nvme_l2b(ns, nlb);
3308 uint64_t mapped_size = data_size;
3309 uint64_t data_offset;
3310 BlockBackend *blk = ns->blkconf.blk;
3311 uint16_t status;
3312
3313 if (nvme_ns_ext(ns)) {
3314 mapped_size += nvme_m2b(ns, nlb);
3315
3316 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3317 bool pract = prinfo & NVME_PRINFO_PRACT;
3318
3319 if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
3320 mapped_size = data_size;
3321 }
3322 }
3323 }
3324
3325 trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
3326
3327 status = nvme_check_mdts(n, mapped_size);
3328 if (status) {
3329 goto invalid;
3330 }
3331
3332 status = nvme_check_bounds(ns, slba, nlb);
3333 if (status) {
3334 goto invalid;
3335 }
3336
3337 if (ns->params.zoned) {
3338 status = nvme_check_zone_read(ns, slba, nlb);
3339 if (status) {
3340 trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status);
3341 goto invalid;
3342 }
3343 }
3344
3345 if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
3346 status = nvme_check_dulbe(ns, slba, nlb);
3347 if (status) {
3348 goto invalid;
3349 }
3350 }
3351
3352 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3353 return nvme_dif_rw(n, req);
3354 }
3355
3356 status = nvme_map_data(n, nlb, req);
3357 if (status) {
3358 goto invalid;
3359 }
3360
3361 data_offset = nvme_l2b(ns, slba);
3362
3363 block_acct_start(blk_get_stats(blk), &req->acct, data_size,
3364 BLOCK_ACCT_READ);
3365 nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
3366 return NVME_NO_COMPLETE;
3367
3368invalid:
3369 block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
3370 return status | NVME_DNR;
3371}
3372
3373static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
3374 bool wrz)
3375{
3376 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
3377 NvmeNamespace *ns = req->ns;
3378 uint64_t slba = le64_to_cpu(rw->slba);
3379 uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
3380 uint16_t ctrl = le16_to_cpu(rw->control);
3381 uint8_t prinfo = NVME_RW_PRINFO(ctrl);
3382 uint64_t data_size = nvme_l2b(ns, nlb);
3383 uint64_t mapped_size = data_size;
3384 uint64_t data_offset;
3385 NvmeZone *zone;
3386 NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
3387 BlockBackend *blk = ns->blkconf.blk;
3388 uint16_t status;
3389
3390 if (nvme_ns_ext(ns)) {
3391 mapped_size += nvme_m2b(ns, nlb);
3392
3393 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3394 bool pract = prinfo & NVME_PRINFO_PRACT;
3395
3396 if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
3397 mapped_size -= nvme_m2b(ns, nlb);
3398 }
3399 }
3400 }
3401
3402 trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
3403 nvme_nsid(ns), nlb, mapped_size, slba);
3404
3405 if (!wrz) {
3406 status = nvme_check_mdts(n, mapped_size);
3407 if (status) {
3408 goto invalid;
3409 }
3410 }
3411
3412 status = nvme_check_bounds(ns, slba, nlb);
3413 if (status) {
3414 goto invalid;
3415 }
3416
3417 if (ns->params.zoned) {
3418 zone = nvme_get_zone_by_slba(ns, slba);
3419 assert(zone);
3420
3421 if (append) {
3422 bool piremap = !!(ctrl & NVME_RW_PIREMAP);
3423
3424 if (unlikely(zone->d.za & NVME_ZA_ZRWA_VALID)) {
3425 return NVME_INVALID_ZONE_OP | NVME_DNR;
3426 }
3427
3428 if (unlikely(slba != zone->d.zslba)) {
3429 trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
3430 status = NVME_INVALID_FIELD;
3431 goto invalid;
3432 }
3433
3434 if (n->params.zasl &&
3435 data_size > (uint64_t)n->page_size << n->params.zasl) {
3436 trace_pci_nvme_err_zasl(data_size);
3437 return NVME_INVALID_FIELD | NVME_DNR;
3438 }
3439
3440 slba = zone->w_ptr;
3441 rw->slba = cpu_to_le64(slba);
3442 res->slba = cpu_to_le64(slba);
3443
3444 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3445 case NVME_ID_NS_DPS_TYPE_1:
3446 if (!piremap) {
3447 return NVME_INVALID_PROT_INFO | NVME_DNR;
3448 }
3449
3450
3451
3452 case NVME_ID_NS_DPS_TYPE_2:
3453 if (piremap) {
3454 uint32_t reftag = le32_to_cpu(rw->reftag);
3455 rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
3456 }
3457
3458 break;
3459
3460 case NVME_ID_NS_DPS_TYPE_3:
3461 if (piremap) {
3462 return NVME_INVALID_PROT_INFO | NVME_DNR;
3463 }
3464
3465 break;
3466 }
3467 }
3468
3469 status = nvme_check_zone_write(ns, zone, slba, nlb);
3470 if (status) {
3471 goto invalid;
3472 }
3473
3474 status = nvme_zrm_auto(n, ns, zone);
3475 if (status) {
3476 goto invalid;
3477 }
3478
3479 if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
3480 zone->w_ptr += nlb;
3481 }
3482 }
3483
3484 data_offset = nvme_l2b(ns, slba);
3485
3486 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
3487 return nvme_dif_rw(n, req);
3488 }
3489
3490 if (!wrz) {
3491 status = nvme_map_data(n, nlb, req);
3492 if (status) {
3493 goto invalid;
3494 }
3495
3496 block_acct_start(blk_get_stats(blk), &req->acct, data_size,
3497 BLOCK_ACCT_WRITE);
3498 nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
3499 } else {
3500 req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
3501 BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
3502 req);
3503 }
3504
3505 return NVME_NO_COMPLETE;
3506
3507invalid:
3508 block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
3509 return status | NVME_DNR;
3510}
3511
3512static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
3513{
3514 return nvme_do_write(n, req, false, false);
3515}
3516
3517static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
3518{
3519 return nvme_do_write(n, req, false, true);
3520}
3521
3522static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
3523{
3524 return nvme_do_write(n, req, true, false);
3525}
3526
3527static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
3528 uint64_t *slba, uint32_t *zone_idx)
3529{
3530 uint32_t dw10 = le32_to_cpu(c->cdw10);
3531 uint32_t dw11 = le32_to_cpu(c->cdw11);
3532
3533 if (!ns->params.zoned) {
3534 trace_pci_nvme_err_invalid_opc(c->opcode);
3535 return NVME_INVALID_OPCODE | NVME_DNR;
3536 }
3537
3538 *slba = ((uint64_t)dw11) << 32 | dw10;
3539 if (unlikely(*slba >= ns->id_ns.nsze)) {
3540 trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze);
3541 *slba = 0;
3542 return NVME_LBA_RANGE | NVME_DNR;
3543 }
3544
3545 *zone_idx = nvme_zone_idx(ns, *slba);
3546 assert(*zone_idx < ns->num_zones);
3547
3548 return NVME_SUCCESS;
3549}
3550
3551typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState,
3552 NvmeRequest *);
3553
3554enum NvmeZoneProcessingMask {
3555 NVME_PROC_CURRENT_ZONE = 0,
3556 NVME_PROC_OPENED_ZONES = 1 << 0,
3557 NVME_PROC_CLOSED_ZONES = 1 << 1,
3558 NVME_PROC_READ_ONLY_ZONES = 1 << 2,
3559 NVME_PROC_FULL_ZONES = 1 << 3,
3560};
3561
3562static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
3563 NvmeZoneState state, NvmeRequest *req)
3564{
3565 NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
3566 int flags = 0;
3567
3568 if (cmd->zsflags & NVME_ZSFLAG_ZRWA_ALLOC) {
3569 uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs);
3570
3571 if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) {
3572 return NVME_INVALID_ZONE_OP | NVME_DNR;
3573 }
3574
3575 if (zone->w_ptr % ns->zns.zrwafg) {
3576 return NVME_NOZRWA | NVME_DNR;
3577 }
3578
3579 flags = NVME_ZRM_ZRWA;
3580 }
3581
3582 return nvme_zrm_open_flags(nvme_ctrl(req), ns, zone, flags);
3583}
3584
3585static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
3586 NvmeZoneState state, NvmeRequest *req)
3587{
3588 return nvme_zrm_close(ns, zone);
3589}
3590
3591static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
3592 NvmeZoneState state, NvmeRequest *req)
3593{
3594 return nvme_zrm_finish(ns, zone);
3595}
3596
3597static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
3598 NvmeZoneState state, NvmeRequest *req)
3599{
3600 switch (state) {
3601 case NVME_ZONE_STATE_READ_ONLY:
3602 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE);
3603
3604 case NVME_ZONE_STATE_OFFLINE:
3605 return NVME_SUCCESS;
3606 default:
3607 return NVME_ZONE_INVAL_TRANSITION;
3608 }
3609}
3610
3611static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone)
3612{
3613 uint16_t status;
3614 uint8_t state = nvme_get_zone_state(zone);
3615
3616 if (state == NVME_ZONE_STATE_EMPTY) {
3617 status = nvme_aor_check(ns, 1, 0);
3618 if (status) {
3619 return status;
3620 }
3621 nvme_aor_inc_active(ns);
3622 zone->d.za |= NVME_ZA_ZD_EXT_VALID;
3623 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
3624 return NVME_SUCCESS;
3625 }
3626
3627 return NVME_ZONE_INVAL_TRANSITION;
3628}
3629
3630static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
3631 enum NvmeZoneProcessingMask proc_mask,
3632 op_handler_t op_hndlr, NvmeRequest *req)
3633{
3634 uint16_t status = NVME_SUCCESS;
3635 NvmeZoneState zs = nvme_get_zone_state(zone);
3636 bool proc_zone;
3637
3638 switch (zs) {
3639 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
3640 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
3641 proc_zone = proc_mask & NVME_PROC_OPENED_ZONES;
3642 break;
3643 case NVME_ZONE_STATE_CLOSED:
3644 proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES;
3645 break;
3646 case NVME_ZONE_STATE_READ_ONLY:
3647 proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES;
3648 break;
3649 case NVME_ZONE_STATE_FULL:
3650 proc_zone = proc_mask & NVME_PROC_FULL_ZONES;
3651 break;
3652 default:
3653 proc_zone = false;
3654 }
3655
3656 if (proc_zone) {
3657 status = op_hndlr(ns, zone, zs, req);
3658 }
3659
3660 return status;
3661}
3662
3663static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
3664 enum NvmeZoneProcessingMask proc_mask,
3665 op_handler_t op_hndlr, NvmeRequest *req)
3666{
3667 NvmeZone *next;
3668 uint16_t status = NVME_SUCCESS;
3669 int i;
3670
3671 if (!proc_mask) {
3672 status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req);
3673 } else {
3674 if (proc_mask & NVME_PROC_CLOSED_ZONES) {
3675 QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
3676 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3677 req);
3678 if (status && status != NVME_NO_COMPLETE) {
3679 goto out;
3680 }
3681 }
3682 }
3683 if (proc_mask & NVME_PROC_OPENED_ZONES) {
3684 QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
3685 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3686 req);
3687 if (status && status != NVME_NO_COMPLETE) {
3688 goto out;
3689 }
3690 }
3691
3692 QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
3693 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3694 req);
3695 if (status && status != NVME_NO_COMPLETE) {
3696 goto out;
3697 }
3698 }
3699 }
3700 if (proc_mask & NVME_PROC_FULL_ZONES) {
3701 QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) {
3702 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3703 req);
3704 if (status && status != NVME_NO_COMPLETE) {
3705 goto out;
3706 }
3707 }
3708 }
3709
3710 if (proc_mask & NVME_PROC_READ_ONLY_ZONES) {
3711 for (i = 0; i < ns->num_zones; i++, zone++) {
3712 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
3713 req);
3714 if (status && status != NVME_NO_COMPLETE) {
3715 goto out;
3716 }
3717 }
3718 }
3719 }
3720
3721out:
3722 return status;
3723}
3724
3725typedef struct NvmeZoneResetAIOCB {
3726 BlockAIOCB common;
3727 BlockAIOCB *aiocb;
3728 NvmeRequest *req;
3729 QEMUBH *bh;
3730 int ret;
3731
3732 bool all;
3733 int idx;
3734 NvmeZone *zone;
3735} NvmeZoneResetAIOCB;
3736
3737static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
3738{
3739 NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
3740 NvmeRequest *req = iocb->req;
3741 NvmeNamespace *ns = req->ns;
3742
3743 iocb->idx = ns->num_zones;
3744
3745 iocb->ret = -ECANCELED;
3746
3747 if (iocb->aiocb) {
3748 blk_aio_cancel_async(iocb->aiocb);
3749 iocb->aiocb = NULL;
3750 }
3751}
3752
3753static const AIOCBInfo nvme_zone_reset_aiocb_info = {
3754 .aiocb_size = sizeof(NvmeZoneResetAIOCB),
3755 .cancel_async = nvme_zone_reset_cancel,
3756};
3757
3758static void nvme_zone_reset_bh(void *opaque)
3759{
3760 NvmeZoneResetAIOCB *iocb = opaque;
3761
3762 iocb->common.cb(iocb->common.opaque, iocb->ret);
3763
3764 qemu_bh_delete(iocb->bh);
3765 iocb->bh = NULL;
3766 qemu_aio_unref(iocb);
3767}
3768
3769static void nvme_zone_reset_cb(void *opaque, int ret);
3770
3771static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
3772{
3773 NvmeZoneResetAIOCB *iocb = opaque;
3774 NvmeRequest *req = iocb->req;
3775 NvmeNamespace *ns = req->ns;
3776 int64_t moff;
3777 int count;
3778
3779 if (ret < 0) {
3780 nvme_zone_reset_cb(iocb, ret);
3781 return;
3782 }
3783
3784 if (!ns->lbaf.ms) {
3785 nvme_zone_reset_cb(iocb, 0);
3786 return;
3787 }
3788
3789 moff = nvme_moff(ns, iocb->zone->d.zslba);
3790 count = nvme_m2b(ns, ns->zone_size);
3791
3792 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
3793 BDRV_REQ_MAY_UNMAP,
3794 nvme_zone_reset_cb, iocb);
3795 return;
3796}
3797
3798static void nvme_zone_reset_cb(void *opaque, int ret)
3799{
3800 NvmeZoneResetAIOCB *iocb = opaque;
3801 NvmeRequest *req = iocb->req;
3802 NvmeNamespace *ns = req->ns;
3803
3804 if (ret < 0) {
3805 iocb->ret = ret;
3806 goto done;
3807 }
3808
3809 if (iocb->zone) {
3810 nvme_zrm_reset(ns, iocb->zone);
3811
3812 if (!iocb->all) {
3813 goto done;
3814 }
3815 }
3816
3817 while (iocb->idx < ns->num_zones) {
3818 NvmeZone *zone = &ns->zone_array[iocb->idx++];
3819
3820 switch (nvme_get_zone_state(zone)) {
3821 case NVME_ZONE_STATE_EMPTY:
3822 if (!iocb->all) {
3823 goto done;
3824 }
3825
3826 continue;
3827
3828 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
3829 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
3830 case NVME_ZONE_STATE_CLOSED:
3831 case NVME_ZONE_STATE_FULL:
3832 iocb->zone = zone;
3833 break;
3834
3835 default:
3836 continue;
3837 }
3838
3839 trace_pci_nvme_zns_zone_reset(zone->d.zslba);
3840
3841 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
3842 nvme_l2b(ns, zone->d.zslba),
3843 nvme_l2b(ns, ns->zone_size),
3844 BDRV_REQ_MAY_UNMAP,
3845 nvme_zone_reset_epilogue_cb,
3846 iocb);
3847 return;
3848 }
3849
3850done:
3851 iocb->aiocb = NULL;
3852 if (iocb->bh) {
3853 qemu_bh_schedule(iocb->bh);
3854 }
3855}
3856
3857static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl *n, NvmeZone *zone,
3858 uint64_t elba, NvmeRequest *req)
3859{
3860 NvmeNamespace *ns = req->ns;
3861 uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs);
3862 uint64_t wp = zone->d.wp;
3863 uint32_t nlb = elba - wp + 1;
3864 uint16_t status;
3865
3866
3867 if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) {
3868 return NVME_INVALID_ZONE_OP | NVME_DNR;
3869 }
3870
3871 if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
3872 return NVME_INVALID_FIELD | NVME_DNR;
3873 }
3874
3875 if (elba < wp || elba > wp + ns->zns.zrwas) {
3876 return NVME_ZONE_BOUNDARY_ERROR | NVME_DNR;
3877 }
3878
3879 if (nlb % ns->zns.zrwafg) {
3880 return NVME_INVALID_FIELD | NVME_DNR;
3881 }
3882
3883 status = nvme_zrm_auto(n, ns, zone);
3884 if (status) {
3885 return status;
3886 }
3887
3888 zone->w_ptr += nlb;
3889
3890 nvme_advance_zone_wp(ns, zone, nlb);
3891
3892 return NVME_SUCCESS;
3893}
3894
3895static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
3896{
3897 NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
3898 NvmeNamespace *ns = req->ns;
3899 NvmeZone *zone;
3900 NvmeZoneResetAIOCB *iocb;
3901 uint8_t *zd_ext;
3902 uint64_t slba = 0;
3903 uint32_t zone_idx = 0;
3904 uint16_t status;
3905 uint8_t action = cmd->zsa;
3906 bool all;
3907 enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
3908
3909 all = cmd->zsflags & NVME_ZSFLAG_SELECT_ALL;
3910
3911 req->status = NVME_SUCCESS;
3912
3913 if (!all) {
3914 status = nvme_get_mgmt_zone_slba_idx(ns, &req->cmd, &slba, &zone_idx);
3915 if (status) {
3916 return status;
3917 }
3918 }
3919
3920 zone = &ns->zone_array[zone_idx];
3921 if (slba != zone->d.zslba && action != NVME_ZONE_ACTION_ZRWA_FLUSH) {
3922 trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba);
3923 return NVME_INVALID_FIELD | NVME_DNR;
3924 }
3925
3926 switch (action) {
3927
3928 case NVME_ZONE_ACTION_OPEN:
3929 if (all) {
3930 proc_mask = NVME_PROC_CLOSED_ZONES;
3931 }
3932 trace_pci_nvme_open_zone(slba, zone_idx, all);
3933 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req);
3934 break;
3935
3936 case NVME_ZONE_ACTION_CLOSE:
3937 if (all) {
3938 proc_mask = NVME_PROC_OPENED_ZONES;
3939 }
3940 trace_pci_nvme_close_zone(slba, zone_idx, all);
3941 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req);
3942 break;
3943
3944 case NVME_ZONE_ACTION_FINISH:
3945 if (all) {
3946 proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
3947 }
3948 trace_pci_nvme_finish_zone(slba, zone_idx, all);
3949 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req);
3950 break;
3951
3952 case NVME_ZONE_ACTION_RESET:
3953 trace_pci_nvme_reset_zone(slba, zone_idx, all);
3954
3955 iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
3956 nvme_misc_cb, req);
3957
3958 iocb->req = req;
3959 iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb);
3960 iocb->ret = 0;
3961 iocb->all = all;
3962 iocb->idx = zone_idx;
3963 iocb->zone = NULL;
3964
3965 req->aiocb = &iocb->common;
3966 nvme_zone_reset_cb(iocb, 0);
3967
3968 return NVME_NO_COMPLETE;
3969
3970 case NVME_ZONE_ACTION_OFFLINE:
3971 if (all) {
3972 proc_mask = NVME_PROC_READ_ONLY_ZONES;
3973 }
3974 trace_pci_nvme_offline_zone(slba, zone_idx, all);
3975 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req);
3976 break;
3977
3978 case NVME_ZONE_ACTION_SET_ZD_EXT:
3979 trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
3980 if (all || !ns->params.zd_extension_size) {
3981 return NVME_INVALID_FIELD | NVME_DNR;
3982 }
3983 zd_ext = nvme_get_zd_extension(ns, zone_idx);
3984 status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
3985 if (status) {
3986 trace_pci_nvme_err_zd_extension_map_error(zone_idx);
3987 return status;
3988 }
3989
3990 status = nvme_set_zd_ext(ns, zone);
3991 if (status == NVME_SUCCESS) {
3992 trace_pci_nvme_zd_extension_set(zone_idx);
3993 return status;
3994 }
3995 break;
3996
3997 case NVME_ZONE_ACTION_ZRWA_FLUSH:
3998 if (all) {
3999 return NVME_INVALID_FIELD | NVME_DNR;
4000 }
4001
4002 return nvme_zone_mgmt_send_zrwa_flush(n, zone, slba, req);
4003
4004 default:
4005 trace_pci_nvme_err_invalid_mgmt_action(action);
4006 status = NVME_INVALID_FIELD;
4007 }
4008
4009 if (status == NVME_ZONE_INVAL_TRANSITION) {
4010 trace_pci_nvme_err_invalid_zone_state_transition(action, slba,
4011 zone->d.za);
4012 }
4013 if (status) {
4014 status |= NVME_DNR;
4015 }
4016
4017 return status;
4018}
4019
4020static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
4021{
4022 NvmeZoneState zs = nvme_get_zone_state(zl);
4023
4024 switch (zafs) {
4025 case NVME_ZONE_REPORT_ALL:
4026 return true;
4027 case NVME_ZONE_REPORT_EMPTY:
4028 return zs == NVME_ZONE_STATE_EMPTY;
4029 case NVME_ZONE_REPORT_IMPLICITLY_OPEN:
4030 return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN;
4031 case NVME_ZONE_REPORT_EXPLICITLY_OPEN:
4032 return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN;
4033 case NVME_ZONE_REPORT_CLOSED:
4034 return zs == NVME_ZONE_STATE_CLOSED;
4035 case NVME_ZONE_REPORT_FULL:
4036 return zs == NVME_ZONE_STATE_FULL;
4037 case NVME_ZONE_REPORT_READ_ONLY:
4038 return zs == NVME_ZONE_STATE_READ_ONLY;
4039 case NVME_ZONE_REPORT_OFFLINE:
4040 return zs == NVME_ZONE_STATE_OFFLINE;
4041 default:
4042 return false;
4043 }
4044}
4045
4046static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
4047{
4048 NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
4049 NvmeNamespace *ns = req->ns;
4050
4051 uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2;
4052 uint32_t dw13 = le32_to_cpu(cmd->cdw13);
4053 uint32_t zone_idx, zra, zrasf, partial;
4054 uint64_t max_zones, nr_zones = 0;
4055 uint16_t status;
4056 uint64_t slba;
4057 NvmeZoneDescr *z;
4058 NvmeZone *zone;
4059 NvmeZoneReportHeader *header;
4060 void *buf, *buf_p;
4061 size_t zone_entry_sz;
4062 int i;
4063
4064 req->status = NVME_SUCCESS;
4065
4066 status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
4067 if (status) {
4068 return status;
4069 }
4070
4071 zra = dw13 & 0xff;
4072 if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) {
4073 return NVME_INVALID_FIELD | NVME_DNR;
4074 }
4075 if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) {
4076 return NVME_INVALID_FIELD | NVME_DNR;
4077 }
4078
4079 zrasf = (dw13 >> 8) & 0xff;
4080 if (zrasf > NVME_ZONE_REPORT_OFFLINE) {
4081 return NVME_INVALID_FIELD | NVME_DNR;
4082 }
4083
4084 if (data_size < sizeof(NvmeZoneReportHeader)) {
4085 return NVME_INVALID_FIELD | NVME_DNR;
4086 }
4087
4088 status = nvme_check_mdts(n, data_size);
4089 if (status) {
4090 return status;
4091 }
4092
4093 partial = (dw13 >> 16) & 0x01;
4094
4095 zone_entry_sz = sizeof(NvmeZoneDescr);
4096 if (zra == NVME_ZONE_REPORT_EXTENDED) {
4097 zone_entry_sz += ns->params.zd_extension_size;
4098 }
4099
4100 max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
4101 buf = g_malloc0(data_size);
4102
4103 zone = &ns->zone_array[zone_idx];
4104 for (i = zone_idx; i < ns->num_zones; i++) {
4105 if (partial && nr_zones >= max_zones) {
4106 break;
4107 }
4108 if (nvme_zone_matches_filter(zrasf, zone++)) {
4109 nr_zones++;
4110 }
4111 }
4112 header = (NvmeZoneReportHeader *)buf;
4113 header->nr_zones = cpu_to_le64(nr_zones);
4114
4115 buf_p = buf + sizeof(NvmeZoneReportHeader);
4116 for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) {
4117 zone = &ns->zone_array[zone_idx];
4118 if (nvme_zone_matches_filter(zrasf, zone)) {
4119 z = (NvmeZoneDescr *)buf_p;
4120 buf_p += sizeof(NvmeZoneDescr);
4121
4122 z->zt = zone->d.zt;
4123 z->zs = zone->d.zs;
4124 z->zcap = cpu_to_le64(zone->d.zcap);
4125 z->zslba = cpu_to_le64(zone->d.zslba);
4126 z->za = zone->d.za;
4127
4128 if (nvme_wp_is_valid(zone)) {
4129 z->wp = cpu_to_le64(zone->d.wp);
4130 } else {
4131 z->wp = cpu_to_le64(~0ULL);
4132 }
4133
4134 if (zra == NVME_ZONE_REPORT_EXTENDED) {
4135 if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
4136 memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx),
4137 ns->params.zd_extension_size);
4138 }
4139 buf_p += ns->params.zd_extension_size;
4140 }
4141
4142 max_zones--;
4143 }
4144 }
4145
4146 status = nvme_c2h(n, (uint8_t *)buf, data_size, req);
4147
4148 g_free(buf);
4149
4150 return status;
4151}
4152
4153static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
4154{
4155 NvmeNamespace *ns;
4156 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
4157
4158 trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
4159 req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
4160
4161 if (!nvme_nsid_valid(n, nsid)) {
4162 return NVME_INVALID_NSID | NVME_DNR;
4163 }
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184 if (req->cmd.opcode == NVME_CMD_FLUSH) {
4185 return nvme_flush(n, req);
4186 }
4187
4188 ns = nvme_ns(n, nsid);
4189 if (unlikely(!ns)) {
4190 return NVME_INVALID_FIELD | NVME_DNR;
4191 }
4192
4193 if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
4194 trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
4195 return NVME_INVALID_OPCODE | NVME_DNR;
4196 }
4197
4198 if (ns->status) {
4199 return ns->status;
4200 }
4201
4202 if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
4203 return NVME_INVALID_FIELD;
4204 }
4205
4206 req->ns = ns;
4207
4208 switch (req->cmd.opcode) {
4209 case NVME_CMD_WRITE_ZEROES:
4210 return nvme_write_zeroes(n, req);
4211 case NVME_CMD_ZONE_APPEND:
4212 return nvme_zone_append(n, req);
4213 case NVME_CMD_WRITE:
4214 return nvme_write(n, req);
4215 case NVME_CMD_READ:
4216 return nvme_read(n, req);
4217 case NVME_CMD_COMPARE:
4218 return nvme_compare(n, req);
4219 case NVME_CMD_DSM:
4220 return nvme_dsm(n, req);
4221 case NVME_CMD_VERIFY:
4222 return nvme_verify(n, req);
4223 case NVME_CMD_COPY:
4224 return nvme_copy(n, req);
4225 case NVME_CMD_ZONE_MGMT_SEND:
4226 return nvme_zone_mgmt_send(n, req);
4227 case NVME_CMD_ZONE_MGMT_RECV:
4228 return nvme_zone_mgmt_recv(n, req);
4229 default:
4230 assert(false);
4231 }
4232
4233 return NVME_INVALID_OPCODE | NVME_DNR;
4234}
4235
4236static void nvme_cq_notifier(EventNotifier *e)
4237{
4238 NvmeCQueue *cq = container_of(e, NvmeCQueue, notifier);
4239 NvmeCtrl *n = cq->ctrl;
4240
4241 if (!event_notifier_test_and_clear(e)) {
4242 return;
4243 }
4244
4245 nvme_update_cq_head(cq);
4246
4247 if (cq->tail == cq->head) {
4248 if (cq->irq_enabled) {
4249 n->cq_pending--;
4250 }
4251
4252 nvme_irq_deassert(n, cq);
4253 }
4254
4255 nvme_post_cqes(cq);
4256}
4257
4258static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
4259{
4260 NvmeCtrl *n = cq->ctrl;
4261 uint16_t offset = (cq->cqid << 3) + (1 << 2);
4262 int ret;
4263
4264 ret = event_notifier_init(&cq->notifier, 0);
4265 if (ret < 0) {
4266 return ret;
4267 }
4268
4269 event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
4270 memory_region_add_eventfd(&n->iomem,
4271 0x1000 + offset, 4, false, 0, &cq->notifier);
4272
4273 return 0;
4274}
4275
4276static void nvme_sq_notifier(EventNotifier *e)
4277{
4278 NvmeSQueue *sq = container_of(e, NvmeSQueue, notifier);
4279
4280 if (!event_notifier_test_and_clear(e)) {
4281 return;
4282 }
4283
4284 nvme_process_sq(sq);
4285}
4286
4287static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
4288{
4289 NvmeCtrl *n = sq->ctrl;
4290 uint16_t offset = sq->sqid << 3;
4291 int ret;
4292
4293 ret = event_notifier_init(&sq->notifier, 0);
4294 if (ret < 0) {
4295 return ret;
4296 }
4297
4298 event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
4299 memory_region_add_eventfd(&n->iomem,
4300 0x1000 + offset, 4, false, 0, &sq->notifier);
4301
4302 return 0;
4303}
4304
4305static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
4306{
4307 uint16_t offset = sq->sqid << 3;
4308
4309 n->sq[sq->sqid] = NULL;
4310 timer_free(sq->timer);
4311 if (sq->ioeventfd_enabled) {
4312 memory_region_del_eventfd(&n->iomem,
4313 0x1000 + offset, 4, false, 0, &sq->notifier);
4314 event_notifier_set_handler(&sq->notifier, NULL);
4315 event_notifier_cleanup(&sq->notifier);
4316 }
4317 g_free(sq->io_req);
4318 if (sq->sqid) {
4319 g_free(sq);
4320 }
4321}
4322
4323static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
4324{
4325 NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
4326 NvmeRequest *r, *next;
4327 NvmeSQueue *sq;
4328 NvmeCQueue *cq;
4329 uint16_t qid = le16_to_cpu(c->qid);
4330
4331 if (unlikely(!qid || nvme_check_sqid(n, qid))) {
4332 trace_pci_nvme_err_invalid_del_sq(qid);
4333 return NVME_INVALID_QID | NVME_DNR;
4334 }
4335
4336 trace_pci_nvme_del_sq(qid);
4337
4338 sq = n->sq[qid];
4339 while (!QTAILQ_EMPTY(&sq->out_req_list)) {
4340 r = QTAILQ_FIRST(&sq->out_req_list);
4341 assert(r->aiocb);
4342 blk_aio_cancel(r->aiocb);
4343 }
4344
4345 assert(QTAILQ_EMPTY(&sq->out_req_list));
4346
4347 if (!nvme_check_cqid(n, sq->cqid)) {
4348 cq = n->cq[sq->cqid];
4349 QTAILQ_REMOVE(&cq->sq_list, sq, entry);
4350
4351 nvme_post_cqes(cq);
4352 QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
4353 if (r->sq == sq) {
4354 QTAILQ_REMOVE(&cq->req_list, r, entry);
4355 QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
4356 }
4357 }
4358 }
4359
4360 nvme_free_sq(sq, n);
4361 return NVME_SUCCESS;
4362}
4363
4364static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
4365 uint16_t sqid, uint16_t cqid, uint16_t size)
4366{
4367 int i;
4368 NvmeCQueue *cq;
4369
4370 sq->ctrl = n;
4371 sq->dma_addr = dma_addr;
4372 sq->sqid = sqid;
4373 sq->size = size;
4374 sq->cqid = cqid;
4375 sq->head = sq->tail = 0;
4376 sq->io_req = g_new0(NvmeRequest, sq->size);
4377
4378 QTAILQ_INIT(&sq->req_list);
4379 QTAILQ_INIT(&sq->out_req_list);
4380 for (i = 0; i < sq->size; i++) {
4381 sq->io_req[i].sq = sq;
4382 QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
4383 }
4384 sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
4385
4386 if (n->dbbuf_enabled) {
4387 sq->db_addr = n->dbbuf_dbs + (sqid << 3);
4388 sq->ei_addr = n->dbbuf_eis + (sqid << 3);
4389
4390 if (n->params.ioeventfd && sq->sqid != 0) {
4391 if (!nvme_init_sq_ioeventfd(sq)) {
4392 sq->ioeventfd_enabled = true;
4393 }
4394 }
4395 }
4396
4397 assert(n->cq[cqid]);
4398 cq = n->cq[cqid];
4399 QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
4400 n->sq[sqid] = sq;
4401}
4402
4403static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
4404{
4405 NvmeSQueue *sq;
4406 NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
4407
4408 uint16_t cqid = le16_to_cpu(c->cqid);
4409 uint16_t sqid = le16_to_cpu(c->sqid);
4410 uint16_t qsize = le16_to_cpu(c->qsize);
4411 uint16_t qflags = le16_to_cpu(c->sq_flags);
4412 uint64_t prp1 = le64_to_cpu(c->prp1);
4413
4414 trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
4415
4416 if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
4417 trace_pci_nvme_err_invalid_create_sq_cqid(cqid);
4418 return NVME_INVALID_CQID | NVME_DNR;
4419 }
4420 if (unlikely(!sqid || sqid > n->conf_ioqpairs || n->sq[sqid] != NULL)) {
4421 trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
4422 return NVME_INVALID_QID | NVME_DNR;
4423 }
4424 if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
4425 trace_pci_nvme_err_invalid_create_sq_size(qsize);
4426 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
4427 }
4428 if (unlikely(prp1 & (n->page_size - 1))) {
4429 trace_pci_nvme_err_invalid_create_sq_addr(prp1);
4430 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
4431 }
4432 if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
4433 trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
4434 return NVME_INVALID_FIELD | NVME_DNR;
4435 }
4436 sq = g_malloc0(sizeof(*sq));
4437 nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
4438 return NVME_SUCCESS;
4439}
4440
4441struct nvme_stats {
4442 uint64_t units_read;
4443 uint64_t units_written;
4444 uint64_t read_commands;
4445 uint64_t write_commands;
4446};
4447
4448static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
4449{
4450 BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
4451
4452 stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
4453 stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
4454 stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
4455 stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
4456}
4457
4458static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
4459 uint64_t off, NvmeRequest *req)
4460{
4461 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
4462 struct nvme_stats stats = { 0 };
4463 NvmeSmartLog smart = { 0 };
4464 uint32_t trans_len;
4465 NvmeNamespace *ns;
4466 time_t current_ms;
4467
4468 if (off >= sizeof(smart)) {
4469 return NVME_INVALID_FIELD | NVME_DNR;
4470 }
4471
4472 if (nsid != 0xffffffff) {
4473 ns = nvme_ns(n, nsid);
4474 if (!ns) {
4475 return NVME_INVALID_NSID | NVME_DNR;
4476 }
4477 nvme_set_blk_stats(ns, &stats);
4478 } else {
4479 int i;
4480
4481 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
4482 ns = nvme_ns(n, i);
4483 if (!ns) {
4484 continue;
4485 }
4486 nvme_set_blk_stats(ns, &stats);
4487 }
4488 }
4489
4490 trans_len = MIN(sizeof(smart) - off, buf_len);
4491 smart.critical_warning = n->smart_critical_warning;
4492
4493 smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
4494 1000));
4495 smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
4496 1000));
4497 smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
4498 smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
4499
4500 smart.temperature = cpu_to_le16(n->temperature);
4501
4502 if ((n->temperature >= n->features.temp_thresh_hi) ||
4503 (n->temperature <= n->features.temp_thresh_low)) {
4504 smart.critical_warning |= NVME_SMART_TEMPERATURE;
4505 }
4506
4507 current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
4508 smart.power_on_hours[0] =
4509 cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
4510
4511 if (!rae) {
4512 nvme_clear_events(n, NVME_AER_TYPE_SMART);
4513 }
4514
4515 return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
4516}
4517
4518static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
4519 NvmeRequest *req)
4520{
4521 uint32_t trans_len;
4522 NvmeFwSlotInfoLog fw_log = {
4523 .afi = 0x1,
4524 };
4525
4526 if (off >= sizeof(fw_log)) {
4527 return NVME_INVALID_FIELD | NVME_DNR;
4528 }
4529
4530 strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
4531 trans_len = MIN(sizeof(fw_log) - off, buf_len);
4532
4533 return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req);
4534}
4535
4536static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
4537 uint64_t off, NvmeRequest *req)
4538{
4539 uint32_t trans_len;
4540 NvmeErrorLog errlog;
4541
4542 if (off >= sizeof(errlog)) {
4543 return NVME_INVALID_FIELD | NVME_DNR;
4544 }
4545
4546 if (!rae) {
4547 nvme_clear_events(n, NVME_AER_TYPE_ERROR);
4548 }
4549
4550 memset(&errlog, 0x0, sizeof(errlog));
4551 trans_len = MIN(sizeof(errlog) - off, buf_len);
4552
4553 return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req);
4554}
4555
4556static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
4557 uint64_t off, NvmeRequest *req)
4558{
4559 uint32_t nslist[1024];
4560 uint32_t trans_len;
4561 int i = 0;
4562 uint32_t nsid;
4563
4564 if (off >= sizeof(nslist)) {
4565 trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist));
4566 return NVME_INVALID_FIELD | NVME_DNR;
4567 }
4568
4569 memset(nslist, 0x0, sizeof(nslist));
4570 trans_len = MIN(sizeof(nslist) - off, buf_len);
4571
4572 while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
4573 NVME_CHANGED_NSID_SIZE) {
4574
4575
4576
4577
4578 if (i == ARRAY_SIZE(nslist)) {
4579 memset(nslist, 0x0, sizeof(nslist));
4580 nslist[0] = 0xffffffff;
4581 break;
4582 }
4583
4584 nslist[i++] = nsid;
4585 clear_bit(nsid, n->changed_nsids);
4586 }
4587
4588
4589
4590
4591
4592 if (nslist[0] == 0xffffffff) {
4593 bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE);
4594 }
4595
4596 if (!rae) {
4597 nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
4598 }
4599
4600 return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req);
4601}
4602
4603static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
4604 uint64_t off, NvmeRequest *req)
4605{
4606 NvmeEffectsLog log = {};
4607 const uint32_t *src_iocs = NULL;
4608 uint32_t trans_len;
4609
4610 if (off >= sizeof(log)) {
4611 trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log));
4612 return NVME_INVALID_FIELD | NVME_DNR;
4613 }
4614
4615 switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
4616 case NVME_CC_CSS_NVM:
4617 src_iocs = nvme_cse_iocs_nvm;
4618
4619 case NVME_CC_CSS_ADMIN_ONLY:
4620 break;
4621 case NVME_CC_CSS_CSI:
4622 switch (csi) {
4623 case NVME_CSI_NVM:
4624 src_iocs = nvme_cse_iocs_nvm;
4625 break;
4626 case NVME_CSI_ZONED:
4627 src_iocs = nvme_cse_iocs_zoned;
4628 break;
4629 }
4630 }
4631
4632 memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
4633
4634 if (src_iocs) {
4635 memcpy(log.iocs, src_iocs, sizeof(log.iocs));
4636 }
4637
4638 trans_len = MIN(sizeof(log) - off, buf_len);
4639
4640 return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
4641}
4642
4643static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
4644{
4645 NvmeCmd *cmd = &req->cmd;
4646
4647 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
4648 uint32_t dw11 = le32_to_cpu(cmd->cdw11);
4649 uint32_t dw12 = le32_to_cpu(cmd->cdw12);
4650 uint32_t dw13 = le32_to_cpu(cmd->cdw13);
4651 uint8_t lid = dw10 & 0xff;
4652 uint8_t lsp = (dw10 >> 8) & 0xf;
4653 uint8_t rae = (dw10 >> 15) & 0x1;
4654 uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24;
4655 uint32_t numdl, numdu;
4656 uint64_t off, lpol, lpou;
4657 size_t len;
4658 uint16_t status;
4659
4660 numdl = (dw10 >> 16);
4661 numdu = (dw11 & 0xffff);
4662 lpol = dw12;
4663 lpou = dw13;
4664
4665 len = (((numdu << 16) | numdl) + 1) << 2;
4666 off = (lpou << 32ULL) | lpol;
4667
4668 if (off & 0x3) {
4669 return NVME_INVALID_FIELD | NVME_DNR;
4670 }
4671
4672 trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
4673
4674 status = nvme_check_mdts(n, len);
4675 if (status) {
4676 return status;
4677 }
4678
4679 switch (lid) {
4680 case NVME_LOG_ERROR_INFO:
4681 return nvme_error_info(n, rae, len, off, req);
4682 case NVME_LOG_SMART_INFO:
4683 return nvme_smart_info(n, rae, len, off, req);
4684 case NVME_LOG_FW_SLOT_INFO:
4685 return nvme_fw_log_info(n, len, off, req);
4686 case NVME_LOG_CHANGED_NSLIST:
4687 return nvme_changed_nslist(n, rae, len, off, req);
4688 case NVME_LOG_CMD_EFFECTS:
4689 return nvme_cmd_effects(n, csi, len, off, req);
4690 default:
4691 trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
4692 return NVME_INVALID_FIELD | NVME_DNR;
4693 }
4694}
4695
4696static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
4697{
4698 uint16_t offset = (cq->cqid << 3) + (1 << 2);
4699
4700 n->cq[cq->cqid] = NULL;
4701 timer_free(cq->timer);
4702 if (cq->ioeventfd_enabled) {
4703 memory_region_del_eventfd(&n->iomem,
4704 0x1000 + offset, 4, false, 0, &cq->notifier);
4705 event_notifier_set_handler(&cq->notifier, NULL);
4706 event_notifier_cleanup(&cq->notifier);
4707 }
4708 if (msix_enabled(&n->parent_obj)) {
4709 msix_vector_unuse(&n->parent_obj, cq->vector);
4710 }
4711 if (cq->cqid) {
4712 g_free(cq);
4713 }
4714}
4715
4716static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
4717{
4718 NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
4719 NvmeCQueue *cq;
4720 uint16_t qid = le16_to_cpu(c->qid);
4721
4722 if (unlikely(!qid || nvme_check_cqid(n, qid))) {
4723 trace_pci_nvme_err_invalid_del_cq_cqid(qid);
4724 return NVME_INVALID_CQID | NVME_DNR;
4725 }
4726
4727 cq = n->cq[qid];
4728 if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
4729 trace_pci_nvme_err_invalid_del_cq_notempty(qid);
4730 return NVME_INVALID_QUEUE_DEL;
4731 }
4732
4733 if (cq->irq_enabled && cq->tail != cq->head) {
4734 n->cq_pending--;
4735 }
4736
4737 nvme_irq_deassert(n, cq);
4738 trace_pci_nvme_del_cq(qid);
4739 nvme_free_cq(cq, n);
4740 return NVME_SUCCESS;
4741}
4742
4743static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
4744 uint16_t cqid, uint16_t vector, uint16_t size,
4745 uint16_t irq_enabled)
4746{
4747 int ret;
4748
4749 if (msix_enabled(&n->parent_obj)) {
4750 ret = msix_vector_use(&n->parent_obj, vector);
4751 assert(ret == 0);
4752 }
4753 cq->ctrl = n;
4754 cq->cqid = cqid;
4755 cq->size = size;
4756 cq->dma_addr = dma_addr;
4757 cq->phase = 1;
4758 cq->irq_enabled = irq_enabled;
4759 cq->vector = vector;
4760 cq->head = cq->tail = 0;
4761 QTAILQ_INIT(&cq->req_list);
4762 QTAILQ_INIT(&cq->sq_list);
4763 if (n->dbbuf_enabled) {
4764 cq->db_addr = n->dbbuf_dbs + (cqid << 3) + (1 << 2);
4765 cq->ei_addr = n->dbbuf_eis + (cqid << 3) + (1 << 2);
4766
4767 if (n->params.ioeventfd && cqid != 0) {
4768 if (!nvme_init_cq_ioeventfd(cq)) {
4769 cq->ioeventfd_enabled = true;
4770 }
4771 }
4772 }
4773 n->cq[cqid] = cq;
4774 cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
4775}
4776
4777static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
4778{
4779 NvmeCQueue *cq;
4780 NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
4781 uint16_t cqid = le16_to_cpu(c->cqid);
4782 uint16_t vector = le16_to_cpu(c->irq_vector);
4783 uint16_t qsize = le16_to_cpu(c->qsize);
4784 uint16_t qflags = le16_to_cpu(c->cq_flags);
4785 uint64_t prp1 = le64_to_cpu(c->prp1);
4786
4787 trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
4788 NVME_CQ_FLAGS_IEN(qflags) != 0);
4789
4790 if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
4791 trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
4792 return NVME_INVALID_QID | NVME_DNR;
4793 }
4794 if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
4795 trace_pci_nvme_err_invalid_create_cq_size(qsize);
4796 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
4797 }
4798 if (unlikely(prp1 & (n->page_size - 1))) {
4799 trace_pci_nvme_err_invalid_create_cq_addr(prp1);
4800 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
4801 }
4802 if (unlikely(!msix_enabled(&n->parent_obj) && vector)) {
4803 trace_pci_nvme_err_invalid_create_cq_vector(vector);
4804 return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
4805 }
4806 if (unlikely(vector >= n->conf_msix_qsize)) {
4807 trace_pci_nvme_err_invalid_create_cq_vector(vector);
4808 return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
4809 }
4810 if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
4811 trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
4812 return NVME_INVALID_FIELD | NVME_DNR;
4813 }
4814
4815 cq = g_malloc0(sizeof(*cq));
4816 nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
4817 NVME_CQ_FLAGS_IEN(qflags));
4818
4819
4820
4821
4822
4823
4824 n->qs_created = true;
4825 return NVME_SUCCESS;
4826}
4827
4828static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
4829{
4830 uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
4831
4832 return nvme_c2h(n, id, sizeof(id), req);
4833}
4834
4835static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
4836{
4837 trace_pci_nvme_identify_ctrl();
4838
4839 return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req);
4840}
4841
4842static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
4843{
4844 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4845 uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
4846 NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id;
4847
4848 trace_pci_nvme_identify_ctrl_csi(c->csi);
4849
4850 switch (c->csi) {
4851 case NVME_CSI_NVM:
4852 id_nvm->vsl = n->params.vsl;
4853 id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
4854 break;
4855
4856 case NVME_CSI_ZONED:
4857 ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl;
4858 break;
4859
4860 default:
4861 return NVME_INVALID_FIELD | NVME_DNR;
4862 }
4863
4864 return nvme_c2h(n, id, sizeof(id), req);
4865}
4866
4867static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
4868{
4869 NvmeNamespace *ns;
4870 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4871 uint32_t nsid = le32_to_cpu(c->nsid);
4872
4873 trace_pci_nvme_identify_ns(nsid);
4874
4875 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
4876 return NVME_INVALID_NSID | NVME_DNR;
4877 }
4878
4879 ns = nvme_ns(n, nsid);
4880 if (unlikely(!ns)) {
4881 if (!active) {
4882 ns = nvme_subsys_ns(n->subsys, nsid);
4883 if (!ns) {
4884 return nvme_rpt_empty_id_struct(n, req);
4885 }
4886 } else {
4887 return nvme_rpt_empty_id_struct(n, req);
4888 }
4889 }
4890
4891 if (active || ns->csi == NVME_CSI_NVM) {
4892 return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
4893 }
4894
4895 return NVME_INVALID_CMD_SET | NVME_DNR;
4896}
4897
4898static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
4899 bool attached)
4900{
4901 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4902 uint32_t nsid = le32_to_cpu(c->nsid);
4903 uint16_t min_id = le16_to_cpu(c->ctrlid);
4904 uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
4905 uint16_t *ids = &list[1];
4906 NvmeNamespace *ns;
4907 NvmeCtrl *ctrl;
4908 int cntlid, nr_ids = 0;
4909
4910 trace_pci_nvme_identify_ctrl_list(c->cns, min_id);
4911
4912 if (!n->subsys) {
4913 return NVME_INVALID_FIELD | NVME_DNR;
4914 }
4915
4916 if (attached) {
4917 if (nsid == NVME_NSID_BROADCAST) {
4918 return NVME_INVALID_FIELD | NVME_DNR;
4919 }
4920
4921 ns = nvme_subsys_ns(n->subsys, nsid);
4922 if (!ns) {
4923 return NVME_INVALID_FIELD | NVME_DNR;
4924 }
4925 }
4926
4927 for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
4928 ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
4929 if (!ctrl) {
4930 continue;
4931 }
4932
4933 if (attached && !nvme_ns(ctrl, nsid)) {
4934 continue;
4935 }
4936
4937 ids[nr_ids++] = cntlid;
4938 }
4939
4940 list[0] = nr_ids;
4941
4942 return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
4943}
4944
4945static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req)
4946{
4947 trace_pci_nvme_identify_pri_ctrl_cap(le16_to_cpu(n->pri_ctrl_cap.cntlid));
4948
4949 return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap,
4950 sizeof(NvmePriCtrlCap), req);
4951}
4952
4953static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
4954{
4955 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4956 uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid);
4957 uint16_t min_id = le16_to_cpu(c->ctrlid);
4958 uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl;
4959 NvmeSecCtrlList list = {0};
4960 uint8_t i;
4961
4962 for (i = 0; i < num_sec_ctrl; i++) {
4963 if (n->sec_ctrl_list.sec[i].scid >= min_id) {
4964 list.numcntl = num_sec_ctrl - i;
4965 memcpy(&list.sec, n->sec_ctrl_list.sec + i,
4966 list.numcntl * sizeof(NvmeSecCtrlEntry));
4967 break;
4968 }
4969 }
4970
4971 trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl);
4972
4973 return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req);
4974}
4975
4976static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
4977 bool active)
4978{
4979 NvmeNamespace *ns;
4980 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
4981 uint32_t nsid = le32_to_cpu(c->nsid);
4982
4983 trace_pci_nvme_identify_ns_csi(nsid, c->csi);
4984
4985 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
4986 return NVME_INVALID_NSID | NVME_DNR;
4987 }
4988
4989 ns = nvme_ns(n, nsid);
4990 if (unlikely(!ns)) {
4991 if (!active) {
4992 ns = nvme_subsys_ns(n->subsys, nsid);
4993 if (!ns) {
4994 return nvme_rpt_empty_id_struct(n, req);
4995 }
4996 } else {
4997 return nvme_rpt_empty_id_struct(n, req);
4998 }
4999 }
5000
5001 if (c->csi == NVME_CSI_NVM) {
5002 return nvme_c2h(n, (uint8_t *)&ns->id_ns_nvm, sizeof(NvmeIdNsNvm),
5003 req);
5004 } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
5005 return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
5006 req);
5007 }
5008
5009 return NVME_INVALID_FIELD | NVME_DNR;
5010}
5011
5012static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
5013 bool active)
5014{
5015 NvmeNamespace *ns;
5016 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
5017 uint32_t min_nsid = le32_to_cpu(c->nsid);
5018 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
5019 static const int data_len = sizeof(list);
5020 uint32_t *list_ptr = (uint32_t *)list;
5021 int i, j = 0;
5022
5023 trace_pci_nvme_identify_nslist(min_nsid);
5024
5025
5026
5027
5028
5029
5030
5031 if (min_nsid >= NVME_NSID_BROADCAST - 1) {
5032 return NVME_INVALID_NSID | NVME_DNR;
5033 }
5034
5035 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5036 ns = nvme_ns(n, i);
5037 if (!ns) {
5038 if (!active) {
5039 ns = nvme_subsys_ns(n->subsys, i);
5040 if (!ns) {
5041 continue;
5042 }
5043 } else {
5044 continue;
5045 }
5046 }
5047 if (ns->params.nsid <= min_nsid) {
5048 continue;
5049 }
5050 list_ptr[j++] = cpu_to_le32(ns->params.nsid);
5051 if (j == data_len / sizeof(uint32_t)) {
5052 break;
5053 }
5054 }
5055
5056 return nvme_c2h(n, list, data_len, req);
5057}
5058
5059static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
5060 bool active)
5061{
5062 NvmeNamespace *ns;
5063 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
5064 uint32_t min_nsid = le32_to_cpu(c->nsid);
5065 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
5066 static const int data_len = sizeof(list);
5067 uint32_t *list_ptr = (uint32_t *)list;
5068 int i, j = 0;
5069
5070 trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
5071
5072
5073
5074
5075 if (min_nsid >= NVME_NSID_BROADCAST - 1) {
5076 return NVME_INVALID_NSID | NVME_DNR;
5077 }
5078
5079 if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) {
5080 return NVME_INVALID_FIELD | NVME_DNR;
5081 }
5082
5083 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5084 ns = nvme_ns(n, i);
5085 if (!ns) {
5086 if (!active) {
5087 ns = nvme_subsys_ns(n->subsys, i);
5088 if (!ns) {
5089 continue;
5090 }
5091 } else {
5092 continue;
5093 }
5094 }
5095 if (ns->params.nsid <= min_nsid || c->csi != ns->csi) {
5096 continue;
5097 }
5098 list_ptr[j++] = cpu_to_le32(ns->params.nsid);
5099 if (j == data_len / sizeof(uint32_t)) {
5100 break;
5101 }
5102 }
5103
5104 return nvme_c2h(n, list, data_len, req);
5105}
5106
5107static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
5108{
5109 NvmeNamespace *ns;
5110 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
5111 uint32_t nsid = le32_to_cpu(c->nsid);
5112 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
5113 uint8_t *pos = list;
5114 struct {
5115 NvmeIdNsDescr hdr;
5116 uint8_t v[NVME_NIDL_UUID];
5117 } QEMU_PACKED uuid = {};
5118 struct {
5119 NvmeIdNsDescr hdr;
5120 uint64_t v;
5121 } QEMU_PACKED eui64 = {};
5122 struct {
5123 NvmeIdNsDescr hdr;
5124 uint8_t v;
5125 } QEMU_PACKED csi = {};
5126
5127 trace_pci_nvme_identify_ns_descr_list(nsid);
5128
5129 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
5130 return NVME_INVALID_NSID | NVME_DNR;
5131 }
5132
5133 ns = nvme_ns(n, nsid);
5134 if (unlikely(!ns)) {
5135 return NVME_INVALID_FIELD | NVME_DNR;
5136 }
5137
5138 if (!qemu_uuid_is_null(&ns->params.uuid)) {
5139 uuid.hdr.nidt = NVME_NIDT_UUID;
5140 uuid.hdr.nidl = NVME_NIDL_UUID;
5141 memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
5142 memcpy(pos, &uuid, sizeof(uuid));
5143 pos += sizeof(uuid);
5144 }
5145
5146 if (ns->params.eui64) {
5147 eui64.hdr.nidt = NVME_NIDT_EUI64;
5148 eui64.hdr.nidl = NVME_NIDL_EUI64;
5149 eui64.v = cpu_to_be64(ns->params.eui64);
5150 memcpy(pos, &eui64, sizeof(eui64));
5151 pos += sizeof(eui64);
5152 }
5153
5154 csi.hdr.nidt = NVME_NIDT_CSI;
5155 csi.hdr.nidl = NVME_NIDL_CSI;
5156 csi.v = ns->csi;
5157 memcpy(pos, &csi, sizeof(csi));
5158 pos += sizeof(csi);
5159
5160 return nvme_c2h(n, list, sizeof(list), req);
5161}
5162
5163static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
5164{
5165 uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
5166 static const int data_len = sizeof(list);
5167
5168 trace_pci_nvme_identify_cmd_set();
5169
5170 NVME_SET_CSI(*list, NVME_CSI_NVM);
5171 NVME_SET_CSI(*list, NVME_CSI_ZONED);
5172
5173 return nvme_c2h(n, list, data_len, req);
5174}
5175
5176static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
5177{
5178 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
5179
5180 trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid),
5181 c->csi);
5182
5183 switch (c->cns) {
5184 case NVME_ID_CNS_NS:
5185 return nvme_identify_ns(n, req, true);
5186 case NVME_ID_CNS_NS_PRESENT:
5187 return nvme_identify_ns(n, req, false);
5188 case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
5189 return nvme_identify_ctrl_list(n, req, true);
5190 case NVME_ID_CNS_CTRL_LIST:
5191 return nvme_identify_ctrl_list(n, req, false);
5192 case NVME_ID_CNS_PRIMARY_CTRL_CAP:
5193 return nvme_identify_pri_ctrl_cap(n, req);
5194 case NVME_ID_CNS_SECONDARY_CTRL_LIST:
5195 return nvme_identify_sec_ctrl_list(n, req);
5196 case NVME_ID_CNS_CS_NS:
5197 return nvme_identify_ns_csi(n, req, true);
5198 case NVME_ID_CNS_CS_NS_PRESENT:
5199 return nvme_identify_ns_csi(n, req, false);
5200 case NVME_ID_CNS_CTRL:
5201 return nvme_identify_ctrl(n, req);
5202 case NVME_ID_CNS_CS_CTRL:
5203 return nvme_identify_ctrl_csi(n, req);
5204 case NVME_ID_CNS_NS_ACTIVE_LIST:
5205 return nvme_identify_nslist(n, req, true);
5206 case NVME_ID_CNS_NS_PRESENT_LIST:
5207 return nvme_identify_nslist(n, req, false);
5208 case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
5209 return nvme_identify_nslist_csi(n, req, true);
5210 case NVME_ID_CNS_CS_NS_PRESENT_LIST:
5211 return nvme_identify_nslist_csi(n, req, false);
5212 case NVME_ID_CNS_NS_DESCR_LIST:
5213 return nvme_identify_ns_descr_list(n, req);
5214 case NVME_ID_CNS_IO_COMMAND_SET:
5215 return nvme_identify_cmd_set(n, req);
5216 default:
5217 trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
5218 return NVME_INVALID_FIELD | NVME_DNR;
5219 }
5220}
5221
5222static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
5223{
5224 uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
5225
5226 req->cqe.result = 1;
5227 if (nvme_check_sqid(n, sqid)) {
5228 return NVME_INVALID_FIELD | NVME_DNR;
5229 }
5230
5231 return NVME_SUCCESS;
5232}
5233
5234static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
5235{
5236 trace_pci_nvme_setfeat_timestamp(ts);
5237
5238 n->host_timestamp = le64_to_cpu(ts);
5239 n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
5240}
5241
5242static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
5243{
5244 uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
5245 uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
5246
5247 union nvme_timestamp {
5248 struct {
5249 uint64_t timestamp:48;
5250 uint64_t sync:1;
5251 uint64_t origin:3;
5252 uint64_t rsvd1:12;
5253 };
5254 uint64_t all;
5255 };
5256
5257 union nvme_timestamp ts;
5258 ts.all = 0;
5259 ts.timestamp = n->host_timestamp + elapsed_time;
5260
5261
5262 ts.origin = n->host_timestamp ? 0x01 : 0x00;
5263
5264 trace_pci_nvme_getfeat_timestamp(ts.all);
5265
5266 return cpu_to_le64(ts.all);
5267}
5268
5269static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
5270{
5271 uint64_t timestamp = nvme_get_timestamp(n);
5272
5273 return nvme_c2h(n, (uint8_t *)×tamp, sizeof(timestamp), req);
5274}
5275
5276static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
5277{
5278 NvmeCmd *cmd = &req->cmd;
5279 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
5280 uint32_t dw11 = le32_to_cpu(cmd->cdw11);
5281 uint32_t nsid = le32_to_cpu(cmd->nsid);
5282 uint32_t result;
5283 uint8_t fid = NVME_GETSETFEAT_FID(dw10);
5284 NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
5285 uint16_t iv;
5286 NvmeNamespace *ns;
5287 int i;
5288
5289 static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
5290 [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
5291 };
5292
5293 trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11);
5294
5295 if (!nvme_feature_support[fid]) {
5296 return NVME_INVALID_FIELD | NVME_DNR;
5297 }
5298
5299 if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
5300 if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
5301
5302
5303
5304
5305
5306
5307
5308 return NVME_INVALID_NSID | NVME_DNR;
5309 }
5310
5311 if (!nvme_ns(n, nsid)) {
5312 return NVME_INVALID_FIELD | NVME_DNR;
5313 }
5314 }
5315
5316 switch (sel) {
5317 case NVME_GETFEAT_SELECT_CURRENT:
5318 break;
5319 case NVME_GETFEAT_SELECT_SAVED:
5320
5321 case NVME_GETFEAT_SELECT_DEFAULT:
5322 goto defaults;
5323 case NVME_GETFEAT_SELECT_CAP:
5324 result = nvme_feature_cap[fid];
5325 goto out;
5326 }
5327
5328 switch (fid) {
5329 case NVME_TEMPERATURE_THRESHOLD:
5330 result = 0;
5331
5332
5333
5334
5335
5336 if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
5337 goto out;
5338 }
5339
5340 switch (NVME_TEMP_THSEL(dw11)) {
5341 case NVME_TEMP_THSEL_OVER:
5342 result = n->features.temp_thresh_hi;
5343 goto out;
5344 case NVME_TEMP_THSEL_UNDER:
5345 result = n->features.temp_thresh_low;
5346 goto out;
5347 }
5348
5349 return NVME_INVALID_FIELD | NVME_DNR;
5350 case NVME_ERROR_RECOVERY:
5351 if (!nvme_nsid_valid(n, nsid)) {
5352 return NVME_INVALID_NSID | NVME_DNR;
5353 }
5354
5355 ns = nvme_ns(n, nsid);
5356 if (unlikely(!ns)) {
5357 return NVME_INVALID_FIELD | NVME_DNR;
5358 }
5359
5360 result = ns->features.err_rec;
5361 goto out;
5362 case NVME_VOLATILE_WRITE_CACHE:
5363 result = 0;
5364 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5365 ns = nvme_ns(n, i);
5366 if (!ns) {
5367 continue;
5368 }
5369
5370 result = blk_enable_write_cache(ns->blkconf.blk);
5371 if (result) {
5372 break;
5373 }
5374 }
5375 trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
5376 goto out;
5377 case NVME_ASYNCHRONOUS_EVENT_CONF:
5378 result = n->features.async_config;
5379 goto out;
5380 case NVME_TIMESTAMP:
5381 return nvme_get_feature_timestamp(n, req);
5382 case NVME_HOST_BEHAVIOR_SUPPORT:
5383 return nvme_c2h(n, (uint8_t *)&n->features.hbs,
5384 sizeof(n->features.hbs), req);
5385 default:
5386 break;
5387 }
5388
5389defaults:
5390 switch (fid) {
5391 case NVME_TEMPERATURE_THRESHOLD:
5392 result = 0;
5393
5394 if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
5395 break;
5396 }
5397
5398 if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) {
5399 result = NVME_TEMPERATURE_WARNING;
5400 }
5401
5402 break;
5403 case NVME_NUMBER_OF_QUEUES:
5404 result = (n->conf_ioqpairs - 1) | ((n->conf_ioqpairs - 1) << 16);
5405 trace_pci_nvme_getfeat_numq(result);
5406 break;
5407 case NVME_INTERRUPT_VECTOR_CONF:
5408 iv = dw11 & 0xffff;
5409 if (iv >= n->conf_ioqpairs + 1) {
5410 return NVME_INVALID_FIELD | NVME_DNR;
5411 }
5412
5413 result = iv;
5414 if (iv == n->admin_cq.vector) {
5415 result |= NVME_INTVC_NOCOALESCING;
5416 }
5417 break;
5418 default:
5419 result = nvme_feature_default[fid];
5420 break;
5421 }
5422
5423out:
5424 req->cqe.result = cpu_to_le32(result);
5425 return NVME_SUCCESS;
5426}
5427
5428static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
5429{
5430 uint16_t ret;
5431 uint64_t timestamp;
5432
5433 ret = nvme_h2c(n, (uint8_t *)×tamp, sizeof(timestamp), req);
5434 if (ret) {
5435 return ret;
5436 }
5437
5438 nvme_set_timestamp(n, timestamp);
5439
5440 return NVME_SUCCESS;
5441}
5442
5443static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
5444{
5445 NvmeNamespace *ns = NULL;
5446
5447 NvmeCmd *cmd = &req->cmd;
5448 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
5449 uint32_t dw11 = le32_to_cpu(cmd->cdw11);
5450 uint32_t nsid = le32_to_cpu(cmd->nsid);
5451 uint8_t fid = NVME_GETSETFEAT_FID(dw10);
5452 uint8_t save = NVME_SETFEAT_SAVE(dw10);
5453 uint16_t status;
5454 int i;
5455
5456 trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
5457
5458 if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) {
5459 return NVME_FID_NOT_SAVEABLE | NVME_DNR;
5460 }
5461
5462 if (!nvme_feature_support[fid]) {
5463 return NVME_INVALID_FIELD | NVME_DNR;
5464 }
5465
5466 if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
5467 if (nsid != NVME_NSID_BROADCAST) {
5468 if (!nvme_nsid_valid(n, nsid)) {
5469 return NVME_INVALID_NSID | NVME_DNR;
5470 }
5471
5472 ns = nvme_ns(n, nsid);
5473 if (unlikely(!ns)) {
5474 return NVME_INVALID_FIELD | NVME_DNR;
5475 }
5476 }
5477 } else if (nsid && nsid != NVME_NSID_BROADCAST) {
5478 if (!nvme_nsid_valid(n, nsid)) {
5479 return NVME_INVALID_NSID | NVME_DNR;
5480 }
5481
5482 return NVME_FEAT_NOT_NS_SPEC | NVME_DNR;
5483 }
5484
5485 if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) {
5486 return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
5487 }
5488
5489 switch (fid) {
5490 case NVME_TEMPERATURE_THRESHOLD:
5491 if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
5492 break;
5493 }
5494
5495 switch (NVME_TEMP_THSEL(dw11)) {
5496 case NVME_TEMP_THSEL_OVER:
5497 n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
5498 break;
5499 case NVME_TEMP_THSEL_UNDER:
5500 n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
5501 break;
5502 default:
5503 return NVME_INVALID_FIELD | NVME_DNR;
5504 }
5505
5506 if ((n->temperature >= n->features.temp_thresh_hi) ||
5507 (n->temperature <= n->features.temp_thresh_low)) {
5508 nvme_smart_event(n, NVME_SMART_TEMPERATURE);
5509 }
5510
5511 break;
5512 case NVME_ERROR_RECOVERY:
5513 if (nsid == NVME_NSID_BROADCAST) {
5514 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5515 ns = nvme_ns(n, i);
5516
5517 if (!ns) {
5518 continue;
5519 }
5520
5521 if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
5522 ns->features.err_rec = dw11;
5523 }
5524 }
5525
5526 break;
5527 }
5528
5529 assert(ns);
5530 if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
5531 ns->features.err_rec = dw11;
5532 }
5533 break;
5534 case NVME_VOLATILE_WRITE_CACHE:
5535 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5536 ns = nvme_ns(n, i);
5537 if (!ns) {
5538 continue;
5539 }
5540
5541 if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) {
5542 blk_flush(ns->blkconf.blk);
5543 }
5544
5545 blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1);
5546 }
5547
5548 break;
5549
5550 case NVME_NUMBER_OF_QUEUES:
5551 if (n->qs_created) {
5552 return NVME_CMD_SEQ_ERROR | NVME_DNR;
5553 }
5554
5555
5556
5557
5558
5559 if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
5560 return NVME_INVALID_FIELD | NVME_DNR;
5561 }
5562
5563 trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1,
5564 ((dw11 >> 16) & 0xffff) + 1,
5565 n->conf_ioqpairs,
5566 n->conf_ioqpairs);
5567 req->cqe.result = cpu_to_le32((n->conf_ioqpairs - 1) |
5568 ((n->conf_ioqpairs - 1) << 16));
5569 break;
5570 case NVME_ASYNCHRONOUS_EVENT_CONF:
5571 n->features.async_config = dw11;
5572 break;
5573 case NVME_TIMESTAMP:
5574 return nvme_set_feature_timestamp(n, req);
5575 case NVME_HOST_BEHAVIOR_SUPPORT:
5576 status = nvme_h2c(n, (uint8_t *)&n->features.hbs,
5577 sizeof(n->features.hbs), req);
5578 if (status) {
5579 return status;
5580 }
5581
5582 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
5583 ns = nvme_ns(n, i);
5584
5585 if (!ns) {
5586 continue;
5587 }
5588
5589 ns->id_ns.nlbaf = ns->nlbaf - 1;
5590 if (!n->features.hbs.lbafee) {
5591 ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15);
5592 }
5593 }
5594
5595 return status;
5596 case NVME_COMMAND_SET_PROFILE:
5597 if (dw11 & 0x1ff) {
5598 trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
5599 return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
5600 }
5601 break;
5602 default:
5603 return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
5604 }
5605 return NVME_SUCCESS;
5606}
5607
5608static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
5609{
5610 trace_pci_nvme_aer(nvme_cid(req));
5611
5612 if (n->outstanding_aers > n->params.aerl) {
5613 trace_pci_nvme_aer_aerl_exceeded();
5614 return NVME_AER_LIMIT_EXCEEDED;
5615 }
5616
5617 n->aer_reqs[n->outstanding_aers] = req;
5618 n->outstanding_aers++;
5619
5620 if (!QTAILQ_EMPTY(&n->aer_queue)) {
5621 nvme_process_aers(n);
5622 }
5623
5624 return NVME_NO_COMPLETE;
5625}
5626
5627static void nvme_update_dmrsl(NvmeCtrl *n)
5628{
5629 int nsid;
5630
5631 for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
5632 NvmeNamespace *ns = nvme_ns(n, nsid);
5633 if (!ns) {
5634 continue;
5635 }
5636
5637 n->dmrsl = MIN_NON_ZERO(n->dmrsl,
5638 BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
5639 }
5640}
5641
5642static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
5643{
5644 uint32_t cc = ldl_le_p(&n->bar.cc);
5645
5646 ns->iocs = nvme_cse_iocs_none;
5647 switch (ns->csi) {
5648 case NVME_CSI_NVM:
5649 if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
5650 ns->iocs = nvme_cse_iocs_nvm;
5651 }
5652 break;
5653 case NVME_CSI_ZONED:
5654 if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
5655 ns->iocs = nvme_cse_iocs_zoned;
5656 } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
5657 ns->iocs = nvme_cse_iocs_nvm;
5658 }
5659 break;
5660 }
5661}
5662
5663static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
5664{
5665 NvmeNamespace *ns;
5666 NvmeCtrl *ctrl;
5667 uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
5668 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
5669 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
5670 uint8_t sel = dw10 & 0xf;
5671 uint16_t *nr_ids = &list[0];
5672 uint16_t *ids = &list[1];
5673 uint16_t ret;
5674 int i;
5675
5676 trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
5677
5678 if (!nvme_nsid_valid(n, nsid)) {
5679 return NVME_INVALID_NSID | NVME_DNR;
5680 }
5681
5682 ns = nvme_subsys_ns(n->subsys, nsid);
5683 if (!ns) {
5684 return NVME_INVALID_FIELD | NVME_DNR;
5685 }
5686
5687 ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
5688 if (ret) {
5689 return ret;
5690 }
5691
5692 if (!*nr_ids) {
5693 return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
5694 }
5695
5696 *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1);
5697 for (i = 0; i < *nr_ids; i++) {
5698 ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
5699 if (!ctrl) {
5700 return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
5701 }
5702
5703 switch (sel) {
5704 case NVME_NS_ATTACHMENT_ATTACH:
5705 if (nvme_ns(ctrl, nsid)) {
5706 return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
5707 }
5708
5709 if (ns->attached && !ns->params.shared) {
5710 return NVME_NS_PRIVATE | NVME_DNR;
5711 }
5712
5713 nvme_attach_ns(ctrl, ns);
5714 nvme_select_iocs_ns(ctrl, ns);
5715
5716 break;
5717
5718 case NVME_NS_ATTACHMENT_DETACH:
5719 if (!nvme_ns(ctrl, nsid)) {
5720 return NVME_NS_NOT_ATTACHED | NVME_DNR;
5721 }
5722
5723 ctrl->namespaces[nsid] = NULL;
5724 ns->attached--;
5725
5726 nvme_update_dmrsl(ctrl);
5727
5728 break;
5729
5730 default:
5731 return NVME_INVALID_FIELD | NVME_DNR;
5732 }
5733
5734
5735
5736
5737
5738 if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
5739 nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
5740 NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
5741 NVME_LOG_CHANGED_NSLIST);
5742 }
5743 }
5744
5745 return NVME_SUCCESS;
5746}
5747
5748typedef struct NvmeFormatAIOCB {
5749 BlockAIOCB common;
5750 BlockAIOCB *aiocb;
5751 QEMUBH *bh;
5752 NvmeRequest *req;
5753 int ret;
5754
5755 NvmeNamespace *ns;
5756 uint32_t nsid;
5757 bool broadcast;
5758 int64_t offset;
5759
5760 uint8_t lbaf;
5761 uint8_t mset;
5762 uint8_t pi;
5763 uint8_t pil;
5764} NvmeFormatAIOCB;
5765
5766static void nvme_format_bh(void *opaque);
5767
5768static void nvme_format_cancel(BlockAIOCB *aiocb)
5769{
5770 NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common);
5771
5772 if (iocb->aiocb) {
5773 blk_aio_cancel_async(iocb->aiocb);
5774 }
5775}
5776
5777static const AIOCBInfo nvme_format_aiocb_info = {
5778 .aiocb_size = sizeof(NvmeFormatAIOCB),
5779 .cancel_async = nvme_format_cancel,
5780 .get_aio_context = nvme_get_aio_context,
5781};
5782
5783static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
5784 uint8_t pi, uint8_t pil)
5785{
5786 uint8_t lbafl = lbaf & 0xf;
5787 uint8_t lbafu = lbaf >> 4;
5788
5789 trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
5790
5791 ns->id_ns.dps = (pil << 3) | pi;
5792 ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl;
5793
5794 nvme_ns_init_format(ns);
5795}
5796
5797static void nvme_format_ns_cb(void *opaque, int ret)
5798{
5799 NvmeFormatAIOCB *iocb = opaque;
5800 NvmeNamespace *ns = iocb->ns;
5801 int bytes;
5802
5803 if (ret < 0) {
5804 iocb->ret = ret;
5805 goto done;
5806 }
5807
5808 assert(ns);
5809
5810 if (iocb->offset < ns->size) {
5811 bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset);
5812
5813 iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset,
5814 bytes, BDRV_REQ_MAY_UNMAP,
5815 nvme_format_ns_cb, iocb);
5816
5817 iocb->offset += bytes;
5818 return;
5819 }
5820
5821 nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil);
5822 ns->status = 0x0;
5823 iocb->ns = NULL;
5824 iocb->offset = 0;
5825
5826done:
5827 iocb->aiocb = NULL;
5828 qemu_bh_schedule(iocb->bh);
5829}
5830
5831static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi)
5832{
5833 if (ns->params.zoned) {
5834 return NVME_INVALID_FORMAT | NVME_DNR;
5835 }
5836
5837 if (lbaf > ns->id_ns.nlbaf) {
5838 return NVME_INVALID_FORMAT | NVME_DNR;
5839 }
5840
5841 if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) {
5842 return NVME_INVALID_FORMAT | NVME_DNR;
5843 }
5844
5845 if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
5846 return NVME_INVALID_FIELD | NVME_DNR;
5847 }
5848
5849 return NVME_SUCCESS;
5850}
5851
5852static void nvme_format_bh(void *opaque)
5853{
5854 NvmeFormatAIOCB *iocb = opaque;
5855 NvmeRequest *req = iocb->req;
5856 NvmeCtrl *n = nvme_ctrl(req);
5857 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
5858 uint8_t lbaf = dw10 & 0xf;
5859 uint8_t pi = (dw10 >> 5) & 0x7;
5860 uint16_t status;
5861 int i;
5862
5863 if (iocb->ret < 0) {
5864 goto done;
5865 }
5866
5867 if (iocb->broadcast) {
5868 for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
5869 iocb->ns = nvme_ns(n, i);
5870 if (iocb->ns) {
5871 iocb->nsid = i;
5872 break;
5873 }
5874 }
5875 }
5876
5877 if (!iocb->ns) {
5878 goto done;
5879 }
5880
5881 status = nvme_format_check(iocb->ns, lbaf, pi);
5882 if (status) {
5883 req->status = status;
5884 goto done;
5885 }
5886
5887 iocb->ns->status = NVME_FORMAT_IN_PROGRESS;
5888 nvme_format_ns_cb(iocb, 0);
5889 return;
5890
5891done:
5892 qemu_bh_delete(iocb->bh);
5893 iocb->bh = NULL;
5894
5895 iocb->common.cb(iocb->common.opaque, iocb->ret);
5896
5897 qemu_aio_unref(iocb);
5898}
5899
5900static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
5901{
5902 NvmeFormatAIOCB *iocb;
5903 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
5904 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
5905 uint8_t lbaf = dw10 & 0xf;
5906 uint8_t mset = (dw10 >> 4) & 0x1;
5907 uint8_t pi = (dw10 >> 5) & 0x7;
5908 uint8_t pil = (dw10 >> 8) & 0x1;
5909 uint8_t lbafu = (dw10 >> 12) & 0x3;
5910 uint16_t status;
5911
5912 iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
5913
5914 iocb->req = req;
5915 iocb->bh = qemu_bh_new(nvme_format_bh, iocb);
5916 iocb->ret = 0;
5917 iocb->ns = NULL;
5918 iocb->nsid = 0;
5919 iocb->lbaf = lbaf;
5920 iocb->mset = mset;
5921 iocb->pi = pi;
5922 iocb->pil = pil;
5923 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
5924 iocb->offset = 0;
5925
5926 if (n->features.hbs.lbafee) {
5927 iocb->lbaf |= lbafu << 4;
5928 }
5929
5930 if (!iocb->broadcast) {
5931 if (!nvme_nsid_valid(n, nsid)) {
5932 status = NVME_INVALID_NSID | NVME_DNR;
5933 goto out;
5934 }
5935
5936 iocb->ns = nvme_ns(n, nsid);
5937 if (!iocb->ns) {
5938 status = NVME_INVALID_FIELD | NVME_DNR;
5939 goto out;
5940 }
5941 }
5942
5943 req->aiocb = &iocb->common;
5944 qemu_bh_schedule(iocb->bh);
5945
5946 return NVME_NO_COMPLETE;
5947
5948out:
5949 qemu_bh_delete(iocb->bh);
5950 iocb->bh = NULL;
5951 qemu_aio_unref(iocb);
5952 return status;
5953}
5954
5955static void nvme_get_virt_res_num(NvmeCtrl *n, uint8_t rt, int *num_total,
5956 int *num_prim, int *num_sec)
5957{
5958 *num_total = le32_to_cpu(rt ?
5959 n->pri_ctrl_cap.vifrt : n->pri_ctrl_cap.vqfrt);
5960 *num_prim = le16_to_cpu(rt ?
5961 n->pri_ctrl_cap.virfap : n->pri_ctrl_cap.vqrfap);
5962 *num_sec = le16_to_cpu(rt ? n->pri_ctrl_cap.virfa : n->pri_ctrl_cap.vqrfa);
5963}
5964
5965static uint16_t nvme_assign_virt_res_to_prim(NvmeCtrl *n, NvmeRequest *req,
5966 uint16_t cntlid, uint8_t rt,
5967 int nr)
5968{
5969 int num_total, num_prim, num_sec;
5970
5971 if (cntlid != n->cntlid) {
5972 return NVME_INVALID_CTRL_ID | NVME_DNR;
5973 }
5974
5975 nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
5976
5977 if (nr > num_total) {
5978 return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
5979 }
5980
5981 if (nr > num_total - num_sec) {
5982 return NVME_INVALID_RESOURCE_ID | NVME_DNR;
5983 }
5984
5985 if (rt) {
5986 n->next_pri_ctrl_cap.virfap = cpu_to_le16(nr);
5987 } else {
5988 n->next_pri_ctrl_cap.vqrfap = cpu_to_le16(nr);
5989 }
5990
5991 req->cqe.result = cpu_to_le32(nr);
5992 return req->status;
5993}
5994
5995static void nvme_update_virt_res(NvmeCtrl *n, NvmeSecCtrlEntry *sctrl,
5996 uint8_t rt, int nr)
5997{
5998 int prev_nr, prev_total;
5999
6000 if (rt) {
6001 prev_nr = le16_to_cpu(sctrl->nvi);
6002 prev_total = le32_to_cpu(n->pri_ctrl_cap.virfa);
6003 sctrl->nvi = cpu_to_le16(nr);
6004 n->pri_ctrl_cap.virfa = cpu_to_le32(prev_total + nr - prev_nr);
6005 } else {
6006 prev_nr = le16_to_cpu(sctrl->nvq);
6007 prev_total = le32_to_cpu(n->pri_ctrl_cap.vqrfa);
6008 sctrl->nvq = cpu_to_le16(nr);
6009 n->pri_ctrl_cap.vqrfa = cpu_to_le32(prev_total + nr - prev_nr);
6010 }
6011}
6012
6013static uint16_t nvme_assign_virt_res_to_sec(NvmeCtrl *n, NvmeRequest *req,
6014 uint16_t cntlid, uint8_t rt, int nr)
6015{
6016 int num_total, num_prim, num_sec, num_free, diff, limit;
6017 NvmeSecCtrlEntry *sctrl;
6018
6019 sctrl = nvme_sctrl_for_cntlid(n, cntlid);
6020 if (!sctrl) {
6021 return NVME_INVALID_CTRL_ID | NVME_DNR;
6022 }
6023
6024 if (sctrl->scs) {
6025 return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
6026 }
6027
6028 limit = le16_to_cpu(rt ? n->pri_ctrl_cap.vifrsm : n->pri_ctrl_cap.vqfrsm);
6029 if (nr > limit) {
6030 return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
6031 }
6032
6033 nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
6034 num_free = num_total - num_prim - num_sec;
6035 diff = nr - le16_to_cpu(rt ? sctrl->nvi : sctrl->nvq);
6036
6037 if (diff > num_free) {
6038 return NVME_INVALID_RESOURCE_ID | NVME_DNR;
6039 }
6040
6041 nvme_update_virt_res(n, sctrl, rt, nr);
6042 req->cqe.result = cpu_to_le32(nr);
6043
6044 return req->status;
6045}
6046
6047static uint16_t nvme_virt_set_state(NvmeCtrl *n, uint16_t cntlid, bool online)
6048{
6049 NvmeCtrl *sn = NULL;
6050 NvmeSecCtrlEntry *sctrl;
6051 int vf_index;
6052
6053 sctrl = nvme_sctrl_for_cntlid(n, cntlid);
6054 if (!sctrl) {
6055 return NVME_INVALID_CTRL_ID | NVME_DNR;
6056 }
6057
6058 if (!pci_is_vf(&n->parent_obj)) {
6059 vf_index = le16_to_cpu(sctrl->vfn) - 1;
6060 sn = NVME(pcie_sriov_get_vf_at_index(&n->parent_obj, vf_index));
6061 }
6062
6063 if (online) {
6064 if (!sctrl->nvi || (le16_to_cpu(sctrl->nvq) < 2) || !sn) {
6065 return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
6066 }
6067
6068 if (!sctrl->scs) {
6069 sctrl->scs = 0x1;
6070 nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
6071 }
6072 } else {
6073 nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_INTERRUPT, 0);
6074 nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_QUEUE, 0);
6075
6076 if (sctrl->scs) {
6077 sctrl->scs = 0x0;
6078 if (sn) {
6079 nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
6080 }
6081 }
6082 }
6083
6084 return NVME_SUCCESS;
6085}
6086
6087static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest *req)
6088{
6089 uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
6090 uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
6091 uint8_t act = dw10 & 0xf;
6092 uint8_t rt = (dw10 >> 8) & 0x7;
6093 uint16_t cntlid = (dw10 >> 16) & 0xffff;
6094 int nr = dw11 & 0xffff;
6095
6096 trace_pci_nvme_virt_mngmt(nvme_cid(req), act, cntlid, rt ? "VI" : "VQ", nr);
6097
6098 if (rt != NVME_VIRT_RES_QUEUE && rt != NVME_VIRT_RES_INTERRUPT) {
6099 return NVME_INVALID_RESOURCE_ID | NVME_DNR;
6100 }
6101
6102 switch (act) {
6103 case NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN:
6104 return nvme_assign_virt_res_to_sec(n, req, cntlid, rt, nr);
6105 case NVME_VIRT_MNGMT_ACTION_PRM_ALLOC:
6106 return nvme_assign_virt_res_to_prim(n, req, cntlid, rt, nr);
6107 case NVME_VIRT_MNGMT_ACTION_SEC_ONLINE:
6108 return nvme_virt_set_state(n, cntlid, true);
6109 case NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE:
6110 return nvme_virt_set_state(n, cntlid, false);
6111 default:
6112 return NVME_INVALID_FIELD | NVME_DNR;
6113 }
6114}
6115
6116static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
6117{
6118 uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
6119 uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
6120 int i;
6121
6122
6123 if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size - 1)) {
6124 return NVME_INVALID_FIELD | NVME_DNR;
6125 }
6126
6127
6128 n->dbbuf_dbs = dbs_addr;
6129 n->dbbuf_eis = eis_addr;
6130 n->dbbuf_enabled = true;
6131
6132 for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
6133 NvmeSQueue *sq = n->sq[i];
6134 NvmeCQueue *cq = n->cq[i];
6135
6136 if (sq) {
6137
6138
6139
6140
6141
6142 sq->db_addr = dbs_addr + (i << 3);
6143 sq->ei_addr = eis_addr + (i << 3);
6144 pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
6145 sizeof(sq->tail));
6146
6147 if (n->params.ioeventfd && sq->sqid != 0) {
6148 if (!nvme_init_sq_ioeventfd(sq)) {
6149 sq->ioeventfd_enabled = true;
6150 }
6151 }
6152 }
6153
6154 if (cq) {
6155
6156 cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
6157 cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
6158 pci_dma_write(&n->parent_obj, cq->db_addr, &cq->head,
6159 sizeof(cq->head));
6160
6161 if (n->params.ioeventfd && cq->cqid != 0) {
6162 if (!nvme_init_cq_ioeventfd(cq)) {
6163 cq->ioeventfd_enabled = true;
6164 }
6165 }
6166 }
6167 }
6168
6169 trace_pci_nvme_dbbuf_config(dbs_addr, eis_addr);
6170
6171 return NVME_SUCCESS;
6172}
6173
6174static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
6175{
6176 trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
6177 nvme_adm_opc_str(req->cmd.opcode));
6178
6179 if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
6180 trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
6181 return NVME_INVALID_OPCODE | NVME_DNR;
6182 }
6183
6184
6185 if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) {
6186 return NVME_INVALID_FIELD | NVME_DNR;
6187 }
6188
6189 if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
6190 return NVME_INVALID_FIELD;
6191 }
6192
6193 switch (req->cmd.opcode) {
6194 case NVME_ADM_CMD_DELETE_SQ:
6195 return nvme_del_sq(n, req);
6196 case NVME_ADM_CMD_CREATE_SQ:
6197 return nvme_create_sq(n, req);
6198 case NVME_ADM_CMD_GET_LOG_PAGE:
6199 return nvme_get_log(n, req);
6200 case NVME_ADM_CMD_DELETE_CQ:
6201 return nvme_del_cq(n, req);
6202 case NVME_ADM_CMD_CREATE_CQ:
6203 return nvme_create_cq(n, req);
6204 case NVME_ADM_CMD_IDENTIFY:
6205 return nvme_identify(n, req);
6206 case NVME_ADM_CMD_ABORT:
6207 return nvme_abort(n, req);
6208 case NVME_ADM_CMD_SET_FEATURES:
6209 return nvme_set_feature(n, req);
6210 case NVME_ADM_CMD_GET_FEATURES:
6211 return nvme_get_feature(n, req);
6212 case NVME_ADM_CMD_ASYNC_EV_REQ:
6213 return nvme_aer(n, req);
6214 case NVME_ADM_CMD_NS_ATTACHMENT:
6215 return nvme_ns_attachment(n, req);
6216 case NVME_ADM_CMD_VIRT_MNGMT:
6217 return nvme_virt_mngmt(n, req);
6218 case NVME_ADM_CMD_DBBUF_CONFIG:
6219 return nvme_dbbuf_config(n, req);
6220 case NVME_ADM_CMD_FORMAT_NVM:
6221 return nvme_format(n, req);
6222 default:
6223 assert(false);
6224 }
6225
6226 return NVME_INVALID_OPCODE | NVME_DNR;
6227}
6228
6229static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
6230{
6231 pci_dma_write(&sq->ctrl->parent_obj, sq->ei_addr, &sq->tail,
6232 sizeof(sq->tail));
6233 trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail);
6234}
6235
6236static void nvme_update_sq_tail(NvmeSQueue *sq)
6237{
6238 pci_dma_read(&sq->ctrl->parent_obj, sq->db_addr, &sq->tail,
6239 sizeof(sq->tail));
6240 trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail);
6241}
6242
6243static void nvme_process_sq(void *opaque)
6244{
6245 NvmeSQueue *sq = opaque;
6246 NvmeCtrl *n = sq->ctrl;
6247 NvmeCQueue *cq = n->cq[sq->cqid];
6248
6249 uint16_t status;
6250 hwaddr addr;
6251 NvmeCmd cmd;
6252 NvmeRequest *req;
6253
6254 if (n->dbbuf_enabled) {
6255 nvme_update_sq_tail(sq);
6256 }
6257
6258 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
6259 addr = sq->dma_addr + sq->head * n->sqe_size;
6260 if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
6261 trace_pci_nvme_err_addr_read(addr);
6262 trace_pci_nvme_err_cfs();
6263 stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
6264 break;
6265 }
6266 nvme_inc_sq_head(sq);
6267
6268 req = QTAILQ_FIRST(&sq->req_list);
6269 QTAILQ_REMOVE(&sq->req_list, req, entry);
6270 QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
6271 nvme_req_clear(req);
6272 req->cqe.cid = cmd.cid;
6273 memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
6274
6275 status = sq->sqid ? nvme_io_cmd(n, req) :
6276 nvme_admin_cmd(n, req);
6277 if (status != NVME_NO_COMPLETE) {
6278 req->status = status;
6279 nvme_enqueue_req_completion(cq, req);
6280 }
6281
6282 if (n->dbbuf_enabled) {
6283 nvme_update_sq_eventidx(sq);
6284 nvme_update_sq_tail(sq);
6285 }
6286 }
6287}
6288
6289static void nvme_update_msixcap_ts(PCIDevice *pci_dev, uint32_t table_size)
6290{
6291 uint8_t *config;
6292
6293 if (!msix_present(pci_dev)) {
6294 return;
6295 }
6296
6297 assert(table_size > 0 && table_size <= pci_dev->msix_entries_nr);
6298
6299 config = pci_dev->config + pci_dev->msix_cap;
6300 pci_set_word_by_mask(config + PCI_MSIX_FLAGS, PCI_MSIX_FLAGS_QSIZE,
6301 table_size - 1);
6302}
6303
6304static void nvme_activate_virt_res(NvmeCtrl *n)
6305{
6306 PCIDevice *pci_dev = &n->parent_obj;
6307 NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
6308 NvmeSecCtrlEntry *sctrl;
6309
6310
6311 if (pci_is_vf(pci_dev)) {
6312 sctrl = nvme_sctrl(n);
6313 cap->vqprt = sctrl->nvq;
6314 cap->viprt = sctrl->nvi;
6315 n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
6316 n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
6317 } else {
6318 cap->vqrfap = n->next_pri_ctrl_cap.vqrfap;
6319 cap->virfap = n->next_pri_ctrl_cap.virfap;
6320 n->conf_ioqpairs = le16_to_cpu(cap->vqprt) +
6321 le16_to_cpu(cap->vqrfap) - 1;
6322 n->conf_msix_qsize = le16_to_cpu(cap->viprt) +
6323 le16_to_cpu(cap->virfap);
6324 }
6325}
6326
6327static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
6328{
6329 PCIDevice *pci_dev = &n->parent_obj;
6330 NvmeSecCtrlEntry *sctrl;
6331 NvmeNamespace *ns;
6332 int i;
6333
6334 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
6335 ns = nvme_ns(n, i);
6336 if (!ns) {
6337 continue;
6338 }
6339
6340 nvme_ns_drain(ns);
6341 }
6342
6343 for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
6344 if (n->sq[i] != NULL) {
6345 nvme_free_sq(n->sq[i], n);
6346 }
6347 }
6348 for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
6349 if (n->cq[i] != NULL) {
6350 nvme_free_cq(n->cq[i], n);
6351 }
6352 }
6353
6354 while (!QTAILQ_EMPTY(&n->aer_queue)) {
6355 NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
6356 QTAILQ_REMOVE(&n->aer_queue, event, entry);
6357 g_free(event);
6358 }
6359
6360 if (n->params.sriov_max_vfs) {
6361 if (!pci_is_vf(pci_dev)) {
6362 for (i = 0; i < n->sec_ctrl_list.numcntl; i++) {
6363 sctrl = &n->sec_ctrl_list.sec[i];
6364 nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
6365 }
6366
6367 if (rst != NVME_RESET_CONTROLLER) {
6368 pcie_sriov_pf_disable_vfs(pci_dev);
6369 }
6370 }
6371
6372 if (rst != NVME_RESET_CONTROLLER) {
6373 nvme_activate_virt_res(n);
6374 }
6375 }
6376
6377 n->aer_queued = 0;
6378 n->aer_mask = 0;
6379 n->outstanding_aers = 0;
6380 n->qs_created = false;
6381
6382 nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
6383
6384 if (pci_is_vf(pci_dev)) {
6385 sctrl = nvme_sctrl(n);
6386
6387 stl_le_p(&n->bar.csts, sctrl->scs ? 0 : NVME_CSTS_FAILED);
6388 } else {
6389 stl_le_p(&n->bar.csts, 0);
6390 }
6391
6392 stl_le_p(&n->bar.intms, 0);
6393 stl_le_p(&n->bar.intmc, 0);
6394 stl_le_p(&n->bar.cc, 0);
6395
6396 n->dbbuf_dbs = 0;
6397 n->dbbuf_eis = 0;
6398 n->dbbuf_enabled = false;
6399}
6400
6401static void nvme_ctrl_shutdown(NvmeCtrl *n)
6402{
6403 NvmeNamespace *ns;
6404 int i;
6405
6406 if (n->pmr.dev) {
6407 memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
6408 }
6409
6410 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
6411 ns = nvme_ns(n, i);
6412 if (!ns) {
6413 continue;
6414 }
6415
6416 nvme_ns_shutdown(ns);
6417 }
6418}
6419
6420static void nvme_select_iocs(NvmeCtrl *n)
6421{
6422 NvmeNamespace *ns;
6423 int i;
6424
6425 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
6426 ns = nvme_ns(n, i);
6427 if (!ns) {
6428 continue;
6429 }
6430
6431 nvme_select_iocs_ns(n, ns);
6432 }
6433}
6434
6435static int nvme_start_ctrl(NvmeCtrl *n)
6436{
6437 uint64_t cap = ldq_le_p(&n->bar.cap);
6438 uint32_t cc = ldl_le_p(&n->bar.cc);
6439 uint32_t aqa = ldl_le_p(&n->bar.aqa);
6440 uint64_t asq = ldq_le_p(&n->bar.asq);
6441 uint64_t acq = ldq_le_p(&n->bar.acq);
6442 uint32_t page_bits = NVME_CC_MPS(cc) + 12;
6443 uint32_t page_size = 1 << page_bits;
6444 NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
6445
6446 if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
6447 trace_pci_nvme_err_startfail_virt_state(le16_to_cpu(sctrl->nvi),
6448 le16_to_cpu(sctrl->nvq),
6449 sctrl->scs ? "ONLINE" :
6450 "OFFLINE");
6451 return -1;
6452 }
6453 if (unlikely(n->cq[0])) {
6454 trace_pci_nvme_err_startfail_cq();
6455 return -1;
6456 }
6457 if (unlikely(n->sq[0])) {
6458 trace_pci_nvme_err_startfail_sq();
6459 return -1;
6460 }
6461 if (unlikely(asq & (page_size - 1))) {
6462 trace_pci_nvme_err_startfail_asq_misaligned(asq);
6463 return -1;
6464 }
6465 if (unlikely(acq & (page_size - 1))) {
6466 trace_pci_nvme_err_startfail_acq_misaligned(acq);
6467 return -1;
6468 }
6469 if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) {
6470 trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc));
6471 return -1;
6472 }
6473 if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) {
6474 trace_pci_nvme_err_startfail_page_too_small(
6475 NVME_CC_MPS(cc),
6476 NVME_CAP_MPSMIN(cap));
6477 return -1;
6478 }
6479 if (unlikely(NVME_CC_MPS(cc) >
6480 NVME_CAP_MPSMAX(cap))) {
6481 trace_pci_nvme_err_startfail_page_too_large(
6482 NVME_CC_MPS(cc),
6483 NVME_CAP_MPSMAX(cap));
6484 return -1;
6485 }
6486 if (unlikely(NVME_CC_IOCQES(cc) <
6487 NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
6488 trace_pci_nvme_err_startfail_cqent_too_small(
6489 NVME_CC_IOCQES(cc),
6490 NVME_CTRL_CQES_MIN(cap));
6491 return -1;
6492 }
6493 if (unlikely(NVME_CC_IOCQES(cc) >
6494 NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
6495 trace_pci_nvme_err_startfail_cqent_too_large(
6496 NVME_CC_IOCQES(cc),
6497 NVME_CTRL_CQES_MAX(cap));
6498 return -1;
6499 }
6500 if (unlikely(NVME_CC_IOSQES(cc) <
6501 NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
6502 trace_pci_nvme_err_startfail_sqent_too_small(
6503 NVME_CC_IOSQES(cc),
6504 NVME_CTRL_SQES_MIN(cap));
6505 return -1;
6506 }
6507 if (unlikely(NVME_CC_IOSQES(cc) >
6508 NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
6509 trace_pci_nvme_err_startfail_sqent_too_large(
6510 NVME_CC_IOSQES(cc),
6511 NVME_CTRL_SQES_MAX(cap));
6512 return -1;
6513 }
6514 if (unlikely(!NVME_AQA_ASQS(aqa))) {
6515 trace_pci_nvme_err_startfail_asqent_sz_zero();
6516 return -1;
6517 }
6518 if (unlikely(!NVME_AQA_ACQS(aqa))) {
6519 trace_pci_nvme_err_startfail_acqent_sz_zero();
6520 return -1;
6521 }
6522
6523 n->page_bits = page_bits;
6524 n->page_size = page_size;
6525 n->max_prp_ents = n->page_size / sizeof(uint64_t);
6526 n->cqe_size = 1 << NVME_CC_IOCQES(cc);
6527 n->sqe_size = 1 << NVME_CC_IOSQES(cc);
6528 nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
6529 nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
6530
6531 nvme_set_timestamp(n, 0ULL);
6532
6533 nvme_select_iocs(n);
6534
6535 return 0;
6536}
6537
6538static void nvme_cmb_enable_regs(NvmeCtrl *n)
6539{
6540 uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc);
6541 uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz);
6542
6543 NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
6544 NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
6545 NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
6546 stl_le_p(&n->bar.cmbloc, cmbloc);
6547
6548 NVME_CMBSZ_SET_SQS(cmbsz, 1);
6549 NVME_CMBSZ_SET_CQS(cmbsz, 0);
6550 NVME_CMBSZ_SET_LISTS(cmbsz, 1);
6551 NVME_CMBSZ_SET_RDS(cmbsz, 1);
6552 NVME_CMBSZ_SET_WDS(cmbsz, 1);
6553 NVME_CMBSZ_SET_SZU(cmbsz, 2);
6554 NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
6555 stl_le_p(&n->bar.cmbsz, cmbsz);
6556}
6557
6558static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
6559 unsigned size)
6560{
6561 uint64_t cap = ldq_le_p(&n->bar.cap);
6562 uint32_t cc = ldl_le_p(&n->bar.cc);
6563 uint32_t intms = ldl_le_p(&n->bar.intms);
6564 uint32_t csts = ldl_le_p(&n->bar.csts);
6565 uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts);
6566
6567 if (unlikely(offset & (sizeof(uint32_t) - 1))) {
6568 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
6569 "MMIO write not 32-bit aligned,"
6570 " offset=0x%"PRIx64"", offset);
6571
6572 }
6573
6574 if (unlikely(size < sizeof(uint32_t))) {
6575 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
6576 "MMIO write smaller than 32-bits,"
6577 " offset=0x%"PRIx64", size=%u",
6578 offset, size);
6579
6580 }
6581
6582 switch (offset) {
6583 case NVME_REG_INTMS:
6584 if (unlikely(msix_enabled(&(n->parent_obj)))) {
6585 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
6586 "undefined access to interrupt mask set"
6587 " when MSI-X is enabled");
6588
6589 }
6590 intms |= data;
6591 stl_le_p(&n->bar.intms, intms);
6592 n->bar.intmc = n->bar.intms;
6593 trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
6594 nvme_irq_check(n);
6595 break;
6596 case NVME_REG_INTMC:
6597 if (unlikely(msix_enabled(&(n->parent_obj)))) {
6598 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
6599 "undefined access to interrupt mask clr"
6600 " when MSI-X is enabled");
6601
6602 }
6603 intms &= ~data;
6604 stl_le_p(&n->bar.intms, intms);
6605 n->bar.intmc = n->bar.intms;
6606 trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
6607 nvme_irq_check(n);
6608 break;
6609 case NVME_REG_CC:
6610 stl_le_p(&n->bar.cc, data);
6611
6612 trace_pci_nvme_mmio_cfg(data & 0xffffffff);
6613
6614 if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
6615 trace_pci_nvme_mmio_shutdown_set();
6616 nvme_ctrl_shutdown(n);
6617 csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
6618 csts |= NVME_CSTS_SHST_COMPLETE;
6619 } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
6620 trace_pci_nvme_mmio_shutdown_cleared();
6621 csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
6622 }
6623
6624 if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
6625 if (unlikely(nvme_start_ctrl(n))) {
6626 trace_pci_nvme_err_startfail();
6627 csts = NVME_CSTS_FAILED;
6628 } else {
6629 trace_pci_nvme_mmio_start_success();
6630 csts = NVME_CSTS_READY;
6631 }
6632 } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
6633 trace_pci_nvme_mmio_stopped();
6634 nvme_ctrl_reset(n, NVME_RESET_CONTROLLER);
6635
6636 break;
6637 }
6638
6639 stl_le_p(&n->bar.csts, csts);
6640
6641 break;
6642 case NVME_REG_CSTS:
6643 if (data & (1 << 4)) {
6644 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
6645 "attempted to W1C CSTS.NSSRO"
6646 " but CAP.NSSRS is zero (not supported)");
6647 } else if (data != 0) {
6648 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts,
6649 "attempted to set a read only bit"
6650 " of controller status");
6651 }
6652 break;
6653 case NVME_REG_NSSR:
6654 if (data == 0x4e564d65) {
6655 trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
6656 } else {
6657
6658 return;
6659 }
6660 break;
6661 case NVME_REG_AQA:
6662 stl_le_p(&n->bar.aqa, data);
6663 trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
6664 break;
6665 case NVME_REG_ASQ:
6666 stn_le_p(&n->bar.asq, size, data);
6667 trace_pci_nvme_mmio_asqaddr(data);
6668 break;
6669 case NVME_REG_ASQ + 4:
6670 stl_le_p((uint8_t *)&n->bar.asq + 4, data);
6671 trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq));
6672 break;
6673 case NVME_REG_ACQ:
6674 trace_pci_nvme_mmio_acqaddr(data);
6675 stn_le_p(&n->bar.acq, size, data);
6676 break;
6677 case NVME_REG_ACQ + 4:
6678 stl_le_p((uint8_t *)&n->bar.acq + 4, data);
6679 trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq));
6680 break;
6681 case NVME_REG_CMBLOC:
6682 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved,
6683 "invalid write to reserved CMBLOC"
6684 " when CMBSZ is zero, ignored");
6685 return;
6686 case NVME_REG_CMBSZ:
6687 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
6688 "invalid write to read only CMBSZ, ignored");
6689 return;
6690 case NVME_REG_CMBMSC:
6691 if (!NVME_CAP_CMBS(cap)) {
6692 return;
6693 }
6694
6695 stn_le_p(&n->bar.cmbmsc, size, data);
6696 n->cmb.cmse = false;
6697
6698 if (NVME_CMBMSC_CRE(data)) {
6699 nvme_cmb_enable_regs(n);
6700
6701 if (NVME_CMBMSC_CMSE(data)) {
6702 uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc);
6703 hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT;
6704 if (cba + int128_get64(n->cmb.mem.size) < cba) {
6705 uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts);
6706 NVME_CMBSTS_SET_CBAI(cmbsts, 1);
6707 stl_le_p(&n->bar.cmbsts, cmbsts);
6708 return;
6709 }
6710
6711 n->cmb.cba = cba;
6712 n->cmb.cmse = true;
6713 }
6714 } else {
6715 n->bar.cmbsz = 0;
6716 n->bar.cmbloc = 0;
6717 }
6718
6719 return;
6720 case NVME_REG_CMBMSC + 4:
6721 stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data);
6722 return;
6723
6724 case NVME_REG_PMRCAP:
6725 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
6726 "invalid write to PMRCAP register, ignored");
6727 return;
6728 case NVME_REG_PMRCTL:
6729 if (!NVME_CAP_PMRS(cap)) {
6730 return;
6731 }
6732
6733 stl_le_p(&n->bar.pmrctl, data);
6734 if (NVME_PMRCTL_EN(data)) {
6735 memory_region_set_enabled(&n->pmr.dev->mr, true);
6736 pmrsts = 0;
6737 } else {
6738 memory_region_set_enabled(&n->pmr.dev->mr, false);
6739 NVME_PMRSTS_SET_NRDY(pmrsts, 1);
6740 n->pmr.cmse = false;
6741 }
6742 stl_le_p(&n->bar.pmrsts, pmrsts);
6743 return;
6744 case NVME_REG_PMRSTS:
6745 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
6746 "invalid write to PMRSTS register, ignored");
6747 return;
6748 case NVME_REG_PMREBS:
6749 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly,
6750 "invalid write to PMREBS register, ignored");
6751 return;
6752 case NVME_REG_PMRSWTP:
6753 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,
6754 "invalid write to PMRSWTP register, ignored");
6755 return;
6756 case NVME_REG_PMRMSCL:
6757 if (!NVME_CAP_PMRS(cap)) {
6758 return;
6759 }
6760
6761 stl_le_p(&n->bar.pmrmscl, data);
6762 n->pmr.cmse = false;
6763
6764 if (NVME_PMRMSCL_CMSE(data)) {
6765 uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu);
6766 hwaddr cba = pmrmscu << 32 |
6767 (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT);
6768 if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
6769 NVME_PMRSTS_SET_CBAI(pmrsts, 1);
6770 stl_le_p(&n->bar.pmrsts, pmrsts);
6771 return;
6772 }
6773
6774 n->pmr.cmse = true;
6775 n->pmr.cba = cba;
6776 }
6777
6778 return;
6779 case NVME_REG_PMRMSCU:
6780 if (!NVME_CAP_PMRS(cap)) {
6781 return;
6782 }
6783
6784 stl_le_p(&n->bar.pmrmscu, data);
6785 return;
6786 default:
6787 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
6788 "invalid MMIO write,"
6789 " offset=0x%"PRIx64", data=%"PRIx64"",
6790 offset, data);
6791 break;
6792 }
6793}
6794
6795static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
6796{
6797 NvmeCtrl *n = (NvmeCtrl *)opaque;
6798 uint8_t *ptr = (uint8_t *)&n->bar;
6799
6800 trace_pci_nvme_mmio_read(addr, size);
6801
6802 if (unlikely(addr & (sizeof(uint32_t) - 1))) {
6803 NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32,
6804 "MMIO read not 32-bit aligned,"
6805 " offset=0x%"PRIx64"", addr);
6806
6807 } else if (unlikely(size < sizeof(uint32_t))) {
6808 NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall,
6809 "MMIO read smaller than 32-bits,"
6810 " offset=0x%"PRIx64"", addr);
6811
6812 }
6813
6814 if (addr > sizeof(n->bar) - size) {
6815 NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
6816 "MMIO read beyond last register,"
6817 " offset=0x%"PRIx64", returning 0", addr);
6818
6819 return 0;
6820 }
6821
6822 if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs &&
6823 addr != NVME_REG_CSTS) {
6824 trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
6825 return 0;
6826 }
6827
6828
6829
6830
6831
6832
6833 if (addr == NVME_REG_PMRSTS &&
6834 (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) {
6835 memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
6836 }
6837
6838 return ldn_le_p(ptr + addr, size);
6839}
6840
6841static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
6842{
6843 uint32_t qid;
6844
6845 if (unlikely(addr & ((1 << 2) - 1))) {
6846 NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
6847 "doorbell write not 32-bit aligned,"
6848 " offset=0x%"PRIx64", ignoring", addr);
6849 return;
6850 }
6851
6852 if (((addr - 0x1000) >> 2) & 1) {
6853
6854
6855 uint16_t new_head = val & 0xffff;
6856 int start_sqs;
6857 NvmeCQueue *cq;
6858
6859 qid = (addr - (0x1000 + (1 << 2))) >> 3;
6860 if (unlikely(nvme_check_cqid(n, qid))) {
6861 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq,
6862 "completion queue doorbell write"
6863 " for nonexistent queue,"
6864 " sqid=%"PRIu32", ignoring", qid);
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879 if (n->outstanding_aers) {
6880 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6881 NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
6882 NVME_LOG_ERROR_INFO);
6883 }
6884
6885 return;
6886 }
6887
6888 cq = n->cq[qid];
6889 if (unlikely(new_head >= cq->size)) {
6890 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead,
6891 "completion queue doorbell write value"
6892 " beyond queue size, sqid=%"PRIu32","
6893 " new_head=%"PRIu16", ignoring",
6894 qid, new_head);
6895
6896 if (n->outstanding_aers) {
6897 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6898 NVME_AER_INFO_ERR_INVALID_DB_VALUE,
6899 NVME_LOG_ERROR_INFO);
6900 }
6901
6902 return;
6903 }
6904
6905 trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
6906
6907 start_sqs = nvme_cq_full(cq) ? 1 : 0;
6908 cq->head = new_head;
6909 if (!qid && n->dbbuf_enabled) {
6910 pci_dma_write(&n->parent_obj, cq->db_addr, &cq->head,
6911 sizeof(cq->head));
6912 }
6913 if (start_sqs) {
6914 NvmeSQueue *sq;
6915 QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
6916 timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
6917 }
6918 timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
6919 }
6920
6921 if (cq->tail == cq->head) {
6922 if (cq->irq_enabled) {
6923 n->cq_pending--;
6924 }
6925
6926 nvme_irq_deassert(n, cq);
6927 }
6928 } else {
6929
6930
6931 uint16_t new_tail = val & 0xffff;
6932 NvmeSQueue *sq;
6933
6934 qid = (addr - 0x1000) >> 3;
6935 if (unlikely(nvme_check_sqid(n, qid))) {
6936 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq,
6937 "submission queue doorbell write"
6938 " for nonexistent queue,"
6939 " sqid=%"PRIu32", ignoring", qid);
6940
6941 if (n->outstanding_aers) {
6942 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6943 NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
6944 NVME_LOG_ERROR_INFO);
6945 }
6946
6947 return;
6948 }
6949
6950 sq = n->sq[qid];
6951 if (unlikely(new_tail >= sq->size)) {
6952 NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail,
6953 "submission queue doorbell write value"
6954 " beyond queue size, sqid=%"PRIu32","
6955 " new_tail=%"PRIu16", ignoring",
6956 qid, new_tail);
6957
6958 if (n->outstanding_aers) {
6959 nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
6960 NVME_AER_INFO_ERR_INVALID_DB_VALUE,
6961 NVME_LOG_ERROR_INFO);
6962 }
6963
6964 return;
6965 }
6966
6967 trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
6968
6969 sq->tail = new_tail;
6970 if (!qid && n->dbbuf_enabled) {
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984 pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
6985 sizeof(sq->tail));
6986 }
6987 timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
6988 }
6989}
6990
6991static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
6992 unsigned size)
6993{
6994 NvmeCtrl *n = (NvmeCtrl *)opaque;
6995
6996 trace_pci_nvme_mmio_write(addr, data, size);
6997
6998 if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs &&
6999 addr != NVME_REG_CSTS) {
7000 trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
7001 return;
7002 }
7003
7004 if (addr < sizeof(n->bar)) {
7005 nvme_write_bar(n, addr, data, size);
7006 } else {
7007 nvme_process_db(n, addr, data);
7008 }
7009}
7010
7011static const MemoryRegionOps nvme_mmio_ops = {
7012 .read = nvme_mmio_read,
7013 .write = nvme_mmio_write,
7014 .endianness = DEVICE_LITTLE_ENDIAN,
7015 .impl = {
7016 .min_access_size = 2,
7017 .max_access_size = 8,
7018 },
7019};
7020
7021static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
7022 unsigned size)
7023{
7024 NvmeCtrl *n = (NvmeCtrl *)opaque;
7025 stn_le_p(&n->cmb.buf[addr], size, data);
7026}
7027
7028static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
7029{
7030 NvmeCtrl *n = (NvmeCtrl *)opaque;
7031 return ldn_le_p(&n->cmb.buf[addr], size);
7032}
7033
7034static const MemoryRegionOps nvme_cmb_ops = {
7035 .read = nvme_cmb_read,
7036 .write = nvme_cmb_write,
7037 .endianness = DEVICE_LITTLE_ENDIAN,
7038 .impl = {
7039 .min_access_size = 1,
7040 .max_access_size = 8,
7041 },
7042};
7043
7044static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
7045{
7046 NvmeParams *params = &n->params;
7047
7048 if (params->num_queues) {
7049 warn_report("num_queues is deprecated; please use max_ioqpairs "
7050 "instead");
7051
7052 params->max_ioqpairs = params->num_queues - 1;
7053 }
7054
7055 if (n->namespace.blkconf.blk && n->subsys) {
7056 error_setg(errp, "subsystem support is unavailable with legacy "
7057 "namespace ('drive' property)");
7058 return;
7059 }
7060
7061 if (params->max_ioqpairs < 1 ||
7062 params->max_ioqpairs > NVME_MAX_IOQPAIRS) {
7063 error_setg(errp, "max_ioqpairs must be between 1 and %d",
7064 NVME_MAX_IOQPAIRS);
7065 return;
7066 }
7067
7068 if (params->msix_qsize < 1 ||
7069 params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) {
7070 error_setg(errp, "msix_qsize must be between 1 and %d",
7071 PCI_MSIX_FLAGS_QSIZE + 1);
7072 return;
7073 }
7074
7075 if (!params->serial) {
7076 error_setg(errp, "serial property not set");
7077 return;
7078 }
7079
7080 if (n->pmr.dev) {
7081 if (host_memory_backend_is_mapped(n->pmr.dev)) {
7082 error_setg(errp, "can't use already busy memdev: %s",
7083 object_get_canonical_path_component(OBJECT(n->pmr.dev)));
7084 return;
7085 }
7086
7087 if (!is_power_of_2(n->pmr.dev->size)) {
7088 error_setg(errp, "pmr backend size needs to be power of 2 in size");
7089 return;
7090 }
7091
7092 host_memory_backend_set_mapped(n->pmr.dev, true);
7093 }
7094
7095 if (n->params.zasl > n->params.mdts) {
7096 error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
7097 "than or equal to mdts (Maximum Data Transfer Size)");
7098 return;
7099 }
7100
7101 if (!n->params.vsl) {
7102 error_setg(errp, "vsl must be non-zero");
7103 return;
7104 }
7105
7106 if (params->sriov_max_vfs) {
7107 if (!n->subsys) {
7108 error_setg(errp, "subsystem is required for the use of SR-IOV");
7109 return;
7110 }
7111
7112 if (params->sriov_max_vfs > NVME_MAX_VFS) {
7113 error_setg(errp, "sriov_max_vfs must be between 0 and %d",
7114 NVME_MAX_VFS);
7115 return;
7116 }
7117
7118 if (params->cmb_size_mb) {
7119 error_setg(errp, "CMB is not supported with SR-IOV");
7120 return;
7121 }
7122
7123 if (n->pmr.dev) {
7124 error_setg(errp, "PMR is not supported with SR-IOV");
7125 return;
7126 }
7127
7128 if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) {
7129 error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible"
7130 " must be set for the use of SR-IOV");
7131 return;
7132 }
7133
7134 if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) {
7135 error_setg(errp, "sriov_vq_flexible must be greater than or equal"
7136 " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2);
7137 return;
7138 }
7139
7140 if (params->max_ioqpairs < params->sriov_vq_flexible + 2) {
7141 error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be"
7142 " greater than or equal to 2");
7143 return;
7144 }
7145
7146 if (params->sriov_vi_flexible < params->sriov_max_vfs) {
7147 error_setg(errp, "sriov_vi_flexible must be greater than or equal"
7148 " to %d (sriov_max_vfs)", params->sriov_max_vfs);
7149 return;
7150 }
7151
7152 if (params->msix_qsize < params->sriov_vi_flexible + 1) {
7153 error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be"
7154 " greater than or equal to 1");
7155 return;
7156 }
7157
7158 if (params->sriov_max_vi_per_vf &&
7159 (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
7160 error_setg(errp, "sriov_max_vi_per_vf must meet:"
7161 " (sriov_max_vi_per_vf - 1) %% %d == 0 and"
7162 " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY);
7163 return;
7164 }
7165
7166 if (params->sriov_max_vq_per_vf &&
7167 (params->sriov_max_vq_per_vf < 2 ||
7168 (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) {
7169 error_setg(errp, "sriov_max_vq_per_vf must meet:"
7170 " (sriov_max_vq_per_vf - 1) %% %d == 0 and"
7171 " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY);
7172 return;
7173 }
7174 }
7175}
7176
7177static void nvme_init_state(NvmeCtrl *n)
7178{
7179 NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
7180 NvmeSecCtrlList *list = &n->sec_ctrl_list;
7181 NvmeSecCtrlEntry *sctrl;
7182 uint8_t max_vfs;
7183 int i;
7184
7185 if (pci_is_vf(&n->parent_obj)) {
7186 sctrl = nvme_sctrl(n);
7187 max_vfs = 0;
7188 n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
7189 n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
7190 } else {
7191 max_vfs = n->params.sriov_max_vfs;
7192 n->conf_ioqpairs = n->params.max_ioqpairs;
7193 n->conf_msix_qsize = n->params.msix_qsize;
7194 }
7195
7196 n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
7197 n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
7198 n->temperature = NVME_TEMPERATURE;
7199 n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
7200 n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
7201 n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
7202 QTAILQ_INIT(&n->aer_queue);
7203
7204 list->numcntl = cpu_to_le16(max_vfs);
7205 for (i = 0; i < max_vfs; i++) {
7206 sctrl = &list->sec[i];
7207 sctrl->pcid = cpu_to_le16(n->cntlid);
7208 sctrl->vfn = cpu_to_le16(i + 1);
7209 }
7210
7211 cap->cntlid = cpu_to_le16(n->cntlid);
7212 cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
7213
7214 if (pci_is_vf(&n->parent_obj)) {
7215 cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs);
7216 } else {
7217 cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs -
7218 n->params.sriov_vq_flexible);
7219 cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible);
7220 cap->vqrfap = cap->vqfrt;
7221 cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
7222 cap->vqfrsm = n->params.sriov_max_vq_per_vf ?
7223 cpu_to_le16(n->params.sriov_max_vq_per_vf) :
7224 cap->vqfrt / MAX(max_vfs, 1);
7225 }
7226
7227 if (pci_is_vf(&n->parent_obj)) {
7228 cap->viprt = cpu_to_le16(n->conf_msix_qsize);
7229 } else {
7230 cap->viprt = cpu_to_le16(n->params.msix_qsize -
7231 n->params.sriov_vi_flexible);
7232 cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible);
7233 cap->virfap = cap->vifrt;
7234 cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
7235 cap->vifrsm = n->params.sriov_max_vi_per_vf ?
7236 cpu_to_le16(n->params.sriov_max_vi_per_vf) :
7237 cap->vifrt / MAX(max_vfs, 1);
7238 }
7239}
7240
7241static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
7242{
7243 uint64_t cmb_size = n->params.cmb_size_mb * MiB;
7244 uint64_t cap = ldq_le_p(&n->bar.cap);
7245
7246 n->cmb.buf = g_malloc0(cmb_size);
7247 memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n,
7248 "nvme-cmb", cmb_size);
7249 pci_register_bar(pci_dev, NVME_CMB_BIR,
7250 PCI_BASE_ADDRESS_SPACE_MEMORY |
7251 PCI_BASE_ADDRESS_MEM_TYPE_64 |
7252 PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem);
7253
7254 NVME_CAP_SET_CMBS(cap, 1);
7255 stq_le_p(&n->bar.cap, cap);
7256
7257 if (n->params.legacy_cmb) {
7258 nvme_cmb_enable_regs(n);
7259 n->cmb.cmse = true;
7260 }
7261}
7262
7263static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
7264{
7265 uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap);
7266
7267 NVME_PMRCAP_SET_RDS(pmrcap, 1);
7268 NVME_PMRCAP_SET_WDS(pmrcap, 1);
7269 NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR);
7270
7271 NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02);
7272 NVME_PMRCAP_SET_CMSS(pmrcap, 1);
7273 stl_le_p(&n->bar.pmrcap, pmrcap);
7274
7275 pci_register_bar(pci_dev, NVME_PMR_BIR,
7276 PCI_BASE_ADDRESS_SPACE_MEMORY |
7277 PCI_BASE_ADDRESS_MEM_TYPE_64 |
7278 PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr);
7279
7280 memory_region_set_enabled(&n->pmr.dev->mr, false);
7281}
7282
7283static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
7284 unsigned *msix_table_offset,
7285 unsigned *msix_pba_offset)
7286{
7287 uint64_t bar_size, msix_table_size, msix_pba_size;
7288
7289 bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
7290 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
7291
7292 if (msix_table_offset) {
7293 *msix_table_offset = bar_size;
7294 }
7295
7296 msix_table_size = PCI_MSIX_ENTRY_SIZE * total_irqs;
7297 bar_size += msix_table_size;
7298 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
7299
7300 if (msix_pba_offset) {
7301 *msix_pba_offset = bar_size;
7302 }
7303
7304 msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
7305 bar_size += msix_pba_size;
7306
7307 bar_size = pow2ceil(bar_size);
7308 return bar_size;
7309}
7310
7311static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
7312{
7313 uint16_t vf_dev_id = n->params.use_intel_id ?
7314 PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
7315 NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
7316 uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
7317 le16_to_cpu(cap->vifrsm),
7318 NULL, NULL);
7319
7320 pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
7321 n->params.sriov_max_vfs, n->params.sriov_max_vfs,
7322 NVME_VF_OFFSET, NVME_VF_STRIDE);
7323
7324 pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
7325 PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
7326}
7327
7328static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
7329{
7330 Error *err = NULL;
7331 int ret;
7332
7333 ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
7334 PCI_PM_SIZEOF, &err);
7335 if (err) {
7336 error_report_err(err);
7337 return ret;
7338 }
7339
7340 pci_set_word(pci_dev->config + offset + PCI_PM_PMC,
7341 PCI_PM_CAP_VER_1_2);
7342 pci_set_word(pci_dev->config + offset + PCI_PM_CTRL,
7343 PCI_PM_CTRL_NO_SOFT_RESET);
7344 pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL,
7345 PCI_PM_CTRL_STATE_MASK);
7346
7347 return 0;
7348}
7349
7350static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
7351{
7352 uint8_t *pci_conf = pci_dev->config;
7353 uint64_t bar_size;
7354 unsigned msix_table_offset, msix_pba_offset;
7355 int ret;
7356
7357 Error *err = NULL;
7358
7359 pci_conf[PCI_INTERRUPT_PIN] = 1;
7360 pci_config_set_prog_interface(pci_conf, 0x2);
7361
7362 if (n->params.use_intel_id) {
7363 pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
7364 pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME);
7365 } else {
7366 pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
7367 pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
7368 }
7369
7370 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
7371 nvme_add_pm_capability(pci_dev, 0x60);
7372 pcie_endpoint_cap_init(pci_dev, 0x80);
7373 pcie_cap_flr_init(pci_dev);
7374 if (n->params.sriov_max_vfs) {
7375 pcie_ari_init(pci_dev, 0x100, 1);
7376 }
7377
7378
7379 bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
7380 &msix_table_offset, &msix_pba_offset);
7381
7382 memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
7383 memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
7384 msix_table_offset);
7385 memory_region_add_subregion(&n->bar0, 0, &n->iomem);
7386
7387 if (pci_is_vf(pci_dev)) {
7388 pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
7389 } else {
7390 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
7391 PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
7392 }
7393 ret = msix_init(pci_dev, n->params.msix_qsize,
7394 &n->bar0, 0, msix_table_offset,
7395 &n->bar0, 0, msix_pba_offset, 0, &err);
7396 if (ret < 0) {
7397 if (ret == -ENOTSUP) {
7398 warn_report_err(err);
7399 } else {
7400 error_propagate(errp, err);
7401 return ret;
7402 }
7403 }
7404
7405 nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
7406
7407 if (n->params.cmb_size_mb) {
7408 nvme_init_cmb(n, pci_dev);
7409 }
7410
7411 if (n->pmr.dev) {
7412 nvme_init_pmr(n, pci_dev);
7413 }
7414
7415 if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
7416 nvme_init_sriov(n, pci_dev, 0x120);
7417 }
7418
7419 return 0;
7420}
7421
7422static void nvme_init_subnqn(NvmeCtrl *n)
7423{
7424 NvmeSubsystem *subsys = n->subsys;
7425 NvmeIdCtrl *id = &n->id_ctrl;
7426
7427 if (!subsys) {
7428 snprintf((char *)id->subnqn, sizeof(id->subnqn),
7429 "nqn.2019-08.org.qemu:%s", n->params.serial);
7430 } else {
7431 pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
7432 }
7433}
7434
7435static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
7436{
7437 NvmeIdCtrl *id = &n->id_ctrl;
7438 uint8_t *pci_conf = pci_dev->config;
7439 uint64_t cap = ldq_le_p(&n->bar.cap);
7440 NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
7441
7442 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
7443 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
7444 strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
7445 strpadcpy((char *)id->fr, sizeof(id->fr), QEMU_VERSION, ' ');
7446 strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
7447
7448 id->cntlid = cpu_to_le16(n->cntlid);
7449
7450 id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
7451 id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS);
7452
7453 id->rab = 6;
7454
7455 if (n->params.use_intel_id) {
7456 id->ieee[0] = 0xb3;
7457 id->ieee[1] = 0x02;
7458 id->ieee[2] = 0x00;
7459 } else {
7460 id->ieee[0] = 0x00;
7461 id->ieee[1] = 0x54;
7462 id->ieee[2] = 0x52;
7463 }
7464
7465 id->mdts = n->params.mdts;
7466 id->ver = cpu_to_le32(NVME_SPEC_VER);
7467 id->oacs =
7468 cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF);
7469 id->cntrltype = 0x1;
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482 id->acl = 3;
7483 id->aerl = n->params.aerl;
7484 id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
7485 id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED;
7486
7487
7488 id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
7489 id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
7490
7491 id->sqes = (0x6 << 4) | 0x6;
7492 id->cqes = (0x4 << 4) | 0x4;
7493 id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
7494 id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
7495 NVME_ONCS_FEATURES | NVME_ONCS_DSM |
7496 NVME_ONCS_COMPARE | NVME_ONCS_COPY);
7497
7498
7499
7500
7501
7502
7503
7504
7505 id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
7506
7507 id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1);
7508 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN);
7509
7510 nvme_init_subnqn(n);
7511
7512 id->psd[0].mp = cpu_to_le16(0x9c4);
7513 id->psd[0].enlat = cpu_to_le32(0x10);
7514 id->psd[0].exlat = cpu_to_le32(0x4);
7515
7516 if (n->subsys) {
7517 id->cmic |= NVME_CMIC_MULTI_CTRL;
7518 }
7519
7520 NVME_CAP_SET_MQES(cap, 0x7ff);
7521 NVME_CAP_SET_CQR(cap, 1);
7522 NVME_CAP_SET_TO(cap, 0xf);
7523 NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
7524 NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP);
7525 NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY);
7526 NVME_CAP_SET_MPSMAX(cap, 4);
7527 NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
7528 NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
7529 stq_le_p(&n->bar.cap, cap);
7530
7531 stl_le_p(&n->bar.vs, NVME_SPEC_VER);
7532 n->bar.intmc = n->bar.intms = 0;
7533
7534 if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
7535 stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
7536 }
7537}
7538
7539static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
7540{
7541 int cntlid;
7542
7543 if (!n->subsys) {
7544 return 0;
7545 }
7546
7547 cntlid = nvme_subsys_register_ctrl(n, errp);
7548 if (cntlid < 0) {
7549 return -1;
7550 }
7551
7552 n->cntlid = cntlid;
7553
7554 return 0;
7555}
7556
7557void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
7558{
7559 uint32_t nsid = ns->params.nsid;
7560 assert(nsid && nsid <= NVME_MAX_NAMESPACES);
7561
7562 n->namespaces[nsid] = ns;
7563 ns->attached++;
7564
7565 n->dmrsl = MIN_NON_ZERO(n->dmrsl,
7566 BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
7567}
7568
7569static void nvme_realize(PCIDevice *pci_dev, Error **errp)
7570{
7571 NvmeCtrl *n = NVME(pci_dev);
7572 NvmeNamespace *ns;
7573 Error *local_err = NULL;
7574 NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
7575
7576 if (pci_is_vf(pci_dev)) {
7577
7578
7579
7580
7581 memcpy(&n->params, &pn->params, sizeof(NvmeParams));
7582 n->subsys = pn->subsys;
7583 }
7584
7585 nvme_check_constraints(n, &local_err);
7586 if (local_err) {
7587 error_propagate(errp, local_err);
7588 return;
7589 }
7590
7591 qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS,
7592 &pci_dev->qdev, n->parent_obj.qdev.id);
7593
7594 if (nvme_init_subsys(n, errp)) {
7595 error_propagate(errp, local_err);
7596 return;
7597 }
7598 nvme_init_state(n);
7599 if (nvme_init_pci(n, pci_dev, errp)) {
7600 return;
7601 }
7602 nvme_init_ctrl(n, pci_dev);
7603
7604
7605 if (n->namespace.blkconf.blk) {
7606 ns = &n->namespace;
7607 ns->params.nsid = 1;
7608
7609 if (nvme_ns_setup(ns, errp)) {
7610 return;
7611 }
7612
7613 nvme_attach_ns(n, ns);
7614 }
7615}
7616
7617static void nvme_exit(PCIDevice *pci_dev)
7618{
7619 NvmeCtrl *n = NVME(pci_dev);
7620 NvmeNamespace *ns;
7621 int i;
7622
7623 nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
7624
7625 if (n->subsys) {
7626 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
7627 ns = nvme_ns(n, i);
7628 if (ns) {
7629 ns->attached--;
7630 }
7631 }
7632
7633 nvme_subsys_unregister_ctrl(n->subsys, n);
7634 }
7635
7636 g_free(n->cq);
7637 g_free(n->sq);
7638 g_free(n->aer_reqs);
7639
7640 if (n->params.cmb_size_mb) {
7641 g_free(n->cmb.buf);
7642 }
7643
7644 if (n->pmr.dev) {
7645 host_memory_backend_set_mapped(n->pmr.dev, false);
7646 }
7647
7648 if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
7649 pcie_sriov_pf_exit(pci_dev);
7650 }
7651
7652 msix_uninit(pci_dev, &n->bar0, &n->bar0);
7653 memory_region_del_subregion(&n->bar0, &n->iomem);
7654}
7655
7656static Property nvme_props[] = {
7657 DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
7658 DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
7659 HostMemoryBackend *),
7660 DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
7661 NvmeSubsystem *),
7662 DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
7663 DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
7664 DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
7665 DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
7666 DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
7667 DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
7668 DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
7669 DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
7670 DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
7671 DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
7672 DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
7673 DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
7674 DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
7675 DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
7676 params.auto_transition_zones, true),
7677 DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
7678 DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl,
7679 params.sriov_vq_flexible, 0),
7680 DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl,
7681 params.sriov_vi_flexible, 0),
7682 DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl,
7683 params.sriov_max_vi_per_vf, 0),
7684 DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
7685 params.sriov_max_vq_per_vf, 0),
7686 DEFINE_PROP_END_OF_LIST(),
7687};
7688
7689static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
7690 void *opaque, Error **errp)
7691{
7692 NvmeCtrl *n = NVME(obj);
7693 uint8_t value = n->smart_critical_warning;
7694
7695 visit_type_uint8(v, name, &value, errp);
7696}
7697
7698static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
7699 void *opaque, Error **errp)
7700{
7701 NvmeCtrl *n = NVME(obj);
7702 uint8_t value, old_value, cap = 0, index, event;
7703
7704 if (!visit_type_uint8(v, name, &value, errp)) {
7705 return;
7706 }
7707
7708 cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY
7709 | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA;
7710 if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) {
7711 cap |= NVME_SMART_PMR_UNRELIABLE;
7712 }
7713
7714 if ((value & cap) != value) {
7715 error_setg(errp, "unsupported smart critical warning bits: 0x%x",
7716 value & ~cap);
7717 return;
7718 }
7719
7720 old_value = n->smart_critical_warning;
7721 n->smart_critical_warning = value;
7722
7723
7724 for (index = 0; index < NVME_SMART_WARN_MAX; index++) {
7725 event = 1 << index;
7726 if (value & ~old_value & event)
7727 nvme_smart_event(n, event);
7728 }
7729}
7730
7731static void nvme_pci_reset(DeviceState *qdev)
7732{
7733 PCIDevice *pci_dev = PCI_DEVICE(qdev);
7734 NvmeCtrl *n = NVME(pci_dev);
7735
7736 trace_pci_nvme_pci_reset();
7737 nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
7738}
7739
7740static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
7741 uint32_t val, int len)
7742{
7743 NvmeCtrl *n = NVME(dev);
7744 NvmeSecCtrlEntry *sctrl;
7745 uint16_t sriov_cap = dev->exp.sriov_cap;
7746 uint32_t off = address - sriov_cap;
7747 int i, num_vfs;
7748
7749 if (!sriov_cap) {
7750 return;
7751 }
7752
7753 if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
7754 if (!(val & PCI_SRIOV_CTRL_VFE)) {
7755 num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
7756 for (i = 0; i < num_vfs; i++) {
7757 sctrl = &n->sec_ctrl_list.sec[i];
7758 nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
7759 }
7760 }
7761 }
7762}
7763
7764static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
7765 uint32_t val, int len)
7766{
7767 nvme_sriov_pre_write_ctrl(dev, address, val, len);
7768 pci_default_write_config(dev, address, val, len);
7769 pcie_cap_flr_write_config(dev, address, val, len);
7770}
7771
7772static const VMStateDescription nvme_vmstate = {
7773 .name = "nvme",
7774 .unmigratable = 1,
7775};
7776
7777static void nvme_class_init(ObjectClass *oc, void *data)
7778{
7779 DeviceClass *dc = DEVICE_CLASS(oc);
7780 PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
7781
7782 pc->realize = nvme_realize;
7783 pc->config_write = nvme_pci_write_config;
7784 pc->exit = nvme_exit;
7785 pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
7786 pc->revision = 2;
7787
7788 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
7789 dc->desc = "Non-Volatile Memory Express";
7790 device_class_set_props(dc, nvme_props);
7791 dc->vmsd = &nvme_vmstate;
7792 dc->reset = nvme_pci_reset;
7793}
7794
7795static void nvme_instance_init(Object *obj)
7796{
7797 NvmeCtrl *n = NVME(obj);
7798
7799 device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex,
7800 "bootindex", "/namespace@1,0",
7801 DEVICE(obj));
7802
7803 object_property_add(obj, "smart_critical_warning", "uint8",
7804 nvme_get_smart_warning,
7805 nvme_set_smart_warning, NULL, NULL);
7806}
7807
7808static const TypeInfo nvme_info = {
7809 .name = TYPE_NVME,
7810 .parent = TYPE_PCI_DEVICE,
7811 .instance_size = sizeof(NvmeCtrl),
7812 .instance_init = nvme_instance_init,
7813 .class_init = nvme_class_init,
7814 .interfaces = (InterfaceInfo[]) {
7815 { INTERFACE_PCIE_DEVICE },
7816 { }
7817 },
7818};
7819
7820static const TypeInfo nvme_bus_info = {
7821 .name = TYPE_NVME_BUS,
7822 .parent = TYPE_BUS,
7823 .instance_size = sizeof(NvmeBus),
7824};
7825
7826static void nvme_register_types(void)
7827{
7828 type_register_static(&nvme_info);
7829 type_register_static(&nvme_bus_info);
7830}
7831
7832type_init(nvme_register_types)
7833