1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "qemu/osdep.h"
18#include "hw/pci/msi.h"
19#include "hw/pci/msix.h"
20#include "hw/pci/pci.h"
21#include "hw/xen/xen.h"
22#include "sysemu/xen.h"
23#include "migration/qemu-file-types.h"
24#include "migration/vmstate.h"
25#include "qemu/range.h"
26#include "qapi/error.h"
27#include "trace.h"
28
29
30#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
31#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
32#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
33
34static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
35{
36 uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
37 MSIMessage msg;
38
39 msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
40 msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
41 return msg;
42}
43
44MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
45{
46 return dev->msix_prepare_message(dev, vector);
47}
48
49
50
51
52
53void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
54{
55 uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
56
57 pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
58 pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
59 table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
60}
61
62static uint8_t msix_pending_mask(int vector)
63{
64 return 1 << (vector % 8);
65}
66
67static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
68{
69 return dev->msix_pba + vector / 8;
70}
71
72static int msix_is_pending(PCIDevice *dev, int vector)
73{
74 return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
75}
76
77void msix_set_pending(PCIDevice *dev, unsigned int vector)
78{
79 *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
80}
81
82void msix_clr_pending(PCIDevice *dev, int vector)
83{
84 *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
85}
86
87static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
88{
89 unsigned offset = vector * PCI_MSIX_ENTRY_SIZE;
90 uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA];
91
92
93 if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) {
94 return false;
95 }
96 return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] &
97 PCI_MSIX_ENTRY_CTRL_MASKBIT;
98}
99
100bool msix_is_masked(PCIDevice *dev, unsigned int vector)
101{
102 return msix_vector_masked(dev, vector, dev->msix_function_masked);
103}
104
105static void msix_fire_vector_notifier(PCIDevice *dev,
106 unsigned int vector, bool is_masked)
107{
108 MSIMessage msg;
109 int ret;
110
111 if (!dev->msix_vector_use_notifier) {
112 return;
113 }
114 if (is_masked) {
115 dev->msix_vector_release_notifier(dev, vector);
116 } else {
117 msg = msix_get_message(dev, vector);
118 ret = dev->msix_vector_use_notifier(dev, vector, msg);
119 assert(ret >= 0);
120 }
121}
122
123static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
124{
125 bool is_masked = msix_is_masked(dev, vector);
126
127 if (is_masked == was_masked) {
128 return;
129 }
130
131 msix_fire_vector_notifier(dev, vector, is_masked);
132
133 if (!is_masked && msix_is_pending(dev, vector)) {
134 msix_clr_pending(dev, vector);
135 msix_notify(dev, vector);
136 }
137}
138
139void msix_set_mask(PCIDevice *dev, int vector, bool mask)
140{
141 unsigned offset;
142 bool was_masked;
143
144 assert(vector < dev->msix_entries_nr);
145
146 offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
147
148 was_masked = msix_is_masked(dev, vector);
149
150 if (mask) {
151 dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
152 } else {
153 dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
154 }
155
156 msix_handle_mask_update(dev, vector, was_masked);
157}
158
159static bool msix_masked(PCIDevice *dev)
160{
161 return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
162}
163
164static void msix_update_function_masked(PCIDevice *dev)
165{
166 dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev);
167}
168
169
170void msix_write_config(PCIDevice *dev, uint32_t addr,
171 uint32_t val, int len)
172{
173 unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
174 int vector;
175 bool was_masked;
176
177 if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
178 return;
179 }
180
181 trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev));
182
183 was_masked = dev->msix_function_masked;
184 msix_update_function_masked(dev);
185
186 if (!msix_enabled(dev)) {
187 return;
188 }
189
190 pci_device_deassert_intx(dev);
191
192 if (dev->msix_function_masked == was_masked) {
193 return;
194 }
195
196 for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
197 msix_handle_mask_update(dev, vector,
198 msix_vector_masked(dev, vector, was_masked));
199 }
200}
201
202static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
203 unsigned size)
204{
205 PCIDevice *dev = opaque;
206
207 assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
208 return pci_get_long(dev->msix_table + addr);
209}
210
211static void msix_table_mmio_write(void *opaque, hwaddr addr,
212 uint64_t val, unsigned size)
213{
214 PCIDevice *dev = opaque;
215 int vector = addr / PCI_MSIX_ENTRY_SIZE;
216 bool was_masked;
217
218 assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
219
220 was_masked = msix_is_masked(dev, vector);
221 pci_set_long(dev->msix_table + addr, val);
222 msix_handle_mask_update(dev, vector, was_masked);
223}
224
225static const MemoryRegionOps msix_table_mmio_ops = {
226 .read = msix_table_mmio_read,
227 .write = msix_table_mmio_write,
228 .endianness = DEVICE_LITTLE_ENDIAN,
229 .valid = {
230 .min_access_size = 4,
231 .max_access_size = 8,
232 },
233 .impl = {
234 .max_access_size = 4,
235 },
236};
237
238static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
239 unsigned size)
240{
241 PCIDevice *dev = opaque;
242 if (dev->msix_vector_poll_notifier) {
243 unsigned vector_start = addr * 8;
244 unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
245 dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
246 }
247
248 return pci_get_long(dev->msix_pba + addr);
249}
250
251static void msix_pba_mmio_write(void *opaque, hwaddr addr,
252 uint64_t val, unsigned size)
253{
254}
255
256static const MemoryRegionOps msix_pba_mmio_ops = {
257 .read = msix_pba_mmio_read,
258 .write = msix_pba_mmio_write,
259 .endianness = DEVICE_LITTLE_ENDIAN,
260 .valid = {
261 .min_access_size = 4,
262 .max_access_size = 8,
263 },
264 .impl = {
265 .max_access_size = 4,
266 },
267};
268
269static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
270{
271 int vector;
272
273 for (vector = 0; vector < nentries; ++vector) {
274 unsigned offset =
275 vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
276 bool was_masked = msix_is_masked(dev, vector);
277
278 dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
279 msix_handle_mask_update(dev, vector, was_masked);
280 }
281}
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303int msix_init(struct PCIDevice *dev, unsigned short nentries,
304 MemoryRegion *table_bar, uint8_t table_bar_nr,
305 unsigned table_offset, MemoryRegion *pba_bar,
306 uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
307 Error **errp)
308{
309 int cap;
310 unsigned table_size, pba_size;
311 uint8_t *config;
312
313
314 if (!msi_nonbroken) {
315 error_setg(errp, "MSI-X is not supported by interrupt controller");
316 return -ENOTSUP;
317 }
318
319 if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
320 error_setg(errp, "The number of MSI-X vectors is invalid");
321 return -EINVAL;
322 }
323
324 table_size = nentries * PCI_MSIX_ENTRY_SIZE;
325 pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
326
327
328 if ((table_bar_nr == pba_bar_nr &&
329 ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
330 table_offset + table_size > memory_region_size(table_bar) ||
331 pba_offset + pba_size > memory_region_size(pba_bar) ||
332 (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
333 error_setg(errp, "table & pba overlap, or they don't fit in BARs,"
334 " or don't align");
335 return -EINVAL;
336 }
337
338 cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
339 cap_pos, MSIX_CAP_LENGTH, errp);
340 if (cap < 0) {
341 return cap;
342 }
343
344 dev->msix_cap = cap;
345 dev->cap_present |= QEMU_PCI_CAP_MSIX;
346 config = dev->config + cap;
347
348 pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
349 dev->msix_entries_nr = nentries;
350 dev->msix_function_masked = true;
351
352 pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
353 pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
354
355
356 dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
357 MSIX_MASKALL_MASK;
358
359 dev->msix_table = g_malloc0(table_size);
360 dev->msix_pba = g_malloc0(pba_size);
361 dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
362
363 msix_mask_all(dev, nentries);
364
365 memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev,
366 "msix-table", table_size);
367 memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
368 memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev,
369 "msix-pba", pba_size);
370 memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
371
372 dev->msix_prepare_message = msix_prepare_message;
373
374 return 0;
375}
376
377int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
378 uint8_t bar_nr, Error **errp)
379{
380 int ret;
381 char *name;
382 uint32_t bar_size = 4096;
383 uint32_t bar_pba_offset = bar_size / 2;
384 uint32_t bar_pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
385
386
387
388
389
390
391
392
393 if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) {
394 bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE;
395 }
396
397 if (bar_pba_offset + bar_pba_size > 4096) {
398 bar_size = bar_pba_offset + bar_pba_size;
399 }
400
401 bar_size = pow2ceil(bar_size);
402
403 name = g_strdup_printf("%s-msix", dev->name);
404 memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size);
405 g_free(name);
406
407 ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
408 0, &dev->msix_exclusive_bar,
409 bar_nr, bar_pba_offset,
410 0, errp);
411 if (ret) {
412 return ret;
413 }
414
415 pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
416 &dev->msix_exclusive_bar);
417
418 return 0;
419}
420
421static void msix_free_irq_entries(PCIDevice *dev)
422{
423 int vector;
424
425 for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
426 dev->msix_entry_used[vector] = 0;
427 msix_clr_pending(dev, vector);
428 }
429}
430
431static void msix_clear_all_vectors(PCIDevice *dev)
432{
433 int vector;
434
435 for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
436 msix_clr_pending(dev, vector);
437 }
438}
439
440
441void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
442{
443 if (!msix_present(dev)) {
444 return;
445 }
446 pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
447 dev->msix_cap = 0;
448 msix_free_irq_entries(dev);
449 dev->msix_entries_nr = 0;
450 memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
451 g_free(dev->msix_pba);
452 dev->msix_pba = NULL;
453 memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
454 g_free(dev->msix_table);
455 dev->msix_table = NULL;
456 g_free(dev->msix_entry_used);
457 dev->msix_entry_used = NULL;
458 dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
459 dev->msix_prepare_message = NULL;
460}
461
462void msix_uninit_exclusive_bar(PCIDevice *dev)
463{
464 if (msix_present(dev)) {
465 msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
466 }
467}
468
469void msix_save(PCIDevice *dev, QEMUFile *f)
470{
471 unsigned n = dev->msix_entries_nr;
472
473 if (!msix_present(dev)) {
474 return;
475 }
476
477 qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
478 qemu_put_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
479}
480
481
482void msix_load(PCIDevice *dev, QEMUFile *f)
483{
484 unsigned n = dev->msix_entries_nr;
485 unsigned int vector;
486
487 if (!msix_present(dev)) {
488 return;
489 }
490
491 msix_clear_all_vectors(dev);
492 qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
493 qemu_get_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
494 msix_update_function_masked(dev);
495
496 for (vector = 0; vector < n; vector++) {
497 msix_handle_mask_update(dev, vector, true);
498 }
499}
500
501
502int msix_present(PCIDevice *dev)
503{
504 return dev->cap_present & QEMU_PCI_CAP_MSIX;
505}
506
507
508int msix_enabled(PCIDevice *dev)
509{
510 return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
511 (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
512 MSIX_ENABLE_MASK);
513}
514
515
516void msix_notify(PCIDevice *dev, unsigned vector)
517{
518 MSIMessage msg;
519
520 assert(vector < dev->msix_entries_nr);
521
522 if (!dev->msix_entry_used[vector]) {
523 return;
524 }
525
526 if (msix_is_masked(dev, vector)) {
527 msix_set_pending(dev, vector);
528 return;
529 }
530
531 msg = msix_get_message(dev, vector);
532
533 msi_send_message(dev, msg);
534}
535
536void msix_reset(PCIDevice *dev)
537{
538 if (!msix_present(dev)) {
539 return;
540 }
541 msix_clear_all_vectors(dev);
542 dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
543 ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
544 memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
545 memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
546 msix_mask_all(dev, dev->msix_entries_nr);
547}
548
549
550
551
552
553
554
555
556
557
558void msix_vector_use(PCIDevice *dev, unsigned vector)
559{
560 assert(vector < dev->msix_entries_nr);
561 dev->msix_entry_used[vector]++;
562}
563
564
565void msix_vector_unuse(PCIDevice *dev, unsigned vector)
566{
567 assert(vector < dev->msix_entries_nr);
568 if (!dev->msix_entry_used[vector]) {
569 return;
570 }
571 if (--dev->msix_entry_used[vector]) {
572 return;
573 }
574 msix_clr_pending(dev, vector);
575}
576
577void msix_unuse_all_vectors(PCIDevice *dev)
578{
579 if (!msix_present(dev)) {
580 return;
581 }
582 msix_free_irq_entries(dev);
583}
584
585unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
586{
587 return dev->msix_entries_nr;
588}
589
590static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
591{
592 MSIMessage msg;
593
594 if (msix_is_masked(dev, vector)) {
595 return 0;
596 }
597 msg = msix_get_message(dev, vector);
598 return dev->msix_vector_use_notifier(dev, vector, msg);
599}
600
601static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
602{
603 if (msix_is_masked(dev, vector)) {
604 return;
605 }
606 dev->msix_vector_release_notifier(dev, vector);
607}
608
609int msix_set_vector_notifiers(PCIDevice *dev,
610 MSIVectorUseNotifier use_notifier,
611 MSIVectorReleaseNotifier release_notifier,
612 MSIVectorPollNotifier poll_notifier)
613{
614 int vector, ret;
615
616 assert(use_notifier && release_notifier);
617
618 dev->msix_vector_use_notifier = use_notifier;
619 dev->msix_vector_release_notifier = release_notifier;
620 dev->msix_vector_poll_notifier = poll_notifier;
621
622 if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
623 (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
624 for (vector = 0; vector < dev->msix_entries_nr; vector++) {
625 ret = msix_set_notifier_for_vector(dev, vector);
626 if (ret < 0) {
627 goto undo;
628 }
629 }
630 }
631 if (dev->msix_vector_poll_notifier) {
632 dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
633 }
634 return 0;
635
636undo:
637 while (--vector >= 0) {
638 msix_unset_notifier_for_vector(dev, vector);
639 }
640 dev->msix_vector_use_notifier = NULL;
641 dev->msix_vector_release_notifier = NULL;
642 return ret;
643}
644
645void msix_unset_vector_notifiers(PCIDevice *dev)
646{
647 int vector;
648
649 assert(dev->msix_vector_use_notifier &&
650 dev->msix_vector_release_notifier);
651
652 if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
653 (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
654 for (vector = 0; vector < dev->msix_entries_nr; vector++) {
655 msix_unset_notifier_for_vector(dev, vector);
656 }
657 }
658 dev->msix_vector_use_notifier = NULL;
659 dev->msix_vector_release_notifier = NULL;
660 dev->msix_vector_poll_notifier = NULL;
661}
662
663static int put_msix_state(QEMUFile *f, void *pv, size_t size,
664 const VMStateField *field, JSONWriter *vmdesc)
665{
666 msix_save(pv, f);
667
668 return 0;
669}
670
671static int get_msix_state(QEMUFile *f, void *pv, size_t size,
672 const VMStateField *field)
673{
674 msix_load(pv, f);
675 return 0;
676}
677
678static VMStateInfo vmstate_info_msix = {
679 .name = "msix state",
680 .get = get_msix_state,
681 .put = put_msix_state,
682};
683
684const VMStateDescription vmstate_msix = {
685 .name = "msix",
686 .fields = (VMStateField[]) {
687 {
688 .name = "msix",
689 .version_id = 0,
690 .field_exists = NULL,
691 .size = 0,
692 .info = &vmstate_info_msix,
693 .flags = VMS_SINGLE,
694 .offset = 0,
695 },
696 VMSTATE_END_OF_LIST()
697 }
698};
699