1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "exec/page-vary.h"
22#include "qapi/error.h"
23
24#include "qemu/cutils.h"
25#include "qemu/cacheflush.h"
26#include "qemu/hbitmap.h"
27#include "qemu/madvise.h"
28
29#ifdef CONFIG_TCG
30#include "hw/core/tcg-cpu-ops.h"
31#endif
32
33#include "exec/exec-all.h"
34#include "exec/target_page.h"
35#include "hw/qdev-core.h"
36#include "hw/qdev-properties.h"
37#include "hw/boards.h"
38#include "hw/xen/xen.h"
39#include "sysemu/kvm.h"
40#include "sysemu/tcg.h"
41#include "sysemu/qtest.h"
42#include "qemu/timer.h"
43#include "qemu/config-file.h"
44#include "qemu/error-report.h"
45#include "qemu/qemu-print.h"
46#include "qemu/log.h"
47#include "qemu/memalign.h"
48#include "exec/memory.h"
49#include "exec/ioport.h"
50#include "sysemu/dma.h"
51#include "sysemu/hostmem.h"
52#include "sysemu/hw_accel.h"
53#include "sysemu/xen-mapcache.h"
54#include "trace/trace-root.h"
55
56#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
57#include <linux/falloc.h>
58#endif
59
60#include "qemu/rcu_queue.h"
61#include "qemu/main-loop.h"
62#include "exec/translate-all.h"
63#include "sysemu/replay.h"
64
65#include "exec/memory-internal.h"
66#include "exec/ram_addr.h"
67
68#include "qemu/pmem.h"
69
70#include "migration/vmstate.h"
71
72#include "qemu/range.h"
73#ifndef _WIN32
74#include "qemu/mmap-alloc.h"
75#endif
76
77#include "monitor/monitor.h"
78
79#ifdef CONFIG_LIBDAXCTL
80#include <daxctl/libdaxctl.h>
81#endif
82
83
84
85
86
87
88RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
89
90static MemoryRegion *system_memory;
91static MemoryRegion *system_io;
92
93AddressSpace address_space_io;
94AddressSpace address_space_memory;
95
96static MemoryRegion io_mem_unassigned;
97
98typedef struct PhysPageEntry PhysPageEntry;
99
100struct PhysPageEntry {
101
102 uint32_t skip : 6;
103
104 uint32_t ptr : 26;
105};
106
107#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
108
109
110#define ADDR_SPACE_BITS 64
111
112#define P_L2_BITS 9
113#define P_L2_SIZE (1 << P_L2_BITS)
114
115#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
116
117typedef PhysPageEntry Node[P_L2_SIZE];
118
119typedef struct PhysPageMap {
120 struct rcu_head rcu;
121
122 unsigned sections_nb;
123 unsigned sections_nb_alloc;
124 unsigned nodes_nb;
125 unsigned nodes_nb_alloc;
126 Node *nodes;
127 MemoryRegionSection *sections;
128} PhysPageMap;
129
130struct AddressSpaceDispatch {
131 MemoryRegionSection *mru_section;
132
133
134
135 PhysPageEntry phys_map;
136 PhysPageMap map;
137};
138
139#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
140typedef struct subpage_t {
141 MemoryRegion iomem;
142 FlatView *fv;
143 hwaddr base;
144 uint16_t sub_section[];
145} subpage_t;
146
147#define PHYS_SECTION_UNASSIGNED 0
148
149static void io_mem_init(void);
150static void memory_map_init(void);
151static void tcg_log_global_after_sync(MemoryListener *listener);
152static void tcg_commit(MemoryListener *listener);
153
154
155
156
157
158
159
160
161struct CPUAddressSpace {
162 CPUState *cpu;
163 AddressSpace *as;
164 struct AddressSpaceDispatch *memory_dispatch;
165 MemoryListener tcg_as_listener;
166};
167
168struct DirtyBitmapSnapshot {
169 ram_addr_t start;
170 ram_addr_t end;
171 unsigned long dirty[];
172};
173
174static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
175{
176 static unsigned alloc_hint = 16;
177 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
178 map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes);
179 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
180 alloc_hint = map->nodes_nb_alloc;
181 }
182}
183
184static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
185{
186 unsigned i;
187 uint32_t ret;
188 PhysPageEntry e;
189 PhysPageEntry *p;
190
191 ret = map->nodes_nb++;
192 p = map->nodes[ret];
193 assert(ret != PHYS_MAP_NODE_NIL);
194 assert(ret != map->nodes_nb_alloc);
195
196 e.skip = leaf ? 0 : 1;
197 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
198 for (i = 0; i < P_L2_SIZE; ++i) {
199 memcpy(&p[i], &e, sizeof(e));
200 }
201 return ret;
202}
203
204static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
205 hwaddr *index, uint64_t *nb, uint16_t leaf,
206 int level)
207{
208 PhysPageEntry *p;
209 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
210
211 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
212 lp->ptr = phys_map_node_alloc(map, level == 0);
213 }
214 p = map->nodes[lp->ptr];
215 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
216
217 while (*nb && lp < &p[P_L2_SIZE]) {
218 if ((*index & (step - 1)) == 0 && *nb >= step) {
219 lp->skip = 0;
220 lp->ptr = leaf;
221 *index += step;
222 *nb -= step;
223 } else {
224 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
225 }
226 ++lp;
227 }
228}
229
230static void phys_page_set(AddressSpaceDispatch *d,
231 hwaddr index, uint64_t nb,
232 uint16_t leaf)
233{
234
235 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
236
237 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
238}
239
240
241
242
243static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
244{
245 unsigned valid_ptr = P_L2_SIZE;
246 int valid = 0;
247 PhysPageEntry *p;
248 int i;
249
250 if (lp->ptr == PHYS_MAP_NODE_NIL) {
251 return;
252 }
253
254 p = nodes[lp->ptr];
255 for (i = 0; i < P_L2_SIZE; i++) {
256 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
257 continue;
258 }
259
260 valid_ptr = i;
261 valid++;
262 if (p[i].skip) {
263 phys_page_compact(&p[i], nodes);
264 }
265 }
266
267
268 if (valid != 1) {
269 return;
270 }
271
272 assert(valid_ptr < P_L2_SIZE);
273
274
275 if (P_L2_LEVELS >= (1 << 6) &&
276 lp->skip + p[valid_ptr].skip >= (1 << 6)) {
277 return;
278 }
279
280 lp->ptr = p[valid_ptr].ptr;
281 if (!p[valid_ptr].skip) {
282
283
284
285
286
287
288 lp->skip = 0;
289 } else {
290 lp->skip += p[valid_ptr].skip;
291 }
292}
293
294void address_space_dispatch_compact(AddressSpaceDispatch *d)
295{
296 if (d->phys_map.skip) {
297 phys_page_compact(&d->phys_map, d->map.nodes);
298 }
299}
300
301static inline bool section_covers_addr(const MemoryRegionSection *section,
302 hwaddr addr)
303{
304
305
306
307 return int128_gethi(section->size) ||
308 range_covers_byte(section->offset_within_address_space,
309 int128_getlo(section->size), addr);
310}
311
312static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr)
313{
314 PhysPageEntry lp = d->phys_map, *p;
315 Node *nodes = d->map.nodes;
316 MemoryRegionSection *sections = d->map.sections;
317 hwaddr index = addr >> TARGET_PAGE_BITS;
318 int i;
319
320 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
321 if (lp.ptr == PHYS_MAP_NODE_NIL) {
322 return §ions[PHYS_SECTION_UNASSIGNED];
323 }
324 p = nodes[lp.ptr];
325 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
326 }
327
328 if (section_covers_addr(§ions[lp.ptr], addr)) {
329 return §ions[lp.ptr];
330 } else {
331 return §ions[PHYS_SECTION_UNASSIGNED];
332 }
333}
334
335
336static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
337 hwaddr addr,
338 bool resolve_subpage)
339{
340 MemoryRegionSection *section = qatomic_read(&d->mru_section);
341 subpage_t *subpage;
342
343 if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] ||
344 !section_covers_addr(section, addr)) {
345 section = phys_page_find(d, addr);
346 qatomic_set(&d->mru_section, section);
347 }
348 if (resolve_subpage && section->mr->subpage) {
349 subpage = container_of(section->mr, subpage_t, iomem);
350 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
351 }
352 return section;
353}
354
355
356static MemoryRegionSection *
357address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
358 hwaddr *plen, bool resolve_subpage)
359{
360 MemoryRegionSection *section;
361 MemoryRegion *mr;
362 Int128 diff;
363
364 section = address_space_lookup_region(d, addr, resolve_subpage);
365
366 addr -= section->offset_within_address_space;
367
368
369 *xlat = addr + section->offset_within_region;
370
371 mr = section->mr;
372
373
374
375
376
377
378
379
380
381
382
383
384 if (memory_region_is_ram(mr)) {
385 diff = int128_sub(section->size, int128_make64(addr));
386 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
387 }
388 return section;
389}
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iommu_mr,
414 hwaddr *xlat,
415 hwaddr *plen_out,
416 hwaddr *page_mask_out,
417 bool is_write,
418 bool is_mmio,
419 AddressSpace **target_as,
420 MemTxAttrs attrs)
421{
422 MemoryRegionSection *section;
423 hwaddr page_mask = (hwaddr)-1;
424
425 do {
426 hwaddr addr = *xlat;
427 IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
428 int iommu_idx = 0;
429 IOMMUTLBEntry iotlb;
430
431 if (imrc->attrs_to_index) {
432 iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
433 }
434
435 iotlb = imrc->translate(iommu_mr, addr, is_write ?
436 IOMMU_WO : IOMMU_RO, iommu_idx);
437
438 if (!(iotlb.perm & (1 << is_write))) {
439 goto unassigned;
440 }
441
442 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
443 | (addr & iotlb.addr_mask));
444 page_mask &= iotlb.addr_mask;
445 *plen_out = MIN(*plen_out, (addr | iotlb.addr_mask) - addr + 1);
446 *target_as = iotlb.target_as;
447
448 section = address_space_translate_internal(
449 address_space_to_dispatch(iotlb.target_as), addr, xlat,
450 plen_out, is_mmio);
451
452 iommu_mr = memory_region_get_iommu(section->mr);
453 } while (unlikely(iommu_mr));
454
455 if (page_mask_out) {
456 *page_mask_out = page_mask;
457 }
458 return *section;
459
460unassigned:
461 return (MemoryRegionSection) { .mr = &io_mem_unassigned };
462}
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484static MemoryRegionSection flatview_do_translate(FlatView *fv,
485 hwaddr addr,
486 hwaddr *xlat,
487 hwaddr *plen_out,
488 hwaddr *page_mask_out,
489 bool is_write,
490 bool is_mmio,
491 AddressSpace **target_as,
492 MemTxAttrs attrs)
493{
494 MemoryRegionSection *section;
495 IOMMUMemoryRegion *iommu_mr;
496 hwaddr plen = (hwaddr)(-1);
497
498 if (!plen_out) {
499 plen_out = &plen;
500 }
501
502 section = address_space_translate_internal(
503 flatview_to_dispatch(fv), addr, xlat,
504 plen_out, is_mmio);
505
506 iommu_mr = memory_region_get_iommu(section->mr);
507 if (unlikely(iommu_mr)) {
508 return address_space_translate_iommu(iommu_mr, xlat,
509 plen_out, page_mask_out,
510 is_write, is_mmio,
511 target_as, attrs);
512 }
513 if (page_mask_out) {
514
515 *page_mask_out = ~TARGET_PAGE_MASK;
516 }
517
518 return *section;
519}
520
521
522IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
523 bool is_write, MemTxAttrs attrs)
524{
525 MemoryRegionSection section;
526 hwaddr xlat, page_mask;
527
528
529
530
531
532 section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat,
533 NULL, &page_mask, is_write, false, &as,
534 attrs);
535
536
537 if (section.mr == &io_mem_unassigned) {
538 goto iotlb_fail;
539 }
540
541
542 xlat += section.offset_within_address_space -
543 section.offset_within_region;
544
545 return (IOMMUTLBEntry) {
546 .target_as = as,
547 .iova = addr & ~page_mask,
548 .translated_addr = xlat & ~page_mask,
549 .addr_mask = page_mask,
550
551 .perm = IOMMU_RW,
552 };
553
554iotlb_fail:
555 return (IOMMUTLBEntry) {0};
556}
557
558
559MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
560 hwaddr *plen, bool is_write,
561 MemTxAttrs attrs)
562{
563 MemoryRegion *mr;
564 MemoryRegionSection section;
565 AddressSpace *as = NULL;
566
567
568 section = flatview_do_translate(fv, addr, xlat, plen, NULL,
569 is_write, true, &as, attrs);
570 mr = section.mr;
571
572 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
573 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
574 *plen = MIN(page, *plen);
575 }
576
577 return mr;
578}
579
580typedef struct TCGIOMMUNotifier {
581 IOMMUNotifier n;
582 MemoryRegion *mr;
583 CPUState *cpu;
584 int iommu_idx;
585 bool active;
586} TCGIOMMUNotifier;
587
588static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
589{
590 TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
591
592 if (!notifier->active) {
593 return;
594 }
595 tlb_flush(notifier->cpu);
596 notifier->active = false;
597
598
599
600
601
602}
603
604static void tcg_register_iommu_notifier(CPUState *cpu,
605 IOMMUMemoryRegion *iommu_mr,
606 int iommu_idx)
607{
608
609
610
611
612 MemoryRegion *mr = MEMORY_REGION(iommu_mr);
613 TCGIOMMUNotifier *notifier = NULL;
614 int i;
615
616 for (i = 0; i < cpu->iommu_notifiers->len; i++) {
617 notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
618 if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) {
619 break;
620 }
621 }
622 if (i == cpu->iommu_notifiers->len) {
623
624 cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1);
625 notifier = g_new0(TCGIOMMUNotifier, 1);
626 g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier;
627
628 notifier->mr = mr;
629 notifier->iommu_idx = iommu_idx;
630 notifier->cpu = cpu;
631
632
633
634
635
636
637 iommu_notifier_init(¬ifier->n,
638 tcg_iommu_unmap_notify,
639 IOMMU_NOTIFIER_UNMAP,
640 0,
641 HWADDR_MAX,
642 iommu_idx);
643 memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n,
644 &error_fatal);
645 }
646
647 if (!notifier->active) {
648 notifier->active = true;
649 }
650}
651
652void tcg_iommu_free_notifier_list(CPUState *cpu)
653{
654
655 int i;
656 TCGIOMMUNotifier *notifier;
657
658 for (i = 0; i < cpu->iommu_notifiers->len; i++) {
659 notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
660 memory_region_unregister_iommu_notifier(notifier->mr, ¬ifier->n);
661 g_free(notifier);
662 }
663 g_array_free(cpu->iommu_notifiers, true);
664}
665
666void tcg_iommu_init_notifier_list(CPUState *cpu)
667{
668 cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *));
669}
670
671
672MemoryRegionSection *
673address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
674 hwaddr *xlat, hwaddr *plen,
675 MemTxAttrs attrs, int *prot)
676{
677 MemoryRegionSection *section;
678 IOMMUMemoryRegion *iommu_mr;
679 IOMMUMemoryRegionClass *imrc;
680 IOMMUTLBEntry iotlb;
681 int iommu_idx;
682 hwaddr addr = orig_addr;
683 AddressSpaceDispatch *d =
684 qatomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
685
686 for (;;) {
687 section = address_space_translate_internal(d, addr, &addr, plen, false);
688
689 iommu_mr = memory_region_get_iommu(section->mr);
690 if (!iommu_mr) {
691 break;
692 }
693
694 imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
695
696 iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
697 tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
698
699
700
701 iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
702 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
703 | (addr & iotlb.addr_mask));
704
705
706
707
708 if (!(iotlb.perm & IOMMU_RO)) {
709 *prot &= ~(PAGE_READ | PAGE_EXEC);
710 }
711 if (!(iotlb.perm & IOMMU_WO)) {
712 *prot &= ~PAGE_WRITE;
713 }
714
715 if (!*prot) {
716 goto translate_fail;
717 }
718
719 d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
720 }
721
722 assert(!memory_region_is_iommu(section->mr));
723 *xlat = addr;
724 return section;
725
726translate_fail:
727
728
729
730
731
732
733
734
735 assert((orig_addr & ~TARGET_PAGE_MASK) == 0);
736 *xlat = orig_addr;
737 return &d->map.sections[PHYS_SECTION_UNASSIGNED];
738}
739
740void cpu_address_space_init(CPUState *cpu, int asidx,
741 const char *prefix, MemoryRegion *mr)
742{
743 CPUAddressSpace *newas;
744 AddressSpace *as = g_new0(AddressSpace, 1);
745 char *as_name;
746
747 assert(mr);
748 as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index);
749 address_space_init(as, mr, as_name);
750 g_free(as_name);
751
752
753 assert(asidx < cpu->num_ases);
754
755 if (asidx == 0) {
756
757 cpu->as = as;
758 }
759
760
761 assert(asidx == 0 || !kvm_enabled());
762
763 if (!cpu->cpu_ases) {
764 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
765 }
766
767 newas = &cpu->cpu_ases[asidx];
768 newas->cpu = cpu;
769 newas->as = as;
770 if (tcg_enabled()) {
771 newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
772 newas->tcg_as_listener.commit = tcg_commit;
773 newas->tcg_as_listener.name = "tcg";
774 memory_listener_register(&newas->tcg_as_listener, as);
775 }
776}
777
778AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
779{
780
781 return cpu->cpu_ases[asidx].as;
782}
783
784
785static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
786{
787 RAMBlock *block;
788
789 block = qatomic_rcu_read(&ram_list.mru_block);
790 if (block && addr - block->offset < block->max_length) {
791 return block;
792 }
793 RAMBLOCK_FOREACH(block) {
794 if (addr - block->offset < block->max_length) {
795 goto found;
796 }
797 }
798
799 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
800 abort();
801
802found:
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819 ram_list.mru_block = block;
820 return block;
821}
822
823static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
824{
825 CPUState *cpu;
826 ram_addr_t start1;
827 RAMBlock *block;
828 ram_addr_t end;
829
830 assert(tcg_enabled());
831 end = TARGET_PAGE_ALIGN(start + length);
832 start &= TARGET_PAGE_MASK;
833
834 RCU_READ_LOCK_GUARD();
835 block = qemu_get_ram_block(start);
836 assert(block == qemu_get_ram_block(end - 1));
837 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
838 CPU_FOREACH(cpu) {
839 tlb_reset_dirty(cpu, start1, length);
840 }
841}
842
843
844bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
845 ram_addr_t length,
846 unsigned client)
847{
848 DirtyMemoryBlocks *blocks;
849 unsigned long end, page, start_page;
850 bool dirty = false;
851 RAMBlock *ramblock;
852 uint64_t mr_offset, mr_size;
853
854 if (length == 0) {
855 return false;
856 }
857
858 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
859 start_page = start >> TARGET_PAGE_BITS;
860 page = start_page;
861
862 WITH_RCU_READ_LOCK_GUARD() {
863 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
864 ramblock = qemu_get_ram_block(start);
865
866 assert(start >= ramblock->offset &&
867 start + length <= ramblock->offset + ramblock->used_length);
868
869 while (page < end) {
870 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
871 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
872 unsigned long num = MIN(end - page,
873 DIRTY_MEMORY_BLOCK_SIZE - offset);
874
875 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
876 offset, num);
877 page += num;
878 }
879
880 mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset;
881 mr_size = (end - start_page) << TARGET_PAGE_BITS;
882 memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
883 }
884
885 if (dirty && tcg_enabled()) {
886 tlb_reset_dirty_range_all(start, length);
887 }
888
889 return dirty;
890}
891
892DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
893 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client)
894{
895 DirtyMemoryBlocks *blocks;
896 ram_addr_t start = memory_region_get_ram_addr(mr) + offset;
897 unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
898 ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
899 ram_addr_t last = QEMU_ALIGN_UP(start + length, align);
900 DirtyBitmapSnapshot *snap;
901 unsigned long page, end, dest;
902
903 snap = g_malloc0(sizeof(*snap) +
904 ((last - first) >> (TARGET_PAGE_BITS + 3)));
905 snap->start = first;
906 snap->end = last;
907
908 page = first >> TARGET_PAGE_BITS;
909 end = last >> TARGET_PAGE_BITS;
910 dest = 0;
911
912 WITH_RCU_READ_LOCK_GUARD() {
913 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
914
915 while (page < end) {
916 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
917 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
918 unsigned long num = MIN(end - page,
919 DIRTY_MEMORY_BLOCK_SIZE - offset);
920
921 assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
922 assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL)));
923 offset >>= BITS_PER_LEVEL;
924
925 bitmap_copy_and_clear_atomic(snap->dirty + dest,
926 blocks->blocks[idx] + offset,
927 num);
928 page += num;
929 dest += num >> BITS_PER_LEVEL;
930 }
931 }
932
933 if (tcg_enabled()) {
934 tlb_reset_dirty_range_all(start, length);
935 }
936
937 memory_region_clear_dirty_bitmap(mr, offset, length);
938
939 return snap;
940}
941
942bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
943 ram_addr_t start,
944 ram_addr_t length)
945{
946 unsigned long page, end;
947
948 assert(start >= snap->start);
949 assert(start + length <= snap->end);
950
951 end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
952 page = (start - snap->start) >> TARGET_PAGE_BITS;
953
954 while (page < end) {
955 if (test_bit(page, snap->dirty)) {
956 return true;
957 }
958 page++;
959 }
960 return false;
961}
962
963
964hwaddr memory_region_section_get_iotlb(CPUState *cpu,
965 MemoryRegionSection *section)
966{
967 AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
968 return section - d->map.sections;
969}
970
971static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
972 uint16_t section);
973static subpage_t *subpage_init(FlatView *fv, hwaddr base);
974
975static uint16_t phys_section_add(PhysPageMap *map,
976 MemoryRegionSection *section)
977{
978
979
980
981
982 assert(map->sections_nb < TARGET_PAGE_SIZE);
983
984 if (map->sections_nb == map->sections_nb_alloc) {
985 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
986 map->sections = g_renew(MemoryRegionSection, map->sections,
987 map->sections_nb_alloc);
988 }
989 map->sections[map->sections_nb] = *section;
990 memory_region_ref(section->mr);
991 return map->sections_nb++;
992}
993
994static void phys_section_destroy(MemoryRegion *mr)
995{
996 bool have_sub_page = mr->subpage;
997
998 memory_region_unref(mr);
999
1000 if (have_sub_page) {
1001 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1002 object_unref(OBJECT(&subpage->iomem));
1003 g_free(subpage);
1004 }
1005}
1006
1007static void phys_sections_free(PhysPageMap *map)
1008{
1009 while (map->sections_nb > 0) {
1010 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1011 phys_section_destroy(section->mr);
1012 }
1013 g_free(map->sections);
1014 g_free(map->nodes);
1015}
1016
1017static void register_subpage(FlatView *fv, MemoryRegionSection *section)
1018{
1019 AddressSpaceDispatch *d = flatview_to_dispatch(fv);
1020 subpage_t *subpage;
1021 hwaddr base = section->offset_within_address_space
1022 & TARGET_PAGE_MASK;
1023 MemoryRegionSection *existing = phys_page_find(d, base);
1024 MemoryRegionSection subsection = {
1025 .offset_within_address_space = base,
1026 .size = int128_make64(TARGET_PAGE_SIZE),
1027 };
1028 hwaddr start, end;
1029
1030 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1031
1032 if (!(existing->mr->subpage)) {
1033 subpage = subpage_init(fv, base);
1034 subsection.fv = fv;
1035 subsection.mr = &subpage->iomem;
1036 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1037 phys_section_add(&d->map, &subsection));
1038 } else {
1039 subpage = container_of(existing->mr, subpage_t, iomem);
1040 }
1041 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1042 end = start + int128_get64(section->size) - 1;
1043 subpage_register(subpage, start, end,
1044 phys_section_add(&d->map, section));
1045}
1046
1047
1048static void register_multipage(FlatView *fv,
1049 MemoryRegionSection *section)
1050{
1051 AddressSpaceDispatch *d = flatview_to_dispatch(fv);
1052 hwaddr start_addr = section->offset_within_address_space;
1053 uint16_t section_index = phys_section_add(&d->map, section);
1054 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1055 TARGET_PAGE_BITS));
1056
1057 assert(num_pages);
1058 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1059}
1060
1061
1062
1063
1064
1065
1066
1067
1068void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section)
1069{
1070 MemoryRegionSection remain = *section;
1071 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1072
1073
1074 if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1075 uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space)
1076 - remain.offset_within_address_space;
1077
1078 MemoryRegionSection now = remain;
1079 now.size = int128_min(int128_make64(left), now.size);
1080 register_subpage(fv, &now);
1081 if (int128_eq(remain.size, now.size)) {
1082 return;
1083 }
1084 remain.size = int128_sub(remain.size, now.size);
1085 remain.offset_within_address_space += int128_get64(now.size);
1086 remain.offset_within_region += int128_get64(now.size);
1087 }
1088
1089
1090 if (int128_ge(remain.size, page_size)) {
1091 MemoryRegionSection now = remain;
1092 now.size = int128_and(now.size, int128_neg(page_size));
1093 register_multipage(fv, &now);
1094 if (int128_eq(remain.size, now.size)) {
1095 return;
1096 }
1097 remain.size = int128_sub(remain.size, now.size);
1098 remain.offset_within_address_space += int128_get64(now.size);
1099 remain.offset_within_region += int128_get64(now.size);
1100 }
1101
1102
1103 register_subpage(fv, &remain);
1104}
1105
1106void qemu_flush_coalesced_mmio_buffer(void)
1107{
1108 if (kvm_enabled())
1109 kvm_flush_coalesced_mmio_buffer();
1110}
1111
1112void qemu_mutex_lock_ramlist(void)
1113{
1114 qemu_mutex_lock(&ram_list.mutex);
1115}
1116
1117void qemu_mutex_unlock_ramlist(void)
1118{
1119 qemu_mutex_unlock(&ram_list.mutex);
1120}
1121
1122GString *ram_block_format(void)
1123{
1124 RAMBlock *block;
1125 char *psize;
1126 GString *buf = g_string_new("");
1127
1128 RCU_READ_LOCK_GUARD();
1129 g_string_append_printf(buf, "%24s %8s %18s %18s %18s %18s %3s\n",
1130 "Block Name", "PSize", "Offset", "Used", "Total",
1131 "HVA", "RO");
1132
1133 RAMBLOCK_FOREACH(block) {
1134 psize = size_to_str(block->page_size);
1135 g_string_append_printf(buf, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64
1136 " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n",
1137 block->idstr, psize,
1138 (uint64_t)block->offset,
1139 (uint64_t)block->used_length,
1140 (uint64_t)block->max_length,
1141 (uint64_t)(uintptr_t)block->host,
1142 block->mr->readonly ? "ro" : "rw");
1143
1144 g_free(psize);
1145 }
1146
1147 return buf;
1148}
1149
1150static int find_min_backend_pagesize(Object *obj, void *opaque)
1151{
1152 long *hpsize_min = opaque;
1153
1154 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
1155 HostMemoryBackend *backend = MEMORY_BACKEND(obj);
1156 long hpsize = host_memory_backend_pagesize(backend);
1157
1158 if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) {
1159 *hpsize_min = hpsize;
1160 }
1161 }
1162
1163 return 0;
1164}
1165
1166static int find_max_backend_pagesize(Object *obj, void *opaque)
1167{
1168 long *hpsize_max = opaque;
1169
1170 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
1171 HostMemoryBackend *backend = MEMORY_BACKEND(obj);
1172 long hpsize = host_memory_backend_pagesize(backend);
1173
1174 if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) {
1175 *hpsize_max = hpsize;
1176 }
1177 }
1178
1179 return 0;
1180}
1181
1182
1183
1184
1185
1186long qemu_minrampagesize(void)
1187{
1188 long hpsize = LONG_MAX;
1189 Object *memdev_root = object_resolve_path("/objects", NULL);
1190
1191 object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
1192 return hpsize;
1193}
1194
1195long qemu_maxrampagesize(void)
1196{
1197 long pagesize = 0;
1198 Object *memdev_root = object_resolve_path("/objects", NULL);
1199
1200 object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize);
1201 return pagesize;
1202}
1203
1204#ifdef CONFIG_POSIX
1205static int64_t get_file_size(int fd)
1206{
1207 int64_t size;
1208#if defined(__linux__)
1209 struct stat st;
1210
1211 if (fstat(fd, &st) < 0) {
1212 return -errno;
1213 }
1214
1215
1216 if (S_ISCHR(st.st_mode)) {
1217 g_autofree char *subsystem_path = NULL;
1218 g_autofree char *subsystem = NULL;
1219
1220 subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem",
1221 major(st.st_rdev), minor(st.st_rdev));
1222 subsystem = g_file_read_link(subsystem_path, NULL);
1223
1224 if (subsystem && g_str_has_suffix(subsystem, "/dax")) {
1225 g_autofree char *size_path = NULL;
1226 g_autofree char *size_str = NULL;
1227
1228 size_path = g_strdup_printf("/sys/dev/char/%d:%d/size",
1229 major(st.st_rdev), minor(st.st_rdev));
1230
1231 if (g_file_get_contents(size_path, &size_str, NULL, NULL)) {
1232 return g_ascii_strtoll(size_str, NULL, 0);
1233 }
1234 }
1235 }
1236#endif
1237
1238
1239 size = lseek(fd, 0, SEEK_END);
1240 if (size < 0) {
1241 return -errno;
1242 }
1243 return size;
1244}
1245
1246static int64_t get_file_align(int fd)
1247{
1248 int64_t align = -1;
1249#if defined(__linux__) && defined(CONFIG_LIBDAXCTL)
1250 struct stat st;
1251
1252 if (fstat(fd, &st) < 0) {
1253 return -errno;
1254 }
1255
1256
1257 if (S_ISCHR(st.st_mode)) {
1258 g_autofree char *path = NULL;
1259 g_autofree char *rpath = NULL;
1260 struct daxctl_ctx *ctx;
1261 struct daxctl_region *region;
1262 int rc = 0;
1263
1264 path = g_strdup_printf("/sys/dev/char/%d:%d",
1265 major(st.st_rdev), minor(st.st_rdev));
1266 rpath = realpath(path, NULL);
1267 if (!rpath) {
1268 return -errno;
1269 }
1270
1271 rc = daxctl_new(&ctx);
1272 if (rc) {
1273 return -1;
1274 }
1275
1276 daxctl_region_foreach(ctx, region) {
1277 if (strstr(rpath, daxctl_region_get_path(region))) {
1278 align = daxctl_region_get_align(region);
1279 break;
1280 }
1281 }
1282 daxctl_unref(ctx);
1283 }
1284#endif
1285
1286 return align;
1287}
1288
1289static int file_ram_open(const char *path,
1290 const char *region_name,
1291 bool readonly,
1292 bool *created,
1293 Error **errp)
1294{
1295 char *filename;
1296 char *sanitized_name;
1297 char *c;
1298 int fd = -1;
1299
1300 *created = false;
1301 for (;;) {
1302 fd = open(path, readonly ? O_RDONLY : O_RDWR);
1303 if (fd >= 0) {
1304
1305 break;
1306 }
1307 if (errno == ENOENT) {
1308
1309 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1310 if (fd >= 0) {
1311 *created = true;
1312 break;
1313 }
1314 } else if (errno == EISDIR) {
1315
1316
1317 sanitized_name = g_strdup(region_name);
1318 for (c = sanitized_name; *c != '\0'; c++) {
1319 if (*c == '/') {
1320 *c = '_';
1321 }
1322 }
1323
1324 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1325 sanitized_name);
1326 g_free(sanitized_name);
1327
1328 fd = mkstemp(filename);
1329 if (fd >= 0) {
1330 unlink(filename);
1331 g_free(filename);
1332 break;
1333 }
1334 g_free(filename);
1335 }
1336 if (errno != EEXIST && errno != EINTR) {
1337 error_setg_errno(errp, errno,
1338 "can't open backing store %s for guest RAM",
1339 path);
1340 return -1;
1341 }
1342
1343
1344
1345
1346 }
1347
1348 return fd;
1349}
1350
1351static void *file_ram_alloc(RAMBlock *block,
1352 ram_addr_t memory,
1353 int fd,
1354 bool readonly,
1355 bool truncate,
1356 off_t offset,
1357 Error **errp)
1358{
1359 uint32_t qemu_map_flags;
1360 void *area;
1361
1362 block->page_size = qemu_fd_getpagesize(fd);
1363 if (block->mr->align % block->page_size) {
1364 error_setg(errp, "alignment 0x%" PRIx64
1365 " must be multiples of page size 0x%zx",
1366 block->mr->align, block->page_size);
1367 return NULL;
1368 } else if (block->mr->align && !is_power_of_2(block->mr->align)) {
1369 error_setg(errp, "alignment 0x%" PRIx64
1370 " must be a power of two", block->mr->align);
1371 return NULL;
1372 } else if (offset % block->page_size) {
1373 error_setg(errp, "offset 0x%" PRIx64
1374 " must be multiples of page size 0x%zx",
1375 offset, block->page_size);
1376 return NULL;
1377 }
1378 block->mr->align = MAX(block->page_size, block->mr->align);
1379#if defined(__s390x__)
1380 if (kvm_enabled()) {
1381 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1382 }
1383#endif
1384
1385 if (memory < block->page_size) {
1386 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1387 "or larger than page size 0x%zx",
1388 memory, block->page_size);
1389 return NULL;
1390 }
1391
1392 memory = ROUND_UP(memory, block->page_size);
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 if (truncate && ftruncate(fd, offset + memory)) {
1409 perror("ftruncate");
1410 }
1411
1412 qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
1413 qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
1414 qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
1415 qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
1416 area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset);
1417 if (area == MAP_FAILED) {
1418 error_setg_errno(errp, errno,
1419 "unable to map backing store for guest RAM");
1420 return NULL;
1421 }
1422
1423 block->fd = fd;
1424 block->fd_offset = offset;
1425 return area;
1426}
1427#endif
1428
1429
1430
1431
1432
1433static ram_addr_t find_ram_offset(ram_addr_t size)
1434{
1435 RAMBlock *block, *next_block;
1436 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1437
1438 assert(size != 0);
1439
1440 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1441 return 0;
1442 }
1443
1444 RAMBLOCK_FOREACH(block) {
1445 ram_addr_t candidate, next = RAM_ADDR_MAX;
1446
1447
1448
1449
1450 candidate = block->offset + block->max_length;
1451 candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS);
1452
1453
1454
1455
1456 RAMBLOCK_FOREACH(next_block) {
1457 if (next_block->offset >= candidate) {
1458 next = MIN(next, next_block->offset);
1459 }
1460 }
1461
1462
1463
1464
1465
1466 if (next - candidate >= size && next - candidate < mingap) {
1467 offset = candidate;
1468 mingap = next - candidate;
1469 }
1470
1471 trace_find_ram_offset_loop(size, candidate, offset, next, mingap);
1472 }
1473
1474 if (offset == RAM_ADDR_MAX) {
1475 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1476 (uint64_t)size);
1477 abort();
1478 }
1479
1480 trace_find_ram_offset(size, offset);
1481
1482 return offset;
1483}
1484
1485static unsigned long last_ram_page(void)
1486{
1487 RAMBlock *block;
1488 ram_addr_t last = 0;
1489
1490 RCU_READ_LOCK_GUARD();
1491 RAMBLOCK_FOREACH(block) {
1492 last = MAX(last, block->offset + block->max_length);
1493 }
1494 return last >> TARGET_PAGE_BITS;
1495}
1496
1497static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1498{
1499 int ret;
1500
1501
1502 if (!machine_dump_guest_core(current_machine)) {
1503 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1504 if (ret) {
1505 perror("qemu_madvise");
1506 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1507 "but dump_guest_core=off specified\n");
1508 }
1509 }
1510}
1511
1512const char *qemu_ram_get_idstr(RAMBlock *rb)
1513{
1514 return rb->idstr;
1515}
1516
1517void *qemu_ram_get_host_addr(RAMBlock *rb)
1518{
1519 return rb->host;
1520}
1521
1522ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
1523{
1524 return rb->offset;
1525}
1526
1527ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
1528{
1529 return rb->used_length;
1530}
1531
1532ram_addr_t qemu_ram_get_max_length(RAMBlock *rb)
1533{
1534 return rb->max_length;
1535}
1536
1537bool qemu_ram_is_shared(RAMBlock *rb)
1538{
1539 return rb->flags & RAM_SHARED;
1540}
1541
1542bool qemu_ram_is_noreserve(RAMBlock *rb)
1543{
1544 return rb->flags & RAM_NORESERVE;
1545}
1546
1547
1548bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
1549{
1550 return rb->flags & RAM_UF_ZEROPAGE;
1551}
1552
1553void qemu_ram_set_uf_zeroable(RAMBlock *rb)
1554{
1555 rb->flags |= RAM_UF_ZEROPAGE;
1556}
1557
1558bool qemu_ram_is_migratable(RAMBlock *rb)
1559{
1560 return rb->flags & RAM_MIGRATABLE;
1561}
1562
1563void qemu_ram_set_migratable(RAMBlock *rb)
1564{
1565 rb->flags |= RAM_MIGRATABLE;
1566}
1567
1568void qemu_ram_unset_migratable(RAMBlock *rb)
1569{
1570 rb->flags &= ~RAM_MIGRATABLE;
1571}
1572
1573bool qemu_ram_is_named_file(RAMBlock *rb)
1574{
1575 return rb->flags & RAM_NAMED_FILE;
1576}
1577
1578int qemu_ram_get_fd(RAMBlock *rb)
1579{
1580 return rb->fd;
1581}
1582
1583
1584void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1585{
1586 RAMBlock *block;
1587
1588 assert(new_block);
1589 assert(!new_block->idstr[0]);
1590
1591 if (dev) {
1592 char *id = qdev_get_dev_path(dev);
1593 if (id) {
1594 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1595 g_free(id);
1596 }
1597 }
1598 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1599
1600 RCU_READ_LOCK_GUARD();
1601 RAMBLOCK_FOREACH(block) {
1602 if (block != new_block &&
1603 !strcmp(block->idstr, new_block->idstr)) {
1604 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1605 new_block->idstr);
1606 abort();
1607 }
1608 }
1609}
1610
1611
1612void qemu_ram_unset_idstr(RAMBlock *block)
1613{
1614
1615
1616
1617
1618 if (block) {
1619 memset(block->idstr, 0, sizeof(block->idstr));
1620 }
1621}
1622
1623size_t qemu_ram_pagesize(RAMBlock *rb)
1624{
1625 return rb->page_size;
1626}
1627
1628
1629size_t qemu_ram_pagesize_largest(void)
1630{
1631 RAMBlock *block;
1632 size_t largest = 0;
1633
1634 RAMBLOCK_FOREACH(block) {
1635 largest = MAX(largest, qemu_ram_pagesize(block));
1636 }
1637
1638 return largest;
1639}
1640
1641static int memory_try_enable_merging(void *addr, size_t len)
1642{
1643 if (!machine_mem_merge(current_machine)) {
1644
1645 return 0;
1646 }
1647
1648 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1649}
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1660{
1661 const ram_addr_t oldsize = block->used_length;
1662 const ram_addr_t unaligned_size = newsize;
1663
1664 assert(block);
1665
1666 newsize = HOST_PAGE_ALIGN(newsize);
1667
1668 if (block->used_length == newsize) {
1669
1670
1671
1672
1673 if (unaligned_size != memory_region_size(block->mr)) {
1674 memory_region_set_size(block->mr, unaligned_size);
1675 if (block->resized) {
1676 block->resized(block->idstr, unaligned_size, block->host);
1677 }
1678 }
1679 return 0;
1680 }
1681
1682 if (!(block->flags & RAM_RESIZEABLE)) {
1683 error_setg_errno(errp, EINVAL,
1684 "Size mismatch: %s: 0x" RAM_ADDR_FMT
1685 " != 0x" RAM_ADDR_FMT, block->idstr,
1686 newsize, block->used_length);
1687 return -EINVAL;
1688 }
1689
1690 if (block->max_length < newsize) {
1691 error_setg_errno(errp, EINVAL,
1692 "Size too large: %s: 0x" RAM_ADDR_FMT
1693 " > 0x" RAM_ADDR_FMT, block->idstr,
1694 newsize, block->max_length);
1695 return -EINVAL;
1696 }
1697
1698
1699 if (block->host) {
1700 ram_block_notify_resize(block->host, oldsize, newsize);
1701 }
1702
1703 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1704 block->used_length = newsize;
1705 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1706 DIRTY_CLIENTS_ALL);
1707 memory_region_set_size(block->mr, unaligned_size);
1708 if (block->resized) {
1709 block->resized(block->idstr, unaligned_size, block->host);
1710 }
1711 return 0;
1712}
1713
1714
1715
1716
1717
1718
1719
1720void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
1721{
1722
1723 g_assert((start + length) <= block->used_length);
1724
1725#ifdef CONFIG_LIBPMEM
1726
1727 if (ramblock_is_pmem(block)) {
1728 void *addr = ramblock_ptr(block, start);
1729 pmem_persist(addr, length);
1730 return;
1731 }
1732#endif
1733 if (block->fd >= 0) {
1734
1735
1736
1737
1738
1739 void *addr = ramblock_ptr(block, start);
1740 if (qemu_msync(addr, length, block->fd)) {
1741 warn_report("%s: failed to sync memory range: start: "
1742 RAM_ADDR_FMT " length: " RAM_ADDR_FMT,
1743 __func__, start, length);
1744 }
1745 }
1746}
1747
1748
1749static void dirty_memory_extend(ram_addr_t old_ram_size,
1750 ram_addr_t new_ram_size)
1751{
1752 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1753 DIRTY_MEMORY_BLOCK_SIZE);
1754 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1755 DIRTY_MEMORY_BLOCK_SIZE);
1756 int i;
1757
1758
1759 if (new_num_blocks <= old_num_blocks) {
1760 return;
1761 }
1762
1763 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1764 DirtyMemoryBlocks *old_blocks;
1765 DirtyMemoryBlocks *new_blocks;
1766 int j;
1767
1768 old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
1769 new_blocks = g_malloc(sizeof(*new_blocks) +
1770 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1771
1772 if (old_num_blocks) {
1773 memcpy(new_blocks->blocks, old_blocks->blocks,
1774 old_num_blocks * sizeof(old_blocks->blocks[0]));
1775 }
1776
1777 for (j = old_num_blocks; j < new_num_blocks; j++) {
1778 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1779 }
1780
1781 qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1782
1783 if (old_blocks) {
1784 g_free_rcu(old_blocks, rcu);
1785 }
1786 }
1787}
1788
1789static void ram_block_add(RAMBlock *new_block, Error **errp)
1790{
1791 const bool noreserve = qemu_ram_is_noreserve(new_block);
1792 const bool shared = qemu_ram_is_shared(new_block);
1793 RAMBlock *block;
1794 RAMBlock *last_block = NULL;
1795 ram_addr_t old_ram_size, new_ram_size;
1796 Error *err = NULL;
1797
1798 old_ram_size = last_ram_page();
1799
1800 qemu_mutex_lock_ramlist();
1801 new_block->offset = find_ram_offset(new_block->max_length);
1802
1803 if (!new_block->host) {
1804 if (xen_enabled()) {
1805 xen_ram_alloc(new_block->offset, new_block->max_length,
1806 new_block->mr, &err);
1807 if (err) {
1808 error_propagate(errp, err);
1809 qemu_mutex_unlock_ramlist();
1810 return;
1811 }
1812 } else {
1813 new_block->host = qemu_anon_ram_alloc(new_block->max_length,
1814 &new_block->mr->align,
1815 shared, noreserve);
1816 if (!new_block->host) {
1817 error_setg_errno(errp, errno,
1818 "cannot set up guest memory '%s'",
1819 memory_region_name(new_block->mr));
1820 qemu_mutex_unlock_ramlist();
1821 return;
1822 }
1823 memory_try_enable_merging(new_block->host, new_block->max_length);
1824 }
1825 }
1826
1827 new_ram_size = MAX(old_ram_size,
1828 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1829 if (new_ram_size > old_ram_size) {
1830 dirty_memory_extend(old_ram_size, new_ram_size);
1831 }
1832
1833
1834
1835
1836 RAMBLOCK_FOREACH(block) {
1837 last_block = block;
1838 if (block->max_length < new_block->max_length) {
1839 break;
1840 }
1841 }
1842 if (block) {
1843 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1844 } else if (last_block) {
1845 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1846 } else {
1847 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1848 }
1849 ram_list.mru_block = NULL;
1850
1851
1852 smp_wmb();
1853 ram_list.version++;
1854 qemu_mutex_unlock_ramlist();
1855
1856 cpu_physical_memory_set_dirty_range(new_block->offset,
1857 new_block->used_length,
1858 DIRTY_CLIENTS_ALL);
1859
1860 if (new_block->host) {
1861 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1862 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1863
1864
1865
1866
1867
1868 if (!qtest_enabled()) {
1869 qemu_madvise(new_block->host, new_block->max_length,
1870 QEMU_MADV_DONTFORK);
1871 }
1872 ram_block_notify_add(new_block->host, new_block->used_length,
1873 new_block->max_length);
1874 }
1875}
1876
1877#ifdef CONFIG_POSIX
1878RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
1879 uint32_t ram_flags, int fd, off_t offset,
1880 bool readonly, Error **errp)
1881{
1882 RAMBlock *new_block;
1883 Error *local_err = NULL;
1884 int64_t file_size, file_align;
1885
1886
1887 assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
1888 RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
1889
1890 if (xen_enabled()) {
1891 error_setg(errp, "-mem-path not supported with Xen");
1892 return NULL;
1893 }
1894
1895 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1896 error_setg(errp,
1897 "host lacks kvm mmu notifiers, -mem-path unsupported");
1898 return NULL;
1899 }
1900
1901 size = HOST_PAGE_ALIGN(size);
1902 file_size = get_file_size(fd);
1903 if (file_size > offset && file_size < (offset + size)) {
1904 error_setg(errp, "backing store size 0x%" PRIx64
1905 " does not match 'size' option 0x" RAM_ADDR_FMT,
1906 file_size, size);
1907 return NULL;
1908 }
1909
1910 file_align = get_file_align(fd);
1911 if (file_align > 0 && file_align > mr->align) {
1912 error_setg(errp, "backing store align 0x%" PRIx64
1913 " is larger than 'align' option 0x%" PRIx64,
1914 file_align, mr->align);
1915 return NULL;
1916 }
1917
1918 new_block = g_malloc0(sizeof(*new_block));
1919 new_block->mr = mr;
1920 new_block->used_length = size;
1921 new_block->max_length = size;
1922 new_block->flags = ram_flags;
1923 new_block->host = file_ram_alloc(new_block, size, fd, readonly,
1924 !file_size, offset, errp);
1925 if (!new_block->host) {
1926 g_free(new_block);
1927 return NULL;
1928 }
1929
1930 ram_block_add(new_block, &local_err);
1931 if (local_err) {
1932 g_free(new_block);
1933 error_propagate(errp, local_err);
1934 return NULL;
1935 }
1936 return new_block;
1937
1938}
1939
1940
1941RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1942 uint32_t ram_flags, const char *mem_path,
1943 off_t offset, bool readonly, Error **errp)
1944{
1945 int fd;
1946 bool created;
1947 RAMBlock *block;
1948
1949 fd = file_ram_open(mem_path, memory_region_name(mr), readonly, &created,
1950 errp);
1951 if (fd < 0) {
1952 return NULL;
1953 }
1954
1955 block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, readonly,
1956 errp);
1957 if (!block) {
1958 if (created) {
1959 unlink(mem_path);
1960 }
1961 close(fd);
1962 return NULL;
1963 }
1964
1965 return block;
1966}
1967#endif
1968
1969static
1970RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1971 void (*resized)(const char*,
1972 uint64_t length,
1973 void *host),
1974 void *host, uint32_t ram_flags,
1975 MemoryRegion *mr, Error **errp)
1976{
1977 RAMBlock *new_block;
1978 Error *local_err = NULL;
1979
1980 assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
1981 RAM_NORESERVE)) == 0);
1982 assert(!host ^ (ram_flags & RAM_PREALLOC));
1983
1984 size = HOST_PAGE_ALIGN(size);
1985 max_size = HOST_PAGE_ALIGN(max_size);
1986 new_block = g_malloc0(sizeof(*new_block));
1987 new_block->mr = mr;
1988 new_block->resized = resized;
1989 new_block->used_length = size;
1990 new_block->max_length = max_size;
1991 assert(max_size >= size);
1992 new_block->fd = -1;
1993 new_block->page_size = qemu_real_host_page_size();
1994 new_block->host = host;
1995 new_block->flags = ram_flags;
1996 ram_block_add(new_block, &local_err);
1997 if (local_err) {
1998 g_free(new_block);
1999 error_propagate(errp, local_err);
2000 return NULL;
2001 }
2002 return new_block;
2003}
2004
2005RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2006 MemoryRegion *mr, Error **errp)
2007{
2008 return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr,
2009 errp);
2010}
2011
2012RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
2013 MemoryRegion *mr, Error **errp)
2014{
2015 assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
2016 return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
2017}
2018
2019RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
2020 void (*resized)(const char*,
2021 uint64_t length,
2022 void *host),
2023 MemoryRegion *mr, Error **errp)
2024{
2025 return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
2026 RAM_RESIZEABLE, mr, errp);
2027}
2028
2029static void reclaim_ramblock(RAMBlock *block)
2030{
2031 if (block->flags & RAM_PREALLOC) {
2032 ;
2033 } else if (xen_enabled()) {
2034 xen_invalidate_map_cache_entry(block->host);
2035#ifndef _WIN32
2036 } else if (block->fd >= 0) {
2037 qemu_ram_munmap(block->fd, block->host, block->max_length);
2038 close(block->fd);
2039#endif
2040 } else {
2041 qemu_anon_ram_free(block->host, block->max_length);
2042 }
2043 g_free(block);
2044}
2045
2046void qemu_ram_free(RAMBlock *block)
2047{
2048 if (!block) {
2049 return;
2050 }
2051
2052 if (block->host) {
2053 ram_block_notify_remove(block->host, block->used_length,
2054 block->max_length);
2055 }
2056
2057 qemu_mutex_lock_ramlist();
2058 QLIST_REMOVE_RCU(block, next);
2059 ram_list.mru_block = NULL;
2060
2061 smp_wmb();
2062 ram_list.version++;
2063 call_rcu(block, reclaim_ramblock, rcu);
2064 qemu_mutex_unlock_ramlist();
2065}
2066
2067#ifndef _WIN32
2068void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2069{
2070 RAMBlock *block;
2071 ram_addr_t offset;
2072 int flags;
2073 void *area, *vaddr;
2074
2075 RAMBLOCK_FOREACH(block) {
2076 offset = addr - block->offset;
2077 if (offset < block->max_length) {
2078 vaddr = ramblock_ptr(block, offset);
2079 if (block->flags & RAM_PREALLOC) {
2080 ;
2081 } else if (xen_enabled()) {
2082 abort();
2083 } else {
2084 flags = MAP_FIXED;
2085 flags |= block->flags & RAM_SHARED ?
2086 MAP_SHARED : MAP_PRIVATE;
2087 flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
2088 if (block->fd >= 0) {
2089 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2090 flags, block->fd, offset + block->fd_offset);
2091 } else {
2092 flags |= MAP_ANONYMOUS;
2093 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2094 flags, -1, 0);
2095 }
2096 if (area != vaddr) {
2097 error_report("Could not remap addr: "
2098 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
2099 length, addr);
2100 exit(1);
2101 }
2102 memory_try_enable_merging(vaddr, length);
2103 qemu_ram_setup_dump(vaddr, length);
2104 }
2105 }
2106 }
2107}
2108#endif
2109
2110
2111
2112
2113
2114
2115
2116
2117void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
2118{
2119 RAMBlock *block = ram_block;
2120
2121 if (block == NULL) {
2122 block = qemu_get_ram_block(addr);
2123 addr -= block->offset;
2124 }
2125
2126 if (xen_enabled() && block->host == NULL) {
2127
2128
2129
2130
2131 if (block->offset == 0) {
2132 return xen_map_cache(addr, 0, 0, false);
2133 }
2134
2135 block->host = xen_map_cache(block->offset, block->max_length, 1, false);
2136 }
2137 return ramblock_ptr(block, addr);
2138}
2139
2140
2141
2142
2143
2144
2145static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
2146 hwaddr *size, bool lock)
2147{
2148 RAMBlock *block = ram_block;
2149 if (*size == 0) {
2150 return NULL;
2151 }
2152
2153 if (block == NULL) {
2154 block = qemu_get_ram_block(addr);
2155 addr -= block->offset;
2156 }
2157 *size = MIN(*size, block->max_length - addr);
2158
2159 if (xen_enabled() && block->host == NULL) {
2160
2161
2162
2163
2164 if (block->offset == 0) {
2165 return xen_map_cache(addr, *size, lock, lock);
2166 }
2167
2168 block->host = xen_map_cache(block->offset, block->max_length, 1, lock);
2169 }
2170
2171 return ramblock_ptr(block, addr);
2172}
2173
2174
2175ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
2176{
2177 ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host;
2178 assert((uintptr_t)host >= (uintptr_t)rb->host);
2179 assert(res < rb->max_length);
2180
2181 return res;
2182}
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
2202 ram_addr_t *offset)
2203{
2204 RAMBlock *block;
2205 uint8_t *host = ptr;
2206
2207 if (xen_enabled()) {
2208 ram_addr_t ram_addr;
2209 RCU_READ_LOCK_GUARD();
2210 ram_addr = xen_ram_addr_from_mapcache(ptr);
2211 block = qemu_get_ram_block(ram_addr);
2212 if (block) {
2213 *offset = ram_addr - block->offset;
2214 }
2215 return block;
2216 }
2217
2218 RCU_READ_LOCK_GUARD();
2219 block = qatomic_rcu_read(&ram_list.mru_block);
2220 if (block && block->host && host - block->host < block->max_length) {
2221 goto found;
2222 }
2223
2224 RAMBLOCK_FOREACH(block) {
2225
2226 if (block->host == NULL) {
2227 continue;
2228 }
2229 if (host - block->host < block->max_length) {
2230 goto found;
2231 }
2232 }
2233
2234 return NULL;
2235
2236found:
2237 *offset = (host - block->host);
2238 if (round_offset) {
2239 *offset &= TARGET_PAGE_MASK;
2240 }
2241 return block;
2242}
2243
2244
2245
2246
2247
2248
2249
2250
2251RAMBlock *qemu_ram_block_by_name(const char *name)
2252{
2253 RAMBlock *block;
2254
2255 RAMBLOCK_FOREACH(block) {
2256 if (!strcmp(name, block->idstr)) {
2257 return block;
2258 }
2259 }
2260
2261 return NULL;
2262}
2263
2264
2265
2266ram_addr_t qemu_ram_addr_from_host(void *ptr)
2267{
2268 RAMBlock *block;
2269 ram_addr_t offset;
2270
2271 block = qemu_ram_block_from_host(ptr, false, &offset);
2272 if (!block) {
2273 return RAM_ADDR_INVALID;
2274 }
2275
2276 return block->offset + offset;
2277}
2278
2279ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2280{
2281 ram_addr_t ram_addr;
2282
2283 ram_addr = qemu_ram_addr_from_host(ptr);
2284 if (ram_addr == RAM_ADDR_INVALID) {
2285 error_report("Bad ram pointer %p", ptr);
2286 abort();
2287 }
2288 return ram_addr;
2289}
2290
2291static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
2292 MemTxAttrs attrs, void *buf, hwaddr len);
2293static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
2294 const void *buf, hwaddr len);
2295static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
2296 bool is_write, MemTxAttrs attrs);
2297
2298static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2299 unsigned len, MemTxAttrs attrs)
2300{
2301 subpage_t *subpage = opaque;
2302 uint8_t buf[8];
2303 MemTxResult res;
2304
2305#if defined(DEBUG_SUBPAGE)
2306 printf("%s: subpage %p len %u addr " HWADDR_FMT_plx "\n", __func__,
2307 subpage, len, addr);
2308#endif
2309 res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len);
2310 if (res) {
2311 return res;
2312 }
2313 *data = ldn_p(buf, len);
2314 return MEMTX_OK;
2315}
2316
2317static MemTxResult subpage_write(void *opaque, hwaddr addr,
2318 uint64_t value, unsigned len, MemTxAttrs attrs)
2319{
2320 subpage_t *subpage = opaque;
2321 uint8_t buf[8];
2322
2323#if defined(DEBUG_SUBPAGE)
2324 printf("%s: subpage %p len %u addr " HWADDR_FMT_plx
2325 " value %"PRIx64"\n",
2326 __func__, subpage, len, addr, value);
2327#endif
2328 stn_p(buf, len, value);
2329 return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
2330}
2331
2332static bool subpage_accepts(void *opaque, hwaddr addr,
2333 unsigned len, bool is_write,
2334 MemTxAttrs attrs)
2335{
2336 subpage_t *subpage = opaque;
2337#if defined(DEBUG_SUBPAGE)
2338 printf("%s: subpage %p %c len %u addr " HWADDR_FMT_plx "\n",
2339 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2340#endif
2341
2342 return flatview_access_valid(subpage->fv, addr + subpage->base,
2343 len, is_write, attrs);
2344}
2345
2346static const MemoryRegionOps subpage_ops = {
2347 .read_with_attrs = subpage_read,
2348 .write_with_attrs = subpage_write,
2349 .impl.min_access_size = 1,
2350 .impl.max_access_size = 8,
2351 .valid.min_access_size = 1,
2352 .valid.max_access_size = 8,
2353 .valid.accepts = subpage_accepts,
2354 .endianness = DEVICE_NATIVE_ENDIAN,
2355};
2356
2357static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
2358 uint16_t section)
2359{
2360 int idx, eidx;
2361
2362 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2363 return -1;
2364 idx = SUBPAGE_IDX(start);
2365 eidx = SUBPAGE_IDX(end);
2366#if defined(DEBUG_SUBPAGE)
2367 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2368 __func__, mmio, start, end, idx, eidx, section);
2369#endif
2370 for (; idx <= eidx; idx++) {
2371 mmio->sub_section[idx] = section;
2372 }
2373
2374 return 0;
2375}
2376
2377static subpage_t *subpage_init(FlatView *fv, hwaddr base)
2378{
2379 subpage_t *mmio;
2380
2381
2382 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2383 mmio->fv = fv;
2384 mmio->base = base;
2385 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2386 NULL, TARGET_PAGE_SIZE);
2387 mmio->iomem.subpage = true;
2388#if defined(DEBUG_SUBPAGE)
2389 printf("%s: %p base " HWADDR_FMT_plx " len %08x\n", __func__,
2390 mmio, base, TARGET_PAGE_SIZE);
2391#endif
2392
2393 return mmio;
2394}
2395
2396static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr)
2397{
2398 assert(fv);
2399 MemoryRegionSection section = {
2400 .fv = fv,
2401 .mr = mr,
2402 .offset_within_address_space = 0,
2403 .offset_within_region = 0,
2404 .size = int128_2_64(),
2405 };
2406
2407 return phys_section_add(map, §ion);
2408}
2409
2410MemoryRegionSection *iotlb_to_section(CPUState *cpu,
2411 hwaddr index, MemTxAttrs attrs)
2412{
2413 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2414 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2415 AddressSpaceDispatch *d = qatomic_rcu_read(&cpuas->memory_dispatch);
2416 int section_index = index & ~TARGET_PAGE_MASK;
2417 MemoryRegionSection *ret;
2418
2419 assert(section_index < d->map.sections_nb);
2420 ret = d->map.sections + section_index;
2421 assert(ret->mr);
2422 assert(ret->mr->ops);
2423
2424 return ret;
2425}
2426
2427static void io_mem_init(void)
2428{
2429 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2430 NULL, UINT64_MAX);
2431}
2432
2433AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
2434{
2435 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2436 uint16_t n;
2437
2438 n = dummy_section(&d->map, fv, &io_mem_unassigned);
2439 assert(n == PHYS_SECTION_UNASSIGNED);
2440
2441 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2442
2443 return d;
2444}
2445
2446void address_space_dispatch_free(AddressSpaceDispatch *d)
2447{
2448 phys_sections_free(&d->map);
2449 g_free(d);
2450}
2451
2452static void do_nothing(CPUState *cpu, run_on_cpu_data d)
2453{
2454}
2455
2456static void tcg_log_global_after_sync(MemoryListener *listener)
2457{
2458 CPUAddressSpace *cpuas;
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 if (replay_mode == REPLAY_MODE_NONE) {
2478
2479
2480
2481
2482
2483
2484
2485 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2486 run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
2487 }
2488}
2489
2490static void tcg_commit(MemoryListener *listener)
2491{
2492 CPUAddressSpace *cpuas;
2493 AddressSpaceDispatch *d;
2494
2495 assert(tcg_enabled());
2496
2497
2498 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2499 cpu_reloading_memory_map();
2500
2501
2502
2503
2504 d = address_space_to_dispatch(cpuas->as);
2505 qatomic_rcu_set(&cpuas->memory_dispatch, d);
2506 tlb_flush(cpuas->cpu);
2507}
2508
2509static void memory_map_init(void)
2510{
2511 system_memory = g_malloc(sizeof(*system_memory));
2512
2513 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2514 address_space_init(&address_space_memory, system_memory, "memory");
2515
2516 system_io = g_malloc(sizeof(*system_io));
2517 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2518 65536);
2519 address_space_init(&address_space_io, system_io, "I/O");
2520}
2521
2522MemoryRegion *get_system_memory(void)
2523{
2524 return system_memory;
2525}
2526
2527MemoryRegion *get_system_io(void)
2528{
2529 return system_io;
2530}
2531
2532static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2533 hwaddr length)
2534{
2535 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2536 addr += memory_region_get_ram_addr(mr);
2537
2538
2539
2540
2541
2542 if (dirty_log_mask) {
2543 dirty_log_mask =
2544 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2545 }
2546 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2547 assert(tcg_enabled());
2548 tb_invalidate_phys_range(addr, addr + length - 1);
2549 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2550 }
2551 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2552}
2553
2554void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size)
2555{
2556
2557
2558
2559
2560
2561
2562 assert(memory_region_is_romd(mr));
2563
2564 invalidate_and_set_dirty(mr, addr, size);
2565}
2566
2567int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2568{
2569 unsigned access_size_max = mr->ops->valid.max_access_size;
2570
2571
2572
2573 if (access_size_max == 0) {
2574 access_size_max = 4;
2575 }
2576
2577
2578 if (!mr->ops->impl.unaligned) {
2579 unsigned align_size_max = addr & -addr;
2580 if (align_size_max != 0 && align_size_max < access_size_max) {
2581 access_size_max = align_size_max;
2582 }
2583 }
2584
2585
2586 if (l > access_size_max) {
2587 l = access_size_max;
2588 }
2589 l = pow2floor(l);
2590
2591 return l;
2592}
2593
2594bool prepare_mmio_access(MemoryRegion *mr)
2595{
2596 bool release_lock = false;
2597
2598 if (!qemu_mutex_iothread_locked()) {
2599 qemu_mutex_lock_iothread();
2600 release_lock = true;
2601 }
2602 if (mr->flush_coalesced_mmio) {
2603 qemu_flush_coalesced_mmio_buffer();
2604 }
2605
2606 return release_lock;
2607}
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
2621 hwaddr addr, hwaddr len)
2622{
2623 if (likely(!attrs.memory)) {
2624 return true;
2625 }
2626 if (memory_region_is_ram(mr)) {
2627 return true;
2628 }
2629 qemu_log_mask(LOG_GUEST_ERROR,
2630 "Invalid access to non-RAM device at "
2631 "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", "
2632 "region '%s'\n", addr, len, memory_region_name(mr));
2633 return false;
2634}
2635
2636
2637static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
2638 MemTxAttrs attrs,
2639 const void *ptr,
2640 hwaddr len, hwaddr addr1,
2641 hwaddr l, MemoryRegion *mr)
2642{
2643 uint8_t *ram_ptr;
2644 uint64_t val;
2645 MemTxResult result = MEMTX_OK;
2646 bool release_lock = false;
2647 const uint8_t *buf = ptr;
2648
2649 for (;;) {
2650 if (!flatview_access_allowed(mr, attrs, addr1, l)) {
2651 result |= MEMTX_ACCESS_ERROR;
2652
2653 } else if (!memory_access_is_direct(mr, true)) {
2654 release_lock |= prepare_mmio_access(mr);
2655 l = memory_access_size(mr, l, addr1);
2656
2657
2658 val = ldn_he_p(buf, l);
2659 result |= memory_region_dispatch_write(mr, addr1, val,
2660 size_memop(l), attrs);
2661 } else {
2662
2663 ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
2664 memmove(ram_ptr, buf, l);
2665 invalidate_and_set_dirty(mr, addr1, l);
2666 }
2667
2668 if (release_lock) {
2669 qemu_mutex_unlock_iothread();
2670 release_lock = false;
2671 }
2672
2673 len -= l;
2674 buf += l;
2675 addr += l;
2676
2677 if (!len) {
2678 break;
2679 }
2680
2681 l = len;
2682 mr = flatview_translate(fv, addr, &addr1, &l, true, attrs);
2683 }
2684
2685 return result;
2686}
2687
2688
2689static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
2690 const void *buf, hwaddr len)
2691{
2692 hwaddr l;
2693 hwaddr addr1;
2694 MemoryRegion *mr;
2695
2696 l = len;
2697 mr = flatview_translate(fv, addr, &addr1, &l, true, attrs);
2698 if (!flatview_access_allowed(mr, attrs, addr, len)) {
2699 return MEMTX_ACCESS_ERROR;
2700 }
2701 return flatview_write_continue(fv, addr, attrs, buf, len,
2702 addr1, l, mr);
2703}
2704
2705
2706MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
2707 MemTxAttrs attrs, void *ptr,
2708 hwaddr len, hwaddr addr1, hwaddr l,
2709 MemoryRegion *mr)
2710{
2711 uint8_t *ram_ptr;
2712 uint64_t val;
2713 MemTxResult result = MEMTX_OK;
2714 bool release_lock = false;
2715 uint8_t *buf = ptr;
2716
2717 fuzz_dma_read_cb(addr, len, mr);
2718 for (;;) {
2719 if (!flatview_access_allowed(mr, attrs, addr1, l)) {
2720 result |= MEMTX_ACCESS_ERROR;
2721
2722 } else if (!memory_access_is_direct(mr, false)) {
2723
2724 release_lock |= prepare_mmio_access(mr);
2725 l = memory_access_size(mr, l, addr1);
2726 result |= memory_region_dispatch_read(mr, addr1, &val,
2727 size_memop(l), attrs);
2728 stn_he_p(buf, l, val);
2729 } else {
2730
2731 ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
2732 memcpy(buf, ram_ptr, l);
2733 }
2734
2735 if (release_lock) {
2736 qemu_mutex_unlock_iothread();
2737 release_lock = false;
2738 }
2739
2740 len -= l;
2741 buf += l;
2742 addr += l;
2743
2744 if (!len) {
2745 break;
2746 }
2747
2748 l = len;
2749 mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
2750 }
2751
2752 return result;
2753}
2754
2755
2756static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
2757 MemTxAttrs attrs, void *buf, hwaddr len)
2758{
2759 hwaddr l;
2760 hwaddr addr1;
2761 MemoryRegion *mr;
2762
2763 l = len;
2764 mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
2765 if (!flatview_access_allowed(mr, attrs, addr, len)) {
2766 return MEMTX_ACCESS_ERROR;
2767 }
2768 return flatview_read_continue(fv, addr, attrs, buf, len,
2769 addr1, l, mr);
2770}
2771
2772MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2773 MemTxAttrs attrs, void *buf, hwaddr len)
2774{
2775 MemTxResult result = MEMTX_OK;
2776 FlatView *fv;
2777
2778 if (len > 0) {
2779 RCU_READ_LOCK_GUARD();
2780 fv = address_space_to_flatview(as);
2781 result = flatview_read(fv, addr, attrs, buf, len);
2782 }
2783
2784 return result;
2785}
2786
2787MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
2788 MemTxAttrs attrs,
2789 const void *buf, hwaddr len)
2790{
2791 MemTxResult result = MEMTX_OK;
2792 FlatView *fv;
2793
2794 if (len > 0) {
2795 RCU_READ_LOCK_GUARD();
2796 fv = address_space_to_flatview(as);
2797 result = flatview_write(fv, addr, attrs, buf, len);
2798 }
2799
2800 return result;
2801}
2802
2803MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2804 void *buf, hwaddr len, bool is_write)
2805{
2806 if (is_write) {
2807 return address_space_write(as, addr, attrs, buf, len);
2808 } else {
2809 return address_space_read_full(as, addr, attrs, buf, len);
2810 }
2811}
2812
2813MemTxResult address_space_set(AddressSpace *as, hwaddr addr,
2814 uint8_t c, hwaddr len, MemTxAttrs attrs)
2815{
2816#define FILLBUF_SIZE 512
2817 uint8_t fillbuf[FILLBUF_SIZE];
2818 int l;
2819 MemTxResult error = MEMTX_OK;
2820
2821 memset(fillbuf, c, FILLBUF_SIZE);
2822 while (len > 0) {
2823 l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
2824 error |= address_space_write(as, addr, attrs, fillbuf, l);
2825 len -= l;
2826 addr += l;
2827 }
2828
2829 return error;
2830}
2831
2832void cpu_physical_memory_rw(hwaddr addr, void *buf,
2833 hwaddr len, bool is_write)
2834{
2835 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2836 buf, len, is_write);
2837}
2838
2839enum write_rom_type {
2840 WRITE_DATA,
2841 FLUSH_CACHE,
2842};
2843
2844static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
2845 hwaddr addr,
2846 MemTxAttrs attrs,
2847 const void *ptr,
2848 hwaddr len,
2849 enum write_rom_type type)
2850{
2851 hwaddr l;
2852 uint8_t *ram_ptr;
2853 hwaddr addr1;
2854 MemoryRegion *mr;
2855 const uint8_t *buf = ptr;
2856
2857 RCU_READ_LOCK_GUARD();
2858 while (len > 0) {
2859 l = len;
2860 mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
2861
2862 if (!(memory_region_is_ram(mr) ||
2863 memory_region_is_romd(mr))) {
2864 l = memory_access_size(mr, l, addr1);
2865 } else {
2866
2867 ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2868 switch (type) {
2869 case WRITE_DATA:
2870 memcpy(ram_ptr, buf, l);
2871 invalidate_and_set_dirty(mr, addr1, l);
2872 break;
2873 case FLUSH_CACHE:
2874 flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
2875 break;
2876 }
2877 }
2878 len -= l;
2879 buf += l;
2880 addr += l;
2881 }
2882 return MEMTX_OK;
2883}
2884
2885
2886MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr,
2887 MemTxAttrs attrs,
2888 const void *buf, hwaddr len)
2889{
2890 return address_space_write_rom_internal(as, addr, attrs,
2891 buf, len, WRITE_DATA);
2892}
2893
2894void cpu_flush_icache_range(hwaddr start, hwaddr len)
2895{
2896
2897
2898
2899
2900
2901
2902 if (tcg_enabled()) {
2903 return;
2904 }
2905
2906 address_space_write_rom_internal(&address_space_memory,
2907 start, MEMTXATTRS_UNSPECIFIED,
2908 NULL, len, FLUSH_CACHE);
2909}
2910
2911typedef struct {
2912 MemoryRegion *mr;
2913 void *buffer;
2914 hwaddr addr;
2915 hwaddr len;
2916 bool in_use;
2917} BounceBuffer;
2918
2919static BounceBuffer bounce;
2920
2921typedef struct MapClient {
2922 QEMUBH *bh;
2923 QLIST_ENTRY(MapClient) link;
2924} MapClient;
2925
2926QemuMutex map_client_list_lock;
2927static QLIST_HEAD(, MapClient) map_client_list
2928 = QLIST_HEAD_INITIALIZER(map_client_list);
2929
2930static void cpu_unregister_map_client_do(MapClient *client)
2931{
2932 QLIST_REMOVE(client, link);
2933 g_free(client);
2934}
2935
2936static void cpu_notify_map_clients_locked(void)
2937{
2938 MapClient *client;
2939
2940 while (!QLIST_EMPTY(&map_client_list)) {
2941 client = QLIST_FIRST(&map_client_list);
2942 qemu_bh_schedule(client->bh);
2943 cpu_unregister_map_client_do(client);
2944 }
2945}
2946
2947void cpu_register_map_client(QEMUBH *bh)
2948{
2949 MapClient *client = g_malloc(sizeof(*client));
2950
2951 qemu_mutex_lock(&map_client_list_lock);
2952 client->bh = bh;
2953 QLIST_INSERT_HEAD(&map_client_list, client, link);
2954
2955 smp_mb();
2956 if (!qatomic_read(&bounce.in_use)) {
2957 cpu_notify_map_clients_locked();
2958 }
2959 qemu_mutex_unlock(&map_client_list_lock);
2960}
2961
2962void cpu_exec_init_all(void)
2963{
2964 qemu_mutex_init(&ram_list.mutex);
2965
2966
2967
2968
2969
2970
2971
2972 finalize_target_page_bits();
2973 io_mem_init();
2974 memory_map_init();
2975 qemu_mutex_init(&map_client_list_lock);
2976}
2977
2978void cpu_unregister_map_client(QEMUBH *bh)
2979{
2980 MapClient *client;
2981
2982 qemu_mutex_lock(&map_client_list_lock);
2983 QLIST_FOREACH(client, &map_client_list, link) {
2984 if (client->bh == bh) {
2985 cpu_unregister_map_client_do(client);
2986 break;
2987 }
2988 }
2989 qemu_mutex_unlock(&map_client_list_lock);
2990}
2991
2992static void cpu_notify_map_clients(void)
2993{
2994 qemu_mutex_lock(&map_client_list_lock);
2995 cpu_notify_map_clients_locked();
2996 qemu_mutex_unlock(&map_client_list_lock);
2997}
2998
2999static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
3000 bool is_write, MemTxAttrs attrs)
3001{
3002 MemoryRegion *mr;
3003 hwaddr l, xlat;
3004
3005 while (len > 0) {
3006 l = len;
3007 mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
3008 if (!memory_access_is_direct(mr, is_write)) {
3009 l = memory_access_size(mr, l, addr);
3010 if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
3011 return false;
3012 }
3013 }
3014
3015 len -= l;
3016 addr += l;
3017 }
3018 return true;
3019}
3020
3021bool address_space_access_valid(AddressSpace *as, hwaddr addr,
3022 hwaddr len, bool is_write,
3023 MemTxAttrs attrs)
3024{
3025 FlatView *fv;
3026
3027 RCU_READ_LOCK_GUARD();
3028 fv = address_space_to_flatview(as);
3029 return flatview_access_valid(fv, addr, len, is_write, attrs);
3030}
3031
3032static hwaddr
3033flatview_extend_translation(FlatView *fv, hwaddr addr,
3034 hwaddr target_len,
3035 MemoryRegion *mr, hwaddr base, hwaddr len,
3036 bool is_write, MemTxAttrs attrs)
3037{
3038 hwaddr done = 0;
3039 hwaddr xlat;
3040 MemoryRegion *this_mr;
3041
3042 for (;;) {
3043 target_len -= len;
3044 addr += len;
3045 done += len;
3046 if (target_len == 0) {
3047 return done;
3048 }
3049
3050 len = target_len;
3051 this_mr = flatview_translate(fv, addr, &xlat,
3052 &len, is_write, attrs);
3053 if (this_mr != mr || xlat != base + done) {
3054 return done;
3055 }
3056 }
3057}
3058
3059
3060
3061
3062
3063
3064
3065
3066void *address_space_map(AddressSpace *as,
3067 hwaddr addr,
3068 hwaddr *plen,
3069 bool is_write,
3070 MemTxAttrs attrs)
3071{
3072 hwaddr len = *plen;
3073 hwaddr l, xlat;
3074 MemoryRegion *mr;
3075 FlatView *fv;
3076
3077 if (len == 0) {
3078 return NULL;
3079 }
3080
3081 l = len;
3082 RCU_READ_LOCK_GUARD();
3083 fv = address_space_to_flatview(as);
3084 mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
3085
3086 if (!memory_access_is_direct(mr, is_write)) {
3087 if (qatomic_xchg(&bounce.in_use, true)) {
3088 *plen = 0;
3089 return NULL;
3090 }
3091
3092 l = MIN(l, TARGET_PAGE_SIZE);
3093 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
3094 bounce.addr = addr;
3095 bounce.len = l;
3096
3097 memory_region_ref(mr);
3098 bounce.mr = mr;
3099 if (!is_write) {
3100 flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
3101 bounce.buffer, l);
3102 }
3103
3104 *plen = l;
3105 return bounce.buffer;
3106 }
3107
3108
3109 memory_region_ref(mr);
3110 *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
3111 l, is_write, attrs);
3112 fuzz_dma_read_cb(addr, *plen, mr);
3113 return qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
3114}
3115
3116
3117
3118
3119
3120void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3121 bool is_write, hwaddr access_len)
3122{
3123 if (buffer != bounce.buffer) {
3124 MemoryRegion *mr;
3125 ram_addr_t addr1;
3126
3127 mr = memory_region_from_host(buffer, &addr1);
3128 assert(mr != NULL);
3129 if (is_write) {
3130 invalidate_and_set_dirty(mr, addr1, access_len);
3131 }
3132 if (xen_enabled()) {
3133 xen_invalidate_map_cache_entry(buffer);
3134 }
3135 memory_region_unref(mr);
3136 return;
3137 }
3138 if (is_write) {
3139 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3140 bounce.buffer, access_len);
3141 }
3142 qemu_vfree(bounce.buffer);
3143 bounce.buffer = NULL;
3144 memory_region_unref(bounce.mr);
3145
3146 qatomic_set_mb(&bounce.in_use, false);
3147 cpu_notify_map_clients();
3148}
3149
3150void *cpu_physical_memory_map(hwaddr addr,
3151 hwaddr *plen,
3152 bool is_write)
3153{
3154 return address_space_map(&address_space_memory, addr, plen, is_write,
3155 MEMTXATTRS_UNSPECIFIED);
3156}
3157
3158void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3159 bool is_write, hwaddr access_len)
3160{
3161 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3162}
3163
3164#define ARG1_DECL AddressSpace *as
3165#define ARG1 as
3166#define SUFFIX
3167#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__)
3168#define RCU_READ_LOCK(...) rcu_read_lock()
3169#define RCU_READ_UNLOCK(...) rcu_read_unlock()
3170#include "memory_ldst.c.inc"
3171
3172int64_t address_space_cache_init(MemoryRegionCache *cache,
3173 AddressSpace *as,
3174 hwaddr addr,
3175 hwaddr len,
3176 bool is_write)
3177{
3178 AddressSpaceDispatch *d;
3179 hwaddr l;
3180 MemoryRegion *mr;
3181 Int128 diff;
3182
3183 assert(len > 0);
3184
3185 l = len;
3186 cache->fv = address_space_get_flatview(as);
3187 d = flatview_to_dispatch(cache->fv);
3188 cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true);
3189
3190
3191
3192
3193
3194
3195 diff = int128_sub(cache->mrs.size,
3196 int128_make64(cache->xlat - cache->mrs.offset_within_region));
3197 l = int128_get64(int128_min(diff, int128_make64(l)));
3198
3199 mr = cache->mrs.mr;
3200 memory_region_ref(mr);
3201 if (memory_access_is_direct(mr, is_write)) {
3202
3203
3204
3205
3206 l = flatview_extend_translation(cache->fv, addr, len, mr,
3207 cache->xlat, l, is_write,
3208 MEMTXATTRS_UNSPECIFIED);
3209 cache->ptr = qemu_ram_ptr_length(mr->ram_block, cache->xlat, &l, true);
3210 } else {
3211 cache->ptr = NULL;
3212 }
3213
3214 cache->len = l;
3215 cache->is_write = is_write;
3216 return l;
3217}
3218
3219void address_space_cache_invalidate(MemoryRegionCache *cache,
3220 hwaddr addr,
3221 hwaddr access_len)
3222{
3223 assert(cache->is_write);
3224 if (likely(cache->ptr)) {
3225 invalidate_and_set_dirty(cache->mrs.mr, addr + cache->xlat, access_len);
3226 }
3227}
3228
3229void address_space_cache_destroy(MemoryRegionCache *cache)
3230{
3231 if (!cache->mrs.mr) {
3232 return;
3233 }
3234
3235 if (xen_enabled()) {
3236 xen_invalidate_map_cache_entry(cache->ptr);
3237 }
3238 memory_region_unref(cache->mrs.mr);
3239 flatview_unref(cache->fv);
3240 cache->mrs.mr = NULL;
3241 cache->fv = NULL;
3242}
3243
3244
3245
3246
3247
3248
3249static inline MemoryRegion *address_space_translate_cached(
3250 MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
3251 hwaddr *plen, bool is_write, MemTxAttrs attrs)
3252{
3253 MemoryRegionSection section;
3254 MemoryRegion *mr;
3255 IOMMUMemoryRegion *iommu_mr;
3256 AddressSpace *target_as;
3257
3258 assert(!cache->ptr);
3259 *xlat = addr + cache->xlat;
3260
3261 mr = cache->mrs.mr;
3262 iommu_mr = memory_region_get_iommu(mr);
3263 if (!iommu_mr) {
3264
3265 return mr;
3266 }
3267
3268 section = address_space_translate_iommu(iommu_mr, xlat, plen,
3269 NULL, is_write, true,
3270 &target_as, attrs);
3271 return section.mr;
3272}
3273
3274
3275
3276
3277MemTxResult
3278address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr,
3279 void *buf, hwaddr len)
3280{
3281 hwaddr addr1, l;
3282 MemoryRegion *mr;
3283
3284 l = len;
3285 mr = address_space_translate_cached(cache, addr, &addr1, &l, false,
3286 MEMTXATTRS_UNSPECIFIED);
3287 return flatview_read_continue(cache->fv,
3288 addr, MEMTXATTRS_UNSPECIFIED, buf, len,
3289 addr1, l, mr);
3290}
3291
3292
3293
3294
3295MemTxResult
3296address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
3297 const void *buf, hwaddr len)
3298{
3299 hwaddr addr1, l;
3300 MemoryRegion *mr;
3301
3302 l = len;
3303 mr = address_space_translate_cached(cache, addr, &addr1, &l, true,
3304 MEMTXATTRS_UNSPECIFIED);
3305 return flatview_write_continue(cache->fv,
3306 addr, MEMTXATTRS_UNSPECIFIED, buf, len,
3307 addr1, l, mr);
3308}
3309
3310#define ARG1_DECL MemoryRegionCache *cache
3311#define ARG1 cache
3312#define SUFFIX _cached_slow
3313#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
3314#define RCU_READ_LOCK() ((void)0)
3315#define RCU_READ_UNLOCK() ((void)0)
3316#include "memory_ldst.c.inc"
3317
3318
3319int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
3320 void *ptr, size_t len, bool is_write)
3321{
3322 hwaddr phys_addr;
3323 vaddr l, page;
3324 uint8_t *buf = ptr;
3325
3326 cpu_synchronize_state(cpu);
3327 while (len > 0) {
3328 int asidx;
3329 MemTxAttrs attrs;
3330 MemTxResult res;
3331
3332 page = addr & TARGET_PAGE_MASK;
3333 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3334 asidx = cpu_asidx_from_attrs(cpu, attrs);
3335
3336 if (phys_addr == -1)
3337 return -1;
3338 l = (page + TARGET_PAGE_SIZE) - addr;
3339 if (l > len)
3340 l = len;
3341 phys_addr += (addr & ~TARGET_PAGE_MASK);
3342 if (is_write) {
3343 res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
3344 attrs, buf, l);
3345 } else {
3346 res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
3347 attrs, buf, l);
3348 }
3349 if (res != MEMTX_OK) {
3350 return -1;
3351 }
3352 len -= l;
3353 buf += l;
3354 addr += l;
3355 }
3356 return 0;
3357}
3358
3359
3360
3361
3362
3363size_t qemu_target_page_size(void)
3364{
3365 return TARGET_PAGE_SIZE;
3366}
3367
3368int qemu_target_page_mask(void)
3369{
3370 return TARGET_PAGE_MASK;
3371}
3372
3373int qemu_target_page_bits(void)
3374{
3375 return TARGET_PAGE_BITS;
3376}
3377
3378int qemu_target_page_bits_min(void)
3379{
3380 return TARGET_PAGE_BITS_MIN;
3381}
3382
3383
3384size_t qemu_target_pages_to_MiB(size_t pages)
3385{
3386 int page_bits = TARGET_PAGE_BITS;
3387
3388
3389 g_assert(page_bits < 20);
3390
3391 return pages >> (20 - page_bits);
3392}
3393
3394bool cpu_physical_memory_is_io(hwaddr phys_addr)
3395{
3396 MemoryRegion*mr;
3397 hwaddr l = 1;
3398
3399 RCU_READ_LOCK_GUARD();
3400 mr = address_space_translate(&address_space_memory,
3401 phys_addr, &phys_addr, &l, false,
3402 MEMTXATTRS_UNSPECIFIED);
3403
3404 return !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3405}
3406
3407int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3408{
3409 RAMBlock *block;
3410 int ret = 0;
3411
3412 RCU_READ_LOCK_GUARD();
3413 RAMBLOCK_FOREACH(block) {
3414 ret = func(block, opaque);
3415 if (ret) {
3416 break;
3417 }
3418 }
3419 return ret;
3420}
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
3431{
3432 int ret = -1;
3433
3434 uint8_t *host_startaddr = rb->host + start;
3435
3436 if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
3437 error_report("ram_block_discard_range: Unaligned start address: %p",
3438 host_startaddr);
3439 goto err;
3440 }
3441
3442 if ((start + length) <= rb->max_length) {
3443 bool need_madvise, need_fallocate;
3444 if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
3445 error_report("ram_block_discard_range: Unaligned length: %zx",
3446 length);
3447 goto err;
3448 }
3449
3450 errno = ENOTSUP;
3451
3452
3453
3454
3455
3456
3457 need_madvise = (rb->page_size == qemu_host_page_size);
3458 need_fallocate = rb->fd != -1;
3459 if (need_fallocate) {
3460
3461
3462
3463
3464#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475 if (!qemu_ram_is_shared(rb)) {
3476 warn_report_once("ram_block_discard_range: Discarding RAM"
3477 " in private file mappings is possibly"
3478 " dangerous, because it will modify the"
3479 " underlying file and will affect other"
3480 " users of the file");
3481 }
3482
3483 ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
3484 start, length);
3485 if (ret) {
3486 ret = -errno;
3487 error_report("ram_block_discard_range: Failed to fallocate "
3488 "%s:%" PRIx64 " +%zx (%d)",
3489 rb->idstr, start, length, ret);
3490 goto err;
3491 }
3492#else
3493 ret = -ENOSYS;
3494 error_report("ram_block_discard_range: fallocate not available/file"
3495 "%s:%" PRIx64 " +%zx (%d)",
3496 rb->idstr, start, length, ret);
3497 goto err;
3498#endif
3499 }
3500 if (need_madvise) {
3501
3502
3503
3504
3505
3506#if defined(CONFIG_MADVISE)
3507 if (qemu_ram_is_shared(rb) && rb->fd < 0) {
3508 ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
3509 } else {
3510 ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
3511 }
3512 if (ret) {
3513 ret = -errno;
3514 error_report("ram_block_discard_range: Failed to discard range "
3515 "%s:%" PRIx64 " +%zx (%d)",
3516 rb->idstr, start, length, ret);
3517 goto err;
3518 }
3519#else
3520 ret = -ENOSYS;
3521 error_report("ram_block_discard_range: MADVISE not available"
3522 "%s:%" PRIx64 " +%zx (%d)",
3523 rb->idstr, start, length, ret);
3524 goto err;
3525#endif
3526 }
3527 trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
3528 need_madvise, need_fallocate, ret);
3529 } else {
3530 error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
3531 "/%zx/" RAM_ADDR_FMT")",
3532 rb->idstr, start, length, rb->max_length);
3533 }
3534
3535err:
3536 return ret;
3537}
3538
3539bool ramblock_is_pmem(RAMBlock *rb)
3540{
3541 return rb->flags & RAM_PMEM;
3542}
3543
3544static void mtree_print_phys_entries(int start, int end, int skip, int ptr)
3545{
3546 if (start == end - 1) {
3547 qemu_printf("\t%3d ", start);
3548 } else {
3549 qemu_printf("\t%3d..%-3d ", start, end - 1);
3550 }
3551 qemu_printf(" skip=%d ", skip);
3552 if (ptr == PHYS_MAP_NODE_NIL) {
3553 qemu_printf(" ptr=NIL");
3554 } else if (!skip) {
3555 qemu_printf(" ptr=#%d", ptr);
3556 } else {
3557 qemu_printf(" ptr=[%d]", ptr);
3558 }
3559 qemu_printf("\n");
3560}
3561
3562#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \
3563 int128_sub((size), int128_one())) : 0)
3564
3565void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
3566{
3567 int i;
3568
3569 qemu_printf(" Dispatch\n");
3570 qemu_printf(" Physical sections\n");
3571
3572 for (i = 0; i < d->map.sections_nb; ++i) {
3573 MemoryRegionSection *s = d->map.sections + i;
3574 const char *names[] = { " [unassigned]", " [not dirty]",
3575 " [ROM]", " [watch]" };
3576
3577 qemu_printf(" #%d @" HWADDR_FMT_plx ".." HWADDR_FMT_plx
3578 " %s%s%s%s%s",
3579 i,
3580 s->offset_within_address_space,
3581 s->offset_within_address_space + MR_SIZE(s->size),
3582 s->mr->name ? s->mr->name : "(noname)",
3583 i < ARRAY_SIZE(names) ? names[i] : "",
3584 s->mr == root ? " [ROOT]" : "",
3585 s == d->mru_section ? " [MRU]" : "",
3586 s->mr->is_iommu ? " [iommu]" : "");
3587
3588 if (s->mr->alias) {
3589 qemu_printf(" alias=%s", s->mr->alias->name ?
3590 s->mr->alias->name : "noname");
3591 }
3592 qemu_printf("\n");
3593 }
3594
3595 qemu_printf(" Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n",
3596 P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip);
3597 for (i = 0; i < d->map.nodes_nb; ++i) {
3598 int j, jprev;
3599 PhysPageEntry prev;
3600 Node *n = d->map.nodes + i;
3601
3602 qemu_printf(" [%d]\n", i);
3603
3604 for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) {
3605 PhysPageEntry *pe = *n + j;
3606
3607 if (pe->ptr == prev.ptr && pe->skip == prev.skip) {
3608 continue;
3609 }
3610
3611 mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
3612
3613 jprev = j;
3614 prev = *pe;
3615 }
3616
3617 if (jprev != ARRAY_SIZE(*n)) {
3618 mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
3619 }
3620 }
3621}
3622
3623
3624static unsigned int ram_block_discard_required_cnt;
3625
3626static unsigned int ram_block_coordinated_discard_required_cnt;
3627
3628static unsigned int ram_block_discard_disabled_cnt;
3629
3630static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
3631static QemuMutex ram_block_discard_disable_mutex;
3632
3633static void ram_block_discard_disable_mutex_lock(void)
3634{
3635 static gsize initialized;
3636
3637 if (g_once_init_enter(&initialized)) {
3638 qemu_mutex_init(&ram_block_discard_disable_mutex);
3639 g_once_init_leave(&initialized, 1);
3640 }
3641 qemu_mutex_lock(&ram_block_discard_disable_mutex);
3642}
3643
3644static void ram_block_discard_disable_mutex_unlock(void)
3645{
3646 qemu_mutex_unlock(&ram_block_discard_disable_mutex);
3647}
3648
3649int ram_block_discard_disable(bool state)
3650{
3651 int ret = 0;
3652
3653 ram_block_discard_disable_mutex_lock();
3654 if (!state) {
3655 ram_block_discard_disabled_cnt--;
3656 } else if (ram_block_discard_required_cnt ||
3657 ram_block_coordinated_discard_required_cnt) {
3658 ret = -EBUSY;
3659 } else {
3660 ram_block_discard_disabled_cnt++;
3661 }
3662 ram_block_discard_disable_mutex_unlock();
3663 return ret;
3664}
3665
3666int ram_block_uncoordinated_discard_disable(bool state)
3667{
3668 int ret = 0;
3669
3670 ram_block_discard_disable_mutex_lock();
3671 if (!state) {
3672 ram_block_uncoordinated_discard_disabled_cnt--;
3673 } else if (ram_block_discard_required_cnt) {
3674 ret = -EBUSY;
3675 } else {
3676 ram_block_uncoordinated_discard_disabled_cnt++;
3677 }
3678 ram_block_discard_disable_mutex_unlock();
3679 return ret;
3680}
3681
3682int ram_block_discard_require(bool state)
3683{
3684 int ret = 0;
3685
3686 ram_block_discard_disable_mutex_lock();
3687 if (!state) {
3688 ram_block_discard_required_cnt--;
3689 } else if (ram_block_discard_disabled_cnt ||
3690 ram_block_uncoordinated_discard_disabled_cnt) {
3691 ret = -EBUSY;
3692 } else {
3693 ram_block_discard_required_cnt++;
3694 }
3695 ram_block_discard_disable_mutex_unlock();
3696 return ret;
3697}
3698
3699int ram_block_coordinated_discard_require(bool state)
3700{
3701 int ret = 0;
3702
3703 ram_block_discard_disable_mutex_lock();
3704 if (!state) {
3705 ram_block_coordinated_discard_required_cnt--;
3706 } else if (ram_block_discard_disabled_cnt) {
3707 ret = -EBUSY;
3708 } else {
3709 ram_block_coordinated_discard_required_cnt++;
3710 }
3711 ram_block_discard_disable_mutex_unlock();
3712 return ret;
3713}
3714
3715bool ram_block_discard_is_disabled(void)
3716{
3717 return qatomic_read(&ram_block_discard_disabled_cnt) ||
3718 qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
3719}
3720
3721bool ram_block_discard_is_required(void)
3722{
3723 return qatomic_read(&ram_block_discard_required_cnt) ||
3724 qatomic_read(&ram_block_coordinated_discard_required_cnt);
3725}
3726