qemu/target/i386/hax/hax-mem.c
<<
>>
Prefs
   1/*
   2 * HAX memory mapping operations
   3 *
   4 * Copyright (c) 2015-16 Intel Corporation
   5 * Copyright 2016 Google, Inc.
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2.  See
   8 * the COPYING file in the top-level directory.
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "cpu.h"
  13#include "exec/address-spaces.h"
  14#include "qemu/error-report.h"
  15
  16#include "hax-accel-ops.h"
  17#include "qemu/queue.h"
  18
  19#define DEBUG_HAX_MEM 0
  20
  21#define DPRINTF(fmt, ...) \
  22    do { \
  23        if (DEBUG_HAX_MEM) { \
  24            fprintf(stdout, fmt, ## __VA_ARGS__); \
  25        } \
  26    } while (0)
  27
  28/**
  29 * HAXMapping: describes a pending guest physical memory mapping
  30 *
  31 * @start_pa: a guest physical address marking the start of the region; must be
  32 *            page-aligned
  33 * @size: a guest physical address marking the end of the region; must be
  34 *          page-aligned
  35 * @host_va: the host virtual address of the start of the mapping
  36 * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
  37 * @entry: additional fields for linking #HAXMapping instances together
  38 */
  39typedef struct HAXMapping {
  40    uint64_t start_pa;
  41    uint32_t size;
  42    uint64_t host_va;
  43    int flags;
  44    QTAILQ_ENTRY(HAXMapping) entry;
  45} HAXMapping;
  46
  47/*
  48 * A doubly-linked list (actually a tail queue) of the pending page mappings
  49 * for the ongoing memory transaction.
  50 *
  51 * It is used to optimize the number of page mapping updates done through the
  52 * kernel module. For example, it's effective when a driver is digging an MMIO
  53 * hole inside an existing memory mapping. It will get a deletion of the whole
  54 * region, then the addition of the 2 remaining RAM areas around the hole and
  55 * finally the memory transaction commit. During the commit, it will effectively
  56 * send to the kernel only the removal of the pages from the MMIO hole after
  57 * having computed locally the result of the deletion and additions.
  58 */
  59static QTAILQ_HEAD(, HAXMapping) mappings =
  60    QTAILQ_HEAD_INITIALIZER(mappings);
  61
  62/**
  63 * hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
  64 */
  65static void hax_mapping_dump_list(void)
  66{
  67    HAXMapping *entry;
  68
  69    DPRINTF("%s updates:\n", __func__);
  70    QTAILQ_FOREACH(entry, &mappings, entry) {
  71        DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
  72                "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
  73                entry->start_pa, entry->start_pa + entry->size, entry->host_va,
  74                entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
  75    }
  76}
  77
  78static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
  79                                      uint32_t size, uint64_t host_va,
  80                                      uint8_t flags)
  81{
  82    HAXMapping *entry;
  83
  84    entry = g_malloc0(sizeof(*entry));
  85    entry->start_pa = start_pa;
  86    entry->size = size;
  87    entry->host_va = host_va;
  88    entry->flags = flags;
  89    if (!next) {
  90        QTAILQ_INSERT_TAIL(&mappings, entry, entry);
  91    } else {
  92        QTAILQ_INSERT_BEFORE(next, entry, entry);
  93    }
  94}
  95
  96static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
  97                                    uint8_t flags)
  98{
  99    /* removed then added without change for the read-only flag */
 100    bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;
 101
 102    return (entry->host_va == host_va) && nop_flags;
 103}
 104
 105static void hax_update_mapping(uint64_t start_pa, uint32_t size,
 106                               uint64_t host_va, uint8_t flags)
 107{
 108    uint64_t end_pa = start_pa + size;
 109    HAXMapping *entry, *next;
 110
 111    QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
 112        uint32_t chunk_sz;
 113        if (start_pa >= entry->start_pa + entry->size) {
 114            continue;
 115        }
 116        if (start_pa < entry->start_pa) {
 117            chunk_sz = end_pa <= entry->start_pa ? size
 118                                                 : entry->start_pa - start_pa;
 119            hax_insert_mapping_before(entry, start_pa, chunk_sz,
 120                                      host_va, flags);
 121            start_pa += chunk_sz;
 122            host_va += chunk_sz;
 123            size -= chunk_sz;
 124        } else if (start_pa > entry->start_pa) {
 125            /* split the existing chunk at start_pa */
 126            chunk_sz = start_pa - entry->start_pa;
 127            hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
 128                                      entry->host_va, entry->flags);
 129            entry->start_pa += chunk_sz;
 130            entry->host_va += chunk_sz;
 131            entry->size -= chunk_sz;
 132        }
 133        /* now start_pa == entry->start_pa */
 134        chunk_sz = MIN(size, entry->size);
 135        if (chunk_sz) {
 136            bool nop = hax_mapping_is_opposite(entry, host_va, flags);
 137            bool partial = chunk_sz < entry->size;
 138            if (partial) {
 139                /* remove the beginning of the existing chunk */
 140                entry->start_pa += chunk_sz;
 141                entry->host_va += chunk_sz;
 142                entry->size -= chunk_sz;
 143                if (!nop) {
 144                    hax_insert_mapping_before(entry, start_pa, chunk_sz,
 145                                              host_va, flags);
 146                }
 147            } else { /* affects the full mapping entry */
 148                if (nop) { /* no change to this mapping, remove it */
 149                    QTAILQ_REMOVE(&mappings, entry, entry);
 150                    g_free(entry);
 151                } else { /* update mapping properties */
 152                    entry->host_va = host_va;
 153                    entry->flags = flags;
 154                }
 155            }
 156            start_pa += chunk_sz;
 157            host_va += chunk_sz;
 158            size -= chunk_sz;
 159        }
 160        if (!size) { /* we are done */
 161            break;
 162        }
 163    }
 164    if (size) { /* add the leftover */
 165        hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
 166    }
 167}
 168
 169static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
 170{
 171    MemoryRegion *mr = section->mr;
 172    hwaddr start_pa = section->offset_within_address_space;
 173    ram_addr_t size = int128_get64(section->size);
 174    unsigned int delta;
 175    uint64_t host_va;
 176    uint32_t max_mapping_size;
 177
 178    /* We only care about RAM and ROM regions */
 179    if (!memory_region_is_ram(mr)) {
 180        if (memory_region_is_romd(mr)) {
 181            /* HAXM kernel module does not support ROMD yet  */
 182            warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
 183                        start_pa, start_pa + size);
 184        }
 185        return;
 186    }
 187
 188    /* Adjust start_pa and size so that they are page-aligned. (Cf
 189     * kvm_set_phys_mem() in kvm-all.c).
 190     */
 191    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
 192    delta &= ~qemu_real_host_page_mask;
 193    if (delta > size) {
 194        return;
 195    }
 196    start_pa += delta;
 197    size -= delta;
 198    size &= qemu_real_host_page_mask;
 199    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
 200        return;
 201    }
 202
 203    host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
 204            + section->offset_within_region + delta;
 205    if (memory_region_is_rom(section->mr)) {
 206        flags |= HAX_RAM_INFO_ROM;
 207    }
 208
 209    /*
 210     * The kernel module interface uses 32-bit sizes:
 211     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
 212     *
 213     * If the mapping size is longer than 32 bits, we can't process it in one
 214     * call into the kernel. Instead, we split the mapping into smaller ones,
 215     * and call hax_update_mapping() on each.
 216     */
 217    max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
 218    while (size > max_mapping_size) {
 219        hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
 220        start_pa += max_mapping_size;
 221        size -= max_mapping_size;
 222        host_va += max_mapping_size;
 223    }
 224    /* Now size <= max_mapping_size */
 225    hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
 226}
 227
 228static void hax_region_add(MemoryListener *listener,
 229                           MemoryRegionSection *section)
 230{
 231    memory_region_ref(section->mr);
 232    hax_process_section(section, 0);
 233}
 234
 235static void hax_region_del(MemoryListener *listener,
 236                           MemoryRegionSection *section)
 237{
 238    hax_process_section(section, HAX_RAM_INFO_INVALID);
 239    memory_region_unref(section->mr);
 240}
 241
 242static void hax_transaction_begin(MemoryListener *listener)
 243{
 244    g_assert(QTAILQ_EMPTY(&mappings));
 245}
 246
 247static void hax_transaction_commit(MemoryListener *listener)
 248{
 249    if (!QTAILQ_EMPTY(&mappings)) {
 250        HAXMapping *entry, *next;
 251
 252        if (DEBUG_HAX_MEM) {
 253            hax_mapping_dump_list();
 254        }
 255        QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
 256            if (entry->flags & HAX_RAM_INFO_INVALID) {
 257                /* for unmapping, put the values expected by the kernel */
 258                entry->flags = HAX_RAM_INFO_INVALID;
 259                entry->host_va = 0;
 260            }
 261            if (hax_set_ram(entry->start_pa, entry->size,
 262                            entry->host_va, entry->flags)) {
 263                fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
 264                        PRIx32 " flags %02x\n", __func__, entry->start_pa,
 265                        entry->size, entry->flags);
 266            }
 267            QTAILQ_REMOVE(&mappings, entry, entry);
 268            g_free(entry);
 269        }
 270    }
 271}
 272
 273/* currently we fake the dirty bitmap sync, always dirty */
 274static void hax_log_sync(MemoryListener *listener,
 275                         MemoryRegionSection *section)
 276{
 277    MemoryRegion *mr = section->mr;
 278
 279    if (!memory_region_is_ram(mr)) {
 280        /* Skip MMIO regions */
 281        return;
 282    }
 283
 284    memory_region_set_dirty(mr, 0, int128_get64(section->size));
 285}
 286
 287static MemoryListener hax_memory_listener = {
 288    .name = "hax",
 289    .begin = hax_transaction_begin,
 290    .commit = hax_transaction_commit,
 291    .region_add = hax_region_add,
 292    .region_del = hax_region_del,
 293    .log_sync = hax_log_sync,
 294    .priority = 10,
 295};
 296
 297static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
 298                                size_t max_size)
 299{
 300    /*
 301     * We must register each RAM block with the HAXM kernel module, or
 302     * hax_set_ram() will fail for any mapping into the RAM block:
 303     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
 304     *
 305     * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
 306     * host physical pages for the RAM block as part of this registration
 307     * process, hence the name hax_populate_ram().
 308     */
 309    if (hax_populate_ram((uint64_t)(uintptr_t)host, max_size) < 0) {
 310        fprintf(stderr, "HAX failed to populate RAM\n");
 311        abort();
 312    }
 313}
 314
 315static struct RAMBlockNotifier hax_ram_notifier = {
 316    .ram_block_added = hax_ram_block_added,
 317};
 318
 319void hax_memory_init(void)
 320{
 321    ram_block_notifier_add(&hax_ram_notifier);
 322    memory_listener_register(&hax_memory_listener, &address_space_memory);
 323}
 324