qemu/hw/i386/xen/xen-mapcache.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011       Citrix Ltd.
   3 *
   4 * This work is licensed under the terms of the GNU GPL, version 2.  See
   5 * the COPYING file in the top-level directory.
   6 *
   7 * Contributions after 2012-01-13 are licensed under the terms of the
   8 * GNU GPL, version 2 or (at your option) any later version.
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qemu/units.h"
  13#include "qemu/error-report.h"
  14
  15#include <sys/resource.h>
  16
  17#include "hw/xen/xen_backend.h"
  18#include "qemu/bitmap.h"
  19
  20#include <xen/hvm/params.h>
  21
  22#include "sysemu/xen-mapcache.h"
  23#include "trace.h"
  24
  25
  26//#define MAPCACHE_DEBUG
  27
  28#ifdef MAPCACHE_DEBUG
  29#  define DPRINTF(fmt, ...) do { \
  30    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
  31} while (0)
  32#else
  33#  define DPRINTF(fmt, ...) do { } while (0)
  34#endif
  35
  36#if HOST_LONG_BITS == 32
  37#  define MCACHE_BUCKET_SHIFT 16
  38#  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
  39#else
  40#  define MCACHE_BUCKET_SHIFT 20
  41#  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
  42#endif
  43#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
  44
  45/* This is the size of the virtual address space reserve to QEMU that will not
  46 * be use by MapCache.
  47 * From empirical tests I observed that qemu use 75MB more than the
  48 * max_mcache_size.
  49 */
  50#define NON_MCACHE_MEMORY_SIZE (80 * MiB)
  51
  52typedef struct MapCacheEntry {
  53    hwaddr paddr_index;
  54    uint8_t *vaddr_base;
  55    unsigned long *valid_mapping;
  56    uint8_t lock;
  57#define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
  58    uint8_t flags;
  59    hwaddr size;
  60    struct MapCacheEntry *next;
  61} MapCacheEntry;
  62
  63typedef struct MapCacheRev {
  64    uint8_t *vaddr_req;
  65    hwaddr paddr_index;
  66    hwaddr size;
  67    QTAILQ_ENTRY(MapCacheRev) next;
  68    bool dma;
  69} MapCacheRev;
  70
  71typedef struct MapCache {
  72    MapCacheEntry *entry;
  73    unsigned long nr_buckets;
  74    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
  75
  76    /* For most cases (>99.9%), the page address is the same. */
  77    MapCacheEntry *last_entry;
  78    unsigned long max_mcache_size;
  79    unsigned int mcache_bucket_shift;
  80
  81    phys_offset_to_gaddr_t phys_offset_to_gaddr;
  82    QemuMutex lock;
  83    void *opaque;
  84} MapCache;
  85
  86static MapCache *mapcache;
  87
  88static inline void mapcache_lock(void)
  89{
  90    qemu_mutex_lock(&mapcache->lock);
  91}
  92
  93static inline void mapcache_unlock(void)
  94{
  95    qemu_mutex_unlock(&mapcache->lock);
  96}
  97
  98static inline int test_bits(int nr, int size, const unsigned long *addr)
  99{
 100    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
 101    if (res >= nr + size)
 102        return 1;
 103    else
 104        return 0;
 105}
 106
 107void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
 108{
 109    unsigned long size;
 110    struct rlimit rlimit_as;
 111
 112    mapcache = g_malloc0(sizeof (MapCache));
 113
 114    mapcache->phys_offset_to_gaddr = f;
 115    mapcache->opaque = opaque;
 116    qemu_mutex_init(&mapcache->lock);
 117
 118    QTAILQ_INIT(&mapcache->locked_entries);
 119
 120    if (geteuid() == 0) {
 121        rlimit_as.rlim_cur = RLIM_INFINITY;
 122        rlimit_as.rlim_max = RLIM_INFINITY;
 123        mapcache->max_mcache_size = MCACHE_MAX_SIZE;
 124    } else {
 125        getrlimit(RLIMIT_AS, &rlimit_as);
 126        rlimit_as.rlim_cur = rlimit_as.rlim_max;
 127
 128        if (rlimit_as.rlim_max != RLIM_INFINITY) {
 129            warn_report("QEMU's maximum size of virtual"
 130                        " memory is not infinity");
 131        }
 132        if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
 133            mapcache->max_mcache_size = rlimit_as.rlim_max -
 134                NON_MCACHE_MEMORY_SIZE;
 135        } else {
 136            mapcache->max_mcache_size = MCACHE_MAX_SIZE;
 137        }
 138    }
 139
 140    setrlimit(RLIMIT_AS, &rlimit_as);
 141
 142    mapcache->nr_buckets =
 143        (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
 144          (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
 145         (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
 146
 147    size = mapcache->nr_buckets * sizeof (MapCacheEntry);
 148    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
 149    DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
 150            mapcache->nr_buckets, size);
 151    mapcache->entry = g_malloc0(size);
 152}
 153
 154static void xen_remap_bucket(MapCacheEntry *entry,
 155                             void *vaddr,
 156                             hwaddr size,
 157                             hwaddr address_index,
 158                             bool dummy)
 159{
 160    uint8_t *vaddr_base;
 161    xen_pfn_t *pfns;
 162    int *err;
 163    unsigned int i;
 164    hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
 165
 166    trace_xen_remap_bucket(address_index);
 167
 168    pfns = g_malloc0(nb_pfn * sizeof (xen_pfn_t));
 169    err = g_malloc0(nb_pfn * sizeof (int));
 170
 171    if (entry->vaddr_base != NULL) {
 172        if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
 173            ram_block_notify_remove(entry->vaddr_base, entry->size);
 174        }
 175        if (munmap(entry->vaddr_base, entry->size) != 0) {
 176            perror("unmap fails");
 177            exit(-1);
 178        }
 179    }
 180    g_free(entry->valid_mapping);
 181    entry->valid_mapping = NULL;
 182
 183    for (i = 0; i < nb_pfn; i++) {
 184        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
 185    }
 186
 187    if (!dummy) {
 188        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
 189                                           PROT_READ | PROT_WRITE, 0,
 190                                           nb_pfn, pfns, err);
 191        if (vaddr_base == NULL) {
 192            perror("xenforeignmemory_map2");
 193            exit(-1);
 194        }
 195    } else {
 196        /*
 197         * We create dummy mappings where we are unable to create a foreign
 198         * mapping immediately due to certain circumstances (i.e. on resume now)
 199         */
 200        vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
 201                          MAP_ANON | MAP_SHARED, -1, 0);
 202        if (vaddr_base == MAP_FAILED) {
 203            perror("mmap");
 204            exit(-1);
 205        }
 206    }
 207
 208    if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
 209        ram_block_notify_add(vaddr_base, size);
 210    }
 211
 212    entry->vaddr_base = vaddr_base;
 213    entry->paddr_index = address_index;
 214    entry->size = size;
 215    entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) *
 216            BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
 217
 218    if (dummy) {
 219        entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
 220    } else {
 221        entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
 222    }
 223
 224    bitmap_zero(entry->valid_mapping, nb_pfn);
 225    for (i = 0; i < nb_pfn; i++) {
 226        if (!err[i]) {
 227            bitmap_set(entry->valid_mapping, i, 1);
 228        }
 229    }
 230
 231    g_free(pfns);
 232    g_free(err);
 233}
 234
 235static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
 236                                       uint8_t lock, bool dma)
 237{
 238    MapCacheEntry *entry, *pentry = NULL,
 239                  *free_entry = NULL, *free_pentry = NULL;
 240    hwaddr address_index;
 241    hwaddr address_offset;
 242    hwaddr cache_size = size;
 243    hwaddr test_bit_size;
 244    bool translated G_GNUC_UNUSED = false;
 245    bool dummy = false;
 246
 247tryagain:
 248    address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
 249    address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
 250
 251    trace_xen_map_cache(phys_addr);
 252
 253    /* test_bit_size is always a multiple of XC_PAGE_SIZE */
 254    if (size) {
 255        test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
 256
 257        if (test_bit_size % XC_PAGE_SIZE) {
 258            test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
 259        }
 260    } else {
 261        test_bit_size = XC_PAGE_SIZE;
 262    }
 263
 264    if (mapcache->last_entry != NULL &&
 265        mapcache->last_entry->paddr_index == address_index &&
 266        !lock && !size &&
 267        test_bits(address_offset >> XC_PAGE_SHIFT,
 268                  test_bit_size >> XC_PAGE_SHIFT,
 269                  mapcache->last_entry->valid_mapping)) {
 270        trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
 271        return mapcache->last_entry->vaddr_base + address_offset;
 272    }
 273
 274    /* size is always a multiple of MCACHE_BUCKET_SIZE */
 275    if (size) {
 276        cache_size = size + address_offset;
 277        if (cache_size % MCACHE_BUCKET_SIZE) {
 278            cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 279        }
 280    } else {
 281        cache_size = MCACHE_BUCKET_SIZE;
 282    }
 283
 284    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
 285
 286    while (entry && (lock || entry->lock) && entry->vaddr_base &&
 287            (entry->paddr_index != address_index || entry->size != cache_size ||
 288             !test_bits(address_offset >> XC_PAGE_SHIFT,
 289                 test_bit_size >> XC_PAGE_SHIFT,
 290                 entry->valid_mapping))) {
 291        if (!free_entry && !entry->lock) {
 292            free_entry = entry;
 293            free_pentry = pentry;
 294        }
 295        pentry = entry;
 296        entry = entry->next;
 297    }
 298    if (!entry && free_entry) {
 299        entry = free_entry;
 300        pentry = free_pentry;
 301    }
 302    if (!entry) {
 303        entry = g_malloc0(sizeof (MapCacheEntry));
 304        pentry->next = entry;
 305        xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
 306    } else if (!entry->lock) {
 307        if (!entry->vaddr_base || entry->paddr_index != address_index ||
 308                entry->size != cache_size ||
 309                !test_bits(address_offset >> XC_PAGE_SHIFT,
 310                    test_bit_size >> XC_PAGE_SHIFT,
 311                    entry->valid_mapping)) {
 312            xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
 313        }
 314    }
 315
 316    if(!test_bits(address_offset >> XC_PAGE_SHIFT,
 317                test_bit_size >> XC_PAGE_SHIFT,
 318                entry->valid_mapping)) {
 319        mapcache->last_entry = NULL;
 320#ifdef XEN_COMPAT_PHYSMAP
 321        if (!translated && mapcache->phys_offset_to_gaddr) {
 322            phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size);
 323            translated = true;
 324            goto tryagain;
 325        }
 326#endif
 327        if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
 328            dummy = true;
 329            goto tryagain;
 330        }
 331        trace_xen_map_cache_return(NULL);
 332        return NULL;
 333    }
 334
 335    mapcache->last_entry = entry;
 336    if (lock) {
 337        MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
 338        entry->lock++;
 339        reventry->dma = dma;
 340        reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset;
 341        reventry->paddr_index = mapcache->last_entry->paddr_index;
 342        reventry->size = entry->size;
 343        QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
 344    }
 345
 346    trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
 347    return mapcache->last_entry->vaddr_base + address_offset;
 348}
 349
 350uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
 351                       uint8_t lock, bool dma)
 352{
 353    uint8_t *p;
 354
 355    mapcache_lock();
 356    p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
 357    mapcache_unlock();
 358    return p;
 359}
 360
 361ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 362{
 363    MapCacheEntry *entry = NULL;
 364    MapCacheRev *reventry;
 365    hwaddr paddr_index;
 366    hwaddr size;
 367    ram_addr_t raddr;
 368    int found = 0;
 369
 370    mapcache_lock();
 371    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 372        if (reventry->vaddr_req == ptr) {
 373            paddr_index = reventry->paddr_index;
 374            size = reventry->size;
 375            found = 1;
 376            break;
 377        }
 378    }
 379    if (!found) {
 380        fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
 381        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 382            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
 383                    reventry->vaddr_req);
 384        }
 385        abort();
 386        return 0;
 387    }
 388
 389    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
 390    while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
 391        entry = entry->next;
 392    }
 393    if (!entry) {
 394        DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr);
 395        raddr = 0;
 396    } else {
 397        raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
 398             ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 399    }
 400    mapcache_unlock();
 401    return raddr;
 402}
 403
 404static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
 405{
 406    MapCacheEntry *entry = NULL, *pentry = NULL;
 407    MapCacheRev *reventry;
 408    hwaddr paddr_index;
 409    hwaddr size;
 410    int found = 0;
 411
 412    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 413        if (reventry->vaddr_req == buffer) {
 414            paddr_index = reventry->paddr_index;
 415            size = reventry->size;
 416            found = 1;
 417            break;
 418        }
 419    }
 420    if (!found) {
 421        DPRINTF("%s, could not find %p\n", __func__, buffer);
 422        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 423            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
 424        }
 425        return;
 426    }
 427    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
 428    g_free(reventry);
 429
 430    if (mapcache->last_entry != NULL &&
 431        mapcache->last_entry->paddr_index == paddr_index) {
 432        mapcache->last_entry = NULL;
 433    }
 434
 435    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
 436    while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
 437        pentry = entry;
 438        entry = entry->next;
 439    }
 440    if (!entry) {
 441        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
 442        return;
 443    }
 444    entry->lock--;
 445    if (entry->lock > 0 || pentry == NULL) {
 446        return;
 447    }
 448
 449    pentry->next = entry->next;
 450    ram_block_notify_remove(entry->vaddr_base, entry->size);
 451    if (munmap(entry->vaddr_base, entry->size) != 0) {
 452        perror("unmap fails");
 453        exit(-1);
 454    }
 455    g_free(entry->valid_mapping);
 456    g_free(entry);
 457}
 458
 459void xen_invalidate_map_cache_entry(uint8_t *buffer)
 460{
 461    mapcache_lock();
 462    xen_invalidate_map_cache_entry_unlocked(buffer);
 463    mapcache_unlock();
 464}
 465
 466void xen_invalidate_map_cache(void)
 467{
 468    unsigned long i;
 469    MapCacheRev *reventry;
 470
 471    /* Flush pending AIO before destroying the mapcache */
 472    bdrv_drain_all();
 473
 474    mapcache_lock();
 475
 476    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 477        if (!reventry->dma) {
 478            continue;
 479        }
 480        fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
 481                " "TARGET_FMT_plx" -> %p is present\n",
 482                reventry->paddr_index, reventry->vaddr_req);
 483    }
 484
 485    for (i = 0; i < mapcache->nr_buckets; i++) {
 486        MapCacheEntry *entry = &mapcache->entry[i];
 487
 488        if (entry->vaddr_base == NULL) {
 489            continue;
 490        }
 491        if (entry->lock > 0) {
 492            continue;
 493        }
 494
 495        if (munmap(entry->vaddr_base, entry->size) != 0) {
 496            perror("unmap fails");
 497            exit(-1);
 498        }
 499
 500        entry->paddr_index = 0;
 501        entry->vaddr_base = NULL;
 502        entry->size = 0;
 503        g_free(entry->valid_mapping);
 504        entry->valid_mapping = NULL;
 505    }
 506
 507    mapcache->last_entry = NULL;
 508
 509    mapcache_unlock();
 510}
 511
 512static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
 513                                                 hwaddr new_phys_addr,
 514                                                 hwaddr size)
 515{
 516    MapCacheEntry *entry;
 517    hwaddr address_index, address_offset;
 518    hwaddr test_bit_size, cache_size = size;
 519
 520    address_index  = old_phys_addr >> MCACHE_BUCKET_SHIFT;
 521    address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1);
 522
 523    assert(size);
 524    /* test_bit_size is always a multiple of XC_PAGE_SIZE */
 525    test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
 526    if (test_bit_size % XC_PAGE_SIZE) {
 527        test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
 528    }
 529    cache_size = size + address_offset;
 530    if (cache_size % MCACHE_BUCKET_SIZE) {
 531        cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 532    }
 533
 534    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
 535    while (entry && !(entry->paddr_index == address_index &&
 536                      entry->size == cache_size)) {
 537        entry = entry->next;
 538    }
 539    if (!entry) {
 540        DPRINTF("Trying to update an entry for "TARGET_FMT_plx \
 541                "that is not in the mapcache!\n", old_phys_addr);
 542        return NULL;
 543    }
 544
 545    address_index  = new_phys_addr >> MCACHE_BUCKET_SHIFT;
 546    address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1);
 547
 548    fprintf(stderr, "Replacing a dummy mapcache entry for "TARGET_FMT_plx \
 549            " with "TARGET_FMT_plx"\n", old_phys_addr, new_phys_addr);
 550
 551    xen_remap_bucket(entry, entry->vaddr_base,
 552                     cache_size, address_index, false);
 553    if (!test_bits(address_offset >> XC_PAGE_SHIFT,
 554                test_bit_size >> XC_PAGE_SHIFT,
 555                entry->valid_mapping)) {
 556        DPRINTF("Unable to update a mapcache entry for "TARGET_FMT_plx"!\n",
 557                old_phys_addr);
 558        return NULL;
 559    }
 560
 561    return entry->vaddr_base + address_offset;
 562}
 563
 564uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
 565                                 hwaddr new_phys_addr,
 566                                 hwaddr size)
 567{
 568    uint8_t *p;
 569
 570    mapcache_lock();
 571    p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
 572    mapcache_unlock();
 573    return p;
 574}
 575