qemu/hw/i386/xen/xen-mapcache.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011       Citrix Ltd.
   3 *
   4 * This work is licensed under the terms of the GNU GPL, version 2.  See
   5 * the COPYING file in the top-level directory.
   6 *
   7 * Contributions after 2012-01-13 are licensed under the terms of the
   8 * GNU GPL, version 2 or (at your option) any later version.
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qemu/units.h"
  13#include "qemu/error-report.h"
  14
  15#include <sys/resource.h>
  16
  17#include "hw/xen/xen-legacy-backend.h"
  18#include "qemu/bitmap.h"
  19
  20#include "sysemu/xen-mapcache.h"
  21#include "trace.h"
  22
  23
  24//#define MAPCACHE_DEBUG
  25
  26#ifdef MAPCACHE_DEBUG
  27#  define DPRINTF(fmt, ...) do { \
  28    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
  29} while (0)
  30#else
  31#  define DPRINTF(fmt, ...) do { } while (0)
  32#endif
  33
  34#if HOST_LONG_BITS == 32
  35#  define MCACHE_BUCKET_SHIFT 16
  36#  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
  37#else
  38#  define MCACHE_BUCKET_SHIFT 20
  39#  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
  40#endif
  41#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
  42
  43/* This is the size of the virtual address space reserve to QEMU that will not
  44 * be use by MapCache.
  45 * From empirical tests I observed that qemu use 75MB more than the
  46 * max_mcache_size.
  47 */
  48#define NON_MCACHE_MEMORY_SIZE (80 * MiB)
  49
  50typedef struct MapCacheEntry {
  51    hwaddr paddr_index;
  52    uint8_t *vaddr_base;
  53    unsigned long *valid_mapping;
  54    uint8_t lock;
  55#define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
  56    uint8_t flags;
  57    hwaddr size;
  58    struct MapCacheEntry *next;
  59} MapCacheEntry;
  60
  61typedef struct MapCacheRev {
  62    uint8_t *vaddr_req;
  63    hwaddr paddr_index;
  64    hwaddr size;
  65    QTAILQ_ENTRY(MapCacheRev) next;
  66    bool dma;
  67} MapCacheRev;
  68
  69typedef struct MapCache {
  70    MapCacheEntry *entry;
  71    unsigned long nr_buckets;
  72    QTAILQ_HEAD(, MapCacheRev) locked_entries;
  73
  74    /* For most cases (>99.9%), the page address is the same. */
  75    MapCacheEntry *last_entry;
  76    unsigned long max_mcache_size;
  77    unsigned int mcache_bucket_shift;
  78
  79    phys_offset_to_gaddr_t phys_offset_to_gaddr;
  80    QemuMutex lock;
  81    void *opaque;
  82} MapCache;
  83
  84static MapCache *mapcache;
  85
  86static inline void mapcache_lock(void)
  87{
  88    qemu_mutex_lock(&mapcache->lock);
  89}
  90
  91static inline void mapcache_unlock(void)
  92{
  93    qemu_mutex_unlock(&mapcache->lock);
  94}
  95
  96static inline int test_bits(int nr, int size, const unsigned long *addr)
  97{
  98    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
  99    if (res >= nr + size)
 100        return 1;
 101    else
 102        return 0;
 103}
 104
 105void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
 106{
 107    unsigned long size;
 108    struct rlimit rlimit_as;
 109
 110    mapcache = g_malloc0(sizeof (MapCache));
 111
 112    mapcache->phys_offset_to_gaddr = f;
 113    mapcache->opaque = opaque;
 114    qemu_mutex_init(&mapcache->lock);
 115
 116    QTAILQ_INIT(&mapcache->locked_entries);
 117
 118    if (geteuid() == 0) {
 119        rlimit_as.rlim_cur = RLIM_INFINITY;
 120        rlimit_as.rlim_max = RLIM_INFINITY;
 121        mapcache->max_mcache_size = MCACHE_MAX_SIZE;
 122    } else {
 123        getrlimit(RLIMIT_AS, &rlimit_as);
 124        rlimit_as.rlim_cur = rlimit_as.rlim_max;
 125
 126        if (rlimit_as.rlim_max != RLIM_INFINITY) {
 127            warn_report("QEMU's maximum size of virtual"
 128                        " memory is not infinity");
 129        }
 130        if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
 131            mapcache->max_mcache_size = rlimit_as.rlim_max -
 132                NON_MCACHE_MEMORY_SIZE;
 133        } else {
 134            mapcache->max_mcache_size = MCACHE_MAX_SIZE;
 135        }
 136    }
 137
 138    setrlimit(RLIMIT_AS, &rlimit_as);
 139
 140    mapcache->nr_buckets =
 141        (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
 142          (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
 143         (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
 144
 145    size = mapcache->nr_buckets * sizeof (MapCacheEntry);
 146    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
 147    DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
 148            mapcache->nr_buckets, size);
 149    mapcache->entry = g_malloc0(size);
 150}
 151
 152static void xen_remap_bucket(MapCacheEntry *entry,
 153                             void *vaddr,
 154                             hwaddr size,
 155                             hwaddr address_index,
 156                             bool dummy)
 157{
 158    uint8_t *vaddr_base;
 159    xen_pfn_t *pfns;
 160    int *err;
 161    unsigned int i;
 162    hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
 163
 164    trace_xen_remap_bucket(address_index);
 165
 166    pfns = g_malloc0(nb_pfn * sizeof (xen_pfn_t));
 167    err = g_malloc0(nb_pfn * sizeof (int));
 168
 169    if (entry->vaddr_base != NULL) {
 170        if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
 171            ram_block_notify_remove(entry->vaddr_base, entry->size);
 172        }
 173        if (munmap(entry->vaddr_base, entry->size) != 0) {
 174            perror("unmap fails");
 175            exit(-1);
 176        }
 177    }
 178    g_free(entry->valid_mapping);
 179    entry->valid_mapping = NULL;
 180
 181    for (i = 0; i < nb_pfn; i++) {
 182        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
 183    }
 184
 185    /*
 186     * If the caller has requested the mapping at a specific address use
 187     * MAP_FIXED to make sure it's honored.
 188     */
 189    if (!dummy) {
 190        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
 191                                           PROT_READ | PROT_WRITE,
 192                                           vaddr ? MAP_FIXED : 0,
 193                                           nb_pfn, pfns, err);
 194        if (vaddr_base == NULL) {
 195            perror("xenforeignmemory_map2");
 196            exit(-1);
 197        }
 198    } else {
 199        /*
 200         * We create dummy mappings where we are unable to create a foreign
 201         * mapping immediately due to certain circumstances (i.e. on resume now)
 202         */
 203        vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
 204                          MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0),
 205                          -1, 0);
 206        if (vaddr_base == MAP_FAILED) {
 207            perror("mmap");
 208            exit(-1);
 209        }
 210    }
 211
 212    if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
 213        ram_block_notify_add(vaddr_base, size);
 214    }
 215
 216    entry->vaddr_base = vaddr_base;
 217    entry->paddr_index = address_index;
 218    entry->size = size;
 219    entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) *
 220            BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
 221
 222    if (dummy) {
 223        entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
 224    } else {
 225        entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
 226    }
 227
 228    bitmap_zero(entry->valid_mapping, nb_pfn);
 229    for (i = 0; i < nb_pfn; i++) {
 230        if (!err[i]) {
 231            bitmap_set(entry->valid_mapping, i, 1);
 232        }
 233    }
 234
 235    g_free(pfns);
 236    g_free(err);
 237}
 238
 239static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
 240                                       uint8_t lock, bool dma)
 241{
 242    MapCacheEntry *entry, *pentry = NULL,
 243                  *free_entry = NULL, *free_pentry = NULL;
 244    hwaddr address_index;
 245    hwaddr address_offset;
 246    hwaddr cache_size = size;
 247    hwaddr test_bit_size;
 248    bool translated G_GNUC_UNUSED = false;
 249    bool dummy = false;
 250
 251tryagain:
 252    address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
 253    address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
 254
 255    trace_xen_map_cache(phys_addr);
 256
 257    /* test_bit_size is always a multiple of XC_PAGE_SIZE */
 258    if (size) {
 259        test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
 260
 261        if (test_bit_size % XC_PAGE_SIZE) {
 262            test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
 263        }
 264    } else {
 265        test_bit_size = XC_PAGE_SIZE;
 266    }
 267
 268    if (mapcache->last_entry != NULL &&
 269        mapcache->last_entry->paddr_index == address_index &&
 270        !lock && !size &&
 271        test_bits(address_offset >> XC_PAGE_SHIFT,
 272                  test_bit_size >> XC_PAGE_SHIFT,
 273                  mapcache->last_entry->valid_mapping)) {
 274        trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
 275        return mapcache->last_entry->vaddr_base + address_offset;
 276    }
 277
 278    /* size is always a multiple of MCACHE_BUCKET_SIZE */
 279    if (size) {
 280        cache_size = size + address_offset;
 281        if (cache_size % MCACHE_BUCKET_SIZE) {
 282            cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 283        }
 284    } else {
 285        cache_size = MCACHE_BUCKET_SIZE;
 286    }
 287
 288    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
 289
 290    while (entry && (lock || entry->lock) && entry->vaddr_base &&
 291            (entry->paddr_index != address_index || entry->size != cache_size ||
 292             !test_bits(address_offset >> XC_PAGE_SHIFT,
 293                 test_bit_size >> XC_PAGE_SHIFT,
 294                 entry->valid_mapping))) {
 295        if (!free_entry && !entry->lock) {
 296            free_entry = entry;
 297            free_pentry = pentry;
 298        }
 299        pentry = entry;
 300        entry = entry->next;
 301    }
 302    if (!entry && free_entry) {
 303        entry = free_entry;
 304        pentry = free_pentry;
 305    }
 306    if (!entry) {
 307        entry = g_malloc0(sizeof (MapCacheEntry));
 308        pentry->next = entry;
 309        xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
 310    } else if (!entry->lock) {
 311        if (!entry->vaddr_base || entry->paddr_index != address_index ||
 312                entry->size != cache_size ||
 313                !test_bits(address_offset >> XC_PAGE_SHIFT,
 314                    test_bit_size >> XC_PAGE_SHIFT,
 315                    entry->valid_mapping)) {
 316            xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
 317        }
 318    }
 319
 320    if(!test_bits(address_offset >> XC_PAGE_SHIFT,
 321                test_bit_size >> XC_PAGE_SHIFT,
 322                entry->valid_mapping)) {
 323        mapcache->last_entry = NULL;
 324#ifdef XEN_COMPAT_PHYSMAP
 325        if (!translated && mapcache->phys_offset_to_gaddr) {
 326            phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size);
 327            translated = true;
 328            goto tryagain;
 329        }
 330#endif
 331        if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
 332            dummy = true;
 333            goto tryagain;
 334        }
 335        trace_xen_map_cache_return(NULL);
 336        return NULL;
 337    }
 338
 339    mapcache->last_entry = entry;
 340    if (lock) {
 341        MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
 342        entry->lock++;
 343        reventry->dma = dma;
 344        reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset;
 345        reventry->paddr_index = mapcache->last_entry->paddr_index;
 346        reventry->size = entry->size;
 347        QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
 348    }
 349
 350    trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
 351    return mapcache->last_entry->vaddr_base + address_offset;
 352}
 353
 354uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
 355                       uint8_t lock, bool dma)
 356{
 357    uint8_t *p;
 358
 359    mapcache_lock();
 360    p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
 361    mapcache_unlock();
 362    return p;
 363}
 364
 365ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 366{
 367    MapCacheEntry *entry = NULL;
 368    MapCacheRev *reventry;
 369    hwaddr paddr_index;
 370    hwaddr size;
 371    ram_addr_t raddr;
 372    int found = 0;
 373
 374    mapcache_lock();
 375    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 376        if (reventry->vaddr_req == ptr) {
 377            paddr_index = reventry->paddr_index;
 378            size = reventry->size;
 379            found = 1;
 380            break;
 381        }
 382    }
 383    if (!found) {
 384        fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
 385        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 386            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
 387                    reventry->vaddr_req);
 388        }
 389        abort();
 390        return 0;
 391    }
 392
 393    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
 394    while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
 395        entry = entry->next;
 396    }
 397    if (!entry) {
 398        DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr);
 399        raddr = 0;
 400    } else {
 401        raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
 402             ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 403    }
 404    mapcache_unlock();
 405    return raddr;
 406}
 407
 408static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
 409{
 410    MapCacheEntry *entry = NULL, *pentry = NULL;
 411    MapCacheRev *reventry;
 412    hwaddr paddr_index;
 413    hwaddr size;
 414    int found = 0;
 415
 416    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 417        if (reventry->vaddr_req == buffer) {
 418            paddr_index = reventry->paddr_index;
 419            size = reventry->size;
 420            found = 1;
 421            break;
 422        }
 423    }
 424    if (!found) {
 425        DPRINTF("%s, could not find %p\n", __func__, buffer);
 426        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 427            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
 428        }
 429        return;
 430    }
 431    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
 432    g_free(reventry);
 433
 434    if (mapcache->last_entry != NULL &&
 435        mapcache->last_entry->paddr_index == paddr_index) {
 436        mapcache->last_entry = NULL;
 437    }
 438
 439    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
 440    while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
 441        pentry = entry;
 442        entry = entry->next;
 443    }
 444    if (!entry) {
 445        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
 446        return;
 447    }
 448    entry->lock--;
 449    if (entry->lock > 0 || pentry == NULL) {
 450        return;
 451    }
 452
 453    pentry->next = entry->next;
 454    ram_block_notify_remove(entry->vaddr_base, entry->size);
 455    if (munmap(entry->vaddr_base, entry->size) != 0) {
 456        perror("unmap fails");
 457        exit(-1);
 458    }
 459    g_free(entry->valid_mapping);
 460    g_free(entry);
 461}
 462
 463void xen_invalidate_map_cache_entry(uint8_t *buffer)
 464{
 465    mapcache_lock();
 466    xen_invalidate_map_cache_entry_unlocked(buffer);
 467    mapcache_unlock();
 468}
 469
 470void xen_invalidate_map_cache(void)
 471{
 472    unsigned long i;
 473    MapCacheRev *reventry;
 474
 475    /* Flush pending AIO before destroying the mapcache */
 476    bdrv_drain_all();
 477
 478    mapcache_lock();
 479
 480    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 481        if (!reventry->dma) {
 482            continue;
 483        }
 484        fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
 485                " "TARGET_FMT_plx" -> %p is present\n",
 486                reventry->paddr_index, reventry->vaddr_req);
 487    }
 488
 489    for (i = 0; i < mapcache->nr_buckets; i++) {
 490        MapCacheEntry *entry = &mapcache->entry[i];
 491
 492        if (entry->vaddr_base == NULL) {
 493            continue;
 494        }
 495        if (entry->lock > 0) {
 496            continue;
 497        }
 498
 499        if (munmap(entry->vaddr_base, entry->size) != 0) {
 500            perror("unmap fails");
 501            exit(-1);
 502        }
 503
 504        entry->paddr_index = 0;
 505        entry->vaddr_base = NULL;
 506        entry->size = 0;
 507        g_free(entry->valid_mapping);
 508        entry->valid_mapping = NULL;
 509    }
 510
 511    mapcache->last_entry = NULL;
 512
 513    mapcache_unlock();
 514}
 515
 516static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
 517                                                 hwaddr new_phys_addr,
 518                                                 hwaddr size)
 519{
 520    MapCacheEntry *entry;
 521    hwaddr address_index, address_offset;
 522    hwaddr test_bit_size, cache_size = size;
 523
 524    address_index  = old_phys_addr >> MCACHE_BUCKET_SHIFT;
 525    address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1);
 526
 527    assert(size);
 528    /* test_bit_size is always a multiple of XC_PAGE_SIZE */
 529    test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
 530    if (test_bit_size % XC_PAGE_SIZE) {
 531        test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
 532    }
 533    cache_size = size + address_offset;
 534    if (cache_size % MCACHE_BUCKET_SIZE) {
 535        cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 536    }
 537
 538    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
 539    while (entry && !(entry->paddr_index == address_index &&
 540                      entry->size == cache_size)) {
 541        entry = entry->next;
 542    }
 543    if (!entry) {
 544        DPRINTF("Trying to update an entry for "TARGET_FMT_plx \
 545                "that is not in the mapcache!\n", old_phys_addr);
 546        return NULL;
 547    }
 548
 549    address_index  = new_phys_addr >> MCACHE_BUCKET_SHIFT;
 550    address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1);
 551
 552    fprintf(stderr, "Replacing a dummy mapcache entry for "TARGET_FMT_plx \
 553            " with "TARGET_FMT_plx"\n", old_phys_addr, new_phys_addr);
 554
 555    xen_remap_bucket(entry, entry->vaddr_base,
 556                     cache_size, address_index, false);
 557    if (!test_bits(address_offset >> XC_PAGE_SHIFT,
 558                test_bit_size >> XC_PAGE_SHIFT,
 559                entry->valid_mapping)) {
 560        DPRINTF("Unable to update a mapcache entry for "TARGET_FMT_plx"!\n",
 561                old_phys_addr);
 562        return NULL;
 563    }
 564
 565    return entry->vaddr_base + address_offset;
 566}
 567
 568uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
 569                                 hwaddr new_phys_addr,
 570                                 hwaddr size)
 571{
 572    uint8_t *p;
 573
 574    mapcache_lock();
 575    p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
 576    mapcache_unlock();
 577    return p;
 578}
 579