qemu/hw/i386/xen/xen-mapcache.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011       Citrix Ltd.
   3 *
   4 * This work is licensed under the terms of the GNU GPL, version 2.  See
   5 * the COPYING file in the top-level directory.
   6 *
   7 * Contributions after 2012-01-13 are licensed under the terms of the
   8 * GNU GPL, version 2 or (at your option) any later version.
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qemu/units.h"
  13#include "qemu/error-report.h"
  14
  15#include <sys/resource.h>
  16
  17#include "hw/xen/xen-legacy-backend.h"
  18#include "qemu/bitmap.h"
  19
  20#include <xen/hvm/params.h>
  21
  22#include "sysemu/xen-mapcache.h"
  23#include "trace.h"
  24
  25
  26//#define MAPCACHE_DEBUG
  27
  28#ifdef MAPCACHE_DEBUG
  29#  define DPRINTF(fmt, ...) do { \
  30    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
  31} while (0)
  32#else
  33#  define DPRINTF(fmt, ...) do { } while (0)
  34#endif
  35
  36#if HOST_LONG_BITS == 32
  37#  define MCACHE_BUCKET_SHIFT 16
  38#  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
  39#else
  40#  define MCACHE_BUCKET_SHIFT 20
  41#  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
  42#endif
  43#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
  44
  45/* This is the size of the virtual address space reserve to QEMU that will not
  46 * be use by MapCache.
  47 * From empirical tests I observed that qemu use 75MB more than the
  48 * max_mcache_size.
  49 */
  50#define NON_MCACHE_MEMORY_SIZE (80 * MiB)
  51
  52typedef struct MapCacheEntry {
  53    hwaddr paddr_index;
  54    uint8_t *vaddr_base;
  55    unsigned long *valid_mapping;
  56    uint8_t lock;
  57#define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
  58    uint8_t flags;
  59    hwaddr size;
  60    struct MapCacheEntry *next;
  61} MapCacheEntry;
  62
  63typedef struct MapCacheRev {
  64    uint8_t *vaddr_req;
  65    hwaddr paddr_index;
  66    hwaddr size;
  67    QTAILQ_ENTRY(MapCacheRev) next;
  68    bool dma;
  69} MapCacheRev;
  70
  71typedef struct MapCache {
  72    MapCacheEntry *entry;
  73    unsigned long nr_buckets;
  74    QTAILQ_HEAD(, MapCacheRev) locked_entries;
  75
  76    /* For most cases (>99.9%), the page address is the same. */
  77    MapCacheEntry *last_entry;
  78    unsigned long max_mcache_size;
  79    unsigned int mcache_bucket_shift;
  80
  81    phys_offset_to_gaddr_t phys_offset_to_gaddr;
  82    QemuMutex lock;
  83    void *opaque;
  84} MapCache;
  85
  86static MapCache *mapcache;
  87
  88static inline void mapcache_lock(void)
  89{
  90    qemu_mutex_lock(&mapcache->lock);
  91}
  92
  93static inline void mapcache_unlock(void)
  94{
  95    qemu_mutex_unlock(&mapcache->lock);
  96}
  97
  98static inline int test_bits(int nr, int size, const unsigned long *addr)
  99{
 100    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
 101    if (res >= nr + size)
 102        return 1;
 103    else
 104        return 0;
 105}
 106
 107void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
 108{
 109    unsigned long size;
 110    struct rlimit rlimit_as;
 111
 112    mapcache = g_malloc0(sizeof (MapCache));
 113
 114    mapcache->phys_offset_to_gaddr = f;
 115    mapcache->opaque = opaque;
 116    qemu_mutex_init(&mapcache->lock);
 117
 118    QTAILQ_INIT(&mapcache->locked_entries);
 119
 120    if (geteuid() == 0) {
 121        rlimit_as.rlim_cur = RLIM_INFINITY;
 122        rlimit_as.rlim_max = RLIM_INFINITY;
 123        mapcache->max_mcache_size = MCACHE_MAX_SIZE;
 124    } else {
 125        getrlimit(RLIMIT_AS, &rlimit_as);
 126        rlimit_as.rlim_cur = rlimit_as.rlim_max;
 127
 128        if (rlimit_as.rlim_max != RLIM_INFINITY) {
 129            warn_report("QEMU's maximum size of virtual"
 130                        " memory is not infinity");
 131        }
 132        if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
 133            mapcache->max_mcache_size = rlimit_as.rlim_max -
 134                NON_MCACHE_MEMORY_SIZE;
 135        } else {
 136            mapcache->max_mcache_size = MCACHE_MAX_SIZE;
 137        }
 138    }
 139
 140    setrlimit(RLIMIT_AS, &rlimit_as);
 141
 142    mapcache->nr_buckets =
 143        (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
 144          (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
 145         (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
 146
 147    size = mapcache->nr_buckets * sizeof (MapCacheEntry);
 148    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
 149    DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
 150            mapcache->nr_buckets, size);
 151    mapcache->entry = g_malloc0(size);
 152}
 153
 154static void xen_remap_bucket(MapCacheEntry *entry,
 155                             void *vaddr,
 156                             hwaddr size,
 157                             hwaddr address_index,
 158                             bool dummy)
 159{
 160    uint8_t *vaddr_base;
 161    xen_pfn_t *pfns;
 162    int *err;
 163    unsigned int i;
 164    hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
 165
 166    trace_xen_remap_bucket(address_index);
 167
 168    pfns = g_malloc0(nb_pfn * sizeof (xen_pfn_t));
 169    err = g_malloc0(nb_pfn * sizeof (int));
 170
 171    if (entry->vaddr_base != NULL) {
 172        if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
 173            ram_block_notify_remove(entry->vaddr_base, entry->size);
 174        }
 175        if (munmap(entry->vaddr_base, entry->size) != 0) {
 176            perror("unmap fails");
 177            exit(-1);
 178        }
 179    }
 180    g_free(entry->valid_mapping);
 181    entry->valid_mapping = NULL;
 182
 183    for (i = 0; i < nb_pfn; i++) {
 184        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
 185    }
 186
 187    /*
 188     * If the caller has requested the mapping at a specific address use
 189     * MAP_FIXED to make sure it's honored.
 190     */
 191    if (!dummy) {
 192        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
 193                                           PROT_READ | PROT_WRITE,
 194                                           vaddr ? MAP_FIXED : 0,
 195                                           nb_pfn, pfns, err);
 196        if (vaddr_base == NULL) {
 197            perror("xenforeignmemory_map2");
 198            exit(-1);
 199        }
 200    } else {
 201        /*
 202         * We create dummy mappings where we are unable to create a foreign
 203         * mapping immediately due to certain circumstances (i.e. on resume now)
 204         */
 205        vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
 206                          MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0),
 207                          -1, 0);
 208        if (vaddr_base == MAP_FAILED) {
 209            perror("mmap");
 210            exit(-1);
 211        }
 212    }
 213
 214    if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
 215        ram_block_notify_add(vaddr_base, size);
 216    }
 217
 218    entry->vaddr_base = vaddr_base;
 219    entry->paddr_index = address_index;
 220    entry->size = size;
 221    entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) *
 222            BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
 223
 224    if (dummy) {
 225        entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
 226    } else {
 227        entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
 228    }
 229
 230    bitmap_zero(entry->valid_mapping, nb_pfn);
 231    for (i = 0; i < nb_pfn; i++) {
 232        if (!err[i]) {
 233            bitmap_set(entry->valid_mapping, i, 1);
 234        }
 235    }
 236
 237    g_free(pfns);
 238    g_free(err);
 239}
 240
 241static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
 242                                       uint8_t lock, bool dma)
 243{
 244    MapCacheEntry *entry, *pentry = NULL,
 245                  *free_entry = NULL, *free_pentry = NULL;
 246    hwaddr address_index;
 247    hwaddr address_offset;
 248    hwaddr cache_size = size;
 249    hwaddr test_bit_size;
 250    bool translated G_GNUC_UNUSED = false;
 251    bool dummy = false;
 252
 253tryagain:
 254    address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
 255    address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
 256
 257    trace_xen_map_cache(phys_addr);
 258
 259    /* test_bit_size is always a multiple of XC_PAGE_SIZE */
 260    if (size) {
 261        test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
 262
 263        if (test_bit_size % XC_PAGE_SIZE) {
 264            test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
 265        }
 266    } else {
 267        test_bit_size = XC_PAGE_SIZE;
 268    }
 269
 270    if (mapcache->last_entry != NULL &&
 271        mapcache->last_entry->paddr_index == address_index &&
 272        !lock && !size &&
 273        test_bits(address_offset >> XC_PAGE_SHIFT,
 274                  test_bit_size >> XC_PAGE_SHIFT,
 275                  mapcache->last_entry->valid_mapping)) {
 276        trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
 277        return mapcache->last_entry->vaddr_base + address_offset;
 278    }
 279
 280    /* size is always a multiple of MCACHE_BUCKET_SIZE */
 281    if (size) {
 282        cache_size = size + address_offset;
 283        if (cache_size % MCACHE_BUCKET_SIZE) {
 284            cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 285        }
 286    } else {
 287        cache_size = MCACHE_BUCKET_SIZE;
 288    }
 289
 290    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
 291
 292    while (entry && (lock || entry->lock) && entry->vaddr_base &&
 293            (entry->paddr_index != address_index || entry->size != cache_size ||
 294             !test_bits(address_offset >> XC_PAGE_SHIFT,
 295                 test_bit_size >> XC_PAGE_SHIFT,
 296                 entry->valid_mapping))) {
 297        if (!free_entry && !entry->lock) {
 298            free_entry = entry;
 299            free_pentry = pentry;
 300        }
 301        pentry = entry;
 302        entry = entry->next;
 303    }
 304    if (!entry && free_entry) {
 305        entry = free_entry;
 306        pentry = free_pentry;
 307    }
 308    if (!entry) {
 309        entry = g_malloc0(sizeof (MapCacheEntry));
 310        pentry->next = entry;
 311        xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
 312    } else if (!entry->lock) {
 313        if (!entry->vaddr_base || entry->paddr_index != address_index ||
 314                entry->size != cache_size ||
 315                !test_bits(address_offset >> XC_PAGE_SHIFT,
 316                    test_bit_size >> XC_PAGE_SHIFT,
 317                    entry->valid_mapping)) {
 318            xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
 319        }
 320    }
 321
 322    if(!test_bits(address_offset >> XC_PAGE_SHIFT,
 323                test_bit_size >> XC_PAGE_SHIFT,
 324                entry->valid_mapping)) {
 325        mapcache->last_entry = NULL;
 326#ifdef XEN_COMPAT_PHYSMAP
 327        if (!translated && mapcache->phys_offset_to_gaddr) {
 328            phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size);
 329            translated = true;
 330            goto tryagain;
 331        }
 332#endif
 333        if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
 334            dummy = true;
 335            goto tryagain;
 336        }
 337        trace_xen_map_cache_return(NULL);
 338        return NULL;
 339    }
 340
 341    mapcache->last_entry = entry;
 342    if (lock) {
 343        MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
 344        entry->lock++;
 345        reventry->dma = dma;
 346        reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset;
 347        reventry->paddr_index = mapcache->last_entry->paddr_index;
 348        reventry->size = entry->size;
 349        QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
 350    }
 351
 352    trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
 353    return mapcache->last_entry->vaddr_base + address_offset;
 354}
 355
 356uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
 357                       uint8_t lock, bool dma)
 358{
 359    uint8_t *p;
 360
 361    mapcache_lock();
 362    p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
 363    mapcache_unlock();
 364    return p;
 365}
 366
 367ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 368{
 369    MapCacheEntry *entry = NULL;
 370    MapCacheRev *reventry;
 371    hwaddr paddr_index;
 372    hwaddr size;
 373    ram_addr_t raddr;
 374    int found = 0;
 375
 376    mapcache_lock();
 377    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 378        if (reventry->vaddr_req == ptr) {
 379            paddr_index = reventry->paddr_index;
 380            size = reventry->size;
 381            found = 1;
 382            break;
 383        }
 384    }
 385    if (!found) {
 386        fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
 387        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 388            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
 389                    reventry->vaddr_req);
 390        }
 391        abort();
 392        return 0;
 393    }
 394
 395    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
 396    while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
 397        entry = entry->next;
 398    }
 399    if (!entry) {
 400        DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr);
 401        raddr = 0;
 402    } else {
 403        raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
 404             ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 405    }
 406    mapcache_unlock();
 407    return raddr;
 408}
 409
 410static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
 411{
 412    MapCacheEntry *entry = NULL, *pentry = NULL;
 413    MapCacheRev *reventry;
 414    hwaddr paddr_index;
 415    hwaddr size;
 416    int found = 0;
 417
 418    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 419        if (reventry->vaddr_req == buffer) {
 420            paddr_index = reventry->paddr_index;
 421            size = reventry->size;
 422            found = 1;
 423            break;
 424        }
 425    }
 426    if (!found) {
 427        DPRINTF("%s, could not find %p\n", __func__, buffer);
 428        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 429            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
 430        }
 431        return;
 432    }
 433    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
 434    g_free(reventry);
 435
 436    if (mapcache->last_entry != NULL &&
 437        mapcache->last_entry->paddr_index == paddr_index) {
 438        mapcache->last_entry = NULL;
 439    }
 440
 441    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
 442    while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
 443        pentry = entry;
 444        entry = entry->next;
 445    }
 446    if (!entry) {
 447        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
 448        return;
 449    }
 450    entry->lock--;
 451    if (entry->lock > 0 || pentry == NULL) {
 452        return;
 453    }
 454
 455    pentry->next = entry->next;
 456    ram_block_notify_remove(entry->vaddr_base, entry->size);
 457    if (munmap(entry->vaddr_base, entry->size) != 0) {
 458        perror("unmap fails");
 459        exit(-1);
 460    }
 461    g_free(entry->valid_mapping);
 462    g_free(entry);
 463}
 464
 465void xen_invalidate_map_cache_entry(uint8_t *buffer)
 466{
 467    mapcache_lock();
 468    xen_invalidate_map_cache_entry_unlocked(buffer);
 469    mapcache_unlock();
 470}
 471
 472void xen_invalidate_map_cache(void)
 473{
 474    unsigned long i;
 475    MapCacheRev *reventry;
 476
 477    /* Flush pending AIO before destroying the mapcache */
 478    bdrv_drain_all();
 479
 480    mapcache_lock();
 481
 482    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 483        if (!reventry->dma) {
 484            continue;
 485        }
 486        fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
 487                " "TARGET_FMT_plx" -> %p is present\n",
 488                reventry->paddr_index, reventry->vaddr_req);
 489    }
 490
 491    for (i = 0; i < mapcache->nr_buckets; i++) {
 492        MapCacheEntry *entry = &mapcache->entry[i];
 493
 494        if (entry->vaddr_base == NULL) {
 495            continue;
 496        }
 497        if (entry->lock > 0) {
 498            continue;
 499        }
 500
 501        if (munmap(entry->vaddr_base, entry->size) != 0) {
 502            perror("unmap fails");
 503            exit(-1);
 504        }
 505
 506        entry->paddr_index = 0;
 507        entry->vaddr_base = NULL;
 508        entry->size = 0;
 509        g_free(entry->valid_mapping);
 510        entry->valid_mapping = NULL;
 511    }
 512
 513    mapcache->last_entry = NULL;
 514
 515    mapcache_unlock();
 516}
 517
 518static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
 519                                                 hwaddr new_phys_addr,
 520                                                 hwaddr size)
 521{
 522    MapCacheEntry *entry;
 523    hwaddr address_index, address_offset;
 524    hwaddr test_bit_size, cache_size = size;
 525
 526    address_index  = old_phys_addr >> MCACHE_BUCKET_SHIFT;
 527    address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1);
 528
 529    assert(size);
 530    /* test_bit_size is always a multiple of XC_PAGE_SIZE */
 531    test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
 532    if (test_bit_size % XC_PAGE_SIZE) {
 533        test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
 534    }
 535    cache_size = size + address_offset;
 536    if (cache_size % MCACHE_BUCKET_SIZE) {
 537        cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 538    }
 539
 540    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
 541    while (entry && !(entry->paddr_index == address_index &&
 542                      entry->size == cache_size)) {
 543        entry = entry->next;
 544    }
 545    if (!entry) {
 546        DPRINTF("Trying to update an entry for "TARGET_FMT_plx \
 547                "that is not in the mapcache!\n", old_phys_addr);
 548        return NULL;
 549    }
 550
 551    address_index  = new_phys_addr >> MCACHE_BUCKET_SHIFT;
 552    address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1);
 553
 554    fprintf(stderr, "Replacing a dummy mapcache entry for "TARGET_FMT_plx \
 555            " with "TARGET_FMT_plx"\n", old_phys_addr, new_phys_addr);
 556
 557    xen_remap_bucket(entry, entry->vaddr_base,
 558                     cache_size, address_index, false);
 559    if (!test_bits(address_offset >> XC_PAGE_SHIFT,
 560                test_bit_size >> XC_PAGE_SHIFT,
 561                entry->valid_mapping)) {
 562        DPRINTF("Unable to update a mapcache entry for "TARGET_FMT_plx"!\n",
 563                old_phys_addr);
 564        return NULL;
 565    }
 566
 567    return entry->vaddr_base + address_offset;
 568}
 569
 570uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
 571                                 hwaddr new_phys_addr,
 572                                 hwaddr size)
 573{
 574    uint8_t *p;
 575
 576    mapcache_lock();
 577    p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
 578    mapcache_unlock();
 579    return p;
 580}
 581