qemu/include/exec/ram_addr.h
<<
>>
Prefs
   1/*
   2 * Declarations for cpu physical memory functions
   3 *
   4 * Copyright 2011 Red Hat, Inc. and/or its affiliates
   5 *
   6 * Authors:
   7 *  Avi Kivity <avi@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or
  10 * later.  See the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14/*
  15 * This header is for use by exec.c and memory.c ONLY.  Do not include it.
  16 * The functions declared here will be removed soon.
  17 */
  18
  19#ifndef RAM_ADDR_H
  20#define RAM_ADDR_H
  21
  22#ifndef CONFIG_USER_ONLY
  23#include "hw/xen/xen.h"
  24#include "sysemu/tcg.h"
  25#include "exec/ramlist.h"
  26
  27struct RAMBlock {
  28    struct rcu_head rcu;
  29    struct MemoryRegion *mr;
  30    uint8_t *host;
  31    uint8_t *colo_cache; /* For colo, VM's ram cache */
  32    ram_addr_t offset;
  33    ram_addr_t used_length;
  34    ram_addr_t max_length;
  35    void (*resized)(const char*, uint64_t length, void *host);
  36    uint32_t flags;
  37    /* Protected by iothread lock.  */
  38    char idstr[256];
  39    /* RCU-enabled, writes protected by the ramlist lock */
  40    QLIST_ENTRY(RAMBlock) next;
  41    QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
  42    int fd;
  43    size_t page_size;
  44    /* dirty bitmap used during migration */
  45    unsigned long *bmap;
  46    /* bitmap of pages that haven't been sent even once
  47     * only maintained and used in postcopy at the moment
  48     * where it's used to send the dirtymap at the start
  49     * of the postcopy phase
  50     */
  51    unsigned long *unsentmap;
  52    /* bitmap of already received pages in postcopy */
  53    unsigned long *receivedmap;
  54
  55    /*
  56     * bitmap to track already cleared dirty bitmap.  When the bit is
  57     * set, it means the corresponding memory chunk needs a log-clear.
  58     * Set this up to non-NULL to enable the capability to postpone
  59     * and split clearing of dirty bitmap on the remote node (e.g.,
  60     * KVM).  The bitmap will be set only when doing global sync.
  61     *
  62     * NOTE: this bitmap is different comparing to the other bitmaps
  63     * in that one bit can represent multiple guest pages (which is
  64     * decided by the `clear_bmap_shift' variable below).  On
  65     * destination side, this should always be NULL, and the variable
  66     * `clear_bmap_shift' is meaningless.
  67     */
  68    unsigned long *clear_bmap;
  69    uint8_t clear_bmap_shift;
  70};
  71
  72/**
  73 * clear_bmap_size: calculate clear bitmap size
  74 *
  75 * @pages: number of guest pages
  76 * @shift: guest page number shift
  77 *
  78 * Returns: number of bits for the clear bitmap
  79 */
  80static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
  81{
  82    return DIV_ROUND_UP(pages, 1UL << shift);
  83}
  84
  85/**
  86 * clear_bmap_set: set clear bitmap for the page range
  87 *
  88 * @rb: the ramblock to operate on
  89 * @start: the start page number
  90 * @size: number of pages to set in the bitmap
  91 *
  92 * Returns: None
  93 */
  94static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
  95                                  uint64_t npages)
  96{
  97    uint8_t shift = rb->clear_bmap_shift;
  98
  99    bitmap_set_atomic(rb->clear_bmap, start >> shift,
 100                      clear_bmap_size(npages, shift));
 101}
 102
 103/**
 104 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
 105 *
 106 * @rb: the ramblock to operate on
 107 * @page: the page number to check
 108 *
 109 * Returns: true if the bit was set, false otherwise
 110 */
 111static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
 112{
 113    uint8_t shift = rb->clear_bmap_shift;
 114
 115    return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
 116}
 117
 118static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
 119{
 120    return (b && b->host && offset < b->used_length) ? true : false;
 121}
 122
 123static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
 124{
 125    assert(offset_in_ramblock(block, offset));
 126    return (char *)block->host + offset;
 127}
 128
 129static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
 130                                                            RAMBlock *rb)
 131{
 132    uint64_t host_addr_offset =
 133            (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
 134    return host_addr_offset >> TARGET_PAGE_BITS;
 135}
 136
 137bool ramblock_is_pmem(RAMBlock *rb);
 138
 139long qemu_minrampagesize(void);
 140long qemu_maxrampagesize(void);
 141
 142/**
 143 * qemu_ram_alloc_from_file,
 144 * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
 145 *                          file or device
 146 *
 147 * Parameters:
 148 *  @size: the size in bytes of the ram block
 149 *  @mr: the memory region where the ram block is
 150 *  @ram_flags: specify the properties of the ram block, which can be one
 151 *              or bit-or of following values
 152 *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
 153 *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
 154 *              Other bits are ignored.
 155 *  @mem_path or @fd: specify the backing file or device
 156 *  @errp: pointer to Error*, to store an error if it happens
 157 *
 158 * Return:
 159 *  On success, return a pointer to the ram block.
 160 *  On failure, return NULL.
 161 */
 162RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
 163                                   uint32_t ram_flags, const char *mem_path,
 164                                   Error **errp);
 165RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 166                                 uint32_t ram_flags, int fd,
 167                                 Error **errp);
 168
 169RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 170                                  MemoryRegion *mr, Error **errp);
 171RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
 172                         Error **errp);
 173RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
 174                                    void (*resized)(const char*,
 175                                                    uint64_t length,
 176                                                    void *host),
 177                                    MemoryRegion *mr, Error **errp);
 178void qemu_ram_free(RAMBlock *block);
 179
 180int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
 181
 182#define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
 183#define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
 184
 185void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
 186
 187static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
 188                                                 ram_addr_t length,
 189                                                 unsigned client)
 190{
 191    DirtyMemoryBlocks *blocks;
 192    unsigned long end, page;
 193    unsigned long idx, offset, base;
 194    bool dirty = false;
 195
 196    assert(client < DIRTY_MEMORY_NUM);
 197
 198    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 199    page = start >> TARGET_PAGE_BITS;
 200
 201    rcu_read_lock();
 202
 203    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
 204
 205    idx = page / DIRTY_MEMORY_BLOCK_SIZE;
 206    offset = page % DIRTY_MEMORY_BLOCK_SIZE;
 207    base = page - offset;
 208    while (page < end) {
 209        unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
 210        unsigned long num = next - base;
 211        unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
 212        if (found < num) {
 213            dirty = true;
 214            break;
 215        }
 216
 217        page = next;
 218        idx++;
 219        offset = 0;
 220        base += DIRTY_MEMORY_BLOCK_SIZE;
 221    }
 222
 223    rcu_read_unlock();
 224
 225    return dirty;
 226}
 227
 228static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
 229                                                 ram_addr_t length,
 230                                                 unsigned client)
 231{
 232    DirtyMemoryBlocks *blocks;
 233    unsigned long end, page;
 234    unsigned long idx, offset, base;
 235    bool dirty = true;
 236
 237    assert(client < DIRTY_MEMORY_NUM);
 238
 239    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 240    page = start >> TARGET_PAGE_BITS;
 241
 242    rcu_read_lock();
 243
 244    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
 245
 246    idx = page / DIRTY_MEMORY_BLOCK_SIZE;
 247    offset = page % DIRTY_MEMORY_BLOCK_SIZE;
 248    base = page - offset;
 249    while (page < end) {
 250        unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
 251        unsigned long num = next - base;
 252        unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
 253        if (found < num) {
 254            dirty = false;
 255            break;
 256        }
 257
 258        page = next;
 259        idx++;
 260        offset = 0;
 261        base += DIRTY_MEMORY_BLOCK_SIZE;
 262    }
 263
 264    rcu_read_unlock();
 265
 266    return dirty;
 267}
 268
 269static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
 270                                                      unsigned client)
 271{
 272    return cpu_physical_memory_get_dirty(addr, 1, client);
 273}
 274
 275static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
 276{
 277    bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
 278    bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
 279    bool migration =
 280        cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
 281    return !(vga && code && migration);
 282}
 283
 284static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
 285                                                               ram_addr_t length,
 286                                                               uint8_t mask)
 287{
 288    uint8_t ret = 0;
 289
 290    if (mask & (1 << DIRTY_MEMORY_VGA) &&
 291        !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
 292        ret |= (1 << DIRTY_MEMORY_VGA);
 293    }
 294    if (mask & (1 << DIRTY_MEMORY_CODE) &&
 295        !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
 296        ret |= (1 << DIRTY_MEMORY_CODE);
 297    }
 298    if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
 299        !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
 300        ret |= (1 << DIRTY_MEMORY_MIGRATION);
 301    }
 302    return ret;
 303}
 304
 305static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
 306                                                      unsigned client)
 307{
 308    unsigned long page, idx, offset;
 309    DirtyMemoryBlocks *blocks;
 310
 311    assert(client < DIRTY_MEMORY_NUM);
 312
 313    page = addr >> TARGET_PAGE_BITS;
 314    idx = page / DIRTY_MEMORY_BLOCK_SIZE;
 315    offset = page % DIRTY_MEMORY_BLOCK_SIZE;
 316
 317    rcu_read_lock();
 318
 319    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
 320
 321    set_bit_atomic(offset, blocks->blocks[idx]);
 322
 323    rcu_read_unlock();
 324}
 325
 326static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
 327                                                       ram_addr_t length,
 328                                                       uint8_t mask)
 329{
 330    DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
 331    unsigned long end, page;
 332    unsigned long idx, offset, base;
 333    int i;
 334
 335    if (!mask && !xen_enabled()) {
 336        return;
 337    }
 338
 339    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 340    page = start >> TARGET_PAGE_BITS;
 341
 342    rcu_read_lock();
 343
 344    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
 345        blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
 346    }
 347
 348    idx = page / DIRTY_MEMORY_BLOCK_SIZE;
 349    offset = page % DIRTY_MEMORY_BLOCK_SIZE;
 350    base = page - offset;
 351    while (page < end) {
 352        unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
 353
 354        if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
 355            bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
 356                              offset, next - page);
 357        }
 358        if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
 359            bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
 360                              offset, next - page);
 361        }
 362        if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
 363            bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
 364                              offset, next - page);
 365        }
 366
 367        page = next;
 368        idx++;
 369        offset = 0;
 370        base += DIRTY_MEMORY_BLOCK_SIZE;
 371    }
 372
 373    rcu_read_unlock();
 374
 375    xen_hvm_modified_memory(start, length);
 376}
 377
 378#if !defined(_WIN32)
 379static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
 380                                                          ram_addr_t start,
 381                                                          ram_addr_t pages)
 382{
 383    unsigned long i, j;
 384    unsigned long page_number, c;
 385    hwaddr addr;
 386    ram_addr_t ram_addr;
 387    unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
 388    unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
 389    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
 390
 391    /* start address is aligned at the start of a word? */
 392    if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
 393        (hpratio == 1)) {
 394        unsigned long **blocks[DIRTY_MEMORY_NUM];
 395        unsigned long idx;
 396        unsigned long offset;
 397        long k;
 398        long nr = BITS_TO_LONGS(pages);
 399
 400        idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
 401        offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
 402                          DIRTY_MEMORY_BLOCK_SIZE);
 403
 404        rcu_read_lock();
 405
 406        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
 407            blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
 408        }
 409
 410        for (k = 0; k < nr; k++) {
 411            if (bitmap[k]) {
 412                unsigned long temp = leul_to_cpu(bitmap[k]);
 413
 414                atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
 415
 416                if (global_dirty_log) {
 417                    atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
 418                              temp);
 419                }
 420
 421                if (tcg_enabled()) {
 422                    atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
 423                }
 424            }
 425
 426            if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
 427                offset = 0;
 428                idx++;
 429            }
 430        }
 431
 432        rcu_read_unlock();
 433
 434        xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
 435    } else {
 436        uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
 437
 438        if (!global_dirty_log) {
 439            clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
 440        }
 441
 442        /*
 443         * bitmap-traveling is faster than memory-traveling (for addr...)
 444         * especially when most of the memory is not dirty.
 445         */
 446        for (i = 0; i < len; i++) {
 447            if (bitmap[i] != 0) {
 448                c = leul_to_cpu(bitmap[i]);
 449                do {
 450                    j = ctzl(c);
 451                    c &= ~(1ul << j);
 452                    page_number = (i * HOST_LONG_BITS + j) * hpratio;
 453                    addr = page_number * TARGET_PAGE_SIZE;
 454                    ram_addr = start + addr;
 455                    cpu_physical_memory_set_dirty_range(ram_addr,
 456                                       TARGET_PAGE_SIZE * hpratio, clients);
 457                } while (c != 0);
 458            }
 459        }
 460    }
 461}
 462#endif /* not _WIN32 */
 463
 464bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 465                                              ram_addr_t length,
 466                                              unsigned client);
 467
 468DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
 469    (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
 470
 471bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
 472                                            ram_addr_t start,
 473                                            ram_addr_t length);
 474
 475static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
 476                                                         ram_addr_t length)
 477{
 478    cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
 479    cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
 480    cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
 481}
 482
 483
 484/* Called with RCU critical section */
 485static inline
 486uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
 487                                               ram_addr_t start,
 488                                               ram_addr_t length,
 489                                               uint64_t *real_dirty_pages)
 490{
 491    ram_addr_t addr;
 492    unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
 493    uint64_t num_dirty = 0;
 494    unsigned long *dest = rb->bmap;
 495
 496    /* start address and length is aligned at the start of a word? */
 497    if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
 498         (start + rb->offset) &&
 499        !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
 500        int k;
 501        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
 502        unsigned long * const *src;
 503        unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
 504        unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
 505                                        DIRTY_MEMORY_BLOCK_SIZE);
 506        unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
 507
 508        src = atomic_rcu_read(
 509                &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
 510
 511        for (k = page; k < page + nr; k++) {
 512            if (src[idx][offset]) {
 513                unsigned long bits = atomic_xchg(&src[idx][offset], 0);
 514                unsigned long new_dirty;
 515                *real_dirty_pages += ctpopl(bits);
 516                new_dirty = ~dest[k];
 517                dest[k] |= bits;
 518                new_dirty &= bits;
 519                num_dirty += ctpopl(new_dirty);
 520            }
 521
 522            if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
 523                offset = 0;
 524                idx++;
 525            }
 526        }
 527
 528        if (rb->clear_bmap) {
 529            /*
 530             * Postpone the dirty bitmap clear to the point before we
 531             * really send the pages, also we will split the clear
 532             * dirty procedure into smaller chunks.
 533             */
 534            clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
 535                           length >> TARGET_PAGE_BITS);
 536        } else {
 537            /* Slow path - still do that in a huge chunk */
 538            memory_region_clear_dirty_bitmap(rb->mr, start, length);
 539        }
 540    } else {
 541        ram_addr_t offset = rb->offset;
 542
 543        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
 544            if (cpu_physical_memory_test_and_clear_dirty(
 545                        start + addr + offset,
 546                        TARGET_PAGE_SIZE,
 547                        DIRTY_MEMORY_MIGRATION)) {
 548                *real_dirty_pages += 1;
 549                long k = (start + addr) >> TARGET_PAGE_BITS;
 550                if (!test_and_set_bit(k, dest)) {
 551                    num_dirty++;
 552                }
 553            }
 554        }
 555    }
 556
 557    return num_dirty;
 558}
 559#endif
 560#endif
 561