qemu/util/mmap-alloc.c
<<
>>
Prefs
   1/*
   2 * Support for RAM backed by mmaped host memory.
   3 *
   4 * Copyright (c) 2015 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Michael S. Tsirkin <mst@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or
  10 * later.  See the COPYING file in the top-level directory.
  11 */
  12
  13#ifdef CONFIG_LINUX
  14#include <linux/mman.h>
  15#else  /* !CONFIG_LINUX */
  16#define MAP_SYNC              0x0
  17#define MAP_SHARED_VALIDATE   0x0
  18#endif /* CONFIG_LINUX */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/mmap-alloc.h"
  22#include "qemu/host-utils.h"
  23
  24#define HUGETLBFS_MAGIC       0x958458f6
  25
  26#ifdef CONFIG_LINUX
  27#include <sys/vfs.h>
  28#endif
  29
  30size_t qemu_fd_getpagesize(int fd)
  31{
  32#ifdef CONFIG_LINUX
  33    struct statfs fs;
  34    int ret;
  35
  36    if (fd != -1) {
  37        do {
  38            ret = fstatfs(fd, &fs);
  39        } while (ret != 0 && errno == EINTR);
  40
  41        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
  42            return fs.f_bsize;
  43        }
  44    }
  45#ifdef __sparc__
  46    /* SPARC Linux needs greater alignment than the pagesize */
  47    return QEMU_VMALLOC_ALIGN;
  48#endif
  49#endif
  50
  51    return qemu_real_host_page_size;
  52}
  53
  54size_t qemu_mempath_getpagesize(const char *mem_path)
  55{
  56#ifdef CONFIG_LINUX
  57    struct statfs fs;
  58    int ret;
  59
  60    if (mem_path) {
  61        do {
  62            ret = statfs(mem_path, &fs);
  63        } while (ret != 0 && errno == EINTR);
  64
  65        if (ret != 0) {
  66            fprintf(stderr, "Couldn't statfs() memory path: %s\n",
  67                    strerror(errno));
  68            exit(1);
  69        }
  70
  71        if (fs.f_type == HUGETLBFS_MAGIC) {
  72            /* It's hugepage, return the huge page size */
  73            return fs.f_bsize;
  74        }
  75    }
  76#ifdef __sparc__
  77    /* SPARC Linux needs greater alignment than the pagesize */
  78    return QEMU_VMALLOC_ALIGN;
  79#endif
  80#endif
  81
  82    return qemu_real_host_page_size;
  83}
  84
  85void *qemu_ram_mmap(int fd,
  86                    size_t size,
  87                    size_t align,
  88                    bool readonly,
  89                    bool shared,
  90                    bool is_pmem,
  91                    off_t map_offset)
  92{
  93    int prot;
  94    int flags;
  95    int map_sync_flags = 0;
  96    int guardfd;
  97    size_t offset;
  98    size_t pagesize;
  99    size_t total;
 100    void *guardptr;
 101    void *ptr;
 102
 103    /*
 104     * Note: this always allocates at least one extra page of virtual address
 105     * space, even if size is already aligned.
 106     */
 107    total = size + align;
 108
 109#if defined(__powerpc64__) && defined(__linux__)
 110    /* On ppc64 mappings in the same segment (aka slice) must share the same
 111     * page size. Since we will be re-allocating part of this segment
 112     * from the supplied fd, we should make sure to use the same page size, to
 113     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
 114     * avoid allocating backing store memory.
 115     * We do this unless we are using the system page size, in which case
 116     * anonymous memory is OK.
 117     */
 118    flags = MAP_PRIVATE;
 119    pagesize = qemu_fd_getpagesize(fd);
 120    if (fd == -1 || pagesize == qemu_real_host_page_size) {
 121        guardfd = -1;
 122        flags |= MAP_ANONYMOUS;
 123    } else {
 124        guardfd = fd;
 125        flags |= MAP_NORESERVE;
 126    }
 127#else
 128    guardfd = -1;
 129    pagesize = qemu_real_host_page_size;
 130    flags = MAP_PRIVATE | MAP_ANONYMOUS;
 131#endif
 132
 133    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
 134
 135    if (guardptr == MAP_FAILED) {
 136        return MAP_FAILED;
 137    }
 138
 139    assert(is_power_of_2(align));
 140    /* Always align to host page size */
 141    assert(align >= pagesize);
 142
 143    flags = MAP_FIXED;
 144    flags |= fd == -1 ? MAP_ANONYMOUS : 0;
 145    flags |= shared ? MAP_SHARED : MAP_PRIVATE;
 146    if (shared && is_pmem) {
 147        map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
 148    }
 149
 150    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
 151
 152    prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
 153
 154    ptr = mmap(guardptr + offset, size, prot,
 155               flags | map_sync_flags, fd, map_offset);
 156
 157    if (ptr == MAP_FAILED && map_sync_flags) {
 158        if (errno == ENOTSUP) {
 159            char *proc_link, *file_name;
 160            int len;
 161            proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
 162            file_name = g_malloc0(PATH_MAX);
 163            len = readlink(proc_link, file_name, PATH_MAX - 1);
 164            if (len < 0) {
 165                len = 0;
 166            }
 167            file_name[len] = '\0';
 168            fprintf(stderr, "Warning: requesting persistence across crashes "
 169                    "for backend file %s failed. Proceeding without "
 170                    "persistence, data might become corrupted in case of host "
 171                    "crash.\n", file_name);
 172            g_free(proc_link);
 173            g_free(file_name);
 174        }
 175        /*
 176         * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
 177         * we will remove these flags to handle compatibility.
 178         */
 179        ptr = mmap(guardptr + offset, size, prot, flags, fd, map_offset);
 180    }
 181
 182    if (ptr == MAP_FAILED) {
 183        munmap(guardptr, total);
 184        return MAP_FAILED;
 185    }
 186
 187    if (offset > 0) {
 188        munmap(guardptr, offset);
 189    }
 190
 191    /*
 192     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
 193     * a guard page guarding against potential buffer overflows.
 194     */
 195    total -= offset;
 196    if (total > size + pagesize) {
 197        munmap(ptr + size + pagesize, total - size - pagesize);
 198    }
 199
 200    return ptr;
 201}
 202
 203void qemu_ram_munmap(int fd, void *ptr, size_t size)
 204{
 205    size_t pagesize;
 206
 207    if (ptr) {
 208        /* Unmap both the RAM block and the guard page */
 209#if defined(__powerpc64__) && defined(__linux__)
 210        pagesize = qemu_fd_getpagesize(fd);
 211#else
 212        pagesize = qemu_real_host_page_size;
 213#endif
 214        munmap(ptr, size + pagesize);
 215    }
 216}
 217