qemu/util/mmap-alloc.c
<<
>>
Prefs
   1/*
   2 * Support for RAM backed by mmaped host memory.
   3 *
   4 * Copyright (c) 2015 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Michael S. Tsirkin <mst@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or
  10 * later.  See the COPYING file in the top-level directory.
  11 */
  12
  13#ifdef CONFIG_LINUX
  14#include <linux/mman.h>
  15#else  /* !CONFIG_LINUX */
  16#define MAP_SYNC              0x0
  17#define MAP_SHARED_VALIDATE   0x0
  18#endif /* CONFIG_LINUX */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/mmap-alloc.h"
  22#include "qemu/host-utils.h"
  23
  24#define HUGETLBFS_MAGIC       0x958458f6
  25
  26#ifdef CONFIG_LINUX
  27#include <sys/vfs.h>
  28#endif
  29
  30size_t qemu_fd_getpagesize(int fd)
  31{
  32#ifdef CONFIG_LINUX
  33    struct statfs fs;
  34    int ret;
  35
  36    if (fd != -1) {
  37        do {
  38            ret = fstatfs(fd, &fs);
  39        } while (ret != 0 && errno == EINTR);
  40
  41        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
  42            return fs.f_bsize;
  43        }
  44    }
  45#ifdef __sparc__
  46    /* SPARC Linux needs greater alignment than the pagesize */
  47    return QEMU_VMALLOC_ALIGN;
  48#endif
  49#endif
  50
  51    return getpagesize();
  52}
  53
  54size_t qemu_mempath_getpagesize(const char *mem_path)
  55{
  56#ifdef CONFIG_LINUX
  57    struct statfs fs;
  58    int ret;
  59
  60    if (mem_path) {
  61        do {
  62            ret = statfs(mem_path, &fs);
  63        } while (ret != 0 && errno == EINTR);
  64
  65        if (ret != 0) {
  66            fprintf(stderr, "Couldn't statfs() memory path: %s\n",
  67                    strerror(errno));
  68            exit(1);
  69        }
  70
  71        if (fs.f_type == HUGETLBFS_MAGIC) {
  72            /* It's hugepage, return the huge page size */
  73            return fs.f_bsize;
  74        }
  75    }
  76#ifdef __sparc__
  77    /* SPARC Linux needs greater alignment than the pagesize */
  78    return QEMU_VMALLOC_ALIGN;
  79#endif
  80#endif
  81
  82    return getpagesize();
  83}
  84
  85void *qemu_ram_mmap(int fd,
  86                    size_t size,
  87                    size_t align,
  88                    bool shared,
  89                    bool is_pmem)
  90{
  91    int flags;
  92    int map_sync_flags = 0;
  93    int guardfd;
  94    size_t offset;
  95    size_t pagesize;
  96    size_t total;
  97    void *guardptr;
  98    void *ptr;
  99
 100    /*
 101     * Note: this always allocates at least one extra page of virtual address
 102     * space, even if size is already aligned.
 103     */
 104    total = size + align;
 105
 106#if defined(__powerpc64__) && defined(__linux__)
 107    /* On ppc64 mappings in the same segment (aka slice) must share the same
 108     * page size. Since we will be re-allocating part of this segment
 109     * from the supplied fd, we should make sure to use the same page size, to
 110     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
 111     * avoid allocating backing store memory.
 112     * We do this unless we are using the system page size, in which case
 113     * anonymous memory is OK.
 114     */
 115    flags = MAP_PRIVATE;
 116    pagesize = qemu_fd_getpagesize(fd);
 117    if (fd == -1 || pagesize == getpagesize()) {
 118        guardfd = -1;
 119        flags |= MAP_ANONYMOUS;
 120    } else {
 121        guardfd = fd;
 122        flags |= MAP_NORESERVE;
 123    }
 124#else
 125    guardfd = -1;
 126    pagesize = getpagesize();
 127    flags = MAP_PRIVATE | MAP_ANONYMOUS;
 128#endif
 129
 130    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
 131
 132    if (guardptr == MAP_FAILED) {
 133        return MAP_FAILED;
 134    }
 135
 136    assert(is_power_of_2(align));
 137    /* Always align to host page size */
 138    assert(align >= pagesize);
 139
 140    flags = MAP_FIXED;
 141    flags |= fd == -1 ? MAP_ANONYMOUS : 0;
 142    flags |= shared ? MAP_SHARED : MAP_PRIVATE;
 143    if (shared && is_pmem) {
 144        map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
 145    }
 146
 147    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
 148
 149    ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
 150               flags | map_sync_flags, fd, 0);
 151
 152    if (ptr == MAP_FAILED && map_sync_flags) {
 153        if (errno == ENOTSUP) {
 154            char *proc_link, *file_name;
 155            int len;
 156            proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
 157            file_name = g_malloc0(PATH_MAX);
 158            len = readlink(proc_link, file_name, PATH_MAX - 1);
 159            if (len < 0) {
 160                len = 0;
 161            }
 162            file_name[len] = '\0';
 163            fprintf(stderr, "Warning: requesting persistence across crashes "
 164                    "for backend file %s failed. Proceeding without "
 165                    "persistence, data might become corrupted in case of host "
 166                    "crash.\n", file_name);
 167            g_free(proc_link);
 168            g_free(file_name);
 169        }
 170        /*
 171         * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
 172         * we will remove these flags to handle compatibility.
 173         */
 174        ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
 175                   flags, fd, 0);
 176    }
 177
 178    if (ptr == MAP_FAILED) {
 179        munmap(guardptr, total);
 180        return MAP_FAILED;
 181    }
 182
 183    if (offset > 0) {
 184        munmap(guardptr, offset);
 185    }
 186
 187    /*
 188     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
 189     * a guard page guarding against potential buffer overflows.
 190     */
 191    total -= offset;
 192    if (total > size + pagesize) {
 193        munmap(ptr + size + pagesize, total - size - pagesize);
 194    }
 195
 196    return ptr;
 197}
 198
 199void qemu_ram_munmap(int fd, void *ptr, size_t size)
 200{
 201    size_t pagesize;
 202
 203    if (ptr) {
 204        /* Unmap both the RAM block and the guard page */
 205#if defined(__powerpc64__) && defined(__linux__)
 206        pagesize = qemu_fd_getpagesize(fd);
 207#else
 208        pagesize = getpagesize();
 209#endif
 210        munmap(ptr, size + pagesize);
 211    }
 212}
 213