qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22
  23#ifdef CONFIG_NUMA
  24#include <numaif.h>
  25QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  29#endif
  30
  31static void
  32host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  33                             void *opaque, Error **errp)
  34{
  35    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  36    uint64_t value = backend->size;
  37
  38    visit_type_size(v, name, &value, errp);
  39}
  40
  41static void
  42host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  43                             void *opaque, Error **errp)
  44{
  45    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  46    Error *local_err = NULL;
  47    uint64_t value;
  48
  49    if (host_memory_backend_mr_inited(backend)) {
  50        error_setg(&local_err, "cannot change property value");
  51        goto out;
  52    }
  53
  54    visit_type_size(v, name, &value, &local_err);
  55    if (local_err) {
  56        goto out;
  57    }
  58    if (!value) {
  59        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
  60                   PRIu64 "'", object_get_typename(obj), name, value);
  61        goto out;
  62    }
  63    backend->size = value;
  64out:
  65    error_propagate(errp, local_err);
  66}
  67
  68static void
  69host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  70                                   void *opaque, Error **errp)
  71{
  72    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  73    uint16List *host_nodes = NULL;
  74    uint16List **node = &host_nodes;
  75    unsigned long value;
  76
  77    value = find_first_bit(backend->host_nodes, MAX_NODES);
  78    if (value == MAX_NODES) {
  79        return;
  80    }
  81
  82    *node = g_malloc0(sizeof(**node));
  83    (*node)->value = value;
  84    node = &(*node)->next;
  85
  86    do {
  87        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  88        if (value == MAX_NODES) {
  89            break;
  90        }
  91
  92        *node = g_malloc0(sizeof(**node));
  93        (*node)->value = value;
  94        node = &(*node)->next;
  95    } while (true);
  96
  97    visit_type_uint16List(v, name, &host_nodes, errp);
  98}
  99
 100static void
 101host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 102                                   void *opaque, Error **errp)
 103{
 104#ifdef CONFIG_NUMA
 105    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 106    uint16List *l = NULL;
 107
 108    visit_type_uint16List(v, name, &l, errp);
 109
 110    while (l) {
 111        bitmap_set(backend->host_nodes, l->value, 1);
 112        l = l->next;
 113    }
 114#else
 115    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 116#endif
 117}
 118
 119static int
 120host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 121{
 122    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 123    return backend->policy;
 124}
 125
 126static void
 127host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 128{
 129    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 130    backend->policy = policy;
 131
 132#ifndef CONFIG_NUMA
 133    if (policy != HOST_MEM_POLICY_DEFAULT) {
 134        error_setg(errp, "NUMA policies are not supported by this QEMU");
 135    }
 136#endif
 137}
 138
 139static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 140{
 141    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 142
 143    return backend->merge;
 144}
 145
 146static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 147{
 148    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 149
 150    if (!host_memory_backend_mr_inited(backend)) {
 151        backend->merge = value;
 152        return;
 153    }
 154
 155    if (value != backend->merge) {
 156        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 157        uint64_t sz = memory_region_size(&backend->mr);
 158
 159        qemu_madvise(ptr, sz,
 160                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 161        backend->merge = value;
 162    }
 163}
 164
 165static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 166{
 167    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 168
 169    return backend->dump;
 170}
 171
 172static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 173{
 174    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 175
 176    if (!host_memory_backend_mr_inited(backend)) {
 177        backend->dump = value;
 178        return;
 179    }
 180
 181    if (value != backend->dump) {
 182        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 183        uint64_t sz = memory_region_size(&backend->mr);
 184
 185        qemu_madvise(ptr, sz,
 186                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 187        backend->dump = value;
 188    }
 189}
 190
 191static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 192{
 193    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 194
 195    return backend->prealloc || backend->force_prealloc;
 196}
 197
 198static void host_memory_backend_set_prealloc(Object *obj, bool value,
 199                                             Error **errp)
 200{
 201    Error *local_err = NULL;
 202    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 203
 204    if (backend->force_prealloc) {
 205        if (value) {
 206            error_setg(errp,
 207                       "remove -mem-prealloc to use the prealloc property");
 208            return;
 209        }
 210    }
 211
 212    if (!host_memory_backend_mr_inited(backend)) {
 213        backend->prealloc = value;
 214        return;
 215    }
 216
 217    if (value && !backend->prealloc) {
 218        int fd = memory_region_get_fd(&backend->mr);
 219        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 220        uint64_t sz = memory_region_size(&backend->mr);
 221
 222        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
 223        if (local_err) {
 224            error_propagate(errp, local_err);
 225            return;
 226        }
 227        backend->prealloc = true;
 228    }
 229}
 230
 231static void host_memory_backend_init(Object *obj)
 232{
 233    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 234    MachineState *machine = MACHINE(qdev_get_machine());
 235
 236    backend->merge = machine_mem_merge(machine);
 237    backend->dump = machine_dump_guest_core(machine);
 238    backend->prealloc = mem_prealloc;
 239}
 240
 241bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 242{
 243    /*
 244     * NOTE: We forbid zero-length memory backend, so here zero means
 245     * "we haven't inited the backend memory region yet".
 246     */
 247    return memory_region_size(&backend->mr) != 0;
 248}
 249
 250MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 251{
 252    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 253}
 254
 255void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 256{
 257    backend->is_mapped = mapped;
 258}
 259
 260bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 261{
 262    return backend->is_mapped;
 263}
 264
 265#ifdef __linux__
 266size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 267{
 268    Object *obj = OBJECT(memdev);
 269    char *path = object_property_get_str(obj, "mem-path", NULL);
 270    size_t pagesize = qemu_mempath_getpagesize(path);
 271
 272    g_free(path);
 273    return pagesize;
 274}
 275#else
 276size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 277{
 278    return getpagesize();
 279}
 280#endif
 281
 282static void
 283host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 284{
 285    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 286    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 287    Error *local_err = NULL;
 288    void *ptr;
 289    uint64_t sz;
 290
 291    if (bc->alloc) {
 292        bc->alloc(backend, &local_err);
 293        if (local_err) {
 294            goto out;
 295        }
 296
 297        ptr = memory_region_get_ram_ptr(&backend->mr);
 298        sz = memory_region_size(&backend->mr);
 299
 300        if (backend->merge) {
 301            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 302        }
 303        if (!backend->dump) {
 304            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 305        }
 306#ifdef CONFIG_NUMA
 307        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 308        /* lastbit == MAX_NODES means maxnode = 0 */
 309        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 310        /* ensure policy won't be ignored in case memory is preallocated
 311         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 312         * this doesn't catch hugepage case. */
 313        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 314
 315        /* check for invalid host-nodes and policies and give more verbose
 316         * error messages than mbind(). */
 317        if (maxnode && backend->policy == MPOL_DEFAULT) {
 318            error_setg(errp, "host-nodes must be empty for policy default,"
 319                       " or you should explicitly specify a policy other"
 320                       " than default");
 321            return;
 322        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 323            error_setg(errp, "host-nodes must be set for policy %s",
 324                       HostMemPolicy_str(backend->policy));
 325            return;
 326        }
 327
 328        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 329         * as argument to mbind() due to an old Linux bug (feature?) which
 330         * cuts off the last specified node. This means backend->host_nodes
 331         * must have MAX_NODES+1 bits available.
 332         */
 333        assert(sizeof(backend->host_nodes) >=
 334               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 335        assert(maxnode <= MAX_NODES);
 336        if (mbind(ptr, sz, backend->policy,
 337                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
 338            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 339                error_setg_errno(errp, errno,
 340                                 "cannot bind memory to host NUMA nodes");
 341                return;
 342            }
 343        }
 344#endif
 345        /* Preallocate memory after the NUMA policy has been instantiated.
 346         * This is necessary to guarantee memory is allocated with
 347         * specified NUMA policy in place.
 348         */
 349        if (backend->prealloc) {
 350            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 351                            smp_cpus, &local_err);
 352            if (local_err) {
 353                goto out;
 354            }
 355        }
 356    }
 357out:
 358    error_propagate(errp, local_err);
 359}
 360
 361static bool
 362host_memory_backend_can_be_deleted(UserCreatable *uc)
 363{
 364    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 365        return false;
 366    } else {
 367        return true;
 368    }
 369}
 370
 371static bool host_memory_backend_get_share(Object *o, Error **errp)
 372{
 373    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 374
 375    return backend->share;
 376}
 377
 378static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 379{
 380    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 381
 382    if (host_memory_backend_mr_inited(backend)) {
 383        error_setg(errp, "cannot change property value");
 384        return;
 385    }
 386    backend->share = value;
 387}
 388
 389static void
 390host_memory_backend_class_init(ObjectClass *oc, void *data)
 391{
 392    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 393
 394    ucc->complete = host_memory_backend_memory_complete;
 395    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 396
 397    object_class_property_add_bool(oc, "merge",
 398        host_memory_backend_get_merge,
 399        host_memory_backend_set_merge, &error_abort);
 400    object_class_property_set_description(oc, "merge",
 401        "Mark memory as mergeable", &error_abort);
 402    object_class_property_add_bool(oc, "dump",
 403        host_memory_backend_get_dump,
 404        host_memory_backend_set_dump, &error_abort);
 405    object_class_property_set_description(oc, "dump",
 406        "Set to 'off' to exclude from core dump", &error_abort);
 407    object_class_property_add_bool(oc, "prealloc",
 408        host_memory_backend_get_prealloc,
 409        host_memory_backend_set_prealloc, &error_abort);
 410    object_class_property_set_description(oc, "prealloc",
 411        "Preallocate memory", &error_abort);
 412    object_class_property_add(oc, "size", "int",
 413        host_memory_backend_get_size,
 414        host_memory_backend_set_size,
 415        NULL, NULL, &error_abort);
 416    object_class_property_set_description(oc, "size",
 417        "Size of the memory region (ex: 500M)", &error_abort);
 418    object_class_property_add(oc, "host-nodes", "int",
 419        host_memory_backend_get_host_nodes,
 420        host_memory_backend_set_host_nodes,
 421        NULL, NULL, &error_abort);
 422    object_class_property_set_description(oc, "host-nodes",
 423        "Binds memory to the list of NUMA host nodes", &error_abort);
 424    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 425        &HostMemPolicy_lookup,
 426        host_memory_backend_get_policy,
 427        host_memory_backend_set_policy, &error_abort);
 428    object_class_property_set_description(oc, "policy",
 429        "Set the NUMA policy", &error_abort);
 430    object_class_property_add_bool(oc, "share",
 431        host_memory_backend_get_share, host_memory_backend_set_share,
 432        &error_abort);
 433    object_class_property_set_description(oc, "share",
 434        "Mark the memory as private to QEMU or shared", &error_abort);
 435}
 436
 437static const TypeInfo host_memory_backend_info = {
 438    .name = TYPE_MEMORY_BACKEND,
 439    .parent = TYPE_OBJECT,
 440    .abstract = true,
 441    .class_size = sizeof(HostMemoryBackendClass),
 442    .class_init = host_memory_backend_class_init,
 443    .instance_size = sizeof(HostMemoryBackend),
 444    .instance_init = host_memory_backend_init,
 445    .interfaces = (InterfaceInfo[]) {
 446        { TYPE_USER_CREATABLE },
 447        { }
 448    }
 449};
 450
 451static void register_types(void)
 452{
 453    type_register_static(&host_memory_backend_info);
 454}
 455
 456type_init(register_types);
 457