qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12#include "qemu/osdep.h"
  13#include "sysemu/hostmem.h"
  14#include "hw/boards.h"
  15#include "qapi/error.h"
  16#include "qapi/visitor.h"
  17#include "qapi-types.h"
  18#include "qapi-visit.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21
  22#ifdef CONFIG_NUMA
  23#include <numaif.h>
  24QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  25QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  28#endif
  29
  30static void
  31host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  32                             void *opaque, Error **errp)
  33{
  34    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  35    uint64_t value = backend->size;
  36
  37    visit_type_size(v, name, &value, errp);
  38}
  39
  40static void
  41host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  42                             void *opaque, Error **errp)
  43{
  44    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  45    Error *local_err = NULL;
  46    uint64_t value;
  47
  48    if (memory_region_size(&backend->mr)) {
  49        error_setg(&local_err, "cannot change property value");
  50        goto out;
  51    }
  52
  53    visit_type_size(v, name, &value, &local_err);
  54    if (local_err) {
  55        goto out;
  56    }
  57    if (!value) {
  58        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
  59                   PRIu64 "'", object_get_typename(obj), name, value);
  60        goto out;
  61    }
  62    backend->size = value;
  63out:
  64    error_propagate(errp, local_err);
  65}
  66
  67static uint16List **host_memory_append_node(uint16List **node,
  68                                            unsigned long value)
  69{
  70     *node = g_malloc0(sizeof(**node));
  71     (*node)->value = value;
  72     return &(*node)->next;
  73}
  74
  75static void
  76host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  77                                   void *opaque, Error **errp)
  78{
  79    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  80    uint16List *host_nodes = NULL;
  81    uint16List **node = &host_nodes;
  82    unsigned long value;
  83
  84    value = find_first_bit(backend->host_nodes, MAX_NODES);
  85
  86    node = host_memory_append_node(node, value);
  87
  88    if (value == MAX_NODES) {
  89        goto out;
  90    }
  91
  92    do {
  93        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  94        if (value == MAX_NODES) {
  95            break;
  96        }
  97
  98        node = host_memory_append_node(node, value);
  99    } while (true);
 100
 101out:
 102    visit_type_uint16List(v, name, &host_nodes, errp);
 103}
 104
 105static void
 106host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 107                                   void *opaque, Error **errp)
 108{
 109#ifdef CONFIG_NUMA
 110    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 111    uint16List *l = NULL;
 112
 113    visit_type_uint16List(v, name, &l, errp);
 114
 115    while (l) {
 116        bitmap_set(backend->host_nodes, l->value, 1);
 117        l = l->next;
 118    }
 119#else
 120    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 121#endif
 122}
 123
 124static int
 125host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 126{
 127    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 128    return backend->policy;
 129}
 130
 131static void
 132host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 133{
 134    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 135    backend->policy = policy;
 136
 137#ifndef CONFIG_NUMA
 138    if (policy != HOST_MEM_POLICY_DEFAULT) {
 139        error_setg(errp, "NUMA policies are not supported by this QEMU");
 140    }
 141#endif
 142}
 143
 144static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 145{
 146    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 147
 148    return backend->merge;
 149}
 150
 151static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 152{
 153    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 154
 155    if (!memory_region_size(&backend->mr)) {
 156        backend->merge = value;
 157        return;
 158    }
 159
 160    if (value != backend->merge) {
 161        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 162        uint64_t sz = memory_region_size(&backend->mr);
 163
 164        qemu_madvise(ptr, sz,
 165                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 166        backend->merge = value;
 167    }
 168}
 169
 170static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 171{
 172    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 173
 174    return backend->dump;
 175}
 176
 177static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 178{
 179    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 180
 181    if (!memory_region_size(&backend->mr)) {
 182        backend->dump = value;
 183        return;
 184    }
 185
 186    if (value != backend->dump) {
 187        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 188        uint64_t sz = memory_region_size(&backend->mr);
 189
 190        qemu_madvise(ptr, sz,
 191                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 192        backend->dump = value;
 193    }
 194}
 195
 196static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 197{
 198    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 199
 200    return backend->prealloc || backend->force_prealloc;
 201}
 202
 203static void host_memory_backend_set_prealloc(Object *obj, bool value,
 204                                             Error **errp)
 205{
 206    Error *local_err = NULL;
 207    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 208
 209    if (backend->force_prealloc) {
 210        if (value) {
 211            error_setg(errp,
 212                       "remove -mem-prealloc to use the prealloc property");
 213            return;
 214        }
 215    }
 216
 217    if (!memory_region_size(&backend->mr)) {
 218        backend->prealloc = value;
 219        return;
 220    }
 221
 222    if (value && !backend->prealloc) {
 223        int fd = memory_region_get_fd(&backend->mr);
 224        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 225        uint64_t sz = memory_region_size(&backend->mr);
 226
 227        os_mem_prealloc(fd, ptr, sz, &local_err);
 228        if (local_err) {
 229            error_propagate(errp, local_err);
 230            return;
 231        }
 232        backend->prealloc = true;
 233    }
 234}
 235
 236static void host_memory_backend_init(Object *obj)
 237{
 238    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 239    MachineState *machine = MACHINE(qdev_get_machine());
 240
 241    backend->merge = machine_mem_merge(machine);
 242    backend->dump = machine_dump_guest_core(machine);
 243    backend->prealloc = mem_prealloc;
 244
 245    object_property_add_bool(obj, "merge",
 246                        host_memory_backend_get_merge,
 247                        host_memory_backend_set_merge, NULL);
 248    object_property_add_bool(obj, "dump",
 249                        host_memory_backend_get_dump,
 250                        host_memory_backend_set_dump, NULL);
 251    object_property_add_bool(obj, "prealloc",
 252                        host_memory_backend_get_prealloc,
 253                        host_memory_backend_set_prealloc, NULL);
 254    object_property_add(obj, "size", "int",
 255                        host_memory_backend_get_size,
 256                        host_memory_backend_set_size, NULL, NULL, NULL);
 257    object_property_add(obj, "host-nodes", "int",
 258                        host_memory_backend_get_host_nodes,
 259                        host_memory_backend_set_host_nodes, NULL, NULL, NULL);
 260    object_property_add_enum(obj, "policy", "HostMemPolicy",
 261                             HostMemPolicy_lookup,
 262                             host_memory_backend_get_policy,
 263                             host_memory_backend_set_policy, NULL);
 264}
 265
 266MemoryRegion *
 267host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 268{
 269    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
 270}
 271
 272void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 273{
 274    backend->is_mapped = mapped;
 275}
 276
 277bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 278{
 279    return backend->is_mapped;
 280}
 281
 282static void
 283host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 284{
 285    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 286    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 287    Error *local_err = NULL;
 288    void *ptr;
 289    uint64_t sz;
 290
 291    if (bc->alloc) {
 292        bc->alloc(backend, &local_err);
 293        if (local_err) {
 294            goto out;
 295        }
 296
 297        ptr = memory_region_get_ram_ptr(&backend->mr);
 298        sz = memory_region_size(&backend->mr);
 299
 300        if (backend->merge) {
 301            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 302        }
 303        if (!backend->dump) {
 304            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 305        }
 306#ifdef CONFIG_NUMA
 307        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 308        /* lastbit == MAX_NODES means maxnode = 0 */
 309        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 310        /* ensure policy won't be ignored in case memory is preallocated
 311         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 312         * this doesn't catch hugepage case. */
 313        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 314
 315        /* check for invalid host-nodes and policies and give more verbose
 316         * error messages than mbind(). */
 317        if (maxnode && backend->policy == MPOL_DEFAULT) {
 318            error_setg(errp, "host-nodes must be empty for policy default,"
 319                       " or you should explicitly specify a policy other"
 320                       " than default");
 321            return;
 322        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 323            error_setg(errp, "host-nodes must be set for policy %s",
 324                       HostMemPolicy_lookup[backend->policy]);
 325            return;
 326        }
 327
 328        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 329         * as argument to mbind() due to an old Linux bug (feature?) which
 330         * cuts off the last specified node. This means backend->host_nodes
 331         * must have MAX_NODES+1 bits available.
 332         */
 333        assert(sizeof(backend->host_nodes) >=
 334               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 335        assert(maxnode <= MAX_NODES);
 336        if (mbind(ptr, sz, backend->policy,
 337                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
 338            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 339                error_setg_errno(errp, errno,
 340                                 "cannot bind memory to host NUMA nodes");
 341                return;
 342            }
 343        }
 344#endif
 345        /* Preallocate memory after the NUMA policy has been instantiated.
 346         * This is necessary to guarantee memory is allocated with
 347         * specified NUMA policy in place.
 348         */
 349        if (backend->prealloc) {
 350            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 351                            &local_err);
 352            if (local_err) {
 353                goto out;
 354            }
 355        }
 356    }
 357out:
 358    error_propagate(errp, local_err);
 359}
 360
 361static bool
 362host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 363{
 364    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 365        return false;
 366    } else {
 367        return true;
 368    }
 369}
 370
 371static void
 372host_memory_backend_class_init(ObjectClass *oc, void *data)
 373{
 374    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 375
 376    ucc->complete = host_memory_backend_memory_complete;
 377    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 378}
 379
 380static const TypeInfo host_memory_backend_info = {
 381    .name = TYPE_MEMORY_BACKEND,
 382    .parent = TYPE_OBJECT,
 383    .abstract = true,
 384    .class_size = sizeof(HostMemoryBackendClass),
 385    .class_init = host_memory_backend_class_init,
 386    .instance_size = sizeof(HostMemoryBackend),
 387    .instance_init = host_memory_backend_init,
 388    .interfaces = (InterfaceInfo[]) {
 389        { TYPE_USER_CREATABLE },
 390        { }
 391    }
 392};
 393
 394static void register_types(void)
 395{
 396    type_register_static(&host_memory_backend_info);
 397}
 398
 399type_init(register_types);
 400