qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12#include "qemu/osdep.h"
  13#include "sysemu/hostmem.h"
  14#include "hw/boards.h"
  15#include "qapi/error.h"
  16#include "qapi/visitor.h"
  17#include "qapi-types.h"
  18#include "qapi-visit.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21
  22#ifdef CONFIG_NUMA
  23#include <numaif.h>
  24QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  25QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  28#endif
  29
  30static void
  31host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  32                             void *opaque, Error **errp)
  33{
  34    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  35    uint64_t value = backend->size;
  36
  37    visit_type_size(v, name, &value, errp);
  38}
  39
  40static void
  41host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  42                             void *opaque, Error **errp)
  43{
  44    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  45    Error *local_err = NULL;
  46    uint64_t value;
  47
  48    if (host_memory_backend_mr_inited(backend)) {
  49        error_setg(&local_err, "cannot change property value");
  50        goto out;
  51    }
  52
  53    visit_type_size(v, name, &value, &local_err);
  54    if (local_err) {
  55        goto out;
  56    }
  57    if (!value) {
  58        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
  59                   PRIu64 "'", object_get_typename(obj), name, value);
  60        goto out;
  61    }
  62    backend->size = value;
  63out:
  64    error_propagate(errp, local_err);
  65}
  66
  67static void
  68host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  69                                   void *opaque, Error **errp)
  70{
  71    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  72    uint16List *host_nodes = NULL;
  73    uint16List **node = &host_nodes;
  74    unsigned long value;
  75
  76    value = find_first_bit(backend->host_nodes, MAX_NODES);
  77    if (value == MAX_NODES) {
  78        return;
  79    }
  80
  81    *node = g_malloc0(sizeof(**node));
  82    (*node)->value = value;
  83    node = &(*node)->next;
  84
  85    do {
  86        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  87        if (value == MAX_NODES) {
  88            break;
  89        }
  90
  91        *node = g_malloc0(sizeof(**node));
  92        (*node)->value = value;
  93        node = &(*node)->next;
  94    } while (true);
  95
  96    visit_type_uint16List(v, name, &host_nodes, errp);
  97}
  98
  99static void
 100host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 101                                   void *opaque, Error **errp)
 102{
 103#ifdef CONFIG_NUMA
 104    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 105    uint16List *l = NULL;
 106
 107    visit_type_uint16List(v, name, &l, errp);
 108
 109    while (l) {
 110        bitmap_set(backend->host_nodes, l->value, 1);
 111        l = l->next;
 112    }
 113#else
 114    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 115#endif
 116}
 117
 118static int
 119host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 120{
 121    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 122    return backend->policy;
 123}
 124
 125static void
 126host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 127{
 128    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 129    backend->policy = policy;
 130
 131#ifndef CONFIG_NUMA
 132    if (policy != HOST_MEM_POLICY_DEFAULT) {
 133        error_setg(errp, "NUMA policies are not supported by this QEMU");
 134    }
 135#endif
 136}
 137
 138static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 139{
 140    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 141
 142    return backend->merge;
 143}
 144
 145static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 146{
 147    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 148
 149    if (!host_memory_backend_mr_inited(backend)) {
 150        backend->merge = value;
 151        return;
 152    }
 153
 154    if (value != backend->merge) {
 155        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 156        uint64_t sz = memory_region_size(&backend->mr);
 157
 158        qemu_madvise(ptr, sz,
 159                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 160        backend->merge = value;
 161    }
 162}
 163
 164static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 165{
 166    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 167
 168    return backend->dump;
 169}
 170
 171static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 172{
 173    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 174
 175    if (!host_memory_backend_mr_inited(backend)) {
 176        backend->dump = value;
 177        return;
 178    }
 179
 180    if (value != backend->dump) {
 181        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 182        uint64_t sz = memory_region_size(&backend->mr);
 183
 184        qemu_madvise(ptr, sz,
 185                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 186        backend->dump = value;
 187    }
 188}
 189
 190static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 191{
 192    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 193
 194    return backend->prealloc || backend->force_prealloc;
 195}
 196
 197static void host_memory_backend_set_prealloc(Object *obj, bool value,
 198                                             Error **errp)
 199{
 200    Error *local_err = NULL;
 201    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 202
 203    if (backend->force_prealloc) {
 204        if (value) {
 205            error_setg(errp,
 206                       "remove -mem-prealloc to use the prealloc property");
 207            return;
 208        }
 209    }
 210
 211    if (!host_memory_backend_mr_inited(backend)) {
 212        backend->prealloc = value;
 213        return;
 214    }
 215
 216    if (value && !backend->prealloc) {
 217        int fd = memory_region_get_fd(&backend->mr);
 218        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 219        uint64_t sz = memory_region_size(&backend->mr);
 220
 221        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
 222        if (local_err) {
 223            error_propagate(errp, local_err);
 224            return;
 225        }
 226        backend->prealloc = true;
 227    }
 228}
 229
 230static void host_memory_backend_init(Object *obj)
 231{
 232    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 233    MachineState *machine = MACHINE(qdev_get_machine());
 234
 235    backend->merge = machine_mem_merge(machine);
 236    backend->dump = machine_dump_guest_core(machine);
 237    backend->prealloc = mem_prealloc;
 238}
 239
 240bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 241{
 242    /*
 243     * NOTE: We forbid zero-length memory backend, so here zero means
 244     * "we haven't inited the backend memory region yet".
 245     */
 246    return memory_region_size(&backend->mr) != 0;
 247}
 248
 249MemoryRegion *
 250host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 251{
 252    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 253}
 254
 255void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 256{
 257    backend->is_mapped = mapped;
 258}
 259
 260bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 261{
 262    return backend->is_mapped;
 263}
 264
 265static void
 266host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 267{
 268    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 269    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 270    Error *local_err = NULL;
 271    void *ptr;
 272    uint64_t sz;
 273
 274    if (bc->alloc) {
 275        bc->alloc(backend, &local_err);
 276        if (local_err) {
 277            goto out;
 278        }
 279
 280        ptr = memory_region_get_ram_ptr(&backend->mr);
 281        sz = memory_region_size(&backend->mr);
 282
 283        if (backend->merge) {
 284            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 285        }
 286        if (!backend->dump) {
 287            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 288        }
 289#ifdef CONFIG_NUMA
 290        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 291        /* lastbit == MAX_NODES means maxnode = 0 */
 292        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 293        /* ensure policy won't be ignored in case memory is preallocated
 294         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 295         * this doesn't catch hugepage case. */
 296        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 297
 298        /* check for invalid host-nodes and policies and give more verbose
 299         * error messages than mbind(). */
 300        if (maxnode && backend->policy == MPOL_DEFAULT) {
 301            error_setg(errp, "host-nodes must be empty for policy default,"
 302                       " or you should explicitly specify a policy other"
 303                       " than default");
 304            return;
 305        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 306            error_setg(errp, "host-nodes must be set for policy %s",
 307                       HostMemPolicy_lookup[backend->policy]);
 308            return;
 309        }
 310
 311        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 312         * as argument to mbind() due to an old Linux bug (feature?) which
 313         * cuts off the last specified node. This means backend->host_nodes
 314         * must have MAX_NODES+1 bits available.
 315         */
 316        assert(sizeof(backend->host_nodes) >=
 317               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 318        assert(maxnode <= MAX_NODES);
 319        if (mbind(ptr, sz, backend->policy,
 320                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
 321            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 322                error_setg_errno(errp, errno,
 323                                 "cannot bind memory to host NUMA nodes");
 324                return;
 325            }
 326        }
 327#endif
 328        /* Preallocate memory after the NUMA policy has been instantiated.
 329         * This is necessary to guarantee memory is allocated with
 330         * specified NUMA policy in place.
 331         */
 332        if (backend->prealloc) {
 333            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 334                            smp_cpus, &local_err);
 335            if (local_err) {
 336                goto out;
 337            }
 338        }
 339    }
 340out:
 341    error_propagate(errp, local_err);
 342}
 343
 344static bool
 345host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 346{
 347    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 348        return false;
 349    } else {
 350        return true;
 351    }
 352}
 353
 354static char *get_id(Object *o, Error **errp)
 355{
 356    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 357
 358    return g_strdup(backend->id);
 359}
 360
 361static void set_id(Object *o, const char *str, Error **errp)
 362{
 363    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 364
 365    if (backend->id) {
 366        error_setg(errp, "cannot change property value");
 367        return;
 368    }
 369    backend->id = g_strdup(str);
 370}
 371
 372static void
 373host_memory_backend_class_init(ObjectClass *oc, void *data)
 374{
 375    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 376
 377    ucc->complete = host_memory_backend_memory_complete;
 378    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 379
 380    object_class_property_add_bool(oc, "merge",
 381        host_memory_backend_get_merge,
 382        host_memory_backend_set_merge, &error_abort);
 383    object_class_property_add_bool(oc, "dump",
 384        host_memory_backend_get_dump,
 385        host_memory_backend_set_dump, &error_abort);
 386    object_class_property_add_bool(oc, "prealloc",
 387        host_memory_backend_get_prealloc,
 388        host_memory_backend_set_prealloc, &error_abort);
 389    object_class_property_add(oc, "size", "int",
 390        host_memory_backend_get_size,
 391        host_memory_backend_set_size,
 392        NULL, NULL, &error_abort);
 393    object_class_property_add(oc, "host-nodes", "int",
 394        host_memory_backend_get_host_nodes,
 395        host_memory_backend_set_host_nodes,
 396        NULL, NULL, &error_abort);
 397    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 398        HostMemPolicy_lookup,
 399        host_memory_backend_get_policy,
 400        host_memory_backend_set_policy, &error_abort);
 401    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
 402}
 403
 404static void host_memory_backend_finalize(Object *o)
 405{
 406    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 407    g_free(backend->id);
 408}
 409
 410static const TypeInfo host_memory_backend_info = {
 411    .name = TYPE_MEMORY_BACKEND,
 412    .parent = TYPE_OBJECT,
 413    .abstract = true,
 414    .class_size = sizeof(HostMemoryBackendClass),
 415    .class_init = host_memory_backend_class_init,
 416    .instance_size = sizeof(HostMemoryBackend),
 417    .instance_init = host_memory_backend_init,
 418    .instance_finalize = host_memory_backend_finalize,
 419    .interfaces = (InterfaceInfo[]) {
 420        { TYPE_USER_CREATABLE },
 421        { }
 422    }
 423};
 424
 425static void register_types(void)
 426{
 427    type_register_static(&host_memory_backend_info);
 428}
 429
 430type_init(register_types);
 431