qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "sysemu/sysemu.h"
  16#include "hw/boards.h"
  17#include "qapi/error.h"
  18#include "qapi/qapi-builtin-visit.h"
  19#include "qapi/visitor.h"
  20#include "qemu/config-file.h"
  21#include "qom/object_interfaces.h"
  22#include "qemu/mmap-alloc.h"
  23
  24#ifdef CONFIG_NUMA
  25#include <numaif.h>
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  29QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  30#endif
  31
  32char *
  33host_memory_backend_get_name(HostMemoryBackend *backend)
  34{
  35    if (!backend->use_canonical_path) {
  36        return object_get_canonical_path_component(OBJECT(backend));
  37    }
  38
  39    return object_get_canonical_path(OBJECT(backend));
  40}
  41
  42static void
  43host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  44                             void *opaque, Error **errp)
  45{
  46    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  47    uint64_t value = backend->size;
  48
  49    visit_type_size(v, name, &value, errp);
  50}
  51
  52static void
  53host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  54                             void *opaque, Error **errp)
  55{
  56    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  57    Error *local_err = NULL;
  58    uint64_t value;
  59
  60    if (host_memory_backend_mr_inited(backend)) {
  61        error_setg(&local_err, "cannot change property %s of %s ",
  62                   name, object_get_typename(obj));
  63        goto out;
  64    }
  65
  66    visit_type_size(v, name, &value, &local_err);
  67    if (local_err) {
  68        goto out;
  69    }
  70    if (!value) {
  71        error_setg(&local_err,
  72                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  73                   name, object_get_typename(obj), value);
  74        goto out;
  75    }
  76    backend->size = value;
  77out:
  78    error_propagate(errp, local_err);
  79}
  80
  81static void
  82host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  83                                   void *opaque, Error **errp)
  84{
  85    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  86    uint16List *host_nodes = NULL;
  87    uint16List **node = &host_nodes;
  88    unsigned long value;
  89
  90    value = find_first_bit(backend->host_nodes, MAX_NODES);
  91    if (value == MAX_NODES) {
  92        goto ret;
  93    }
  94
  95    *node = g_malloc0(sizeof(**node));
  96    (*node)->value = value;
  97    node = &(*node)->next;
  98
  99    do {
 100        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
 101        if (value == MAX_NODES) {
 102            break;
 103        }
 104
 105        *node = g_malloc0(sizeof(**node));
 106        (*node)->value = value;
 107        node = &(*node)->next;
 108    } while (true);
 109
 110ret:
 111    visit_type_uint16List(v, name, &host_nodes, errp);
 112}
 113
 114static void
 115host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 116                                   void *opaque, Error **errp)
 117{
 118#ifdef CONFIG_NUMA
 119    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 120    uint16List *l, *host_nodes = NULL;
 121
 122    visit_type_uint16List(v, name, &host_nodes, errp);
 123
 124    for (l = host_nodes; l; l = l->next) {
 125        if (l->value >= MAX_NODES) {
 126            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 127            goto out;
 128        }
 129    }
 130
 131    for (l = host_nodes; l; l = l->next) {
 132        bitmap_set(backend->host_nodes, l->value, 1);
 133    }
 134
 135out:
 136    qapi_free_uint16List(host_nodes);
 137#else
 138    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 139#endif
 140}
 141
 142static int
 143host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 144{
 145    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 146    return backend->policy;
 147}
 148
 149static void
 150host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 151{
 152    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 153    backend->policy = policy;
 154
 155#ifndef CONFIG_NUMA
 156    if (policy != HOST_MEM_POLICY_DEFAULT) {
 157        error_setg(errp, "NUMA policies are not supported by this QEMU");
 158    }
 159#endif
 160}
 161
 162static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 163{
 164    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 165
 166    return backend->merge;
 167}
 168
 169static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 170{
 171    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 172
 173    if (!host_memory_backend_mr_inited(backend)) {
 174        backend->merge = value;
 175        return;
 176    }
 177
 178    if (value != backend->merge) {
 179        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 180        uint64_t sz = memory_region_size(&backend->mr);
 181
 182        qemu_madvise(ptr, sz,
 183                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 184        backend->merge = value;
 185    }
 186}
 187
 188static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 189{
 190    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 191
 192    return backend->dump;
 193}
 194
 195static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 196{
 197    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 198
 199    if (!host_memory_backend_mr_inited(backend)) {
 200        backend->dump = value;
 201        return;
 202    }
 203
 204    if (value != backend->dump) {
 205        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 206        uint64_t sz = memory_region_size(&backend->mr);
 207
 208        qemu_madvise(ptr, sz,
 209                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 210        backend->dump = value;
 211    }
 212}
 213
 214static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 215{
 216    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 217
 218    return backend->prealloc || backend->force_prealloc;
 219}
 220
 221static void host_memory_backend_set_prealloc(Object *obj, bool value,
 222                                             Error **errp)
 223{
 224    Error *local_err = NULL;
 225    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 226    MachineState *ms = MACHINE(qdev_get_machine());
 227
 228    if (backend->force_prealloc) {
 229        if (value) {
 230            error_setg(errp,
 231                       "remove -mem-prealloc to use the prealloc property");
 232            return;
 233        }
 234    }
 235
 236    if (!host_memory_backend_mr_inited(backend)) {
 237        backend->prealloc = value;
 238        return;
 239    }
 240
 241    if (value && !backend->prealloc) {
 242        int fd = memory_region_get_fd(&backend->mr);
 243        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 244        uint64_t sz = memory_region_size(&backend->mr);
 245
 246        os_mem_prealloc(fd, ptr, sz, ms->smp.cpus, &local_err);
 247        if (local_err) {
 248            error_propagate(errp, local_err);
 249            return;
 250        }
 251        backend->prealloc = true;
 252    }
 253}
 254
 255static void host_memory_backend_init(Object *obj)
 256{
 257    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 258    MachineState *machine = MACHINE(qdev_get_machine());
 259
 260    backend->merge = machine_mem_merge(machine);
 261    backend->dump = machine_dump_guest_core(machine);
 262    backend->prealloc = mem_prealloc;
 263}
 264
 265static void host_memory_backend_post_init(Object *obj)
 266{
 267    object_apply_compat_props(obj);
 268}
 269
 270bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 271{
 272    /*
 273     * NOTE: We forbid zero-length memory backend, so here zero means
 274     * "we haven't inited the backend memory region yet".
 275     */
 276    return memory_region_size(&backend->mr) != 0;
 277}
 278
 279MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 280{
 281    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 282}
 283
 284void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 285{
 286    backend->is_mapped = mapped;
 287}
 288
 289bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 290{
 291    return backend->is_mapped;
 292}
 293
 294#ifdef __linux__
 295size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 296{
 297    Object *obj = OBJECT(memdev);
 298    char *path = object_property_get_str(obj, "mem-path", NULL);
 299    size_t pagesize = qemu_mempath_getpagesize(path);
 300
 301    g_free(path);
 302    return pagesize;
 303}
 304#else
 305size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 306{
 307    return qemu_real_host_page_size;
 308}
 309#endif
 310
 311static void
 312host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 313{
 314    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 315    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 316    MachineState *ms = MACHINE(qdev_get_machine());
 317    Error *local_err = NULL;
 318    void *ptr;
 319    uint64_t sz;
 320
 321    if (bc->alloc) {
 322        bc->alloc(backend, &local_err);
 323        if (local_err) {
 324            goto out;
 325        }
 326
 327        ptr = memory_region_get_ram_ptr(&backend->mr);
 328        sz = memory_region_size(&backend->mr);
 329
 330        if (backend->merge) {
 331            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 332        }
 333        if (!backend->dump) {
 334            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 335        }
 336#ifdef CONFIG_NUMA
 337        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 338        /* lastbit == MAX_NODES means maxnode = 0 */
 339        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 340        /* ensure policy won't be ignored in case memory is preallocated
 341         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 342         * this doesn't catch hugepage case. */
 343        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 344
 345        /* check for invalid host-nodes and policies and give more verbose
 346         * error messages than mbind(). */
 347        if (maxnode && backend->policy == MPOL_DEFAULT) {
 348            error_setg(errp, "host-nodes must be empty for policy default,"
 349                       " or you should explicitly specify a policy other"
 350                       " than default");
 351            return;
 352        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 353            error_setg(errp, "host-nodes must be set for policy %s",
 354                       HostMemPolicy_str(backend->policy));
 355            return;
 356        }
 357
 358        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 359         * as argument to mbind() due to an old Linux bug (feature?) which
 360         * cuts off the last specified node. This means backend->host_nodes
 361         * must have MAX_NODES+1 bits available.
 362         */
 363        assert(sizeof(backend->host_nodes) >=
 364               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 365        assert(maxnode <= MAX_NODES);
 366        if (mbind(ptr, sz, backend->policy,
 367                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
 368            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 369                error_setg_errno(errp, errno,
 370                                 "cannot bind memory to host NUMA nodes");
 371                return;
 372            }
 373        }
 374#endif
 375        /* Preallocate memory after the NUMA policy has been instantiated.
 376         * This is necessary to guarantee memory is allocated with
 377         * specified NUMA policy in place.
 378         */
 379        if (backend->prealloc) {
 380            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 381                            ms->smp.cpus, &local_err);
 382            if (local_err) {
 383                goto out;
 384            }
 385        }
 386    }
 387out:
 388    error_propagate(errp, local_err);
 389}
 390
 391static bool
 392host_memory_backend_can_be_deleted(UserCreatable *uc)
 393{
 394    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 395        return false;
 396    } else {
 397        return true;
 398    }
 399}
 400
 401static bool host_memory_backend_get_share(Object *o, Error **errp)
 402{
 403    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 404
 405    return backend->share;
 406}
 407
 408static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 409{
 410    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 411
 412    if (host_memory_backend_mr_inited(backend)) {
 413        error_setg(errp, "cannot change property value");
 414        return;
 415    }
 416    backend->share = value;
 417}
 418
 419static bool
 420host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 421{
 422    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 423
 424    return backend->use_canonical_path;
 425}
 426
 427static void
 428host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 429                                           Error **errp)
 430{
 431    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 432
 433    backend->use_canonical_path = value;
 434}
 435
 436static void
 437host_memory_backend_class_init(ObjectClass *oc, void *data)
 438{
 439    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 440
 441    ucc->complete = host_memory_backend_memory_complete;
 442    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 443
 444    object_class_property_add_bool(oc, "merge",
 445        host_memory_backend_get_merge,
 446        host_memory_backend_set_merge, &error_abort);
 447    object_class_property_set_description(oc, "merge",
 448        "Mark memory as mergeable", &error_abort);
 449    object_class_property_add_bool(oc, "dump",
 450        host_memory_backend_get_dump,
 451        host_memory_backend_set_dump, &error_abort);
 452    object_class_property_set_description(oc, "dump",
 453        "Set to 'off' to exclude from core dump", &error_abort);
 454    object_class_property_add_bool(oc, "prealloc",
 455        host_memory_backend_get_prealloc,
 456        host_memory_backend_set_prealloc, &error_abort);
 457    object_class_property_set_description(oc, "prealloc",
 458        "Preallocate memory", &error_abort);
 459    object_class_property_add(oc, "size", "int",
 460        host_memory_backend_get_size,
 461        host_memory_backend_set_size,
 462        NULL, NULL, &error_abort);
 463    object_class_property_set_description(oc, "size",
 464        "Size of the memory region (ex: 500M)", &error_abort);
 465    object_class_property_add(oc, "host-nodes", "int",
 466        host_memory_backend_get_host_nodes,
 467        host_memory_backend_set_host_nodes,
 468        NULL, NULL, &error_abort);
 469    object_class_property_set_description(oc, "host-nodes",
 470        "Binds memory to the list of NUMA host nodes", &error_abort);
 471    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 472        &HostMemPolicy_lookup,
 473        host_memory_backend_get_policy,
 474        host_memory_backend_set_policy, &error_abort);
 475    object_class_property_set_description(oc, "policy",
 476        "Set the NUMA policy", &error_abort);
 477    object_class_property_add_bool(oc, "share",
 478        host_memory_backend_get_share, host_memory_backend_set_share,
 479        &error_abort);
 480    object_class_property_set_description(oc, "share",
 481        "Mark the memory as private to QEMU or shared", &error_abort);
 482    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 483        host_memory_backend_get_use_canonical_path,
 484        host_memory_backend_set_use_canonical_path, &error_abort);
 485}
 486
 487static const TypeInfo host_memory_backend_info = {
 488    .name = TYPE_MEMORY_BACKEND,
 489    .parent = TYPE_OBJECT,
 490    .abstract = true,
 491    .class_size = sizeof(HostMemoryBackendClass),
 492    .class_init = host_memory_backend_class_init,
 493    .instance_size = sizeof(HostMemoryBackend),
 494    .instance_init = host_memory_backend_init,
 495    .instance_post_init = host_memory_backend_post_init,
 496    .interfaces = (InterfaceInfo[]) {
 497        { TYPE_USER_CREATABLE },
 498        { }
 499    }
 500};
 501
 502static void register_types(void)
 503{
 504    type_register_static(&host_memory_backend_info);
 505}
 506
 507type_init(register_types);
 508