qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22
  23#ifdef CONFIG_NUMA
  24#include <numaif.h>
  25QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  29#endif
  30
  31char *
  32host_memory_backend_get_name(HostMemoryBackend *backend)
  33{
  34    if (!backend->use_canonical_path) {
  35        return object_get_canonical_path_component(OBJECT(backend));
  36    }
  37
  38    return object_get_canonical_path(OBJECT(backend));
  39}
  40
  41static void
  42host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  43                             void *opaque, Error **errp)
  44{
  45    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  46    uint64_t value = backend->size;
  47
  48    visit_type_size(v, name, &value, errp);
  49}
  50
  51static void
  52host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  53                             void *opaque, Error **errp)
  54{
  55    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  56    Error *local_err = NULL;
  57    uint64_t value;
  58
  59    if (host_memory_backend_mr_inited(backend)) {
  60        error_setg(&local_err, "cannot change property %s of %s ",
  61                   name, object_get_typename(obj));
  62        goto out;
  63    }
  64
  65    visit_type_size(v, name, &value, &local_err);
  66    if (local_err) {
  67        goto out;
  68    }
  69    if (!value) {
  70        error_setg(&local_err,
  71                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  72                   name, object_get_typename(obj), value);
  73        goto out;
  74    }
  75    backend->size = value;
  76out:
  77    error_propagate(errp, local_err);
  78}
  79
  80static void
  81host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  82                                   void *opaque, Error **errp)
  83{
  84    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  85    uint16List *host_nodes = NULL;
  86    uint16List **node = &host_nodes;
  87    unsigned long value;
  88
  89    value = find_first_bit(backend->host_nodes, MAX_NODES);
  90    if (value == MAX_NODES) {
  91        goto ret;
  92    }
  93
  94    *node = g_malloc0(sizeof(**node));
  95    (*node)->value = value;
  96    node = &(*node)->next;
  97
  98    do {
  99        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
 100        if (value == MAX_NODES) {
 101            break;
 102        }
 103
 104        *node = g_malloc0(sizeof(**node));
 105        (*node)->value = value;
 106        node = &(*node)->next;
 107    } while (true);
 108
 109ret:
 110    visit_type_uint16List(v, name, &host_nodes, errp);
 111}
 112
 113static void
 114host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 115                                   void *opaque, Error **errp)
 116{
 117#ifdef CONFIG_NUMA
 118    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 119    uint16List *l, *host_nodes = NULL;
 120
 121    visit_type_uint16List(v, name, &host_nodes, errp);
 122
 123    for (l = host_nodes; l; l = l->next) {
 124        if (l->value >= MAX_NODES) {
 125            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 126            goto out;
 127        }
 128    }
 129
 130    for (l = host_nodes; l; l = l->next) {
 131        bitmap_set(backend->host_nodes, l->value, 1);
 132    }
 133
 134out:
 135    qapi_free_uint16List(host_nodes);
 136#else
 137    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 138#endif
 139}
 140
 141static int
 142host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 143{
 144    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 145    return backend->policy;
 146}
 147
 148static void
 149host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 150{
 151    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 152    backend->policy = policy;
 153
 154#ifndef CONFIG_NUMA
 155    if (policy != HOST_MEM_POLICY_DEFAULT) {
 156        error_setg(errp, "NUMA policies are not supported by this QEMU");
 157    }
 158#endif
 159}
 160
 161static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 162{
 163    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 164
 165    return backend->merge;
 166}
 167
 168static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 169{
 170    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 171
 172    if (!host_memory_backend_mr_inited(backend)) {
 173        backend->merge = value;
 174        return;
 175    }
 176
 177    if (value != backend->merge) {
 178        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 179        uint64_t sz = memory_region_size(&backend->mr);
 180
 181        qemu_madvise(ptr, sz,
 182                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 183        backend->merge = value;
 184    }
 185}
 186
 187static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 188{
 189    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 190
 191    return backend->dump;
 192}
 193
 194static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 195{
 196    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 197
 198    if (!host_memory_backend_mr_inited(backend)) {
 199        backend->dump = value;
 200        return;
 201    }
 202
 203    if (value != backend->dump) {
 204        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 205        uint64_t sz = memory_region_size(&backend->mr);
 206
 207        qemu_madvise(ptr, sz,
 208                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 209        backend->dump = value;
 210    }
 211}
 212
 213static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 214{
 215    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 216
 217    return backend->prealloc || backend->force_prealloc;
 218}
 219
 220static void host_memory_backend_set_prealloc(Object *obj, bool value,
 221                                             Error **errp)
 222{
 223    Error *local_err = NULL;
 224    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 225    MachineState *ms = MACHINE(qdev_get_machine());
 226
 227    if (backend->force_prealloc) {
 228        if (value) {
 229            error_setg(errp,
 230                       "remove -mem-prealloc to use the prealloc property");
 231            return;
 232        }
 233    }
 234
 235    if (!host_memory_backend_mr_inited(backend)) {
 236        backend->prealloc = value;
 237        return;
 238    }
 239
 240    if (value && !backend->prealloc) {
 241        int fd = memory_region_get_fd(&backend->mr);
 242        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 243        uint64_t sz = memory_region_size(&backend->mr);
 244
 245        os_mem_prealloc(fd, ptr, sz, ms->smp.cpus, &local_err);
 246        if (local_err) {
 247            error_propagate(errp, local_err);
 248            return;
 249        }
 250        backend->prealloc = true;
 251    }
 252}
 253
 254static void host_memory_backend_init(Object *obj)
 255{
 256    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 257    MachineState *machine = MACHINE(qdev_get_machine());
 258
 259    backend->merge = machine_mem_merge(machine);
 260    backend->dump = machine_dump_guest_core(machine);
 261    backend->prealloc = mem_prealloc;
 262}
 263
 264static void host_memory_backend_post_init(Object *obj)
 265{
 266    object_apply_compat_props(obj);
 267}
 268
 269bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 270{
 271    /*
 272     * NOTE: We forbid zero-length memory backend, so here zero means
 273     * "we haven't inited the backend memory region yet".
 274     */
 275    return memory_region_size(&backend->mr) != 0;
 276}
 277
 278MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 279{
 280    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 281}
 282
 283void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 284{
 285    backend->is_mapped = mapped;
 286}
 287
 288bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 289{
 290    return backend->is_mapped;
 291}
 292
 293#ifdef __linux__
 294size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 295{
 296    Object *obj = OBJECT(memdev);
 297    char *path = object_property_get_str(obj, "mem-path", NULL);
 298    size_t pagesize = qemu_mempath_getpagesize(path);
 299
 300    g_free(path);
 301    return pagesize;
 302}
 303#else
 304size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 305{
 306    return getpagesize();
 307}
 308#endif
 309
 310static void
 311host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 312{
 313    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 314    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 315    MachineState *ms = MACHINE(qdev_get_machine());
 316    Error *local_err = NULL;
 317    void *ptr;
 318    uint64_t sz;
 319
 320    if (bc->alloc) {
 321        bc->alloc(backend, &local_err);
 322        if (local_err) {
 323            goto out;
 324        }
 325
 326        ptr = memory_region_get_ram_ptr(&backend->mr);
 327        sz = memory_region_size(&backend->mr);
 328
 329        if (backend->merge) {
 330            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 331        }
 332        if (!backend->dump) {
 333            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 334        }
 335#ifdef CONFIG_NUMA
 336        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 337        /* lastbit == MAX_NODES means maxnode = 0 */
 338        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 339        /* ensure policy won't be ignored in case memory is preallocated
 340         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 341         * this doesn't catch hugepage case. */
 342        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 343
 344        /* check for invalid host-nodes and policies and give more verbose
 345         * error messages than mbind(). */
 346        if (maxnode && backend->policy == MPOL_DEFAULT) {
 347            error_setg(errp, "host-nodes must be empty for policy default,"
 348                       " or you should explicitly specify a policy other"
 349                       " than default");
 350            return;
 351        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 352            error_setg(errp, "host-nodes must be set for policy %s",
 353                       HostMemPolicy_str(backend->policy));
 354            return;
 355        }
 356
 357        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 358         * as argument to mbind() due to an old Linux bug (feature?) which
 359         * cuts off the last specified node. This means backend->host_nodes
 360         * must have MAX_NODES+1 bits available.
 361         */
 362        assert(sizeof(backend->host_nodes) >=
 363               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 364        assert(maxnode <= MAX_NODES);
 365        if (mbind(ptr, sz, backend->policy,
 366                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
 367            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 368                error_setg_errno(errp, errno,
 369                                 "cannot bind memory to host NUMA nodes");
 370                return;
 371            }
 372        }
 373#endif
 374        /* Preallocate memory after the NUMA policy has been instantiated.
 375         * This is necessary to guarantee memory is allocated with
 376         * specified NUMA policy in place.
 377         */
 378        if (backend->prealloc) {
 379            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 380                            ms->smp.cpus, &local_err);
 381            if (local_err) {
 382                goto out;
 383            }
 384        }
 385    }
 386out:
 387    error_propagate(errp, local_err);
 388}
 389
 390static bool
 391host_memory_backend_can_be_deleted(UserCreatable *uc)
 392{
 393    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 394        return false;
 395    } else {
 396        return true;
 397    }
 398}
 399
 400static bool host_memory_backend_get_share(Object *o, Error **errp)
 401{
 402    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 403
 404    return backend->share;
 405}
 406
 407static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 408{
 409    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 410
 411    if (host_memory_backend_mr_inited(backend)) {
 412        error_setg(errp, "cannot change property value");
 413        return;
 414    }
 415    backend->share = value;
 416}
 417
 418static bool
 419host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 420{
 421    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 422
 423    return backend->use_canonical_path;
 424}
 425
 426static void
 427host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 428                                           Error **errp)
 429{
 430    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 431
 432    backend->use_canonical_path = value;
 433}
 434
 435static void
 436host_memory_backend_class_init(ObjectClass *oc, void *data)
 437{
 438    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 439
 440    ucc->complete = host_memory_backend_memory_complete;
 441    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 442
 443    object_class_property_add_bool(oc, "merge",
 444        host_memory_backend_get_merge,
 445        host_memory_backend_set_merge, &error_abort);
 446    object_class_property_set_description(oc, "merge",
 447        "Mark memory as mergeable", &error_abort);
 448    object_class_property_add_bool(oc, "dump",
 449        host_memory_backend_get_dump,
 450        host_memory_backend_set_dump, &error_abort);
 451    object_class_property_set_description(oc, "dump",
 452        "Set to 'off' to exclude from core dump", &error_abort);
 453    object_class_property_add_bool(oc, "prealloc",
 454        host_memory_backend_get_prealloc,
 455        host_memory_backend_set_prealloc, &error_abort);
 456    object_class_property_set_description(oc, "prealloc",
 457        "Preallocate memory", &error_abort);
 458    object_class_property_add(oc, "size", "int",
 459        host_memory_backend_get_size,
 460        host_memory_backend_set_size,
 461        NULL, NULL, &error_abort);
 462    object_class_property_set_description(oc, "size",
 463        "Size of the memory region (ex: 500M)", &error_abort);
 464    object_class_property_add(oc, "host-nodes", "int",
 465        host_memory_backend_get_host_nodes,
 466        host_memory_backend_set_host_nodes,
 467        NULL, NULL, &error_abort);
 468    object_class_property_set_description(oc, "host-nodes",
 469        "Binds memory to the list of NUMA host nodes", &error_abort);
 470    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 471        &HostMemPolicy_lookup,
 472        host_memory_backend_get_policy,
 473        host_memory_backend_set_policy, &error_abort);
 474    object_class_property_set_description(oc, "policy",
 475        "Set the NUMA policy", &error_abort);
 476    object_class_property_add_bool(oc, "share",
 477        host_memory_backend_get_share, host_memory_backend_set_share,
 478        &error_abort);
 479    object_class_property_set_description(oc, "share",
 480        "Mark the memory as private to QEMU or shared", &error_abort);
 481    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 482        host_memory_backend_get_use_canonical_path,
 483        host_memory_backend_set_use_canonical_path, &error_abort);
 484}
 485
 486static const TypeInfo host_memory_backend_info = {
 487    .name = TYPE_MEMORY_BACKEND,
 488    .parent = TYPE_OBJECT,
 489    .abstract = true,
 490    .class_size = sizeof(HostMemoryBackendClass),
 491    .class_init = host_memory_backend_class_init,
 492    .instance_size = sizeof(HostMemoryBackend),
 493    .instance_init = host_memory_backend_init,
 494    .instance_post_init = host_memory_backend_post_init,
 495    .interfaces = (InterfaceInfo[]) {
 496        { TYPE_USER_CREATABLE },
 497        { }
 498    }
 499};
 500
 501static void register_types(void)
 502{
 503    type_register_static(&host_memory_backend_info);
 504}
 505
 506type_init(register_types);
 507