qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22#include "qemu/madvise.h"
  23
  24#ifdef CONFIG_NUMA
  25#include <numaif.h>
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  29QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  30#endif
  31
  32char *
  33host_memory_backend_get_name(HostMemoryBackend *backend)
  34{
  35    if (!backend->use_canonical_path) {
  36        return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
  37    }
  38
  39    return object_get_canonical_path(OBJECT(backend));
  40}
  41
  42static void
  43host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  44                             void *opaque, Error **errp)
  45{
  46    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  47    uint64_t value = backend->size;
  48
  49    visit_type_size(v, name, &value, errp);
  50}
  51
  52static void
  53host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  54                             void *opaque, Error **errp)
  55{
  56    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  57    uint64_t value;
  58
  59    if (host_memory_backend_mr_inited(backend)) {
  60        error_setg(errp, "cannot change property %s of %s ", name,
  61                   object_get_typename(obj));
  62        return;
  63    }
  64
  65    if (!visit_type_size(v, name, &value, errp)) {
  66        return;
  67    }
  68    if (!value) {
  69        error_setg(errp,
  70                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  71                   name, object_get_typename(obj), value);
  72        return;
  73    }
  74    backend->size = value;
  75}
  76
  77static void
  78host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  79                                   void *opaque, Error **errp)
  80{
  81    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  82    uint16List *host_nodes = NULL;
  83    uint16List **tail = &host_nodes;
  84    unsigned long value;
  85
  86    value = find_first_bit(backend->host_nodes, MAX_NODES);
  87    if (value == MAX_NODES) {
  88        goto ret;
  89    }
  90
  91    QAPI_LIST_APPEND(tail, value);
  92
  93    do {
  94        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  95        if (value == MAX_NODES) {
  96            break;
  97        }
  98
  99        QAPI_LIST_APPEND(tail, value);
 100    } while (true);
 101
 102ret:
 103    visit_type_uint16List(v, name, &host_nodes, errp);
 104    qapi_free_uint16List(host_nodes);
 105}
 106
 107static void
 108host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 109                                   void *opaque, Error **errp)
 110{
 111#ifdef CONFIG_NUMA
 112    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 113    uint16List *l, *host_nodes = NULL;
 114
 115    visit_type_uint16List(v, name, &host_nodes, errp);
 116
 117    for (l = host_nodes; l; l = l->next) {
 118        if (l->value >= MAX_NODES) {
 119            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 120            goto out;
 121        }
 122    }
 123
 124    for (l = host_nodes; l; l = l->next) {
 125        bitmap_set(backend->host_nodes, l->value, 1);
 126    }
 127
 128out:
 129    qapi_free_uint16List(host_nodes);
 130#else
 131    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 132#endif
 133}
 134
 135static int
 136host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 137{
 138    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 139    return backend->policy;
 140}
 141
 142static void
 143host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 144{
 145    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 146    backend->policy = policy;
 147
 148#ifndef CONFIG_NUMA
 149    if (policy != HOST_MEM_POLICY_DEFAULT) {
 150        error_setg(errp, "NUMA policies are not supported by this QEMU");
 151    }
 152#endif
 153}
 154
 155static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 156{
 157    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 158
 159    return backend->merge;
 160}
 161
 162static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 163{
 164    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 165
 166    if (!host_memory_backend_mr_inited(backend)) {
 167        backend->merge = value;
 168        return;
 169    }
 170
 171    if (value != backend->merge) {
 172        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 173        uint64_t sz = memory_region_size(&backend->mr);
 174
 175        qemu_madvise(ptr, sz,
 176                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 177        backend->merge = value;
 178    }
 179}
 180
 181static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 182{
 183    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 184
 185    return backend->dump;
 186}
 187
 188static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 189{
 190    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 191
 192    if (!host_memory_backend_mr_inited(backend)) {
 193        backend->dump = value;
 194        return;
 195    }
 196
 197    if (value != backend->dump) {
 198        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 199        uint64_t sz = memory_region_size(&backend->mr);
 200
 201        qemu_madvise(ptr, sz,
 202                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 203        backend->dump = value;
 204    }
 205}
 206
 207static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 208{
 209    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 210
 211    return backend->prealloc;
 212}
 213
 214static void host_memory_backend_set_prealloc(Object *obj, bool value,
 215                                             Error **errp)
 216{
 217    Error *local_err = NULL;
 218    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 219
 220    if (!backend->reserve && value) {
 221        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 222        return;
 223    }
 224
 225    if (!host_memory_backend_mr_inited(backend)) {
 226        backend->prealloc = value;
 227        return;
 228    }
 229
 230    if (value && !backend->prealloc) {
 231        int fd = memory_region_get_fd(&backend->mr);
 232        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 233        uint64_t sz = memory_region_size(&backend->mr);
 234
 235        qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
 236                          backend->prealloc_context, &local_err);
 237        if (local_err) {
 238            error_propagate(errp, local_err);
 239            return;
 240        }
 241        backend->prealloc = true;
 242    }
 243}
 244
 245static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
 246    const char *name, void *opaque, Error **errp)
 247{
 248    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 249    visit_type_uint32(v, name, &backend->prealloc_threads, errp);
 250}
 251
 252static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
 253    const char *name, void *opaque, Error **errp)
 254{
 255    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 256    uint32_t value;
 257
 258    if (!visit_type_uint32(v, name, &value, errp)) {
 259        return;
 260    }
 261    if (value <= 0) {
 262        error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
 263                   object_get_typename(obj), value);
 264        return;
 265    }
 266    backend->prealloc_threads = value;
 267}
 268
 269static void host_memory_backend_init(Object *obj)
 270{
 271    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 272    MachineState *machine = MACHINE(qdev_get_machine());
 273
 274    /* TODO: convert access to globals to compat properties */
 275    backend->merge = machine_mem_merge(machine);
 276    backend->dump = machine_dump_guest_core(machine);
 277    backend->reserve = true;
 278    backend->prealloc_threads = machine->smp.cpus;
 279}
 280
 281static void host_memory_backend_post_init(Object *obj)
 282{
 283    object_apply_compat_props(obj);
 284}
 285
 286bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 287{
 288    /*
 289     * NOTE: We forbid zero-length memory backend, so here zero means
 290     * "we haven't inited the backend memory region yet".
 291     */
 292    return memory_region_size(&backend->mr) != 0;
 293}
 294
 295MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 296{
 297    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 298}
 299
 300void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 301{
 302    backend->is_mapped = mapped;
 303}
 304
 305bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 306{
 307    return backend->is_mapped;
 308}
 309
 310size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 311{
 312    size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
 313    g_assert(pagesize >= qemu_real_host_page_size());
 314    return pagesize;
 315}
 316
 317static void
 318host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 319{
 320    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 321    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 322    Error *local_err = NULL;
 323    void *ptr;
 324    uint64_t sz;
 325
 326    if (bc->alloc) {
 327        bc->alloc(backend, &local_err);
 328        if (local_err) {
 329            goto out;
 330        }
 331
 332        ptr = memory_region_get_ram_ptr(&backend->mr);
 333        sz = memory_region_size(&backend->mr);
 334
 335        if (backend->merge) {
 336            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 337        }
 338        if (!backend->dump) {
 339            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 340        }
 341#ifdef CONFIG_NUMA
 342        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 343        /* lastbit == MAX_NODES means maxnode = 0 */
 344        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 345        /* ensure policy won't be ignored in case memory is preallocated
 346         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 347         * this doesn't catch hugepage case. */
 348        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 349
 350        /* check for invalid host-nodes and policies and give more verbose
 351         * error messages than mbind(). */
 352        if (maxnode && backend->policy == MPOL_DEFAULT) {
 353            error_setg(errp, "host-nodes must be empty for policy default,"
 354                       " or you should explicitly specify a policy other"
 355                       " than default");
 356            return;
 357        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 358            error_setg(errp, "host-nodes must be set for policy %s",
 359                       HostMemPolicy_str(backend->policy));
 360            return;
 361        }
 362
 363        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 364         * as argument to mbind() due to an old Linux bug (feature?) which
 365         * cuts off the last specified node. This means backend->host_nodes
 366         * must have MAX_NODES+1 bits available.
 367         */
 368        assert(sizeof(backend->host_nodes) >=
 369               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 370        assert(maxnode <= MAX_NODES);
 371
 372        if (maxnode &&
 373            mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
 374                  flags)) {
 375            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 376                error_setg_errno(errp, errno,
 377                                 "cannot bind memory to host NUMA nodes");
 378                return;
 379            }
 380        }
 381#endif
 382        /* Preallocate memory after the NUMA policy has been instantiated.
 383         * This is necessary to guarantee memory is allocated with
 384         * specified NUMA policy in place.
 385         */
 386        if (backend->prealloc) {
 387            qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
 388                              backend->prealloc_threads,
 389                              backend->prealloc_context, &local_err);
 390            if (local_err) {
 391                goto out;
 392            }
 393        }
 394    }
 395out:
 396    error_propagate(errp, local_err);
 397}
 398
 399static bool
 400host_memory_backend_can_be_deleted(UserCreatable *uc)
 401{
 402    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 403        return false;
 404    } else {
 405        return true;
 406    }
 407}
 408
 409static bool host_memory_backend_get_share(Object *o, Error **errp)
 410{
 411    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 412
 413    return backend->share;
 414}
 415
 416static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 417{
 418    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 419
 420    if (host_memory_backend_mr_inited(backend)) {
 421        error_setg(errp, "cannot change property value");
 422        return;
 423    }
 424    backend->share = value;
 425}
 426
 427#ifdef CONFIG_LINUX
 428static bool host_memory_backend_get_reserve(Object *o, Error **errp)
 429{
 430    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 431
 432    return backend->reserve;
 433}
 434
 435static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
 436{
 437    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 438
 439    if (host_memory_backend_mr_inited(backend)) {
 440        error_setg(errp, "cannot change property value");
 441        return;
 442    }
 443    if (backend->prealloc && !value) {
 444        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 445        return;
 446    }
 447    backend->reserve = value;
 448}
 449#endif /* CONFIG_LINUX */
 450
 451static bool
 452host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 453{
 454    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 455
 456    return backend->use_canonical_path;
 457}
 458
 459static void
 460host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 461                                           Error **errp)
 462{
 463    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 464
 465    backend->use_canonical_path = value;
 466}
 467
 468static void
 469host_memory_backend_class_init(ObjectClass *oc, void *data)
 470{
 471    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 472
 473    ucc->complete = host_memory_backend_memory_complete;
 474    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 475
 476    object_class_property_add_bool(oc, "merge",
 477        host_memory_backend_get_merge,
 478        host_memory_backend_set_merge);
 479    object_class_property_set_description(oc, "merge",
 480        "Mark memory as mergeable");
 481    object_class_property_add_bool(oc, "dump",
 482        host_memory_backend_get_dump,
 483        host_memory_backend_set_dump);
 484    object_class_property_set_description(oc, "dump",
 485        "Set to 'off' to exclude from core dump");
 486    object_class_property_add_bool(oc, "prealloc",
 487        host_memory_backend_get_prealloc,
 488        host_memory_backend_set_prealloc);
 489    object_class_property_set_description(oc, "prealloc",
 490        "Preallocate memory");
 491    object_class_property_add(oc, "prealloc-threads", "int",
 492        host_memory_backend_get_prealloc_threads,
 493        host_memory_backend_set_prealloc_threads,
 494        NULL, NULL);
 495    object_class_property_set_description(oc, "prealloc-threads",
 496        "Number of CPU threads to use for prealloc");
 497    object_class_property_add_link(oc, "prealloc-context",
 498        TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
 499        object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
 500    object_class_property_set_description(oc, "prealloc-context",
 501        "Context to use for creating CPU threads for preallocation");
 502    object_class_property_add(oc, "size", "int",
 503        host_memory_backend_get_size,
 504        host_memory_backend_set_size,
 505        NULL, NULL);
 506    object_class_property_set_description(oc, "size",
 507        "Size of the memory region (ex: 500M)");
 508    object_class_property_add(oc, "host-nodes", "int",
 509        host_memory_backend_get_host_nodes,
 510        host_memory_backend_set_host_nodes,
 511        NULL, NULL);
 512    object_class_property_set_description(oc, "host-nodes",
 513        "Binds memory to the list of NUMA host nodes");
 514    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 515        &HostMemPolicy_lookup,
 516        host_memory_backend_get_policy,
 517        host_memory_backend_set_policy);
 518    object_class_property_set_description(oc, "policy",
 519        "Set the NUMA policy");
 520    object_class_property_add_bool(oc, "share",
 521        host_memory_backend_get_share, host_memory_backend_set_share);
 522    object_class_property_set_description(oc, "share",
 523        "Mark the memory as private to QEMU or shared");
 524#ifdef CONFIG_LINUX
 525    object_class_property_add_bool(oc, "reserve",
 526        host_memory_backend_get_reserve, host_memory_backend_set_reserve);
 527    object_class_property_set_description(oc, "reserve",
 528        "Reserve swap space (or huge pages) if applicable");
 529#endif /* CONFIG_LINUX */
 530    /*
 531     * Do not delete/rename option. This option must be considered stable
 532     * (as if it didn't have the 'x-' prefix including deprecation period) as
 533     * long as 4.0 and older machine types exists.
 534     * Option will be used by upper layers to override (disable) canonical path
 535     * for ramblock-id set by compat properties on old machine types ( <= 4.0),
 536     * to keep migration working when backend is used for main RAM with
 537     * -machine memory-backend= option (main RAM historically used prefix-less
 538     * ramblock-id).
 539     */
 540    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 541        host_memory_backend_get_use_canonical_path,
 542        host_memory_backend_set_use_canonical_path);
 543}
 544
 545static const TypeInfo host_memory_backend_info = {
 546    .name = TYPE_MEMORY_BACKEND,
 547    .parent = TYPE_OBJECT,
 548    .abstract = true,
 549    .class_size = sizeof(HostMemoryBackendClass),
 550    .class_init = host_memory_backend_class_init,
 551    .instance_size = sizeof(HostMemoryBackend),
 552    .instance_init = host_memory_backend_init,
 553    .instance_post_init = host_memory_backend_post_init,
 554    .interfaces = (InterfaceInfo[]) {
 555        { TYPE_USER_CREATABLE },
 556        { }
 557    }
 558};
 559
 560static void register_types(void)
 561{
 562    type_register_static(&host_memory_backend_info);
 563}
 564
 565type_init(register_types);
 566