qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22#include "qemu/madvise.h"
  23
  24#ifdef CONFIG_NUMA
  25#include <numaif.h>
  26#include <numa.h>
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  28/*
  29 * HOST_MEM_POLICY_PREFERRED may either translate to MPOL_PREFERRED or
  30 * MPOL_PREFERRED_MANY, see comments further below.
  31 */
  32QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  33QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  34QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  35#endif
  36
  37char *
  38host_memory_backend_get_name(HostMemoryBackend *backend)
  39{
  40    if (!backend->use_canonical_path) {
  41        return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
  42    }
  43
  44    return object_get_canonical_path(OBJECT(backend));
  45}
  46
  47static void
  48host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  49                             void *opaque, Error **errp)
  50{
  51    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  52    uint64_t value = backend->size;
  53
  54    visit_type_size(v, name, &value, errp);
  55}
  56
  57static void
  58host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  59                             void *opaque, Error **errp)
  60{
  61    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  62    uint64_t value;
  63
  64    if (host_memory_backend_mr_inited(backend)) {
  65        error_setg(errp, "cannot change property %s of %s ", name,
  66                   object_get_typename(obj));
  67        return;
  68    }
  69
  70    if (!visit_type_size(v, name, &value, errp)) {
  71        return;
  72    }
  73    if (!value) {
  74        error_setg(errp,
  75                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  76                   name, object_get_typename(obj), value);
  77        return;
  78    }
  79    backend->size = value;
  80}
  81
  82static void
  83host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  84                                   void *opaque, Error **errp)
  85{
  86    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  87    uint16List *host_nodes = NULL;
  88    uint16List **tail = &host_nodes;
  89    unsigned long value;
  90
  91    value = find_first_bit(backend->host_nodes, MAX_NODES);
  92    if (value == MAX_NODES) {
  93        goto ret;
  94    }
  95
  96    QAPI_LIST_APPEND(tail, value);
  97
  98    do {
  99        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
 100        if (value == MAX_NODES) {
 101            break;
 102        }
 103
 104        QAPI_LIST_APPEND(tail, value);
 105    } while (true);
 106
 107ret:
 108    visit_type_uint16List(v, name, &host_nodes, errp);
 109    qapi_free_uint16List(host_nodes);
 110}
 111
 112static void
 113host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 114                                   void *opaque, Error **errp)
 115{
 116#ifdef CONFIG_NUMA
 117    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 118    uint16List *l, *host_nodes = NULL;
 119
 120    visit_type_uint16List(v, name, &host_nodes, errp);
 121
 122    for (l = host_nodes; l; l = l->next) {
 123        if (l->value >= MAX_NODES) {
 124            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 125            goto out;
 126        }
 127    }
 128
 129    for (l = host_nodes; l; l = l->next) {
 130        bitmap_set(backend->host_nodes, l->value, 1);
 131    }
 132
 133out:
 134    qapi_free_uint16List(host_nodes);
 135#else
 136    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 137#endif
 138}
 139
 140static int
 141host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 142{
 143    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 144    return backend->policy;
 145}
 146
 147static void
 148host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 149{
 150    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 151    backend->policy = policy;
 152
 153#ifndef CONFIG_NUMA
 154    if (policy != HOST_MEM_POLICY_DEFAULT) {
 155        error_setg(errp, "NUMA policies are not supported by this QEMU");
 156    }
 157#endif
 158}
 159
 160static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 161{
 162    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 163
 164    return backend->merge;
 165}
 166
 167static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 168{
 169    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 170
 171    if (!host_memory_backend_mr_inited(backend)) {
 172        backend->merge = value;
 173        return;
 174    }
 175
 176    if (value != backend->merge) {
 177        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 178        uint64_t sz = memory_region_size(&backend->mr);
 179
 180        qemu_madvise(ptr, sz,
 181                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 182        backend->merge = value;
 183    }
 184}
 185
 186static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 187{
 188    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 189
 190    return backend->dump;
 191}
 192
 193static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 194{
 195    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 196
 197    if (!host_memory_backend_mr_inited(backend)) {
 198        backend->dump = value;
 199        return;
 200    }
 201
 202    if (value != backend->dump) {
 203        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 204        uint64_t sz = memory_region_size(&backend->mr);
 205
 206        qemu_madvise(ptr, sz,
 207                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 208        backend->dump = value;
 209    }
 210}
 211
 212static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 213{
 214    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 215
 216    return backend->prealloc;
 217}
 218
 219static void host_memory_backend_set_prealloc(Object *obj, bool value,
 220                                             Error **errp)
 221{
 222    Error *local_err = NULL;
 223    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 224
 225    if (!backend->reserve && value) {
 226        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 227        return;
 228    }
 229
 230    if (!host_memory_backend_mr_inited(backend)) {
 231        backend->prealloc = value;
 232        return;
 233    }
 234
 235    if (value && !backend->prealloc) {
 236        int fd = memory_region_get_fd(&backend->mr);
 237        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 238        uint64_t sz = memory_region_size(&backend->mr);
 239
 240        qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
 241                          backend->prealloc_context, &local_err);
 242        if (local_err) {
 243            error_propagate(errp, local_err);
 244            return;
 245        }
 246        backend->prealloc = true;
 247    }
 248}
 249
 250static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
 251    const char *name, void *opaque, Error **errp)
 252{
 253    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 254    visit_type_uint32(v, name, &backend->prealloc_threads, errp);
 255}
 256
 257static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
 258    const char *name, void *opaque, Error **errp)
 259{
 260    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 261    uint32_t value;
 262
 263    if (!visit_type_uint32(v, name, &value, errp)) {
 264        return;
 265    }
 266    if (value <= 0) {
 267        error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
 268                   object_get_typename(obj), value);
 269        return;
 270    }
 271    backend->prealloc_threads = value;
 272}
 273
 274static void host_memory_backend_init(Object *obj)
 275{
 276    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 277    MachineState *machine = MACHINE(qdev_get_machine());
 278
 279    /* TODO: convert access to globals to compat properties */
 280    backend->merge = machine_mem_merge(machine);
 281    backend->dump = machine_dump_guest_core(machine);
 282    backend->reserve = true;
 283    backend->prealloc_threads = machine->smp.cpus;
 284}
 285
 286static void host_memory_backend_post_init(Object *obj)
 287{
 288    object_apply_compat_props(obj);
 289}
 290
 291bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 292{
 293    /*
 294     * NOTE: We forbid zero-length memory backend, so here zero means
 295     * "we haven't inited the backend memory region yet".
 296     */
 297    return memory_region_size(&backend->mr) != 0;
 298}
 299
 300MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 301{
 302    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 303}
 304
 305void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 306{
 307    backend->is_mapped = mapped;
 308}
 309
 310bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 311{
 312    return backend->is_mapped;
 313}
 314
 315size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 316{
 317    size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
 318    g_assert(pagesize >= qemu_real_host_page_size());
 319    return pagesize;
 320}
 321
 322static void
 323host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 324{
 325    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 326    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 327    Error *local_err = NULL;
 328    void *ptr;
 329    uint64_t sz;
 330
 331    if (bc->alloc) {
 332        bc->alloc(backend, &local_err);
 333        if (local_err) {
 334            goto out;
 335        }
 336
 337        ptr = memory_region_get_ram_ptr(&backend->mr);
 338        sz = memory_region_size(&backend->mr);
 339
 340        if (backend->merge) {
 341            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 342        }
 343        if (!backend->dump) {
 344            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 345        }
 346#ifdef CONFIG_NUMA
 347        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 348        /* lastbit == MAX_NODES means maxnode = 0 */
 349        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 350        /* ensure policy won't be ignored in case memory is preallocated
 351         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 352         * this doesn't catch hugepage case. */
 353        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 354        int mode = backend->policy;
 355
 356        /* check for invalid host-nodes and policies and give more verbose
 357         * error messages than mbind(). */
 358        if (maxnode && backend->policy == MPOL_DEFAULT) {
 359            error_setg(errp, "host-nodes must be empty for policy default,"
 360                       " or you should explicitly specify a policy other"
 361                       " than default");
 362            return;
 363        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 364            error_setg(errp, "host-nodes must be set for policy %s",
 365                       HostMemPolicy_str(backend->policy));
 366            return;
 367        }
 368
 369        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 370         * as argument to mbind() due to an old Linux bug (feature?) which
 371         * cuts off the last specified node. This means backend->host_nodes
 372         * must have MAX_NODES+1 bits available.
 373         */
 374        assert(sizeof(backend->host_nodes) >=
 375               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 376        assert(maxnode <= MAX_NODES);
 377
 378#ifdef HAVE_NUMA_HAS_PREFERRED_MANY
 379        if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
 380            /*
 381             * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
 382             * silently picks the first node.
 383             */
 384            mode = MPOL_PREFERRED_MANY;
 385        }
 386#endif
 387
 388        if (maxnode &&
 389            mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
 390            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 391                error_setg_errno(errp, errno,
 392                                 "cannot bind memory to host NUMA nodes");
 393                return;
 394            }
 395        }
 396#endif
 397        /* Preallocate memory after the NUMA policy has been instantiated.
 398         * This is necessary to guarantee memory is allocated with
 399         * specified NUMA policy in place.
 400         */
 401        if (backend->prealloc) {
 402            qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
 403                              backend->prealloc_threads,
 404                              backend->prealloc_context, &local_err);
 405            if (local_err) {
 406                goto out;
 407            }
 408        }
 409    }
 410out:
 411    error_propagate(errp, local_err);
 412}
 413
 414static bool
 415host_memory_backend_can_be_deleted(UserCreatable *uc)
 416{
 417    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 418        return false;
 419    } else {
 420        return true;
 421    }
 422}
 423
 424static bool host_memory_backend_get_share(Object *o, Error **errp)
 425{
 426    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 427
 428    return backend->share;
 429}
 430
 431static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 432{
 433    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 434
 435    if (host_memory_backend_mr_inited(backend)) {
 436        error_setg(errp, "cannot change property value");
 437        return;
 438    }
 439    backend->share = value;
 440}
 441
 442#ifdef CONFIG_LINUX
 443static bool host_memory_backend_get_reserve(Object *o, Error **errp)
 444{
 445    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 446
 447    return backend->reserve;
 448}
 449
 450static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
 451{
 452    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 453
 454    if (host_memory_backend_mr_inited(backend)) {
 455        error_setg(errp, "cannot change property value");
 456        return;
 457    }
 458    if (backend->prealloc && !value) {
 459        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 460        return;
 461    }
 462    backend->reserve = value;
 463}
 464#endif /* CONFIG_LINUX */
 465
 466static bool
 467host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 468{
 469    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 470
 471    return backend->use_canonical_path;
 472}
 473
 474static void
 475host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 476                                           Error **errp)
 477{
 478    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 479
 480    backend->use_canonical_path = value;
 481}
 482
 483static void
 484host_memory_backend_class_init(ObjectClass *oc, void *data)
 485{
 486    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 487
 488    ucc->complete = host_memory_backend_memory_complete;
 489    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 490
 491    object_class_property_add_bool(oc, "merge",
 492        host_memory_backend_get_merge,
 493        host_memory_backend_set_merge);
 494    object_class_property_set_description(oc, "merge",
 495        "Mark memory as mergeable");
 496    object_class_property_add_bool(oc, "dump",
 497        host_memory_backend_get_dump,
 498        host_memory_backend_set_dump);
 499    object_class_property_set_description(oc, "dump",
 500        "Set to 'off' to exclude from core dump");
 501    object_class_property_add_bool(oc, "prealloc",
 502        host_memory_backend_get_prealloc,
 503        host_memory_backend_set_prealloc);
 504    object_class_property_set_description(oc, "prealloc",
 505        "Preallocate memory");
 506    object_class_property_add(oc, "prealloc-threads", "int",
 507        host_memory_backend_get_prealloc_threads,
 508        host_memory_backend_set_prealloc_threads,
 509        NULL, NULL);
 510    object_class_property_set_description(oc, "prealloc-threads",
 511        "Number of CPU threads to use for prealloc");
 512    object_class_property_add_link(oc, "prealloc-context",
 513        TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
 514        object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
 515    object_class_property_set_description(oc, "prealloc-context",
 516        "Context to use for creating CPU threads for preallocation");
 517    object_class_property_add(oc, "size", "int",
 518        host_memory_backend_get_size,
 519        host_memory_backend_set_size,
 520        NULL, NULL);
 521    object_class_property_set_description(oc, "size",
 522        "Size of the memory region (ex: 500M)");
 523    object_class_property_add(oc, "host-nodes", "int",
 524        host_memory_backend_get_host_nodes,
 525        host_memory_backend_set_host_nodes,
 526        NULL, NULL);
 527    object_class_property_set_description(oc, "host-nodes",
 528        "Binds memory to the list of NUMA host nodes");
 529    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 530        &HostMemPolicy_lookup,
 531        host_memory_backend_get_policy,
 532        host_memory_backend_set_policy);
 533    object_class_property_set_description(oc, "policy",
 534        "Set the NUMA policy");
 535    object_class_property_add_bool(oc, "share",
 536        host_memory_backend_get_share, host_memory_backend_set_share);
 537    object_class_property_set_description(oc, "share",
 538        "Mark the memory as private to QEMU or shared");
 539#ifdef CONFIG_LINUX
 540    object_class_property_add_bool(oc, "reserve",
 541        host_memory_backend_get_reserve, host_memory_backend_set_reserve);
 542    object_class_property_set_description(oc, "reserve",
 543        "Reserve swap space (or huge pages) if applicable");
 544#endif /* CONFIG_LINUX */
 545    /*
 546     * Do not delete/rename option. This option must be considered stable
 547     * (as if it didn't have the 'x-' prefix including deprecation period) as
 548     * long as 4.0 and older machine types exists.
 549     * Option will be used by upper layers to override (disable) canonical path
 550     * for ramblock-id set by compat properties on old machine types ( <= 4.0),
 551     * to keep migration working when backend is used for main RAM with
 552     * -machine memory-backend= option (main RAM historically used prefix-less
 553     * ramblock-id).
 554     */
 555    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 556        host_memory_backend_get_use_canonical_path,
 557        host_memory_backend_set_use_canonical_path);
 558}
 559
 560static const TypeInfo host_memory_backend_info = {
 561    .name = TYPE_MEMORY_BACKEND,
 562    .parent = TYPE_OBJECT,
 563    .abstract = true,
 564    .class_size = sizeof(HostMemoryBackendClass),
 565    .class_init = host_memory_backend_class_init,
 566    .instance_size = sizeof(HostMemoryBackend),
 567    .instance_init = host_memory_backend_init,
 568    .instance_post_init = host_memory_backend_post_init,
 569    .interfaces = (InterfaceInfo[]) {
 570        { TYPE_USER_CREATABLE },
 571        { }
 572    }
 573};
 574
 575static void register_types(void)
 576{
 577    type_register_static(&host_memory_backend_info);
 578}
 579
 580type_init(register_types);
 581