qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22#include "qemu/madvise.h"
  23
  24#ifdef CONFIG_NUMA
  25#include <numaif.h>
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  29QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  30#endif
  31
  32char *
  33host_memory_backend_get_name(HostMemoryBackend *backend)
  34{
  35    if (!backend->use_canonical_path) {
  36        return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
  37    }
  38
  39    return object_get_canonical_path(OBJECT(backend));
  40}
  41
  42static void
  43host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  44                             void *opaque, Error **errp)
  45{
  46    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  47    uint64_t value = backend->size;
  48
  49    visit_type_size(v, name, &value, errp);
  50}
  51
  52static void
  53host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  54                             void *opaque, Error **errp)
  55{
  56    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  57    uint64_t value;
  58
  59    if (host_memory_backend_mr_inited(backend)) {
  60        error_setg(errp, "cannot change property %s of %s ", name,
  61                   object_get_typename(obj));
  62        return;
  63    }
  64
  65    if (!visit_type_size(v, name, &value, errp)) {
  66        return;
  67    }
  68    if (!value) {
  69        error_setg(errp,
  70                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  71                   name, object_get_typename(obj), value);
  72        return;
  73    }
  74    backend->size = value;
  75}
  76
  77static void
  78host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  79                                   void *opaque, Error **errp)
  80{
  81    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  82    uint16List *host_nodes = NULL;
  83    uint16List **tail = &host_nodes;
  84    unsigned long value;
  85
  86    value = find_first_bit(backend->host_nodes, MAX_NODES);
  87    if (value == MAX_NODES) {
  88        goto ret;
  89    }
  90
  91    QAPI_LIST_APPEND(tail, value);
  92
  93    do {
  94        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  95        if (value == MAX_NODES) {
  96            break;
  97        }
  98
  99        QAPI_LIST_APPEND(tail, value);
 100    } while (true);
 101
 102ret:
 103    visit_type_uint16List(v, name, &host_nodes, errp);
 104    qapi_free_uint16List(host_nodes);
 105}
 106
 107static void
 108host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 109                                   void *opaque, Error **errp)
 110{
 111#ifdef CONFIG_NUMA
 112    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 113    uint16List *l, *host_nodes = NULL;
 114
 115    visit_type_uint16List(v, name, &host_nodes, errp);
 116
 117    for (l = host_nodes; l; l = l->next) {
 118        if (l->value >= MAX_NODES) {
 119            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 120            goto out;
 121        }
 122    }
 123
 124    for (l = host_nodes; l; l = l->next) {
 125        bitmap_set(backend->host_nodes, l->value, 1);
 126    }
 127
 128out:
 129    qapi_free_uint16List(host_nodes);
 130#else
 131    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 132#endif
 133}
 134
 135static int
 136host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 137{
 138    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 139    return backend->policy;
 140}
 141
 142static void
 143host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 144{
 145    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 146    backend->policy = policy;
 147
 148#ifndef CONFIG_NUMA
 149    if (policy != HOST_MEM_POLICY_DEFAULT) {
 150        error_setg(errp, "NUMA policies are not supported by this QEMU");
 151    }
 152#endif
 153}
 154
 155static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 156{
 157    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 158
 159    return backend->merge;
 160}
 161
 162static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 163{
 164    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 165
 166    if (!host_memory_backend_mr_inited(backend)) {
 167        backend->merge = value;
 168        return;
 169    }
 170
 171    if (value != backend->merge) {
 172        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 173        uint64_t sz = memory_region_size(&backend->mr);
 174
 175        qemu_madvise(ptr, sz,
 176                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 177        backend->merge = value;
 178    }
 179}
 180
 181static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 182{
 183    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 184
 185    return backend->dump;
 186}
 187
 188static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 189{
 190    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 191
 192    if (!host_memory_backend_mr_inited(backend)) {
 193        backend->dump = value;
 194        return;
 195    }
 196
 197    if (value != backend->dump) {
 198        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 199        uint64_t sz = memory_region_size(&backend->mr);
 200
 201        qemu_madvise(ptr, sz,
 202                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 203        backend->dump = value;
 204    }
 205}
 206
 207static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 208{
 209    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 210
 211    return backend->prealloc;
 212}
 213
 214static void host_memory_backend_set_prealloc(Object *obj, bool value,
 215                                             Error **errp)
 216{
 217    Error *local_err = NULL;
 218    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 219
 220    if (!backend->reserve && value) {
 221        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 222        return;
 223    }
 224
 225    if (!host_memory_backend_mr_inited(backend)) {
 226        backend->prealloc = value;
 227        return;
 228    }
 229
 230    if (value && !backend->prealloc) {
 231        int fd = memory_region_get_fd(&backend->mr);
 232        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 233        uint64_t sz = memory_region_size(&backend->mr);
 234
 235        os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
 236        if (local_err) {
 237            error_propagate(errp, local_err);
 238            return;
 239        }
 240        backend->prealloc = true;
 241    }
 242}
 243
 244static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
 245    const char *name, void *opaque, Error **errp)
 246{
 247    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 248    visit_type_uint32(v, name, &backend->prealloc_threads, errp);
 249}
 250
 251static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
 252    const char *name, void *opaque, Error **errp)
 253{
 254    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 255    uint32_t value;
 256
 257    if (!visit_type_uint32(v, name, &value, errp)) {
 258        return;
 259    }
 260    if (value <= 0) {
 261        error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
 262                   object_get_typename(obj), value);
 263        return;
 264    }
 265    backend->prealloc_threads = value;
 266}
 267
 268static void host_memory_backend_init(Object *obj)
 269{
 270    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 271    MachineState *machine = MACHINE(qdev_get_machine());
 272
 273    /* TODO: convert access to globals to compat properties */
 274    backend->merge = machine_mem_merge(machine);
 275    backend->dump = machine_dump_guest_core(machine);
 276    backend->reserve = true;
 277    backend->prealloc_threads = machine->smp.cpus;
 278}
 279
 280static void host_memory_backend_post_init(Object *obj)
 281{
 282    object_apply_compat_props(obj);
 283}
 284
 285bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 286{
 287    /*
 288     * NOTE: We forbid zero-length memory backend, so here zero means
 289     * "we haven't inited the backend memory region yet".
 290     */
 291    return memory_region_size(&backend->mr) != 0;
 292}
 293
 294MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 295{
 296    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 297}
 298
 299void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 300{
 301    backend->is_mapped = mapped;
 302}
 303
 304bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 305{
 306    return backend->is_mapped;
 307}
 308
 309#ifdef __linux__
 310size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 311{
 312    Object *obj = OBJECT(memdev);
 313    char *path = object_property_get_str(obj, "mem-path", NULL);
 314    size_t pagesize = qemu_mempath_getpagesize(path);
 315
 316    g_free(path);
 317    return pagesize;
 318}
 319#else
 320size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 321{
 322    return qemu_real_host_page_size();
 323}
 324#endif
 325
 326static void
 327host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 328{
 329    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 330    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 331    Error *local_err = NULL;
 332    void *ptr;
 333    uint64_t sz;
 334
 335    if (bc->alloc) {
 336        bc->alloc(backend, &local_err);
 337        if (local_err) {
 338            goto out;
 339        }
 340
 341        ptr = memory_region_get_ram_ptr(&backend->mr);
 342        sz = memory_region_size(&backend->mr);
 343
 344        if (backend->merge) {
 345            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 346        }
 347        if (!backend->dump) {
 348            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 349        }
 350#ifdef CONFIG_NUMA
 351        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 352        /* lastbit == MAX_NODES means maxnode = 0 */
 353        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 354        /* ensure policy won't be ignored in case memory is preallocated
 355         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 356         * this doesn't catch hugepage case. */
 357        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 358
 359        /* check for invalid host-nodes and policies and give more verbose
 360         * error messages than mbind(). */
 361        if (maxnode && backend->policy == MPOL_DEFAULT) {
 362            error_setg(errp, "host-nodes must be empty for policy default,"
 363                       " or you should explicitly specify a policy other"
 364                       " than default");
 365            return;
 366        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 367            error_setg(errp, "host-nodes must be set for policy %s",
 368                       HostMemPolicy_str(backend->policy));
 369            return;
 370        }
 371
 372        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 373         * as argument to mbind() due to an old Linux bug (feature?) which
 374         * cuts off the last specified node. This means backend->host_nodes
 375         * must have MAX_NODES+1 bits available.
 376         */
 377        assert(sizeof(backend->host_nodes) >=
 378               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 379        assert(maxnode <= MAX_NODES);
 380
 381        if (maxnode &&
 382            mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
 383                  flags)) {
 384            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 385                error_setg_errno(errp, errno,
 386                                 "cannot bind memory to host NUMA nodes");
 387                return;
 388            }
 389        }
 390#endif
 391        /* Preallocate memory after the NUMA policy has been instantiated.
 392         * This is necessary to guarantee memory is allocated with
 393         * specified NUMA policy in place.
 394         */
 395        if (backend->prealloc) {
 396            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 397                            backend->prealloc_threads, &local_err);
 398            if (local_err) {
 399                goto out;
 400            }
 401        }
 402    }
 403out:
 404    error_propagate(errp, local_err);
 405}
 406
 407static bool
 408host_memory_backend_can_be_deleted(UserCreatable *uc)
 409{
 410    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 411        return false;
 412    } else {
 413        return true;
 414    }
 415}
 416
 417static bool host_memory_backend_get_share(Object *o, Error **errp)
 418{
 419    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 420
 421    return backend->share;
 422}
 423
 424static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 425{
 426    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 427
 428    if (host_memory_backend_mr_inited(backend)) {
 429        error_setg(errp, "cannot change property value");
 430        return;
 431    }
 432    backend->share = value;
 433}
 434
 435#ifdef CONFIG_LINUX
 436static bool host_memory_backend_get_reserve(Object *o, Error **errp)
 437{
 438    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 439
 440    return backend->reserve;
 441}
 442
 443static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
 444{
 445    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 446
 447    if (host_memory_backend_mr_inited(backend)) {
 448        error_setg(errp, "cannot change property value");
 449        return;
 450    }
 451    if (backend->prealloc && !value) {
 452        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 453        return;
 454    }
 455    backend->reserve = value;
 456}
 457#endif /* CONFIG_LINUX */
 458
 459static bool
 460host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 461{
 462    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 463
 464    return backend->use_canonical_path;
 465}
 466
 467static void
 468host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 469                                           Error **errp)
 470{
 471    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 472
 473    backend->use_canonical_path = value;
 474}
 475
 476static void
 477host_memory_backend_class_init(ObjectClass *oc, void *data)
 478{
 479    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 480
 481    ucc->complete = host_memory_backend_memory_complete;
 482    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 483
 484    object_class_property_add_bool(oc, "merge",
 485        host_memory_backend_get_merge,
 486        host_memory_backend_set_merge);
 487    object_class_property_set_description(oc, "merge",
 488        "Mark memory as mergeable");
 489    object_class_property_add_bool(oc, "dump",
 490        host_memory_backend_get_dump,
 491        host_memory_backend_set_dump);
 492    object_class_property_set_description(oc, "dump",
 493        "Set to 'off' to exclude from core dump");
 494    object_class_property_add_bool(oc, "prealloc",
 495        host_memory_backend_get_prealloc,
 496        host_memory_backend_set_prealloc);
 497    object_class_property_set_description(oc, "prealloc",
 498        "Preallocate memory");
 499    object_class_property_add(oc, "prealloc-threads", "int",
 500        host_memory_backend_get_prealloc_threads,
 501        host_memory_backend_set_prealloc_threads,
 502        NULL, NULL);
 503    object_class_property_set_description(oc, "prealloc-threads",
 504        "Number of CPU threads to use for prealloc");
 505    object_class_property_add(oc, "size", "int",
 506        host_memory_backend_get_size,
 507        host_memory_backend_set_size,
 508        NULL, NULL);
 509    object_class_property_set_description(oc, "size",
 510        "Size of the memory region (ex: 500M)");
 511    object_class_property_add(oc, "host-nodes", "int",
 512        host_memory_backend_get_host_nodes,
 513        host_memory_backend_set_host_nodes,
 514        NULL, NULL);
 515    object_class_property_set_description(oc, "host-nodes",
 516        "Binds memory to the list of NUMA host nodes");
 517    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 518        &HostMemPolicy_lookup,
 519        host_memory_backend_get_policy,
 520        host_memory_backend_set_policy);
 521    object_class_property_set_description(oc, "policy",
 522        "Set the NUMA policy");
 523    object_class_property_add_bool(oc, "share",
 524        host_memory_backend_get_share, host_memory_backend_set_share);
 525    object_class_property_set_description(oc, "share",
 526        "Mark the memory as private to QEMU or shared");
 527#ifdef CONFIG_LINUX
 528    object_class_property_add_bool(oc, "reserve",
 529        host_memory_backend_get_reserve, host_memory_backend_set_reserve);
 530    object_class_property_set_description(oc, "reserve",
 531        "Reserve swap space (or huge pages) if applicable");
 532#endif /* CONFIG_LINUX */
 533    /*
 534     * Do not delete/rename option. This option must be considered stable
 535     * (as if it didn't have the 'x-' prefix including deprecation period) as
 536     * long as 4.0 and older machine types exists.
 537     * Option will be used by upper layers to override (disable) canonical path
 538     * for ramblock-id set by compat properties on old machine types ( <= 4.0),
 539     * to keep migration working when backend is used for main RAM with
 540     * -machine memory-backend= option (main RAM historically used prefix-less
 541     * ramblock-id).
 542     */
 543    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 544        host_memory_backend_get_use_canonical_path,
 545        host_memory_backend_set_use_canonical_path);
 546}
 547
 548static const TypeInfo host_memory_backend_info = {
 549    .name = TYPE_MEMORY_BACKEND,
 550    .parent = TYPE_OBJECT,
 551    .abstract = true,
 552    .class_size = sizeof(HostMemoryBackendClass),
 553    .class_init = host_memory_backend_class_init,
 554    .instance_size = sizeof(HostMemoryBackend),
 555    .instance_init = host_memory_backend_init,
 556    .instance_post_init = host_memory_backend_post_init,
 557    .interfaces = (InterfaceInfo[]) {
 558        { TYPE_USER_CREATABLE },
 559        { }
 560    }
 561};
 562
 563static void register_types(void)
 564{
 565    type_register_static(&host_memory_backend_info);
 566}
 567
 568type_init(register_types);
 569