qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "sysemu/sysemu.h"
  16#include "hw/boards.h"
  17#include "qapi/error.h"
  18#include "qapi/qapi-builtin-visit.h"
  19#include "qapi/visitor.h"
  20#include "qemu/config-file.h"
  21#include "qom/object_interfaces.h"
  22#include "qemu/mmap-alloc.h"
  23
  24#ifdef CONFIG_NUMA
  25#include <numaif.h>
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  29QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  30#endif
  31
  32char *
  33host_memory_backend_get_name(HostMemoryBackend *backend)
  34{
  35    if (!backend->use_canonical_path) {
  36        return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
  37    }
  38
  39    return object_get_canonical_path(OBJECT(backend));
  40}
  41
  42static void
  43host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  44                             void *opaque, Error **errp)
  45{
  46    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  47    uint64_t value = backend->size;
  48
  49    visit_type_size(v, name, &value, errp);
  50}
  51
  52static void
  53host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  54                             void *opaque, Error **errp)
  55{
  56    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  57    uint64_t value;
  58
  59    if (host_memory_backend_mr_inited(backend)) {
  60        error_setg(errp, "cannot change property %s of %s ", name,
  61                   object_get_typename(obj));
  62        return;
  63    }
  64
  65    if (!visit_type_size(v, name, &value, errp)) {
  66        return;
  67    }
  68    if (!value) {
  69        error_setg(errp,
  70                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  71                   name, object_get_typename(obj), value);
  72        return;
  73    }
  74    backend->size = value;
  75}
  76
  77static void
  78host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  79                                   void *opaque, Error **errp)
  80{
  81    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  82    uint16List *host_nodes = NULL;
  83    uint16List **tail = &host_nodes;
  84    unsigned long value;
  85
  86    value = find_first_bit(backend->host_nodes, MAX_NODES);
  87    if (value == MAX_NODES) {
  88        goto ret;
  89    }
  90
  91    QAPI_LIST_APPEND(tail, value);
  92
  93    do {
  94        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  95        if (value == MAX_NODES) {
  96            break;
  97        }
  98
  99        QAPI_LIST_APPEND(tail, value);
 100    } while (true);
 101
 102ret:
 103    visit_type_uint16List(v, name, &host_nodes, errp);
 104    qapi_free_uint16List(host_nodes);
 105}
 106
 107static void
 108host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 109                                   void *opaque, Error **errp)
 110{
 111#ifdef CONFIG_NUMA
 112    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 113    uint16List *l, *host_nodes = NULL;
 114
 115    visit_type_uint16List(v, name, &host_nodes, errp);
 116
 117    for (l = host_nodes; l; l = l->next) {
 118        if (l->value >= MAX_NODES) {
 119            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 120            goto out;
 121        }
 122    }
 123
 124    for (l = host_nodes; l; l = l->next) {
 125        bitmap_set(backend->host_nodes, l->value, 1);
 126    }
 127
 128out:
 129    qapi_free_uint16List(host_nodes);
 130#else
 131    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 132#endif
 133}
 134
 135static int
 136host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 137{
 138    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 139    return backend->policy;
 140}
 141
 142static void
 143host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 144{
 145    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 146    backend->policy = policy;
 147
 148#ifndef CONFIG_NUMA
 149    if (policy != HOST_MEM_POLICY_DEFAULT) {
 150        error_setg(errp, "NUMA policies are not supported by this QEMU");
 151    }
 152#endif
 153}
 154
 155static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 156{
 157    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 158
 159    return backend->merge;
 160}
 161
 162static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 163{
 164    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 165
 166    if (!host_memory_backend_mr_inited(backend)) {
 167        backend->merge = value;
 168        return;
 169    }
 170
 171    if (value != backend->merge) {
 172        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 173        uint64_t sz = memory_region_size(&backend->mr);
 174
 175        qemu_madvise(ptr, sz,
 176                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 177        backend->merge = value;
 178    }
 179}
 180
 181static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 182{
 183    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 184
 185    return backend->dump;
 186}
 187
 188static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 189{
 190    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 191
 192    if (!host_memory_backend_mr_inited(backend)) {
 193        backend->dump = value;
 194        return;
 195    }
 196
 197    if (value != backend->dump) {
 198        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 199        uint64_t sz = memory_region_size(&backend->mr);
 200
 201        qemu_madvise(ptr, sz,
 202                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 203        backend->dump = value;
 204    }
 205}
 206
 207static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 208{
 209    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 210
 211    return backend->prealloc;
 212}
 213
 214static void host_memory_backend_set_prealloc(Object *obj, bool value,
 215                                             Error **errp)
 216{
 217    Error *local_err = NULL;
 218    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 219
 220    if (!host_memory_backend_mr_inited(backend)) {
 221        backend->prealloc = value;
 222        return;
 223    }
 224
 225    if (value && !backend->prealloc) {
 226        int fd = memory_region_get_fd(&backend->mr);
 227        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 228        uint64_t sz = memory_region_size(&backend->mr);
 229
 230        os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
 231        if (local_err) {
 232            error_propagate(errp, local_err);
 233            return;
 234        }
 235        backend->prealloc = true;
 236    }
 237}
 238
 239static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
 240    const char *name, void *opaque, Error **errp)
 241{
 242    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 243    visit_type_uint32(v, name, &backend->prealloc_threads, errp);
 244}
 245
 246static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
 247    const char *name, void *opaque, Error **errp)
 248{
 249    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 250    uint32_t value;
 251
 252    if (!visit_type_uint32(v, name, &value, errp)) {
 253        return;
 254    }
 255    if (value <= 0) {
 256        error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
 257                   object_get_typename(obj), value);
 258        return;
 259    }
 260    backend->prealloc_threads = value;
 261}
 262
 263static void host_memory_backend_init(Object *obj)
 264{
 265    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 266    MachineState *machine = MACHINE(qdev_get_machine());
 267
 268    /* TODO: convert access to globals to compat properties */
 269    backend->merge = machine_mem_merge(machine);
 270    backend->dump = machine_dump_guest_core(machine);
 271    backend->prealloc_threads = 1;
 272}
 273
 274static void host_memory_backend_post_init(Object *obj)
 275{
 276    object_apply_compat_props(obj);
 277}
 278
 279bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 280{
 281    /*
 282     * NOTE: We forbid zero-length memory backend, so here zero means
 283     * "we haven't inited the backend memory region yet".
 284     */
 285    return memory_region_size(&backend->mr) != 0;
 286}
 287
 288MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 289{
 290    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 291}
 292
 293void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 294{
 295    backend->is_mapped = mapped;
 296}
 297
 298bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 299{
 300    return backend->is_mapped;
 301}
 302
 303#ifdef __linux__
 304size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 305{
 306    Object *obj = OBJECT(memdev);
 307    char *path = object_property_get_str(obj, "mem-path", NULL);
 308    size_t pagesize = qemu_mempath_getpagesize(path);
 309
 310    g_free(path);
 311    return pagesize;
 312}
 313#else
 314size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 315{
 316    return qemu_real_host_page_size;
 317}
 318#endif
 319
 320static void
 321host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 322{
 323    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 324    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 325    Error *local_err = NULL;
 326    void *ptr;
 327    uint64_t sz;
 328
 329    if (bc->alloc) {
 330        bc->alloc(backend, &local_err);
 331        if (local_err) {
 332            goto out;
 333        }
 334
 335        ptr = memory_region_get_ram_ptr(&backend->mr);
 336        sz = memory_region_size(&backend->mr);
 337
 338        if (backend->merge) {
 339            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 340        }
 341        if (!backend->dump) {
 342            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 343        }
 344#ifdef CONFIG_NUMA
 345        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 346        /* lastbit == MAX_NODES means maxnode = 0 */
 347        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 348        /* ensure policy won't be ignored in case memory is preallocated
 349         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 350         * this doesn't catch hugepage case. */
 351        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 352
 353        /* check for invalid host-nodes and policies and give more verbose
 354         * error messages than mbind(). */
 355        if (maxnode && backend->policy == MPOL_DEFAULT) {
 356            error_setg(errp, "host-nodes must be empty for policy default,"
 357                       " or you should explicitly specify a policy other"
 358                       " than default");
 359            return;
 360        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 361            error_setg(errp, "host-nodes must be set for policy %s",
 362                       HostMemPolicy_str(backend->policy));
 363            return;
 364        }
 365
 366        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 367         * as argument to mbind() due to an old Linux bug (feature?) which
 368         * cuts off the last specified node. This means backend->host_nodes
 369         * must have MAX_NODES+1 bits available.
 370         */
 371        assert(sizeof(backend->host_nodes) >=
 372               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 373        assert(maxnode <= MAX_NODES);
 374
 375        if (maxnode &&
 376            mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
 377                  flags)) {
 378            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 379                error_setg_errno(errp, errno,
 380                                 "cannot bind memory to host NUMA nodes");
 381                return;
 382            }
 383        }
 384#endif
 385        /* Preallocate memory after the NUMA policy has been instantiated.
 386         * This is necessary to guarantee memory is allocated with
 387         * specified NUMA policy in place.
 388         */
 389        if (backend->prealloc) {
 390            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 391                            backend->prealloc_threads, &local_err);
 392            if (local_err) {
 393                goto out;
 394            }
 395        }
 396    }
 397out:
 398    error_propagate(errp, local_err);
 399}
 400
 401static bool
 402host_memory_backend_can_be_deleted(UserCreatable *uc)
 403{
 404    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 405        return false;
 406    } else {
 407        return true;
 408    }
 409}
 410
 411static bool host_memory_backend_get_share(Object *o, Error **errp)
 412{
 413    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 414
 415    return backend->share;
 416}
 417
 418static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 419{
 420    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 421
 422    if (host_memory_backend_mr_inited(backend)) {
 423        error_setg(errp, "cannot change property value");
 424        return;
 425    }
 426    backend->share = value;
 427}
 428
 429static bool
 430host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 431{
 432    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 433
 434    return backend->use_canonical_path;
 435}
 436
 437static void
 438host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 439                                           Error **errp)
 440{
 441    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 442
 443    backend->use_canonical_path = value;
 444}
 445
 446static void
 447host_memory_backend_class_init(ObjectClass *oc, void *data)
 448{
 449    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 450
 451    ucc->complete = host_memory_backend_memory_complete;
 452    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 453
 454    object_class_property_add_bool(oc, "merge",
 455        host_memory_backend_get_merge,
 456        host_memory_backend_set_merge);
 457    object_class_property_set_description(oc, "merge",
 458        "Mark memory as mergeable");
 459    object_class_property_add_bool(oc, "dump",
 460        host_memory_backend_get_dump,
 461        host_memory_backend_set_dump);
 462    object_class_property_set_description(oc, "dump",
 463        "Set to 'off' to exclude from core dump");
 464    object_class_property_add_bool(oc, "prealloc",
 465        host_memory_backend_get_prealloc,
 466        host_memory_backend_set_prealloc);
 467    object_class_property_set_description(oc, "prealloc",
 468        "Preallocate memory");
 469    object_class_property_add(oc, "prealloc-threads", "int",
 470        host_memory_backend_get_prealloc_threads,
 471        host_memory_backend_set_prealloc_threads,
 472        NULL, NULL);
 473    object_class_property_set_description(oc, "prealloc-threads",
 474        "Number of CPU threads to use for prealloc");
 475    object_class_property_add(oc, "size", "int",
 476        host_memory_backend_get_size,
 477        host_memory_backend_set_size,
 478        NULL, NULL);
 479    object_class_property_set_description(oc, "size",
 480        "Size of the memory region (ex: 500M)");
 481    object_class_property_add(oc, "host-nodes", "int",
 482        host_memory_backend_get_host_nodes,
 483        host_memory_backend_set_host_nodes,
 484        NULL, NULL);
 485    object_class_property_set_description(oc, "host-nodes",
 486        "Binds memory to the list of NUMA host nodes");
 487    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 488        &HostMemPolicy_lookup,
 489        host_memory_backend_get_policy,
 490        host_memory_backend_set_policy);
 491    object_class_property_set_description(oc, "policy",
 492        "Set the NUMA policy");
 493    object_class_property_add_bool(oc, "share",
 494        host_memory_backend_get_share, host_memory_backend_set_share);
 495    object_class_property_set_description(oc, "share",
 496        "Mark the memory as private to QEMU or shared");
 497    /*
 498     * Do not delete/rename option. This option must be considered stable
 499     * (as if it didn't have the 'x-' prefix including deprecation period) as
 500     * long as 4.0 and older machine types exists.
 501     * Option will be used by upper layers to override (disable) canonical path
 502     * for ramblock-id set by compat properties on old machine types ( <= 4.0),
 503     * to keep migration working when backend is used for main RAM with
 504     * -machine memory-backend= option (main RAM historically used prefix-less
 505     * ramblock-id).
 506     */
 507    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 508        host_memory_backend_get_use_canonical_path,
 509        host_memory_backend_set_use_canonical_path);
 510}
 511
 512static const TypeInfo host_memory_backend_info = {
 513    .name = TYPE_MEMORY_BACKEND,
 514    .parent = TYPE_OBJECT,
 515    .abstract = true,
 516    .class_size = sizeof(HostMemoryBackendClass),
 517    .class_init = host_memory_backend_class_init,
 518    .instance_size = sizeof(HostMemoryBackend),
 519    .instance_init = host_memory_backend_init,
 520    .instance_post_init = host_memory_backend_post_init,
 521    .interfaces = (InterfaceInfo[]) {
 522        { TYPE_USER_CREATABLE },
 523        { }
 524    }
 525};
 526
 527static void register_types(void)
 528{
 529    type_register_static(&host_memory_backend_info);
 530}
 531
 532type_init(register_types);
 533