qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22
  23#ifdef CONFIG_NUMA
  24#include <numaif.h>
  25QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  29#endif
  30
  31char *
  32host_memory_backend_get_name(HostMemoryBackend *backend)
  33{
  34    if (!backend->use_canonical_path) {
  35        return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
  36    }
  37
  38    return object_get_canonical_path(OBJECT(backend));
  39}
  40
  41static void
  42host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  43                             void *opaque, Error **errp)
  44{
  45    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  46    uint64_t value = backend->size;
  47
  48    visit_type_size(v, name, &value, errp);
  49}
  50
  51static void
  52host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  53                             void *opaque, Error **errp)
  54{
  55    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  56    uint64_t value;
  57
  58    if (host_memory_backend_mr_inited(backend)) {
  59        error_setg(errp, "cannot change property %s of %s ", name,
  60                   object_get_typename(obj));
  61        return;
  62    }
  63
  64    if (!visit_type_size(v, name, &value, errp)) {
  65        return;
  66    }
  67    if (!value) {
  68        error_setg(errp,
  69                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  70                   name, object_get_typename(obj), value);
  71        return;
  72    }
  73    backend->size = value;
  74}
  75
  76static void
  77host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  78                                   void *opaque, Error **errp)
  79{
  80    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  81    uint16List *host_nodes = NULL;
  82    uint16List **tail = &host_nodes;
  83    unsigned long value;
  84
  85    value = find_first_bit(backend->host_nodes, MAX_NODES);
  86    if (value == MAX_NODES) {
  87        goto ret;
  88    }
  89
  90    QAPI_LIST_APPEND(tail, value);
  91
  92    do {
  93        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  94        if (value == MAX_NODES) {
  95            break;
  96        }
  97
  98        QAPI_LIST_APPEND(tail, value);
  99    } while (true);
 100
 101ret:
 102    visit_type_uint16List(v, name, &host_nodes, errp);
 103    qapi_free_uint16List(host_nodes);
 104}
 105
 106static void
 107host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 108                                   void *opaque, Error **errp)
 109{
 110#ifdef CONFIG_NUMA
 111    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 112    uint16List *l, *host_nodes = NULL;
 113
 114    visit_type_uint16List(v, name, &host_nodes, errp);
 115
 116    for (l = host_nodes; l; l = l->next) {
 117        if (l->value >= MAX_NODES) {
 118            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 119            goto out;
 120        }
 121    }
 122
 123    for (l = host_nodes; l; l = l->next) {
 124        bitmap_set(backend->host_nodes, l->value, 1);
 125    }
 126
 127out:
 128    qapi_free_uint16List(host_nodes);
 129#else
 130    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 131#endif
 132}
 133
 134static int
 135host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 136{
 137    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 138    return backend->policy;
 139}
 140
 141static void
 142host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 143{
 144    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 145    backend->policy = policy;
 146
 147#ifndef CONFIG_NUMA
 148    if (policy != HOST_MEM_POLICY_DEFAULT) {
 149        error_setg(errp, "NUMA policies are not supported by this QEMU");
 150    }
 151#endif
 152}
 153
 154static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 155{
 156    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 157
 158    return backend->merge;
 159}
 160
 161static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 162{
 163    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 164
 165    if (!host_memory_backend_mr_inited(backend)) {
 166        backend->merge = value;
 167        return;
 168    }
 169
 170    if (value != backend->merge) {
 171        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 172        uint64_t sz = memory_region_size(&backend->mr);
 173
 174        qemu_madvise(ptr, sz,
 175                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 176        backend->merge = value;
 177    }
 178}
 179
 180static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 181{
 182    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 183
 184    return backend->dump;
 185}
 186
 187static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 188{
 189    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 190
 191    if (!host_memory_backend_mr_inited(backend)) {
 192        backend->dump = value;
 193        return;
 194    }
 195
 196    if (value != backend->dump) {
 197        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 198        uint64_t sz = memory_region_size(&backend->mr);
 199
 200        qemu_madvise(ptr, sz,
 201                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 202        backend->dump = value;
 203    }
 204}
 205
 206static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 207{
 208    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 209
 210    return backend->prealloc;
 211}
 212
 213static void host_memory_backend_set_prealloc(Object *obj, bool value,
 214                                             Error **errp)
 215{
 216    Error *local_err = NULL;
 217    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 218
 219    if (!backend->reserve && value) {
 220        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 221        return;
 222    }
 223
 224    if (!host_memory_backend_mr_inited(backend)) {
 225        backend->prealloc = value;
 226        return;
 227    }
 228
 229    if (value && !backend->prealloc) {
 230        int fd = memory_region_get_fd(&backend->mr);
 231        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 232        uint64_t sz = memory_region_size(&backend->mr);
 233
 234        os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
 235        if (local_err) {
 236            error_propagate(errp, local_err);
 237            return;
 238        }
 239        backend->prealloc = true;
 240    }
 241}
 242
 243static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
 244    const char *name, void *opaque, Error **errp)
 245{
 246    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 247    visit_type_uint32(v, name, &backend->prealloc_threads, errp);
 248}
 249
 250static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
 251    const char *name, void *opaque, Error **errp)
 252{
 253    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 254    uint32_t value;
 255
 256    if (!visit_type_uint32(v, name, &value, errp)) {
 257        return;
 258    }
 259    if (value <= 0) {
 260        error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
 261                   object_get_typename(obj), value);
 262        return;
 263    }
 264    backend->prealloc_threads = value;
 265}
 266
 267static void host_memory_backend_init(Object *obj)
 268{
 269    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 270    MachineState *machine = MACHINE(qdev_get_machine());
 271
 272    /* TODO: convert access to globals to compat properties */
 273    backend->merge = machine_mem_merge(machine);
 274    backend->dump = machine_dump_guest_core(machine);
 275    backend->reserve = true;
 276    backend->prealloc_threads = 1;
 277}
 278
 279static void host_memory_backend_post_init(Object *obj)
 280{
 281    object_apply_compat_props(obj);
 282}
 283
 284bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 285{
 286    /*
 287     * NOTE: We forbid zero-length memory backend, so here zero means
 288     * "we haven't inited the backend memory region yet".
 289     */
 290    return memory_region_size(&backend->mr) != 0;
 291}
 292
 293MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 294{
 295    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 296}
 297
 298void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 299{
 300    backend->is_mapped = mapped;
 301}
 302
 303bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 304{
 305    return backend->is_mapped;
 306}
 307
 308#ifdef __linux__
 309size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 310{
 311    Object *obj = OBJECT(memdev);
 312    char *path = object_property_get_str(obj, "mem-path", NULL);
 313    size_t pagesize = qemu_mempath_getpagesize(path);
 314
 315    g_free(path);
 316    return pagesize;
 317}
 318#else
 319size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 320{
 321    return qemu_real_host_page_size;
 322}
 323#endif
 324
 325static void
 326host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 327{
 328    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 329    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 330    Error *local_err = NULL;
 331    void *ptr;
 332    uint64_t sz;
 333
 334    if (bc->alloc) {
 335        bc->alloc(backend, &local_err);
 336        if (local_err) {
 337            goto out;
 338        }
 339
 340        ptr = memory_region_get_ram_ptr(&backend->mr);
 341        sz = memory_region_size(&backend->mr);
 342
 343        if (backend->merge) {
 344            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 345        }
 346        if (!backend->dump) {
 347            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 348        }
 349#ifdef CONFIG_NUMA
 350        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 351        /* lastbit == MAX_NODES means maxnode = 0 */
 352        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 353        /* ensure policy won't be ignored in case memory is preallocated
 354         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 355         * this doesn't catch hugepage case. */
 356        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 357
 358        /* check for invalid host-nodes and policies and give more verbose
 359         * error messages than mbind(). */
 360        if (maxnode && backend->policy == MPOL_DEFAULT) {
 361            error_setg(errp, "host-nodes must be empty for policy default,"
 362                       " or you should explicitly specify a policy other"
 363                       " than default");
 364            return;
 365        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 366            error_setg(errp, "host-nodes must be set for policy %s",
 367                       HostMemPolicy_str(backend->policy));
 368            return;
 369        }
 370
 371        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 372         * as argument to mbind() due to an old Linux bug (feature?) which
 373         * cuts off the last specified node. This means backend->host_nodes
 374         * must have MAX_NODES+1 bits available.
 375         */
 376        assert(sizeof(backend->host_nodes) >=
 377               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 378        assert(maxnode <= MAX_NODES);
 379
 380        if (maxnode &&
 381            mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
 382                  flags)) {
 383            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 384                error_setg_errno(errp, errno,
 385                                 "cannot bind memory to host NUMA nodes");
 386                return;
 387            }
 388        }
 389#endif
 390        /* Preallocate memory after the NUMA policy has been instantiated.
 391         * This is necessary to guarantee memory is allocated with
 392         * specified NUMA policy in place.
 393         */
 394        if (backend->prealloc) {
 395            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 396                            backend->prealloc_threads, &local_err);
 397            if (local_err) {
 398                goto out;
 399            }
 400        }
 401    }
 402out:
 403    error_propagate(errp, local_err);
 404}
 405
 406static bool
 407host_memory_backend_can_be_deleted(UserCreatable *uc)
 408{
 409    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 410        return false;
 411    } else {
 412        return true;
 413    }
 414}
 415
 416static bool host_memory_backend_get_share(Object *o, Error **errp)
 417{
 418    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 419
 420    return backend->share;
 421}
 422
 423static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 424{
 425    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 426
 427    if (host_memory_backend_mr_inited(backend)) {
 428        error_setg(errp, "cannot change property value");
 429        return;
 430    }
 431    backend->share = value;
 432}
 433
 434#ifdef CONFIG_LINUX
 435static bool host_memory_backend_get_reserve(Object *o, Error **errp)
 436{
 437    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 438
 439    return backend->reserve;
 440}
 441
 442static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
 443{
 444    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 445
 446    if (host_memory_backend_mr_inited(backend)) {
 447        error_setg(errp, "cannot change property value");
 448        return;
 449    }
 450    if (backend->prealloc && !value) {
 451        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
 452        return;
 453    }
 454    backend->reserve = value;
 455}
 456#endif /* CONFIG_LINUX */
 457
 458static bool
 459host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 460{
 461    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 462
 463    return backend->use_canonical_path;
 464}
 465
 466static void
 467host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 468                                           Error **errp)
 469{
 470    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 471
 472    backend->use_canonical_path = value;
 473}
 474
 475static void
 476host_memory_backend_class_init(ObjectClass *oc, void *data)
 477{
 478    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 479
 480    ucc->complete = host_memory_backend_memory_complete;
 481    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 482
 483    object_class_property_add_bool(oc, "merge",
 484        host_memory_backend_get_merge,
 485        host_memory_backend_set_merge);
 486    object_class_property_set_description(oc, "merge",
 487        "Mark memory as mergeable");
 488    object_class_property_add_bool(oc, "dump",
 489        host_memory_backend_get_dump,
 490        host_memory_backend_set_dump);
 491    object_class_property_set_description(oc, "dump",
 492        "Set to 'off' to exclude from core dump");
 493    object_class_property_add_bool(oc, "prealloc",
 494        host_memory_backend_get_prealloc,
 495        host_memory_backend_set_prealloc);
 496    object_class_property_set_description(oc, "prealloc",
 497        "Preallocate memory");
 498    object_class_property_add(oc, "prealloc-threads", "int",
 499        host_memory_backend_get_prealloc_threads,
 500        host_memory_backend_set_prealloc_threads,
 501        NULL, NULL);
 502    object_class_property_set_description(oc, "prealloc-threads",
 503        "Number of CPU threads to use for prealloc");
 504    object_class_property_add(oc, "size", "int",
 505        host_memory_backend_get_size,
 506        host_memory_backend_set_size,
 507        NULL, NULL);
 508    object_class_property_set_description(oc, "size",
 509        "Size of the memory region (ex: 500M)");
 510    object_class_property_add(oc, "host-nodes", "int",
 511        host_memory_backend_get_host_nodes,
 512        host_memory_backend_set_host_nodes,
 513        NULL, NULL);
 514    object_class_property_set_description(oc, "host-nodes",
 515        "Binds memory to the list of NUMA host nodes");
 516    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 517        &HostMemPolicy_lookup,
 518        host_memory_backend_get_policy,
 519        host_memory_backend_set_policy);
 520    object_class_property_set_description(oc, "policy",
 521        "Set the NUMA policy");
 522    object_class_property_add_bool(oc, "share",
 523        host_memory_backend_get_share, host_memory_backend_set_share);
 524    object_class_property_set_description(oc, "share",
 525        "Mark the memory as private to QEMU or shared");
 526#ifdef CONFIG_LINUX
 527    object_class_property_add_bool(oc, "reserve",
 528        host_memory_backend_get_reserve, host_memory_backend_set_reserve);
 529    object_class_property_set_description(oc, "reserve",
 530        "Reserve swap space (or huge pages) if applicable");
 531#endif /* CONFIG_LINUX */
 532    /*
 533     * Do not delete/rename option. This option must be considered stable
 534     * (as if it didn't have the 'x-' prefix including deprecation period) as
 535     * long as 4.0 and older machine types exists.
 536     * Option will be used by upper layers to override (disable) canonical path
 537     * for ramblock-id set by compat properties on old machine types ( <= 4.0),
 538     * to keep migration working when backend is used for main RAM with
 539     * -machine memory-backend= option (main RAM historically used prefix-less
 540     * ramblock-id).
 541     */
 542    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 543        host_memory_backend_get_use_canonical_path,
 544        host_memory_backend_set_use_canonical_path);
 545}
 546
 547static const TypeInfo host_memory_backend_info = {
 548    .name = TYPE_MEMORY_BACKEND,
 549    .parent = TYPE_OBJECT,
 550    .abstract = true,
 551    .class_size = sizeof(HostMemoryBackendClass),
 552    .class_init = host_memory_backend_class_init,
 553    .instance_size = sizeof(HostMemoryBackend),
 554    .instance_init = host_memory_backend_init,
 555    .instance_post_init = host_memory_backend_post_init,
 556    .interfaces = (InterfaceInfo[]) {
 557        { TYPE_USER_CREATABLE },
 558        { }
 559    }
 560};
 561
 562static void register_types(void)
 563{
 564    type_register_static(&host_memory_backend_info);
 565}
 566
 567type_init(register_types);
 568