qemu/backends/hostmem.c
<<
>>
Prefs
   1/*
   2 * QEMU Host Memory Backend
   3 *
   4 * Copyright (C) 2013-2014 Red Hat Inc
   5 *
   6 * Authors:
   7 *   Igor Mammedov <imammedo@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/hostmem.h"
  15#include "hw/boards.h"
  16#include "qapi/error.h"
  17#include "qapi/qapi-builtin-visit.h"
  18#include "qapi/visitor.h"
  19#include "qemu/config-file.h"
  20#include "qom/object_interfaces.h"
  21#include "qemu/mmap-alloc.h"
  22
  23#ifdef CONFIG_NUMA
  24#include <numaif.h>
  25QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  26QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  27QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  28QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  29#endif
  30
  31char *
  32host_memory_backend_get_name(HostMemoryBackend *backend)
  33{
  34    if (!backend->use_canonical_path) {
  35        return object_get_canonical_path_component(OBJECT(backend));
  36    }
  37
  38    return object_get_canonical_path(OBJECT(backend));
  39}
  40
  41static void
  42host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  43                             void *opaque, Error **errp)
  44{
  45    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  46    uint64_t value = backend->size;
  47
  48    visit_type_size(v, name, &value, errp);
  49}
  50
  51static void
  52host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  53                             void *opaque, Error **errp)
  54{
  55    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  56    Error *local_err = NULL;
  57    uint64_t value;
  58
  59    if (host_memory_backend_mr_inited(backend)) {
  60        error_setg(&local_err, "cannot change property %s of %s ",
  61                   name, object_get_typename(obj));
  62        goto out;
  63    }
  64
  65    visit_type_size(v, name, &value, &local_err);
  66    if (local_err) {
  67        goto out;
  68    }
  69    if (!value) {
  70        error_setg(&local_err,
  71                   "property '%s' of %s doesn't take value '%" PRIu64 "'",
  72                   name, object_get_typename(obj), value);
  73        goto out;
  74    }
  75    backend->size = value;
  76out:
  77    error_propagate(errp, local_err);
  78}
  79
  80static void
  81host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  82                                   void *opaque, Error **errp)
  83{
  84    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  85    uint16List *host_nodes = NULL;
  86    uint16List **node = &host_nodes;
  87    unsigned long value;
  88
  89    value = find_first_bit(backend->host_nodes, MAX_NODES);
  90    if (value == MAX_NODES) {
  91        goto ret;
  92    }
  93
  94    *node = g_malloc0(sizeof(**node));
  95    (*node)->value = value;
  96    node = &(*node)->next;
  97
  98    do {
  99        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
 100        if (value == MAX_NODES) {
 101            break;
 102        }
 103
 104        *node = g_malloc0(sizeof(**node));
 105        (*node)->value = value;
 106        node = &(*node)->next;
 107    } while (true);
 108
 109ret:
 110    visit_type_uint16List(v, name, &host_nodes, errp);
 111}
 112
 113static void
 114host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
 115                                   void *opaque, Error **errp)
 116{
 117#ifdef CONFIG_NUMA
 118    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 119    uint16List *l, *host_nodes = NULL;
 120
 121    visit_type_uint16List(v, name, &host_nodes, errp);
 122
 123    for (l = host_nodes; l; l = l->next) {
 124        if (l->value >= MAX_NODES) {
 125            error_setg(errp, "Invalid host-nodes value: %d", l->value);
 126            goto out;
 127        }
 128    }
 129
 130    for (l = host_nodes; l; l = l->next) {
 131        bitmap_set(backend->host_nodes, l->value, 1);
 132    }
 133
 134out:
 135    qapi_free_uint16List(host_nodes);
 136#else
 137    error_setg(errp, "NUMA node binding are not supported by this QEMU");
 138#endif
 139}
 140
 141static int
 142host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
 143{
 144    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 145    return backend->policy;
 146}
 147
 148static void
 149host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
 150{
 151    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 152    backend->policy = policy;
 153
 154#ifndef CONFIG_NUMA
 155    if (policy != HOST_MEM_POLICY_DEFAULT) {
 156        error_setg(errp, "NUMA policies are not supported by this QEMU");
 157    }
 158#endif
 159}
 160
 161static bool host_memory_backend_get_merge(Object *obj, Error **errp)
 162{
 163    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 164
 165    return backend->merge;
 166}
 167
 168static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 169{
 170    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 171
 172    if (!host_memory_backend_mr_inited(backend)) {
 173        backend->merge = value;
 174        return;
 175    }
 176
 177    if (value != backend->merge) {
 178        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 179        uint64_t sz = memory_region_size(&backend->mr);
 180
 181        qemu_madvise(ptr, sz,
 182                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
 183        backend->merge = value;
 184    }
 185}
 186
 187static bool host_memory_backend_get_dump(Object *obj, Error **errp)
 188{
 189    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 190
 191    return backend->dump;
 192}
 193
 194static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 195{
 196    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 197
 198    if (!host_memory_backend_mr_inited(backend)) {
 199        backend->dump = value;
 200        return;
 201    }
 202
 203    if (value != backend->dump) {
 204        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 205        uint64_t sz = memory_region_size(&backend->mr);
 206
 207        qemu_madvise(ptr, sz,
 208                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
 209        backend->dump = value;
 210    }
 211}
 212
 213static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 214{
 215    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 216
 217    return backend->prealloc || backend->force_prealloc;
 218}
 219
 220static void host_memory_backend_set_prealloc(Object *obj, bool value,
 221                                             Error **errp)
 222{
 223    Error *local_err = NULL;
 224    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 225
 226    if (backend->force_prealloc) {
 227        if (value) {
 228            error_setg(errp,
 229                       "remove -mem-prealloc to use the prealloc property");
 230            return;
 231        }
 232    }
 233
 234    if (!host_memory_backend_mr_inited(backend)) {
 235        backend->prealloc = value;
 236        return;
 237    }
 238
 239    if (value && !backend->prealloc) {
 240        int fd = memory_region_get_fd(&backend->mr);
 241        void *ptr = memory_region_get_ram_ptr(&backend->mr);
 242        uint64_t sz = memory_region_size(&backend->mr);
 243
 244        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
 245        if (local_err) {
 246            error_propagate(errp, local_err);
 247            return;
 248        }
 249        backend->prealloc = true;
 250    }
 251}
 252
 253static void host_memory_backend_init(Object *obj)
 254{
 255    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 256    MachineState *machine = MACHINE(qdev_get_machine());
 257
 258    backend->merge = machine_mem_merge(machine);
 259    backend->dump = machine_dump_guest_core(machine);
 260    backend->prealloc = mem_prealloc;
 261}
 262
 263static void host_memory_backend_post_init(Object *obj)
 264{
 265    object_apply_compat_props(obj);
 266}
 267
 268bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
 269{
 270    /*
 271     * NOTE: We forbid zero-length memory backend, so here zero means
 272     * "we haven't inited the backend memory region yet".
 273     */
 274    return memory_region_size(&backend->mr) != 0;
 275}
 276
 277MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
 278{
 279    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 280}
 281
 282void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
 283{
 284    backend->is_mapped = mapped;
 285}
 286
 287bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
 288{
 289    return backend->is_mapped;
 290}
 291
 292#ifdef __linux__
 293size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 294{
 295    Object *obj = OBJECT(memdev);
 296    char *path = object_property_get_str(obj, "mem-path", NULL);
 297    size_t pagesize = qemu_mempath_getpagesize(path);
 298
 299    g_free(path);
 300    return pagesize;
 301}
 302#else
 303size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
 304{
 305    return getpagesize();
 306}
 307#endif
 308
 309static void
 310host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 311{
 312    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
 313    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 314    Error *local_err = NULL;
 315    void *ptr;
 316    uint64_t sz;
 317
 318    if (bc->alloc) {
 319        bc->alloc(backend, &local_err);
 320        if (local_err) {
 321            goto out;
 322        }
 323
 324        ptr = memory_region_get_ram_ptr(&backend->mr);
 325        sz = memory_region_size(&backend->mr);
 326
 327        if (backend->merge) {
 328            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
 329        }
 330        if (!backend->dump) {
 331            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
 332        }
 333#ifdef CONFIG_NUMA
 334        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
 335        /* lastbit == MAX_NODES means maxnode = 0 */
 336        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
 337        /* ensure policy won't be ignored in case memory is preallocated
 338         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
 339         * this doesn't catch hugepage case. */
 340        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 341
 342        /* check for invalid host-nodes and policies and give more verbose
 343         * error messages than mbind(). */
 344        if (maxnode && backend->policy == MPOL_DEFAULT) {
 345            error_setg(errp, "host-nodes must be empty for policy default,"
 346                       " or you should explicitly specify a policy other"
 347                       " than default");
 348            return;
 349        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
 350            error_setg(errp, "host-nodes must be set for policy %s",
 351                       HostMemPolicy_str(backend->policy));
 352            return;
 353        }
 354
 355        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
 356         * as argument to mbind() due to an old Linux bug (feature?) which
 357         * cuts off the last specified node. This means backend->host_nodes
 358         * must have MAX_NODES+1 bits available.
 359         */
 360        assert(sizeof(backend->host_nodes) >=
 361               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
 362        assert(maxnode <= MAX_NODES);
 363        if (mbind(ptr, sz, backend->policy,
 364                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
 365            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 366                error_setg_errno(errp, errno,
 367                                 "cannot bind memory to host NUMA nodes");
 368                return;
 369            }
 370        }
 371#endif
 372        /* Preallocate memory after the NUMA policy has been instantiated.
 373         * This is necessary to guarantee memory is allocated with
 374         * specified NUMA policy in place.
 375         */
 376        if (backend->prealloc) {
 377            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
 378                            smp_cpus, &local_err);
 379            if (local_err) {
 380                goto out;
 381            }
 382        }
 383    }
 384out:
 385    error_propagate(errp, local_err);
 386}
 387
 388static bool
 389host_memory_backend_can_be_deleted(UserCreatable *uc)
 390{
 391    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
 392        return false;
 393    } else {
 394        return true;
 395    }
 396}
 397
 398static bool host_memory_backend_get_share(Object *o, Error **errp)
 399{
 400    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 401
 402    return backend->share;
 403}
 404
 405static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
 406{
 407    HostMemoryBackend *backend = MEMORY_BACKEND(o);
 408
 409    if (host_memory_backend_mr_inited(backend)) {
 410        error_setg(errp, "cannot change property value");
 411        return;
 412    }
 413    backend->share = value;
 414}
 415
 416static bool
 417host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 418{
 419    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 420
 421    return backend->use_canonical_path;
 422}
 423
 424static void
 425host_memory_backend_set_use_canonical_path(Object *obj, bool value,
 426                                           Error **errp)
 427{
 428    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 429
 430    backend->use_canonical_path = value;
 431}
 432
 433static void
 434host_memory_backend_class_init(ObjectClass *oc, void *data)
 435{
 436    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
 437
 438    ucc->complete = host_memory_backend_memory_complete;
 439    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 440
 441    object_class_property_add_bool(oc, "merge",
 442        host_memory_backend_get_merge,
 443        host_memory_backend_set_merge, &error_abort);
 444    object_class_property_set_description(oc, "merge",
 445        "Mark memory as mergeable", &error_abort);
 446    object_class_property_add_bool(oc, "dump",
 447        host_memory_backend_get_dump,
 448        host_memory_backend_set_dump, &error_abort);
 449    object_class_property_set_description(oc, "dump",
 450        "Set to 'off' to exclude from core dump", &error_abort);
 451    object_class_property_add_bool(oc, "prealloc",
 452        host_memory_backend_get_prealloc,
 453        host_memory_backend_set_prealloc, &error_abort);
 454    object_class_property_set_description(oc, "prealloc",
 455        "Preallocate memory", &error_abort);
 456    object_class_property_add(oc, "size", "int",
 457        host_memory_backend_get_size,
 458        host_memory_backend_set_size,
 459        NULL, NULL, &error_abort);
 460    object_class_property_set_description(oc, "size",
 461        "Size of the memory region (ex: 500M)", &error_abort);
 462    object_class_property_add(oc, "host-nodes", "int",
 463        host_memory_backend_get_host_nodes,
 464        host_memory_backend_set_host_nodes,
 465        NULL, NULL, &error_abort);
 466    object_class_property_set_description(oc, "host-nodes",
 467        "Binds memory to the list of NUMA host nodes", &error_abort);
 468    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
 469        &HostMemPolicy_lookup,
 470        host_memory_backend_get_policy,
 471        host_memory_backend_set_policy, &error_abort);
 472    object_class_property_set_description(oc, "policy",
 473        "Set the NUMA policy", &error_abort);
 474    object_class_property_add_bool(oc, "share",
 475        host_memory_backend_get_share, host_memory_backend_set_share,
 476        &error_abort);
 477    object_class_property_set_description(oc, "share",
 478        "Mark the memory as private to QEMU or shared", &error_abort);
 479    object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
 480        host_memory_backend_get_use_canonical_path,
 481        host_memory_backend_set_use_canonical_path, &error_abort);
 482}
 483
 484static const TypeInfo host_memory_backend_info = {
 485    .name = TYPE_MEMORY_BACKEND,
 486    .parent = TYPE_OBJECT,
 487    .abstract = true,
 488    .class_size = sizeof(HostMemoryBackendClass),
 489    .class_init = host_memory_backend_class_init,
 490    .instance_size = sizeof(HostMemoryBackend),
 491    .instance_init = host_memory_backend_init,
 492    .instance_post_init = host_memory_backend_post_init,
 493    .interfaces = (InterfaceInfo[]) {
 494        { TYPE_USER_CREATABLE },
 495        { }
 496    }
 497};
 498
 499static void register_types(void)
 500{
 501    type_register_static(&host_memory_backend_info);
 502}
 503
 504type_init(register_types);
 505