qemu/hw/hyperv/hyperv.c
<<
>>
Prefs
   1/*
   2 * Hyper-V guest/hypervisor interaction
   3 *
   4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/main-loop.h"
  12#include "qemu/module.h"
  13#include "qapi/error.h"
  14#include "exec/address-spaces.h"
  15#include "sysemu/kvm.h"
  16#include "qemu/bitops.h"
  17#include "qemu/error-report.h"
  18#include "qemu/queue.h"
  19#include "qemu/rcu.h"
  20#include "qemu/rcu_queue.h"
  21#include "hw/hyperv/hyperv.h"
  22
  23typedef struct SynICState {
  24    DeviceState parent_obj;
  25
  26    CPUState *cs;
  27
  28    bool enabled;
  29    hwaddr msg_page_addr;
  30    hwaddr event_page_addr;
  31    MemoryRegion msg_page_mr;
  32    MemoryRegion event_page_mr;
  33    struct hyperv_message_page *msg_page;
  34    struct hyperv_event_flags_page *event_page;
  35} SynICState;
  36
  37#define TYPE_SYNIC "hyperv-synic"
  38#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
  39
  40static SynICState *get_synic(CPUState *cs)
  41{
  42    return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
  43}
  44
  45static void synic_update(SynICState *synic, bool enable,
  46                         hwaddr msg_page_addr, hwaddr event_page_addr)
  47{
  48
  49    synic->enabled = enable;
  50    if (synic->msg_page_addr != msg_page_addr) {
  51        if (synic->msg_page_addr) {
  52            memory_region_del_subregion(get_system_memory(),
  53                                        &synic->msg_page_mr);
  54        }
  55        if (msg_page_addr) {
  56            memory_region_add_subregion(get_system_memory(), msg_page_addr,
  57                                        &synic->msg_page_mr);
  58        }
  59        synic->msg_page_addr = msg_page_addr;
  60    }
  61    if (synic->event_page_addr != event_page_addr) {
  62        if (synic->event_page_addr) {
  63            memory_region_del_subregion(get_system_memory(),
  64                                        &synic->event_page_mr);
  65        }
  66        if (event_page_addr) {
  67            memory_region_add_subregion(get_system_memory(), event_page_addr,
  68                                        &synic->event_page_mr);
  69        }
  70        synic->event_page_addr = event_page_addr;
  71    }
  72}
  73
  74void hyperv_synic_update(CPUState *cs, bool enable,
  75                         hwaddr msg_page_addr, hwaddr event_page_addr)
  76{
  77    SynICState *synic = get_synic(cs);
  78
  79    if (!synic) {
  80        return;
  81    }
  82
  83    synic_update(synic, enable, msg_page_addr, event_page_addr);
  84}
  85
  86static void synic_realize(DeviceState *dev, Error **errp)
  87{
  88    Object *obj = OBJECT(dev);
  89    SynICState *synic = SYNIC(dev);
  90    char *msgp_name, *eventp_name;
  91    uint32_t vp_index;
  92
  93    /* memory region names have to be globally unique */
  94    vp_index = hyperv_vp_index(synic->cs);
  95    msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
  96    eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
  97
  98    memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
  99                           sizeof(*synic->msg_page), &error_abort);
 100    memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
 101                           sizeof(*synic->event_page), &error_abort);
 102    synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
 103    synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
 104
 105    g_free(msgp_name);
 106    g_free(eventp_name);
 107}
 108static void synic_reset(DeviceState *dev)
 109{
 110    SynICState *synic = SYNIC(dev);
 111    memset(synic->msg_page, 0, sizeof(*synic->msg_page));
 112    memset(synic->event_page, 0, sizeof(*synic->event_page));
 113    synic_update(synic, false, 0, 0);
 114}
 115
 116static void synic_class_init(ObjectClass *klass, void *data)
 117{
 118    DeviceClass *dc = DEVICE_CLASS(klass);
 119
 120    dc->realize = synic_realize;
 121    dc->reset = synic_reset;
 122    dc->user_creatable = false;
 123}
 124
 125void hyperv_synic_add(CPUState *cs)
 126{
 127    Object *obj;
 128    SynICState *synic;
 129
 130    obj = object_new(TYPE_SYNIC);
 131    synic = SYNIC(obj);
 132    synic->cs = cs;
 133    object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
 134    object_unref(obj);
 135    object_property_set_bool(obj, true, "realized", &error_abort);
 136}
 137
 138void hyperv_synic_reset(CPUState *cs)
 139{
 140    SynICState *synic = get_synic(cs);
 141
 142    if (synic) {
 143        device_reset(DEVICE(synic));
 144    }
 145}
 146
 147static const TypeInfo synic_type_info = {
 148    .name = TYPE_SYNIC,
 149    .parent = TYPE_DEVICE,
 150    .instance_size = sizeof(SynICState),
 151    .class_init = synic_class_init,
 152};
 153
 154static void synic_register_types(void)
 155{
 156    type_register_static(&synic_type_info);
 157}
 158
 159type_init(synic_register_types)
 160
 161/*
 162 * KVM has its own message producers (SynIC timers).  To guarantee
 163 * serialization with both KVM vcpu and the guest cpu, the messages are first
 164 * staged in an intermediate area and then posted to the SynIC message page in
 165 * the vcpu thread.
 166 */
 167typedef struct HvSintStagedMessage {
 168    /* message content staged by hyperv_post_msg */
 169    struct hyperv_message msg;
 170    /* callback + data (r/o) to complete the processing in a BH */
 171    HvSintMsgCb cb;
 172    void *cb_data;
 173    /* message posting status filled by cpu_post_msg */
 174    int status;
 175    /* passing the buck: */
 176    enum {
 177        /* initial state */
 178        HV_STAGED_MSG_FREE,
 179        /*
 180         * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
 181         * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
 182         */
 183        HV_STAGED_MSG_BUSY,
 184        /*
 185         * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
 186         * notify the guest, records the status, marks the posting done (BUSY
 187         * -> POSTED), and schedules sint_msg_bh BH
 188         */
 189        HV_STAGED_MSG_POSTED,
 190        /*
 191         * sint_msg_bh (BH) verifies that the posting is done, runs the
 192         * callback, and starts over (POSTED -> FREE)
 193         */
 194    } state;
 195} HvSintStagedMessage;
 196
 197struct HvSintRoute {
 198    uint32_t sint;
 199    SynICState *synic;
 200    int gsi;
 201    EventNotifier sint_set_notifier;
 202    EventNotifier sint_ack_notifier;
 203
 204    HvSintStagedMessage *staged_msg;
 205
 206    unsigned refcount;
 207};
 208
 209static CPUState *hyperv_find_vcpu(uint32_t vp_index)
 210{
 211    CPUState *cs = qemu_get_cpu(vp_index);
 212    assert(hyperv_vp_index(cs) == vp_index);
 213    return cs;
 214}
 215
 216/*
 217 * BH to complete the processing of a staged message.
 218 */
 219static void sint_msg_bh(void *opaque)
 220{
 221    HvSintRoute *sint_route = opaque;
 222    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 223
 224    if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
 225        /* status nor ready yet (spurious ack from guest?), ignore */
 226        return;
 227    }
 228
 229    staged_msg->cb(staged_msg->cb_data, staged_msg->status);
 230    staged_msg->status = 0;
 231
 232    /* staged message processing finished, ready to start over */
 233    atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
 234    /* drop the reference taken in hyperv_post_msg */
 235    hyperv_sint_route_unref(sint_route);
 236}
 237
 238/*
 239 * Worker to transfer the message from the staging area into the SynIC message
 240 * page in vcpu context.
 241 */
 242static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
 243{
 244    HvSintRoute *sint_route = data.host_ptr;
 245    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 246    SynICState *synic = sint_route->synic;
 247    struct hyperv_message *dst_msg;
 248    bool wait_for_sint_ack = false;
 249
 250    assert(staged_msg->state == HV_STAGED_MSG_BUSY);
 251
 252    if (!synic->enabled || !synic->msg_page_addr) {
 253        staged_msg->status = -ENXIO;
 254        goto posted;
 255    }
 256
 257    dst_msg = &synic->msg_page->slot[sint_route->sint];
 258
 259    if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
 260        dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
 261        staged_msg->status = -EAGAIN;
 262        wait_for_sint_ack = true;
 263    } else {
 264        memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
 265        staged_msg->status = hyperv_sint_route_set_sint(sint_route);
 266    }
 267
 268    memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
 269
 270posted:
 271    atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
 272    /*
 273     * Notify the msg originator of the progress made; if the slot was busy we
 274     * set msg_pending flag in it so it will be the guest who will do EOM and
 275     * trigger the notification from KVM via sint_ack_notifier
 276     */
 277    if (!wait_for_sint_ack) {
 278        aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
 279                                sint_route);
 280    }
 281}
 282
 283/*
 284 * Post a Hyper-V message to the staging area, for delivery to guest in the
 285 * vcpu thread.
 286 */
 287int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
 288{
 289    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 290
 291    assert(staged_msg);
 292
 293    /* grab the staging area */
 294    if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
 295                       HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
 296        return -EAGAIN;
 297    }
 298
 299    memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
 300
 301    /* hold a reference on sint_route until the callback is finished */
 302    hyperv_sint_route_ref(sint_route);
 303
 304    /* schedule message posting attempt in vcpu thread */
 305    async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
 306                     RUN_ON_CPU_HOST_PTR(sint_route));
 307    return 0;
 308}
 309
 310static void sint_ack_handler(EventNotifier *notifier)
 311{
 312    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
 313                                           sint_ack_notifier);
 314    event_notifier_test_and_clear(notifier);
 315
 316    /*
 317     * the guest consumed the previous message so complete the current one with
 318     * -EAGAIN and let the msg originator retry
 319     */
 320    aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
 321}
 322
 323/*
 324 * Set given event flag for a given sint on a given vcpu, and signal the sint.
 325 */
 326int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
 327{
 328    int ret;
 329    SynICState *synic = sint_route->synic;
 330    unsigned long *flags, set_mask;
 331    unsigned set_idx;
 332
 333    if (eventno > HV_EVENT_FLAGS_COUNT) {
 334        return -EINVAL;
 335    }
 336    if (!synic->enabled || !synic->event_page_addr) {
 337        return -ENXIO;
 338    }
 339
 340    set_idx = BIT_WORD(eventno);
 341    set_mask = BIT_MASK(eventno);
 342    flags = synic->event_page->slot[sint_route->sint].flags;
 343
 344    if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
 345        memory_region_set_dirty(&synic->event_page_mr, 0,
 346                                sizeof(*synic->event_page));
 347        ret = hyperv_sint_route_set_sint(sint_route);
 348    } else {
 349        ret = 0;
 350    }
 351    return ret;
 352}
 353
 354HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
 355                                   HvSintMsgCb cb, void *cb_data)
 356{
 357    HvSintRoute *sint_route;
 358    EventNotifier *ack_notifier;
 359    int r, gsi;
 360    CPUState *cs;
 361    SynICState *synic;
 362
 363    cs = hyperv_find_vcpu(vp_index);
 364    if (!cs) {
 365        return NULL;
 366    }
 367
 368    synic = get_synic(cs);
 369    if (!synic) {
 370        return NULL;
 371    }
 372
 373    sint_route = g_new0(HvSintRoute, 1);
 374    r = event_notifier_init(&sint_route->sint_set_notifier, false);
 375    if (r) {
 376        goto err;
 377    }
 378
 379
 380    ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
 381    if (ack_notifier) {
 382        sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
 383        sint_route->staged_msg->cb = cb;
 384        sint_route->staged_msg->cb_data = cb_data;
 385
 386        r = event_notifier_init(ack_notifier, false);
 387        if (r) {
 388            goto err_sint_set_notifier;
 389        }
 390
 391        event_notifier_set_handler(ack_notifier, sint_ack_handler);
 392    }
 393
 394    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
 395    if (gsi < 0) {
 396        goto err_gsi;
 397    }
 398
 399    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
 400                                           &sint_route->sint_set_notifier,
 401                                           ack_notifier, gsi);
 402    if (r) {
 403        goto err_irqfd;
 404    }
 405    sint_route->gsi = gsi;
 406    sint_route->synic = synic;
 407    sint_route->sint = sint;
 408    sint_route->refcount = 1;
 409
 410    return sint_route;
 411
 412err_irqfd:
 413    kvm_irqchip_release_virq(kvm_state, gsi);
 414err_gsi:
 415    if (ack_notifier) {
 416        event_notifier_set_handler(ack_notifier, NULL);
 417        event_notifier_cleanup(ack_notifier);
 418        g_free(sint_route->staged_msg);
 419    }
 420err_sint_set_notifier:
 421    event_notifier_cleanup(&sint_route->sint_set_notifier);
 422err:
 423    g_free(sint_route);
 424
 425    return NULL;
 426}
 427
 428void hyperv_sint_route_ref(HvSintRoute *sint_route)
 429{
 430    sint_route->refcount++;
 431}
 432
 433void hyperv_sint_route_unref(HvSintRoute *sint_route)
 434{
 435    if (!sint_route) {
 436        return;
 437    }
 438
 439    assert(sint_route->refcount > 0);
 440
 441    if (--sint_route->refcount) {
 442        return;
 443    }
 444
 445    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
 446                                          &sint_route->sint_set_notifier,
 447                                          sint_route->gsi);
 448    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
 449    if (sint_route->staged_msg) {
 450        event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
 451        event_notifier_cleanup(&sint_route->sint_ack_notifier);
 452        g_free(sint_route->staged_msg);
 453    }
 454    event_notifier_cleanup(&sint_route->sint_set_notifier);
 455    g_free(sint_route);
 456}
 457
 458int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
 459{
 460    return event_notifier_set(&sint_route->sint_set_notifier);
 461}
 462
 463typedef struct MsgHandler {
 464    struct rcu_head rcu;
 465    QLIST_ENTRY(MsgHandler) link;
 466    uint32_t conn_id;
 467    HvMsgHandler handler;
 468    void *data;
 469} MsgHandler;
 470
 471typedef struct EventFlagHandler {
 472    struct rcu_head rcu;
 473    QLIST_ENTRY(EventFlagHandler) link;
 474    uint32_t conn_id;
 475    EventNotifier *notifier;
 476} EventFlagHandler;
 477
 478static QLIST_HEAD(, MsgHandler) msg_handlers;
 479static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
 480static QemuMutex handlers_mutex;
 481
 482static void __attribute__((constructor)) hv_init(void)
 483{
 484    QLIST_INIT(&msg_handlers);
 485    QLIST_INIT(&event_flag_handlers);
 486    qemu_mutex_init(&handlers_mutex);
 487}
 488
 489int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
 490{
 491    int ret;
 492    MsgHandler *mh;
 493
 494    qemu_mutex_lock(&handlers_mutex);
 495    QLIST_FOREACH(mh, &msg_handlers, link) {
 496        if (mh->conn_id == conn_id) {
 497            if (handler) {
 498                ret = -EEXIST;
 499            } else {
 500                QLIST_REMOVE_RCU(mh, link);
 501                g_free_rcu(mh, rcu);
 502                ret = 0;
 503            }
 504            goto unlock;
 505        }
 506    }
 507
 508    if (handler) {
 509        mh = g_new(MsgHandler, 1);
 510        mh->conn_id = conn_id;
 511        mh->handler = handler;
 512        mh->data = data;
 513        QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
 514        ret = 0;
 515    } else {
 516        ret = -ENOENT;
 517    }
 518unlock:
 519    qemu_mutex_unlock(&handlers_mutex);
 520    return ret;
 521}
 522
 523uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
 524{
 525    uint16_t ret;
 526    hwaddr len;
 527    struct hyperv_post_message_input *msg;
 528    MsgHandler *mh;
 529
 530    if (fast) {
 531        return HV_STATUS_INVALID_HYPERCALL_CODE;
 532    }
 533    if (param & (__alignof__(*msg) - 1)) {
 534        return HV_STATUS_INVALID_ALIGNMENT;
 535    }
 536
 537    len = sizeof(*msg);
 538    msg = cpu_physical_memory_map(param, &len, 0);
 539    if (len < sizeof(*msg)) {
 540        ret = HV_STATUS_INSUFFICIENT_MEMORY;
 541        goto unmap;
 542    }
 543    if (msg->payload_size > sizeof(msg->payload)) {
 544        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 545        goto unmap;
 546    }
 547
 548    ret = HV_STATUS_INVALID_CONNECTION_ID;
 549    rcu_read_lock();
 550    QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
 551        if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
 552            ret = mh->handler(msg, mh->data);
 553            break;
 554        }
 555    }
 556    rcu_read_unlock();
 557
 558unmap:
 559    cpu_physical_memory_unmap(msg, len, 0, 0);
 560    return ret;
 561}
 562
 563static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
 564{
 565    int ret;
 566    EventFlagHandler *handler;
 567
 568    qemu_mutex_lock(&handlers_mutex);
 569    QLIST_FOREACH(handler, &event_flag_handlers, link) {
 570        if (handler->conn_id == conn_id) {
 571            if (notifier) {
 572                ret = -EEXIST;
 573            } else {
 574                QLIST_REMOVE_RCU(handler, link);
 575                g_free_rcu(handler, rcu);
 576                ret = 0;
 577            }
 578            goto unlock;
 579        }
 580    }
 581
 582    if (notifier) {
 583        handler = g_new(EventFlagHandler, 1);
 584        handler->conn_id = conn_id;
 585        handler->notifier = notifier;
 586        QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
 587        ret = 0;
 588    } else {
 589        ret = -ENOENT;
 590    }
 591unlock:
 592    qemu_mutex_unlock(&handlers_mutex);
 593    return ret;
 594}
 595
 596static bool process_event_flags_userspace;
 597
 598int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
 599{
 600    if (!process_event_flags_userspace &&
 601        !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
 602        process_event_flags_userspace = true;
 603
 604        warn_report("Hyper-V event signaling is not supported by this kernel; "
 605                    "using slower userspace hypercall processing");
 606    }
 607
 608    if (!process_event_flags_userspace) {
 609        struct kvm_hyperv_eventfd hvevfd = {
 610            .conn_id = conn_id,
 611            .fd = notifier ? event_notifier_get_fd(notifier) : -1,
 612            .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
 613        };
 614
 615        return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
 616    }
 617    return set_event_flag_handler(conn_id, notifier);
 618}
 619
 620uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
 621{
 622    uint16_t ret;
 623    EventFlagHandler *handler;
 624
 625    if (unlikely(!fast)) {
 626        hwaddr addr = param;
 627
 628        if (addr & (__alignof__(addr) - 1)) {
 629            return HV_STATUS_INVALID_ALIGNMENT;
 630        }
 631
 632        param = ldq_phys(&address_space_memory, addr);
 633    }
 634
 635    /*
 636     * Per spec, bits 32-47 contain the extra "flag number".  However, we
 637     * have no use for it, and in all known usecases it is zero, so just
 638     * report lookup failure if it isn't.
 639     */
 640    if (param & 0xffff00000000ULL) {
 641        return HV_STATUS_INVALID_PORT_ID;
 642    }
 643    /* remaining bits are reserved-zero */
 644    if (param & ~HV_CONNECTION_ID_MASK) {
 645        return HV_STATUS_INVALID_HYPERCALL_INPUT;
 646    }
 647
 648    ret = HV_STATUS_INVALID_CONNECTION_ID;
 649    rcu_read_lock();
 650    QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
 651        if (handler->conn_id == param) {
 652            event_notifier_set(handler->notifier);
 653            ret = 0;
 654            break;
 655        }
 656    }
 657    rcu_read_unlock();
 658    return ret;
 659}
 660