qemu/hw/hyperv/hyperv.c
<<
>>
Prefs
   1/*
   2 * Hyper-V guest/hypervisor interaction
   3 *
   4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/main-loop.h"
  12#include "qapi/error.h"
  13#include "exec/address-spaces.h"
  14#include "sysemu/kvm.h"
  15#include "qemu/bitops.h"
  16#include "qemu/error-report.h"
  17#include "qemu/queue.h"
  18#include "qemu/rcu.h"
  19#include "qemu/rcu_queue.h"
  20#include "hw/hyperv/hyperv.h"
  21
  22typedef struct SynICState {
  23    DeviceState parent_obj;
  24
  25    CPUState *cs;
  26
  27    bool enabled;
  28    hwaddr msg_page_addr;
  29    hwaddr event_page_addr;
  30    MemoryRegion msg_page_mr;
  31    MemoryRegion event_page_mr;
  32    struct hyperv_message_page *msg_page;
  33    struct hyperv_event_flags_page *event_page;
  34} SynICState;
  35
  36#define TYPE_SYNIC "hyperv-synic"
  37#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
  38
  39static SynICState *get_synic(CPUState *cs)
  40{
  41    return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
  42}
  43
  44static void synic_update(SynICState *synic, bool enable,
  45                         hwaddr msg_page_addr, hwaddr event_page_addr)
  46{
  47
  48    synic->enabled = enable;
  49    if (synic->msg_page_addr != msg_page_addr) {
  50        if (synic->msg_page_addr) {
  51            memory_region_del_subregion(get_system_memory(),
  52                                        &synic->msg_page_mr);
  53        }
  54        if (msg_page_addr) {
  55            memory_region_add_subregion(get_system_memory(), msg_page_addr,
  56                                        &synic->msg_page_mr);
  57        }
  58        synic->msg_page_addr = msg_page_addr;
  59    }
  60    if (synic->event_page_addr != event_page_addr) {
  61        if (synic->event_page_addr) {
  62            memory_region_del_subregion(get_system_memory(),
  63                                        &synic->event_page_mr);
  64        }
  65        if (event_page_addr) {
  66            memory_region_add_subregion(get_system_memory(), event_page_addr,
  67                                        &synic->event_page_mr);
  68        }
  69        synic->event_page_addr = event_page_addr;
  70    }
  71}
  72
  73void hyperv_synic_update(CPUState *cs, bool enable,
  74                         hwaddr msg_page_addr, hwaddr event_page_addr)
  75{
  76    SynICState *synic = get_synic(cs);
  77
  78    if (!synic) {
  79        return;
  80    }
  81
  82    synic_update(synic, enable, msg_page_addr, event_page_addr);
  83}
  84
  85static void synic_realize(DeviceState *dev, Error **errp)
  86{
  87    Object *obj = OBJECT(dev);
  88    SynICState *synic = SYNIC(dev);
  89    char *msgp_name, *eventp_name;
  90    uint32_t vp_index;
  91
  92    /* memory region names have to be globally unique */
  93    vp_index = hyperv_vp_index(synic->cs);
  94    msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
  95    eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
  96
  97    memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
  98                           sizeof(*synic->msg_page), &error_abort);
  99    memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
 100                           sizeof(*synic->event_page), &error_abort);
 101    synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
 102    synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
 103
 104    g_free(msgp_name);
 105    g_free(eventp_name);
 106}
 107static void synic_reset(DeviceState *dev)
 108{
 109    SynICState *synic = SYNIC(dev);
 110    memset(synic->msg_page, 0, sizeof(*synic->msg_page));
 111    memset(synic->event_page, 0, sizeof(*synic->event_page));
 112    synic_update(synic, false, 0, 0);
 113}
 114
 115static void synic_class_init(ObjectClass *klass, void *data)
 116{
 117    DeviceClass *dc = DEVICE_CLASS(klass);
 118
 119    dc->realize = synic_realize;
 120    dc->reset = synic_reset;
 121    dc->user_creatable = false;
 122}
 123
 124void hyperv_synic_add(CPUState *cs)
 125{
 126    Object *obj;
 127    SynICState *synic;
 128
 129    obj = object_new(TYPE_SYNIC);
 130    synic = SYNIC(obj);
 131    synic->cs = cs;
 132    object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
 133    object_unref(obj);
 134    object_property_set_bool(obj, true, "realized", &error_abort);
 135}
 136
 137void hyperv_synic_reset(CPUState *cs)
 138{
 139    SynICState *synic = get_synic(cs);
 140
 141    if (synic) {
 142        device_reset(DEVICE(synic));
 143    }
 144}
 145
 146static const TypeInfo synic_type_info = {
 147    .name = TYPE_SYNIC,
 148    .parent = TYPE_DEVICE,
 149    .instance_size = sizeof(SynICState),
 150    .class_init = synic_class_init,
 151};
 152
 153static void synic_register_types(void)
 154{
 155    type_register_static(&synic_type_info);
 156}
 157
 158type_init(synic_register_types)
 159
 160/*
 161 * KVM has its own message producers (SynIC timers).  To guarantee
 162 * serialization with both KVM vcpu and the guest cpu, the messages are first
 163 * staged in an intermediate area and then posted to the SynIC message page in
 164 * the vcpu thread.
 165 */
 166typedef struct HvSintStagedMessage {
 167    /* message content staged by hyperv_post_msg */
 168    struct hyperv_message msg;
 169    /* callback + data (r/o) to complete the processing in a BH */
 170    HvSintMsgCb cb;
 171    void *cb_data;
 172    /* message posting status filled by cpu_post_msg */
 173    int status;
 174    /* passing the buck: */
 175    enum {
 176        /* initial state */
 177        HV_STAGED_MSG_FREE,
 178        /*
 179         * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
 180         * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
 181         */
 182        HV_STAGED_MSG_BUSY,
 183        /*
 184         * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
 185         * notify the guest, records the status, marks the posting done (BUSY
 186         * -> POSTED), and schedules sint_msg_bh BH
 187         */
 188        HV_STAGED_MSG_POSTED,
 189        /*
 190         * sint_msg_bh (BH) verifies that the posting is done, runs the
 191         * callback, and starts over (POSTED -> FREE)
 192         */
 193    } state;
 194} HvSintStagedMessage;
 195
 196struct HvSintRoute {
 197    uint32_t sint;
 198    SynICState *synic;
 199    int gsi;
 200    EventNotifier sint_set_notifier;
 201    EventNotifier sint_ack_notifier;
 202
 203    HvSintStagedMessage *staged_msg;
 204
 205    unsigned refcount;
 206};
 207
 208static CPUState *hyperv_find_vcpu(uint32_t vp_index)
 209{
 210    CPUState *cs = qemu_get_cpu(vp_index);
 211    assert(hyperv_vp_index(cs) == vp_index);
 212    return cs;
 213}
 214
 215/*
 216 * BH to complete the processing of a staged message.
 217 */
 218static void sint_msg_bh(void *opaque)
 219{
 220    HvSintRoute *sint_route = opaque;
 221    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 222
 223    if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
 224        /* status nor ready yet (spurious ack from guest?), ignore */
 225        return;
 226    }
 227
 228    staged_msg->cb(staged_msg->cb_data, staged_msg->status);
 229    staged_msg->status = 0;
 230
 231    /* staged message processing finished, ready to start over */
 232    atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
 233    /* drop the reference taken in hyperv_post_msg */
 234    hyperv_sint_route_unref(sint_route);
 235}
 236
 237/*
 238 * Worker to transfer the message from the staging area into the SynIC message
 239 * page in vcpu context.
 240 */
 241static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
 242{
 243    HvSintRoute *sint_route = data.host_ptr;
 244    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 245    SynICState *synic = sint_route->synic;
 246    struct hyperv_message *dst_msg;
 247    bool wait_for_sint_ack = false;
 248
 249    assert(staged_msg->state == HV_STAGED_MSG_BUSY);
 250
 251    if (!synic->enabled || !synic->msg_page_addr) {
 252        staged_msg->status = -ENXIO;
 253        goto posted;
 254    }
 255
 256    dst_msg = &synic->msg_page->slot[sint_route->sint];
 257
 258    if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
 259        dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
 260        staged_msg->status = -EAGAIN;
 261        wait_for_sint_ack = true;
 262    } else {
 263        memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
 264        staged_msg->status = hyperv_sint_route_set_sint(sint_route);
 265    }
 266
 267    memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
 268
 269posted:
 270    atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
 271    /*
 272     * Notify the msg originator of the progress made; if the slot was busy we
 273     * set msg_pending flag in it so it will be the guest who will do EOM and
 274     * trigger the notification from KVM via sint_ack_notifier
 275     */
 276    if (!wait_for_sint_ack) {
 277        aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
 278                                sint_route);
 279    }
 280}
 281
 282/*
 283 * Post a Hyper-V message to the staging area, for delivery to guest in the
 284 * vcpu thread.
 285 */
 286int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
 287{
 288    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 289
 290    assert(staged_msg);
 291
 292    /* grab the staging area */
 293    if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
 294                       HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
 295        return -EAGAIN;
 296    }
 297
 298    memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
 299
 300    /* hold a reference on sint_route until the callback is finished */
 301    hyperv_sint_route_ref(sint_route);
 302
 303    /* schedule message posting attempt in vcpu thread */
 304    async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
 305                     RUN_ON_CPU_HOST_PTR(sint_route));
 306    return 0;
 307}
 308
 309static void sint_ack_handler(EventNotifier *notifier)
 310{
 311    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
 312                                           sint_ack_notifier);
 313    event_notifier_test_and_clear(notifier);
 314
 315    /*
 316     * the guest consumed the previous message so complete the current one with
 317     * -EAGAIN and let the msg originator retry
 318     */
 319    aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
 320}
 321
 322/*
 323 * Set given event flag for a given sint on a given vcpu, and signal the sint.
 324 */
 325int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
 326{
 327    int ret;
 328    SynICState *synic = sint_route->synic;
 329    unsigned long *flags, set_mask;
 330    unsigned set_idx;
 331
 332    if (eventno > HV_EVENT_FLAGS_COUNT) {
 333        return -EINVAL;
 334    }
 335    if (!synic->enabled || !synic->event_page_addr) {
 336        return -ENXIO;
 337    }
 338
 339    set_idx = BIT_WORD(eventno);
 340    set_mask = BIT_MASK(eventno);
 341    flags = synic->event_page->slot[sint_route->sint].flags;
 342
 343    if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
 344        memory_region_set_dirty(&synic->event_page_mr, 0,
 345                                sizeof(*synic->event_page));
 346        ret = hyperv_sint_route_set_sint(sint_route);
 347    } else {
 348        ret = 0;
 349    }
 350    return ret;
 351}
 352
 353HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
 354                                   HvSintMsgCb cb, void *cb_data)
 355{
 356    HvSintRoute *sint_route;
 357    EventNotifier *ack_notifier;
 358    int r, gsi;
 359    CPUState *cs;
 360    SynICState *synic;
 361
 362    cs = hyperv_find_vcpu(vp_index);
 363    if (!cs) {
 364        return NULL;
 365    }
 366
 367    synic = get_synic(cs);
 368    if (!synic) {
 369        return NULL;
 370    }
 371
 372    sint_route = g_new0(HvSintRoute, 1);
 373    r = event_notifier_init(&sint_route->sint_set_notifier, false);
 374    if (r) {
 375        goto err;
 376    }
 377
 378
 379    ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
 380    if (ack_notifier) {
 381        sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
 382        sint_route->staged_msg->cb = cb;
 383        sint_route->staged_msg->cb_data = cb_data;
 384
 385        r = event_notifier_init(ack_notifier, false);
 386        if (r) {
 387            goto err_sint_set_notifier;
 388        }
 389
 390        event_notifier_set_handler(ack_notifier, sint_ack_handler);
 391    }
 392
 393    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
 394    if (gsi < 0) {
 395        goto err_gsi;
 396    }
 397
 398    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
 399                                           &sint_route->sint_set_notifier,
 400                                           ack_notifier, gsi);
 401    if (r) {
 402        goto err_irqfd;
 403    }
 404    sint_route->gsi = gsi;
 405    sint_route->synic = synic;
 406    sint_route->sint = sint;
 407    sint_route->refcount = 1;
 408
 409    return sint_route;
 410
 411err_irqfd:
 412    kvm_irqchip_release_virq(kvm_state, gsi);
 413err_gsi:
 414    if (ack_notifier) {
 415        event_notifier_set_handler(ack_notifier, NULL);
 416        event_notifier_cleanup(ack_notifier);
 417        g_free(sint_route->staged_msg);
 418    }
 419err_sint_set_notifier:
 420    event_notifier_cleanup(&sint_route->sint_set_notifier);
 421err:
 422    g_free(sint_route);
 423
 424    return NULL;
 425}
 426
 427void hyperv_sint_route_ref(HvSintRoute *sint_route)
 428{
 429    sint_route->refcount++;
 430}
 431
 432void hyperv_sint_route_unref(HvSintRoute *sint_route)
 433{
 434    if (!sint_route) {
 435        return;
 436    }
 437
 438    assert(sint_route->refcount > 0);
 439
 440    if (--sint_route->refcount) {
 441        return;
 442    }
 443
 444    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
 445                                          &sint_route->sint_set_notifier,
 446                                          sint_route->gsi);
 447    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
 448    if (sint_route->staged_msg) {
 449        event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
 450        event_notifier_cleanup(&sint_route->sint_ack_notifier);
 451        g_free(sint_route->staged_msg);
 452    }
 453    event_notifier_cleanup(&sint_route->sint_set_notifier);
 454    g_free(sint_route);
 455}
 456
 457int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
 458{
 459    return event_notifier_set(&sint_route->sint_set_notifier);
 460}
 461
 462typedef struct MsgHandler {
 463    struct rcu_head rcu;
 464    QLIST_ENTRY(MsgHandler) link;
 465    uint32_t conn_id;
 466    HvMsgHandler handler;
 467    void *data;
 468} MsgHandler;
 469
 470typedef struct EventFlagHandler {
 471    struct rcu_head rcu;
 472    QLIST_ENTRY(EventFlagHandler) link;
 473    uint32_t conn_id;
 474    EventNotifier *notifier;
 475} EventFlagHandler;
 476
 477static QLIST_HEAD(, MsgHandler) msg_handlers;
 478static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
 479static QemuMutex handlers_mutex;
 480
 481static void __attribute__((constructor)) hv_init(void)
 482{
 483    QLIST_INIT(&msg_handlers);
 484    QLIST_INIT(&event_flag_handlers);
 485    qemu_mutex_init(&handlers_mutex);
 486}
 487
 488int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
 489{
 490    int ret;
 491    MsgHandler *mh;
 492
 493    qemu_mutex_lock(&handlers_mutex);
 494    QLIST_FOREACH(mh, &msg_handlers, link) {
 495        if (mh->conn_id == conn_id) {
 496            if (handler) {
 497                ret = -EEXIST;
 498            } else {
 499                QLIST_REMOVE_RCU(mh, link);
 500                g_free_rcu(mh, rcu);
 501                ret = 0;
 502            }
 503            goto unlock;
 504        }
 505    }
 506
 507    if (handler) {
 508        mh = g_new(MsgHandler, 1);
 509        mh->conn_id = conn_id;
 510        mh->handler = handler;
 511        mh->data = data;
 512        QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
 513        ret = 0;
 514    } else {
 515        ret = -ENOENT;
 516    }
 517unlock:
 518    qemu_mutex_unlock(&handlers_mutex);
 519    return ret;
 520}
 521
 522uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
 523{
 524    uint16_t ret;
 525    hwaddr len;
 526    struct hyperv_post_message_input *msg;
 527    MsgHandler *mh;
 528
 529    if (fast) {
 530        return HV_STATUS_INVALID_HYPERCALL_CODE;
 531    }
 532    if (param & (__alignof__(*msg) - 1)) {
 533        return HV_STATUS_INVALID_ALIGNMENT;
 534    }
 535
 536    len = sizeof(*msg);
 537    msg = cpu_physical_memory_map(param, &len, 0);
 538    if (len < sizeof(*msg)) {
 539        ret = HV_STATUS_INSUFFICIENT_MEMORY;
 540        goto unmap;
 541    }
 542    if (msg->payload_size > sizeof(msg->payload)) {
 543        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 544        goto unmap;
 545    }
 546
 547    ret = HV_STATUS_INVALID_CONNECTION_ID;
 548    rcu_read_lock();
 549    QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
 550        if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
 551            ret = mh->handler(msg, mh->data);
 552            break;
 553        }
 554    }
 555    rcu_read_unlock();
 556
 557unmap:
 558    cpu_physical_memory_unmap(msg, len, 0, 0);
 559    return ret;
 560}
 561
 562static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
 563{
 564    int ret;
 565    EventFlagHandler *handler;
 566
 567    qemu_mutex_lock(&handlers_mutex);
 568    QLIST_FOREACH(handler, &event_flag_handlers, link) {
 569        if (handler->conn_id == conn_id) {
 570            if (notifier) {
 571                ret = -EEXIST;
 572            } else {
 573                QLIST_REMOVE_RCU(handler, link);
 574                g_free_rcu(handler, rcu);
 575                ret = 0;
 576            }
 577            goto unlock;
 578        }
 579    }
 580
 581    if (notifier) {
 582        handler = g_new(EventFlagHandler, 1);
 583        handler->conn_id = conn_id;
 584        handler->notifier = notifier;
 585        QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
 586        ret = 0;
 587    } else {
 588        ret = -ENOENT;
 589    }
 590unlock:
 591    qemu_mutex_unlock(&handlers_mutex);
 592    return ret;
 593}
 594
 595static bool process_event_flags_userspace;
 596
 597int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
 598{
 599    if (!process_event_flags_userspace &&
 600        !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
 601        process_event_flags_userspace = true;
 602
 603        warn_report("Hyper-V event signaling is not supported by this kernel; "
 604                    "using slower userspace hypercall processing");
 605    }
 606
 607    if (!process_event_flags_userspace) {
 608        struct kvm_hyperv_eventfd hvevfd = {
 609            .conn_id = conn_id,
 610            .fd = notifier ? event_notifier_get_fd(notifier) : -1,
 611            .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
 612        };
 613
 614        return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
 615    }
 616    return set_event_flag_handler(conn_id, notifier);
 617}
 618
 619uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
 620{
 621    uint16_t ret;
 622    EventFlagHandler *handler;
 623
 624    if (unlikely(!fast)) {
 625        hwaddr addr = param;
 626
 627        if (addr & (__alignof__(addr) - 1)) {
 628            return HV_STATUS_INVALID_ALIGNMENT;
 629        }
 630
 631        param = ldq_phys(&address_space_memory, addr);
 632    }
 633
 634    /*
 635     * Per spec, bits 32-47 contain the extra "flag number".  However, we
 636     * have no use for it, and in all known usecases it is zero, so just
 637     * report lookup failure if it isn't.
 638     */
 639    if (param & 0xffff00000000ULL) {
 640        return HV_STATUS_INVALID_PORT_ID;
 641    }
 642    /* remaining bits are reserved-zero */
 643    if (param & ~HV_CONNECTION_ID_MASK) {
 644        return HV_STATUS_INVALID_HYPERCALL_INPUT;
 645    }
 646
 647    ret = HV_STATUS_INVALID_CONNECTION_ID;
 648    rcu_read_lock();
 649    QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
 650        if (handler->conn_id == param) {
 651            event_notifier_set(handler->notifier);
 652            ret = 0;
 653            break;
 654        }
 655    }
 656    rcu_read_unlock();
 657    return ret;
 658}
 659