qemu/hw/hyperv/hyperv.c
<<
>>
Prefs
   1/*
   2 * Hyper-V guest/hypervisor interaction
   3 *
   4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/main-loop.h"
  12#include "qemu/module.h"
  13#include "qapi/error.h"
  14#include "exec/address-spaces.h"
  15#include "sysemu/kvm.h"
  16#include "qemu/bitops.h"
  17#include "qemu/error-report.h"
  18#include "qemu/lockable.h"
  19#include "qemu/queue.h"
  20#include "qemu/rcu.h"
  21#include "qemu/rcu_queue.h"
  22#include "hw/hyperv/hyperv.h"
  23#include "qom/object.h"
  24
  25struct SynICState {
  26    DeviceState parent_obj;
  27
  28    CPUState *cs;
  29
  30    bool enabled;
  31    hwaddr msg_page_addr;
  32    hwaddr event_page_addr;
  33    MemoryRegion msg_page_mr;
  34    MemoryRegion event_page_mr;
  35    struct hyperv_message_page *msg_page;
  36    struct hyperv_event_flags_page *event_page;
  37};
  38
  39#define TYPE_SYNIC "hyperv-synic"
  40OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC)
  41
  42static bool synic_enabled;
  43
  44bool hyperv_is_synic_enabled(void)
  45{
  46    return synic_enabled;
  47}
  48
  49static SynICState *get_synic(CPUState *cs)
  50{
  51    return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
  52}
  53
  54static void synic_update(SynICState *synic, bool enable,
  55                         hwaddr msg_page_addr, hwaddr event_page_addr)
  56{
  57
  58    synic->enabled = enable;
  59    if (synic->msg_page_addr != msg_page_addr) {
  60        if (synic->msg_page_addr) {
  61            memory_region_del_subregion(get_system_memory(),
  62                                        &synic->msg_page_mr);
  63        }
  64        if (msg_page_addr) {
  65            memory_region_add_subregion(get_system_memory(), msg_page_addr,
  66                                        &synic->msg_page_mr);
  67        }
  68        synic->msg_page_addr = msg_page_addr;
  69    }
  70    if (synic->event_page_addr != event_page_addr) {
  71        if (synic->event_page_addr) {
  72            memory_region_del_subregion(get_system_memory(),
  73                                        &synic->event_page_mr);
  74        }
  75        if (event_page_addr) {
  76            memory_region_add_subregion(get_system_memory(), event_page_addr,
  77                                        &synic->event_page_mr);
  78        }
  79        synic->event_page_addr = event_page_addr;
  80    }
  81}
  82
  83void hyperv_synic_update(CPUState *cs, bool enable,
  84                         hwaddr msg_page_addr, hwaddr event_page_addr)
  85{
  86    SynICState *synic = get_synic(cs);
  87
  88    if (!synic) {
  89        return;
  90    }
  91
  92    synic_update(synic, enable, msg_page_addr, event_page_addr);
  93}
  94
  95static void synic_realize(DeviceState *dev, Error **errp)
  96{
  97    Object *obj = OBJECT(dev);
  98    SynICState *synic = SYNIC(dev);
  99    char *msgp_name, *eventp_name;
 100    uint32_t vp_index;
 101
 102    /* memory region names have to be globally unique */
 103    vp_index = hyperv_vp_index(synic->cs);
 104    msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
 105    eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
 106
 107    memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
 108                           sizeof(*synic->msg_page), &error_abort);
 109    memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
 110                           sizeof(*synic->event_page), &error_abort);
 111    synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
 112    synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
 113
 114    g_free(msgp_name);
 115    g_free(eventp_name);
 116}
 117static void synic_reset(DeviceState *dev)
 118{
 119    SynICState *synic = SYNIC(dev);
 120    memset(synic->msg_page, 0, sizeof(*synic->msg_page));
 121    memset(synic->event_page, 0, sizeof(*synic->event_page));
 122    synic_update(synic, false, 0, 0);
 123}
 124
 125static void synic_class_init(ObjectClass *klass, void *data)
 126{
 127    DeviceClass *dc = DEVICE_CLASS(klass);
 128
 129    dc->realize = synic_realize;
 130    dc->reset = synic_reset;
 131    dc->user_creatable = false;
 132}
 133
 134void hyperv_synic_add(CPUState *cs)
 135{
 136    Object *obj;
 137    SynICState *synic;
 138
 139    obj = object_new(TYPE_SYNIC);
 140    synic = SYNIC(obj);
 141    synic->cs = cs;
 142    object_property_add_child(OBJECT(cs), "synic", obj);
 143    object_unref(obj);
 144    qdev_realize(DEVICE(obj), NULL, &error_abort);
 145    synic_enabled = true;
 146}
 147
 148void hyperv_synic_reset(CPUState *cs)
 149{
 150    SynICState *synic = get_synic(cs);
 151
 152    if (synic) {
 153        device_legacy_reset(DEVICE(synic));
 154    }
 155}
 156
 157static const TypeInfo synic_type_info = {
 158    .name = TYPE_SYNIC,
 159    .parent = TYPE_DEVICE,
 160    .instance_size = sizeof(SynICState),
 161    .class_init = synic_class_init,
 162};
 163
 164static void synic_register_types(void)
 165{
 166    type_register_static(&synic_type_info);
 167}
 168
 169type_init(synic_register_types)
 170
 171/*
 172 * KVM has its own message producers (SynIC timers).  To guarantee
 173 * serialization with both KVM vcpu and the guest cpu, the messages are first
 174 * staged in an intermediate area and then posted to the SynIC message page in
 175 * the vcpu thread.
 176 */
 177typedef struct HvSintStagedMessage {
 178    /* message content staged by hyperv_post_msg */
 179    struct hyperv_message msg;
 180    /* callback + data (r/o) to complete the processing in a BH */
 181    HvSintMsgCb cb;
 182    void *cb_data;
 183    /* message posting status filled by cpu_post_msg */
 184    int status;
 185    /* passing the buck: */
 186    enum {
 187        /* initial state */
 188        HV_STAGED_MSG_FREE,
 189        /*
 190         * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
 191         * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
 192         */
 193        HV_STAGED_MSG_BUSY,
 194        /*
 195         * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
 196         * notify the guest, records the status, marks the posting done (BUSY
 197         * -> POSTED), and schedules sint_msg_bh BH
 198         */
 199        HV_STAGED_MSG_POSTED,
 200        /*
 201         * sint_msg_bh (BH) verifies that the posting is done, runs the
 202         * callback, and starts over (POSTED -> FREE)
 203         */
 204    } state;
 205} HvSintStagedMessage;
 206
 207struct HvSintRoute {
 208    uint32_t sint;
 209    SynICState *synic;
 210    int gsi;
 211    EventNotifier sint_set_notifier;
 212    EventNotifier sint_ack_notifier;
 213
 214    HvSintStagedMessage *staged_msg;
 215
 216    unsigned refcount;
 217};
 218
 219static CPUState *hyperv_find_vcpu(uint32_t vp_index)
 220{
 221    CPUState *cs = qemu_get_cpu(vp_index);
 222    assert(hyperv_vp_index(cs) == vp_index);
 223    return cs;
 224}
 225
 226/*
 227 * BH to complete the processing of a staged message.
 228 */
 229static void sint_msg_bh(void *opaque)
 230{
 231    HvSintRoute *sint_route = opaque;
 232    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 233
 234    if (qatomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
 235        /* status nor ready yet (spurious ack from guest?), ignore */
 236        return;
 237    }
 238
 239    staged_msg->cb(staged_msg->cb_data, staged_msg->status);
 240    staged_msg->status = 0;
 241
 242    /* staged message processing finished, ready to start over */
 243    qatomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
 244    /* drop the reference taken in hyperv_post_msg */
 245    hyperv_sint_route_unref(sint_route);
 246}
 247
 248/*
 249 * Worker to transfer the message from the staging area into the SynIC message
 250 * page in vcpu context.
 251 */
 252static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
 253{
 254    HvSintRoute *sint_route = data.host_ptr;
 255    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 256    SynICState *synic = sint_route->synic;
 257    struct hyperv_message *dst_msg;
 258    bool wait_for_sint_ack = false;
 259
 260    assert(staged_msg->state == HV_STAGED_MSG_BUSY);
 261
 262    if (!synic->enabled || !synic->msg_page_addr) {
 263        staged_msg->status = -ENXIO;
 264        goto posted;
 265    }
 266
 267    dst_msg = &synic->msg_page->slot[sint_route->sint];
 268
 269    if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
 270        dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
 271        staged_msg->status = -EAGAIN;
 272        wait_for_sint_ack = true;
 273    } else {
 274        memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
 275        staged_msg->status = hyperv_sint_route_set_sint(sint_route);
 276    }
 277
 278    memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
 279
 280posted:
 281    qatomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
 282    /*
 283     * Notify the msg originator of the progress made; if the slot was busy we
 284     * set msg_pending flag in it so it will be the guest who will do EOM and
 285     * trigger the notification from KVM via sint_ack_notifier
 286     */
 287    if (!wait_for_sint_ack) {
 288        aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
 289                                sint_route);
 290    }
 291}
 292
 293/*
 294 * Post a Hyper-V message to the staging area, for delivery to guest in the
 295 * vcpu thread.
 296 */
 297int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
 298{
 299    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
 300
 301    assert(staged_msg);
 302
 303    /* grab the staging area */
 304    if (qatomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
 305                       HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
 306        return -EAGAIN;
 307    }
 308
 309    memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
 310
 311    /* hold a reference on sint_route until the callback is finished */
 312    hyperv_sint_route_ref(sint_route);
 313
 314    /* schedule message posting attempt in vcpu thread */
 315    async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
 316                     RUN_ON_CPU_HOST_PTR(sint_route));
 317    return 0;
 318}
 319
 320static void sint_ack_handler(EventNotifier *notifier)
 321{
 322    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
 323                                           sint_ack_notifier);
 324    event_notifier_test_and_clear(notifier);
 325
 326    /*
 327     * the guest consumed the previous message so complete the current one with
 328     * -EAGAIN and let the msg originator retry
 329     */
 330    aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
 331}
 332
 333/*
 334 * Set given event flag for a given sint on a given vcpu, and signal the sint.
 335 */
 336int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
 337{
 338    int ret;
 339    SynICState *synic = sint_route->synic;
 340    unsigned long *flags, set_mask;
 341    unsigned set_idx;
 342
 343    if (eventno > HV_EVENT_FLAGS_COUNT) {
 344        return -EINVAL;
 345    }
 346    if (!synic->enabled || !synic->event_page_addr) {
 347        return -ENXIO;
 348    }
 349
 350    set_idx = BIT_WORD(eventno);
 351    set_mask = BIT_MASK(eventno);
 352    flags = synic->event_page->slot[sint_route->sint].flags;
 353
 354    if ((qatomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
 355        memory_region_set_dirty(&synic->event_page_mr, 0,
 356                                sizeof(*synic->event_page));
 357        ret = hyperv_sint_route_set_sint(sint_route);
 358    } else {
 359        ret = 0;
 360    }
 361    return ret;
 362}
 363
 364HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
 365                                   HvSintMsgCb cb, void *cb_data)
 366{
 367    HvSintRoute *sint_route;
 368    EventNotifier *ack_notifier;
 369    int r, gsi;
 370    CPUState *cs;
 371    SynICState *synic;
 372
 373    cs = hyperv_find_vcpu(vp_index);
 374    if (!cs) {
 375        return NULL;
 376    }
 377
 378    synic = get_synic(cs);
 379    if (!synic) {
 380        return NULL;
 381    }
 382
 383    sint_route = g_new0(HvSintRoute, 1);
 384    r = event_notifier_init(&sint_route->sint_set_notifier, false);
 385    if (r) {
 386        goto err;
 387    }
 388
 389
 390    ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
 391    if (ack_notifier) {
 392        sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
 393        sint_route->staged_msg->cb = cb;
 394        sint_route->staged_msg->cb_data = cb_data;
 395
 396        r = event_notifier_init(ack_notifier, false);
 397        if (r) {
 398            goto err_sint_set_notifier;
 399        }
 400
 401        event_notifier_set_handler(ack_notifier, sint_ack_handler);
 402    }
 403
 404    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
 405    if (gsi < 0) {
 406        goto err_gsi;
 407    }
 408
 409    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
 410                                           &sint_route->sint_set_notifier,
 411                                           ack_notifier, gsi);
 412    if (r) {
 413        goto err_irqfd;
 414    }
 415    sint_route->gsi = gsi;
 416    sint_route->synic = synic;
 417    sint_route->sint = sint;
 418    sint_route->refcount = 1;
 419
 420    return sint_route;
 421
 422err_irqfd:
 423    kvm_irqchip_release_virq(kvm_state, gsi);
 424err_gsi:
 425    if (ack_notifier) {
 426        event_notifier_set_handler(ack_notifier, NULL);
 427        event_notifier_cleanup(ack_notifier);
 428        g_free(sint_route->staged_msg);
 429    }
 430err_sint_set_notifier:
 431    event_notifier_cleanup(&sint_route->sint_set_notifier);
 432err:
 433    g_free(sint_route);
 434
 435    return NULL;
 436}
 437
 438void hyperv_sint_route_ref(HvSintRoute *sint_route)
 439{
 440    sint_route->refcount++;
 441}
 442
 443void hyperv_sint_route_unref(HvSintRoute *sint_route)
 444{
 445    if (!sint_route) {
 446        return;
 447    }
 448
 449    assert(sint_route->refcount > 0);
 450
 451    if (--sint_route->refcount) {
 452        return;
 453    }
 454
 455    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
 456                                          &sint_route->sint_set_notifier,
 457                                          sint_route->gsi);
 458    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
 459    if (sint_route->staged_msg) {
 460        event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
 461        event_notifier_cleanup(&sint_route->sint_ack_notifier);
 462        g_free(sint_route->staged_msg);
 463    }
 464    event_notifier_cleanup(&sint_route->sint_set_notifier);
 465    g_free(sint_route);
 466}
 467
 468int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
 469{
 470    return event_notifier_set(&sint_route->sint_set_notifier);
 471}
 472
 473typedef struct MsgHandler {
 474    struct rcu_head rcu;
 475    QLIST_ENTRY(MsgHandler) link;
 476    uint32_t conn_id;
 477    HvMsgHandler handler;
 478    void *data;
 479} MsgHandler;
 480
 481typedef struct EventFlagHandler {
 482    struct rcu_head rcu;
 483    QLIST_ENTRY(EventFlagHandler) link;
 484    uint32_t conn_id;
 485    EventNotifier *notifier;
 486} EventFlagHandler;
 487
 488static QLIST_HEAD(, MsgHandler) msg_handlers;
 489static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
 490static QemuMutex handlers_mutex;
 491
 492static void __attribute__((constructor)) hv_init(void)
 493{
 494    QLIST_INIT(&msg_handlers);
 495    QLIST_INIT(&event_flag_handlers);
 496    qemu_mutex_init(&handlers_mutex);
 497}
 498
 499int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
 500{
 501    int ret;
 502    MsgHandler *mh;
 503
 504    QEMU_LOCK_GUARD(&handlers_mutex);
 505    QLIST_FOREACH(mh, &msg_handlers, link) {
 506        if (mh->conn_id == conn_id) {
 507            if (handler) {
 508                ret = -EEXIST;
 509            } else {
 510                QLIST_REMOVE_RCU(mh, link);
 511                g_free_rcu(mh, rcu);
 512                ret = 0;
 513            }
 514            return ret;
 515        }
 516    }
 517
 518    if (handler) {
 519        mh = g_new(MsgHandler, 1);
 520        mh->conn_id = conn_id;
 521        mh->handler = handler;
 522        mh->data = data;
 523        QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
 524        ret = 0;
 525    } else {
 526        ret = -ENOENT;
 527    }
 528
 529    return ret;
 530}
 531
 532uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
 533{
 534    uint16_t ret;
 535    hwaddr len;
 536    struct hyperv_post_message_input *msg;
 537    MsgHandler *mh;
 538
 539    if (fast) {
 540        return HV_STATUS_INVALID_HYPERCALL_CODE;
 541    }
 542    if (param & (__alignof__(*msg) - 1)) {
 543        return HV_STATUS_INVALID_ALIGNMENT;
 544    }
 545
 546    len = sizeof(*msg);
 547    msg = cpu_physical_memory_map(param, &len, 0);
 548    if (len < sizeof(*msg)) {
 549        ret = HV_STATUS_INSUFFICIENT_MEMORY;
 550        goto unmap;
 551    }
 552    if (msg->payload_size > sizeof(msg->payload)) {
 553        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 554        goto unmap;
 555    }
 556
 557    ret = HV_STATUS_INVALID_CONNECTION_ID;
 558    WITH_RCU_READ_LOCK_GUARD() {
 559        QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
 560            if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
 561                ret = mh->handler(msg, mh->data);
 562                break;
 563            }
 564        }
 565    }
 566
 567unmap:
 568    cpu_physical_memory_unmap(msg, len, 0, 0);
 569    return ret;
 570}
 571
 572static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
 573{
 574    int ret;
 575    EventFlagHandler *handler;
 576
 577    QEMU_LOCK_GUARD(&handlers_mutex);
 578    QLIST_FOREACH(handler, &event_flag_handlers, link) {
 579        if (handler->conn_id == conn_id) {
 580            if (notifier) {
 581                ret = -EEXIST;
 582            } else {
 583                QLIST_REMOVE_RCU(handler, link);
 584                g_free_rcu(handler, rcu);
 585                ret = 0;
 586            }
 587            return ret;
 588        }
 589    }
 590
 591    if (notifier) {
 592        handler = g_new(EventFlagHandler, 1);
 593        handler->conn_id = conn_id;
 594        handler->notifier = notifier;
 595        QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
 596        ret = 0;
 597    } else {
 598        ret = -ENOENT;
 599    }
 600
 601    return ret;
 602}
 603
 604static bool process_event_flags_userspace;
 605
 606int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
 607{
 608    if (!process_event_flags_userspace &&
 609        !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
 610        process_event_flags_userspace = true;
 611
 612        warn_report("Hyper-V event signaling is not supported by this kernel; "
 613                    "using slower userspace hypercall processing");
 614    }
 615
 616    if (!process_event_flags_userspace) {
 617        struct kvm_hyperv_eventfd hvevfd = {
 618            .conn_id = conn_id,
 619            .fd = notifier ? event_notifier_get_fd(notifier) : -1,
 620            .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
 621        };
 622
 623        return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
 624    }
 625    return set_event_flag_handler(conn_id, notifier);
 626}
 627
 628uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
 629{
 630    EventFlagHandler *handler;
 631
 632    if (unlikely(!fast)) {
 633        hwaddr addr = param;
 634
 635        if (addr & (__alignof__(addr) - 1)) {
 636            return HV_STATUS_INVALID_ALIGNMENT;
 637        }
 638
 639        param = ldq_phys(&address_space_memory, addr);
 640    }
 641
 642    /*
 643     * Per spec, bits 32-47 contain the extra "flag number".  However, we
 644     * have no use for it, and in all known usecases it is zero, so just
 645     * report lookup failure if it isn't.
 646     */
 647    if (param & 0xffff00000000ULL) {
 648        return HV_STATUS_INVALID_PORT_ID;
 649    }
 650    /* remaining bits are reserved-zero */
 651    if (param & ~HV_CONNECTION_ID_MASK) {
 652        return HV_STATUS_INVALID_HYPERCALL_INPUT;
 653    }
 654
 655    RCU_READ_LOCK_GUARD();
 656    QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
 657        if (handler->conn_id == param) {
 658            event_notifier_set(handler->notifier);
 659            return 0;
 660        }
 661    }
 662    return HV_STATUS_INVALID_CONNECTION_ID;
 663}
 664