LXR linux/arch/x86/kvm/hyperv.c

   1/*
   2 * KVM Microsoft Hyper-V emulation
   3 *
   4 * derived from arch/x86/kvm/x86.c
   5 *
   6 * Copyright (C) 2006 Qumranet, Inc.
   7 * Copyright (C) 2008 Qumranet, Inc.
   8 * Copyright IBM Corporation, 2008
   9 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  10 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
  11 *
  12 * Authors:
  13 *   Avi Kivity   <avi@qumranet.com>
  14 *   Yaniv Kamay  <yaniv@qumranet.com>
  15 *   Amit Shah    <amit.shah@qumranet.com>
  16 *   Ben-Ami Yassour <benami@il.ibm.com>
  17 *   Andrey Smetanin <asmetanin@virtuozzo.com>
  18 *
  19 * This work is licensed under the terms of the GNU GPL, version 2.  See
  20 * the COPYING file in the top-level directory.
  21 *
  22 */
  23
  24#include "x86.h"
  25#include "lapic.h"
  26#include "ioapic.h"
  27#include "hyperv.h"
  28
  29#include <linux/kvm_host.h>
  30#include <linux/highmem.h>
  31#include <linux/sched/cputime.h>
  32#include <linux/eventfd.h>
  33
  34#include <asm/apicdef.h>
  35#include <trace/events/kvm.h>
  36
  37#include "trace.h"
  38
  39static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
  40{
  41        return atomic64_read(&synic->sint[sint]);
  42}
  43
  44static inline int synic_get_sint_vector(u64 sint_value)
  45{
  46        if (sint_value & HV_SYNIC_SINT_MASKED)
  47                return -1;
  48        return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
  49}
  50
  51static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
  52                                      int vector)
  53{
  54        int i;
  55
  56        for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  57                if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
  58                        return true;
  59        }
  60        return false;
  61}
  62
  63static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
  64                                     int vector)
  65{
  66        int i;
  67        u64 sint_value;
  68
  69        for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  70                sint_value = synic_read_sint(synic, i);
  71                if (synic_get_sint_vector(sint_value) == vector &&
  72                    sint_value & HV_SYNIC_SINT_AUTO_EOI)
  73                        return true;
  74        }
  75        return false;
  76}
  77
  78static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
  79                                int vector)
  80{
  81        if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
  82                return;
  83
  84        if (synic_has_vector_connected(synic, vector))
  85                __set_bit(vector, synic->vec_bitmap);
  86        else
  87                __clear_bit(vector, synic->vec_bitmap);
  88
  89        if (synic_has_vector_auto_eoi(synic, vector))
  90                __set_bit(vector, synic->auto_eoi_bitmap);
  91        else
  92                __clear_bit(vector, synic->auto_eoi_bitmap);
  93}
  94
  95static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
  96                          u64 data, bool host)
  97{
  98        int vector, old_vector;
  99        bool masked;
 100
 101        vector = data & HV_SYNIC_SINT_VECTOR_MASK;
 102        masked = data & HV_SYNIC_SINT_MASKED;
 103
 104        /*
 105         * Valid vectors are 16-255, however, nested Hyper-V attempts to write
 106         * default '0x10000' value on boot and this should not #GP. We need to
 107         * allow zero-initing the register from host as well.
 108         */
 109        if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
 110                return 1;
 111        /*
 112         * Guest may configure multiple SINTs to use the same vector, so
 113         * we maintain a bitmap of vectors handled by synic, and a
 114         * bitmap of vectors with auto-eoi behavior.  The bitmaps are
 115         * updated here, and atomically queried on fast paths.
 116         */
 117        old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
 118
 119        atomic64_set(&synic->sint[sint], data);
 120
 121        synic_update_vector(synic, old_vector);
 122
 123        synic_update_vector(synic, vector);
 124
 125        /* Load SynIC vectors into EOI exit bitmap */
 126        kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
 127        return 0;
 128}
 129
 130static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
 131{
 132        struct kvm_vcpu *vcpu = NULL;
 133        int i;
 134
 135        if (vpidx < KVM_MAX_VCPUS)
 136                vcpu = kvm_get_vcpu(kvm, vpidx);
 137        if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
 138                return vcpu;
 139        kvm_for_each_vcpu(i, vcpu, kvm)
 140                if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
 141                        return vcpu;
 142        return NULL;
 143}
 144
 145static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
 146{
 147        struct kvm_vcpu *vcpu;
 148        struct kvm_vcpu_hv_synic *synic;
 149
 150        vcpu = get_vcpu_by_vpidx(kvm, vpidx);
 151        if (!vcpu)
 152                return NULL;
 153        synic = vcpu_to_synic(vcpu);
 154        return (synic->active) ? synic : NULL;
 155}
 156
 157static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
 158                                        u32 sint)
 159{
 160        struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
 161        struct page *page;
 162        gpa_t gpa;
 163        struct hv_message *msg;
 164        struct hv_message_page *msg_page;
 165
 166        gpa = synic->msg_page & PAGE_MASK;
 167        page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
 168        if (is_error_page(page)) {
 169                vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
 170                         gpa);
 171                return;
 172        }
 173        msg_page = kmap_atomic(page);
 174
 175        msg = &msg_page->sint_message[sint];
 176        msg->header.message_flags.msg_pending = 0;
 177
 178        kunmap_atomic(msg_page);
 179        kvm_release_page_dirty(page);
 180        kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
 181}
 182
 183static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
 184{
 185        struct kvm *kvm = vcpu->kvm;
 186        struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
 187        struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
 188        struct kvm_vcpu_hv_stimer *stimer;
 189        int gsi, idx, stimers_pending;
 190
 191        trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
 192
 193        if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
 194                synic_clear_sint_msg_pending(synic, sint);
 195
 196        /* Try to deliver pending Hyper-V SynIC timers messages */
 197        stimers_pending = 0;
 198        for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
 199                stimer = &hv_vcpu->stimer[idx];
 200                if (stimer->msg_pending &&
 201                    (stimer->config & HV_STIMER_ENABLE) &&
 202                    HV_STIMER_SINT(stimer->config) == sint) {
 203                        set_bit(stimer->index,
 204                                hv_vcpu->stimer_pending_bitmap);
 205                        stimers_pending++;
 206                }
 207        }
 208        if (stimers_pending)
 209                kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
 210
 211        idx = srcu_read_lock(&kvm->irq_srcu);
 212        gsi = atomic_read(&synic->sint_to_gsi[sint]);
 213        if (gsi != -1)
 214                kvm_notify_acked_gsi(kvm, gsi);
 215        srcu_read_unlock(&kvm->irq_srcu, idx);
 216}
 217
 218static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
 219{
 220        struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
 221        struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
 222
 223        hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
 224        hv_vcpu->exit.u.synic.msr = msr;
 225        hv_vcpu->exit.u.synic.control = synic->control;
 226        hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
 227        hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
 228
 229        kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
 230}
 231
 232static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 233                         u32 msr, u64 data, bool host)
 234{
 235        struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
 236        int ret;
 237
 238        if (!synic->active)
 239                return 1;
 240
 241        trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
 242
 243        ret = 0;
 244        switch (msr) {
 245        case HV_X64_MSR_SCONTROL:
 246                synic->control = data;
 247                if (!host)
 248                        synic_exit(synic, msr);
 249                break;
 250        case HV_X64_MSR_SVERSION:
 251                if (!host) {
 252                        ret = 1;
 253                        break;
 254                }
 255                synic->version = data;
 256                break;
 257        case HV_X64_MSR_SIEFP:
 258                if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
 259                    !synic->dont_zero_synic_pages)
 260                        if (kvm_clear_guest(vcpu->kvm,
 261                                            data & PAGE_MASK, PAGE_SIZE)) {
 262                                ret = 1;
 263                                break;
 264                        }
 265                synic->evt_page = data;
 266                if (!host)
 267                        synic_exit(synic, msr);
 268                break;
 269        case HV_X64_MSR_SIMP:
 270                if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
 271                    !synic->dont_zero_synic_pages)
 272                        if (kvm_clear_guest(vcpu->kvm,
 273                                            data & PAGE_MASK, PAGE_SIZE)) {
 274                                ret = 1;
 275                                break;
 276                        }
 277                synic->msg_page = data;
 278                if (!host)
 279                        synic_exit(synic, msr);
 280                break;
 281        case HV_X64_MSR_EOM: {
 282                int i;
 283
 284                for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
 285                        kvm_hv_notify_acked_sint(vcpu, i);
 286                break;
 287        }
 288        case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 289                ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
 290                break;
 291        default:
 292                ret = 1;
 293                break;
 294        }
 295        return ret;
 296}
 297
 298static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
 299{
 300        int ret;
 301
 302        if (!synic->active)
 303                return 1;
 304
 305        ret = 0;
 306        switch (msr) {
 307        case HV_X64_MSR_SCONTROL:
 308                *pdata = synic->control;
 309                break;
 310        case HV_X64_MSR_SVERSION:
 311                *pdata = synic->version;
 312                break;
 313        case HV_X64_MSR_SIEFP:
 314                *pdata = synic->evt_page;
 315                break;
 316        case HV_X64_MSR_SIMP:
 317                *pdata = synic->msg_page;
 318                break;
 319        case HV_X64_MSR_EOM:
 320                *pdata = 0;
 321                break;
 322        case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 323                *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
 324                break;
 325        default:
 326                ret = 1;
 327                break;
 328        }
 329        return ret;
 330}
 331
 332static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
 333{
 334        struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
 335        struct kvm_lapic_irq irq;
 336        int ret, vector;
 337
 338        if (sint >= ARRAY_SIZE(synic->sint))
 339                return -EINVAL;
 340
 341        vector = synic_get_sint_vector(synic_read_sint(synic, sint));
 342        if (vector < 0)
 343                return -ENOENT;
 344
 345        memset(&irq, 0, sizeof(irq));
 346        irq.shorthand = APIC_DEST_SELF;
 347        irq.dest_mode = APIC_DEST_PHYSICAL;
 348        irq.delivery_mode = APIC_DM_FIXED;
 349        irq.vector = vector;
 350        irq.level = 1;
 351
 352        ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
 353        trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
 354        return ret;
 355}
 356
 357int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
 358{
 359        struct kvm_vcpu_hv_synic *synic;
 360
 361        synic = synic_get(kvm, vpidx);
 362        if (!synic)
 363                return -EINVAL;
 364
 365        return synic_set_irq(synic, sint);
 366}
 367
 368void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
 369{
 370        struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
 371        int i;
 372
 373        trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
 374
 375        for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
 376                if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
 377                        kvm_hv_notify_acked_sint(vcpu, i);
 378}
 379
 380static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
 381{
 382        struct kvm_vcpu_hv_synic *synic;
 383
 384        synic = synic_get(kvm, vpidx);
 385        if (!synic)
 386                return -EINVAL;
 387
 388        if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
 389                return -EINVAL;
 390
 391        atomic_set(&synic->sint_to_gsi[sint], gsi);
 392        return 0;
 393}
 394
 395void kvm_hv_irq_routing_update(struct kvm *kvm)
 396{
 397        struct kvm_irq_routing_table *irq_rt;
 398        struct kvm_kernel_irq_routing_entry *e;
 399        u32 gsi;
 400
 401        irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
 402                                        lockdep_is_held(&kvm->irq_lock));
 403
 404        for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
 405                hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
 406                        if (e->type == KVM_IRQ_ROUTING_HV_SINT)
 407                                kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
 408                                                    e->hv_sint.sint, gsi);
 409                }
 410        }
 411}
 412
 413static void synic_init(struct kvm_vcpu_hv_synic *synic)
 414{
 415        int i;
 416
 417        memset(synic, 0, sizeof(*synic));
 418        synic->version = HV_SYNIC_VERSION_1;
 419        for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
 420                atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
 421                atomic_set(&synic->sint_to_gsi[i], -1);
 422        }
 423}
 424
 425static u64 get_time_ref_counter(struct kvm *kvm)
 426{
 427        struct kvm_hv *hv = &kvm->arch.hyperv;
 428        struct kvm_vcpu *vcpu;
 429        u64 tsc;
 430
 431        /*
 432         * The guest has not set up the TSC page or the clock isn't
 433         * stable, fall back to get_kvmclock_ns.
 434         */
 435        if (!hv->tsc_ref.tsc_sequence)
 436                return div_u64(get_kvmclock_ns(kvm), 100);
 437
 438        vcpu = kvm_get_vcpu(kvm, 0);
 439        tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 440        return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
 441                + hv->tsc_ref.tsc_offset;
 442}
 443
 444static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
 445                                bool vcpu_kick)
 446{
 447        struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
 448
 449        set_bit(stimer->index,
 450                vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
 451        kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
 452        if (vcpu_kick)
 453                kvm_vcpu_kick(vcpu);
 454}
 455
 456static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
 457{
 458        struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
 459
 460        trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
 461                                    stimer->index);
 462
 463        hrtimer_cancel(&stimer->timer);
 464        clear_bit(stimer->index,
 465                  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
 466        stimer->msg_pending = false;
 467        stimer->exp_time = 0;
 468}
 469
 470static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
 471{
 472        struct kvm_vcpu_hv_stimer *stimer;
 473
 474        stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
 475        trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
 476                                     stimer->index);
 477        stimer_mark_pending(stimer, true);
 478
 479        return HRTIMER_NORESTART;
 480}
 481
 482/*
 483 * stimer_start() assumptions:
 484 * a) stimer->count is not equal to 0
 485 * b) stimer->config has HV_STIMER_ENABLE flag
 486 */
 487static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 488{
 489        u64 time_now;
 490        ktime_t ktime_now;
 491
 492        time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
 493        ktime_now = ktime_get();
 494
 495        if (stimer->config & HV_STIMER_PERIODIC) {
 496                if (stimer->exp_time) {
 497                        if (time_now >= stimer->exp_time) {
 498                                u64 remainder;
 499
 500                                div64_u64_rem(time_now - stimer->exp_time,
 501                                              stimer->count, &remainder);
 502                                stimer->exp_time =
 503                                        time_now + (stimer->count - remainder);
 504                        }
 505                } else
 506                        stimer->exp_time = time_now + stimer->count;
 507
 508                trace_kvm_hv_stimer_start_periodic(
 509                                        stimer_to_vcpu(stimer)->vcpu_id,
 510                                        stimer->index,
 511                                        time_now, stimer->exp_time);
 512
 513                hrtimer_start(&stimer->timer,
 514                              ktime_add_ns(ktime_now,
 515                                           100 * (stimer->exp_time - time_now)),
 516                              HRTIMER_MODE_ABS);
 517                return 0;
 518        }
 519        stimer->exp_time = stimer->count;
 520        if (time_now >= stimer->count) {
 521                /*
 522                 * Expire timer according to Hypervisor Top-Level Functional
 523                 * specification v4(15.3.1):
 524                 * "If a one shot is enabled and the specified count is in
 525                 * the past, it will expire immediately."
 526                 */
 527                stimer_mark_pending(stimer, false);
 528                return 0;
 529        }
 530
 531        trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
 532                                           stimer->index,
 533                                           time_now, stimer->count);
 534
 535        hrtimer_start(&stimer->timer,
 536                      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
 537                      HRTIMER_MODE_ABS);
 538        return 0;
 539}
 540
 541static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 542                             bool host)
 543{
 544        trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
 545                                       stimer->index, config, host);
 546
 547        stimer_cleanup(stimer);
 548        if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
 549                config &= ~HV_STIMER_ENABLE;
 550        stimer->config = config;
 551        stimer_mark_pending(stimer, false);
 552        return 0;
 553}
 554
 555static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
 556                            bool host)
 557{
 558        trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
 559                                      stimer->index, count, host);
 560
 561        stimer_cleanup(stimer);
 562        stimer->count = count;
 563        if (stimer->count == 0)
 564                stimer->config &= ~HV_STIMER_ENABLE;
 565        else if (stimer->config & HV_STIMER_AUTOENABLE)
 566                stimer->config |= HV_STIMER_ENABLE;
 567        stimer_mark_pending(stimer, false);
 568        return 0;
 569}
 570
 571static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
 572{
 573        *pconfig = stimer->config;
 574        return 0;
 575}
 576
 577static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
 578{
 579        *pcount = stimer->count;
 580        return 0;
 581}
 582
 583static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
 584                             struct hv_message *src_msg)
 585{
 586        struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
 587        struct page *page;
 588        gpa_t gpa;
 589        struct hv_message *dst_msg;
 590        int r;
 591        struct hv_message_page *msg_page;
 592
 593        if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
 594                return -ENOENT;
 595
 596        gpa = synic->msg_page & PAGE_MASK;
 597        page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
 598        if (is_error_page(page))
 599                return -EFAULT;
 600
 601        msg_page = kmap_atomic(page);
 602        dst_msg = &msg_page->sint_message[sint];
 603        if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
 604                         src_msg->header.message_type) != HVMSG_NONE) {
 605                dst_msg->header.message_flags.msg_pending = 1;
 606                r = -EAGAIN;
 607        } else {
 608                memcpy(&dst_msg->u.payload, &src_msg->u.payload,
 609                       src_msg->header.payload_size);
 610                dst_msg->header.message_type = src_msg->header.message_type;
 611                dst_msg->header.payload_size = src_msg->header.payload_size;
 612                r = synic_set_irq(synic, sint);
 613                if (r >= 1)
 614                        r = 0;
 615                else if (r == 0)
 616                        r = -EFAULT;
 617        }
 618        kunmap_atomic(msg_page);
 619        kvm_release_page_dirty(page);
 620        kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
 621        return r;
 622}
 623
 624static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
 625{
 626        struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
 627        struct hv_message *msg = &stimer->msg;
 628        struct hv_timer_message_payload *payload =
 629                        (struct hv_timer_message_payload *)&msg->u.payload;
 630
 631        payload->expiration_time = stimer->exp_time;
 632        payload->delivery_time = get_time_ref_counter(vcpu->kvm);
 633        return synic_deliver_msg(vcpu_to_synic(vcpu),
 634                                 HV_STIMER_SINT(stimer->config), msg);
 635}
 636
 637static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
 638{
 639        int r;
 640
 641        stimer->msg_pending = true;
 642        r = stimer_send_msg(stimer);
 643        trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
 644                                       stimer->index, r);
 645        if (!r) {
 646                stimer->msg_pending = false;
 647                if (!(stimer->config & HV_STIMER_PERIODIC))
 648                        stimer->config &= ~HV_STIMER_ENABLE;
 649        }
 650}
 651
 652void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 653{
 654        struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
 655        struct kvm_vcpu_hv_stimer *stimer;
 656        u64 time_now, exp_time;
 657        int i;
 658
 659        for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 660                if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
 661                        stimer = &hv_vcpu->stimer[i];
 662                        if (stimer->config & HV_STIMER_ENABLE) {
 663                                exp_time = stimer->exp_time;
 664
 665                                if (exp_time) {
 666                                        time_now =
 667                                                get_time_ref_counter(vcpu->kvm);
 668                                        if (time_now >= exp_time)
 669                                                stimer_expiration(stimer);
 670                                }
 671
 672                                if ((stimer->config & HV_STIMER_ENABLE) &&
 673                                    stimer->count) {
 674                                        if (!stimer->msg_pending)
 675                                                stimer_start(stimer);
 676                                } else
 677                                        stimer_cleanup(stimer);
 678                        }
 679                }
 680}
 681
 682void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
 683{
 684        struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
 685        int i;
 686
 687        for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 688                stimer_cleanup(&hv_vcpu->stimer[i]);
 689}
 690
 691static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
 692{
 693        struct hv_message *msg = &stimer->msg;
 694        struct hv_timer_message_payload *payload =
 695                        (struct hv_timer_message_payload *)&msg->u.payload;
 696
 697        memset(&msg->header, 0, sizeof(msg->header));
 698        msg->header.message_type = HVMSG_TIMER_EXPIRED;
 699        msg->header.payload_size = sizeof(*payload);
 700
 701        payload->timer_index = stimer->index;
 702        payload->expiration_time = 0;
 703        payload->delivery_time = 0;
 704}
 705
 706static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
 707{
 708        memset(stimer, 0, sizeof(*stimer));
 709        stimer->index = timer_index;
 710        hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 711        stimer->timer.function = stimer_timer_callback;
 712        stimer_prepare_msg(stimer);
 713}
 714
 715void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 716{
 717        struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
 718        int i;
 719
 720        synic_init(&hv_vcpu->synic);
 721
 722        bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 723        for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 724                stimer_init(&hv_vcpu->stimer[i], i);
 725}
 726
 727void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
 728{
 729        struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
 730
 731        hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
 732}
 733
 734int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 735{
 736        struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
 737
 738        /*
 739         * Hyper-V SynIC auto EOI SINT's are
 740         * not compatible with APICV, so deactivate APICV
 741         */
 742        kvm_vcpu_deactivate_apicv(vcpu);
 743        synic->active = true;
 744        synic->dont_zero_synic_pages = dont_zero_synic_pages;
 745        return 0;
 746}
 747
 748static bool kvm_hv_msr_partition_wide(u32 msr)
 749{
 750        bool r = false;
 751
 752        switch (msr) {
 753        case HV_X64_MSR_GUEST_OS_ID:
 754        case HV_X64_MSR_HYPERCALL:
 755        case HV_X64_MSR_REFERENCE_TSC:
 756        case HV_X64_MSR_TIME_REF_COUNT:
 757        case HV_X64_MSR_CRASH_CTL:
 758        case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 759        case HV_X64_MSR_RESET:
 760        case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
 761        case HV_X64_MSR_TSC_EMULATION_CONTROL:
 762        case HV_X64_MSR_TSC_EMULATION_STATUS:
 763                r = true;
 764                break;
 765        }
 766
 767        return r;
 768}
 769
 770static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
 771                                     u32 index, u64 *pdata)
 772{
 773        struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
 774
 775        if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
 776                return -EINVAL;
 777
 778        *pdata = hv->hv_crash_param[index];
 779        return 0;
 780}
 781
 782static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
 783{
 784        struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
 785
 786        *pdata = hv->hv_crash_ctl;
 787        return 0;
 788}
 789
 790static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
 791{
 792        struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
 793
 794        if (host)
 795                hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
 796
 797        if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
 798
 799                vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
 800                          hv->hv_crash_param[0],
 801                          hv->hv_crash_param[1],
 802                          hv->hv_crash_param[2],
 803                          hv->hv_crash_param[3],
 804                          hv->hv_crash_param[4]);
 805
 806                /* Send notification about crash to user space */
 807                kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
 808        }
 809
 810        return 0;
 811}
 812
 813static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
 814                                     u32 index, u64 data)
 815{
 816        struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
 817
 818        if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
 819                return -EINVAL;
 820
 821        hv->hv_crash_param[index] = data;
 822        return 0;
 823}
 824
 825/*
 826 * The kvmclock and Hyper-V TSC page use similar formulas, and converting
 827 * between them is possible:
 828 *
 829 * kvmclock formula:
 830 *    nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
 831 *           + system_time
 832 *
 833 * Hyper-V formula:
 834 *    nsec/100 = ticks * scale / 2^64 + offset
 835 *
 836 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
 837 * By dividing the kvmclock formula by 100 and equating what's left we get:
 838 *    ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
 839 *            scale / 2^64 =         tsc_to_system_mul * 2^(tsc_shift-32) / 100
 840 *            scale        =         tsc_to_system_mul * 2^(32+tsc_shift) / 100
 841 *
 842 * Now expand the kvmclock formula and divide by 100:
 843 *    nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
 844 *           - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
 845 *           + system_time
 846 *    nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
 847 *               - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
 848 *               + system_time / 100
 849 *
 850 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
 851 *    nsec/100 = ticks * scale / 2^64
 852 *               - tsc_timestamp * scale / 2^64
 853 *               + system_time / 100
 854 *
 855 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
 856 *    offset = system_time / 100 - tsc_timestamp * scale / 2^64
 857 *
 858 * These two equivalencies are implemented in this function.
 859 */
 860static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
 861                                        HV_REFERENCE_TSC_PAGE *tsc_ref)
 862{
 863        u64 max_mul;
 864
 865        if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
 866                return false;
 867
 868        /*
 869         * check if scale would overflow, if so we use the time ref counter
 870         *    tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
 871         *    tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
 872         *    tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
 873         */
 874        max_mul = 100ull << (32 - hv_clock->tsc_shift);
 875        if (hv_clock->tsc_to_system_mul >= max_mul)
 876                return false;
 877
 878        /*
 879         * Otherwise compute the scale and offset according to the formulas
 880         * derived above.
 881         */
 882        tsc_ref->tsc_scale =
 883                mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
 884                                hv_clock->tsc_to_system_mul,
 885                                100);
 886
 887        tsc_ref->tsc_offset = hv_clock->system_time;
 888        do_div(tsc_ref->tsc_offset, 100);
 889        tsc_ref->tsc_offset -=
 890                mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
 891        return true;
 892}
 893
 894void kvm_hv_setup_tsc_page(struct kvm *kvm,
 895                           struct pvclock_vcpu_time_info *hv_clock)
 896{
 897        struct kvm_hv *hv = &kvm->arch.hyperv;
 898        u32 tsc_seq;
 899        u64 gfn;
 900
 901        BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
 902        BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
 903
 904        if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
 905                return;
 906
 907        mutex_lock(&kvm->arch.hyperv.hv_lock);
 908        if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
 909                goto out_unlock;
 910
 911        gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
 912        /*
 913         * Because the TSC parameters only vary when there is a
 914         * change in the master clock, do not bother with caching.
 915         */
 916        if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
 917                                    &tsc_seq, sizeof(tsc_seq))))
 918                goto out_unlock;
 919
 920        /*
 921         * While we're computing and writing the parameters, force the
 922         * guest to use the time reference count MSR.
 923         */
 924        hv->tsc_ref.tsc_sequence = 0;
 925        if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
 926                            &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
 927                goto out_unlock;
 928
 929        if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
 930                goto out_unlock;
 931
 932        /* Ensure sequence is zero before writing the rest of the struct.  */
 933        smp_wmb();
 934        if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
 935                goto out_unlock;
 936
 937        /*
 938         * Now switch to the TSC page mechanism by writing the sequence.
 939         */
 940        tsc_seq++;
 941        if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
 942                tsc_seq = 1;
 943
 944        /* Write the struct entirely before the non-zero sequence.  */
 945        smp_wmb();
 946
 947        hv->tsc_ref.tsc_sequence = tsc_seq;
 948        kvm_write_guest(kvm, gfn_to_gpa(gfn),
 949                        &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
 950out_unlock:
 951        mutex_unlock(&kvm->arch.hyperv.hv_lock);
 952}
 953
 954static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 955                             bool host)
 956{
 957        struct kvm *kvm = vcpu->kvm;
 958        struct kvm_hv *hv = &kvm->arch.hyperv;
 959
 960        switch (msr) {
 961        case HV_X64_MSR_GUEST_OS_ID:
 962                hv->hv_guest_os_id = data;
 963                /* setting guest os id to zero disables hypercall page */
 964                if (!hv->hv_guest_os_id)
 965                        hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
 966                break;
 967        case HV_X64_MSR_HYPERCALL: {
 968                u64 gfn;
 969                unsigned long addr;
 970                u8 instructions[4];
 971
 972                /* if guest os id is not set hypercall should remain disabled */
 973                if (!hv->hv_guest_os_id)
 974                        break;
 975                if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
 976                        hv->hv_hypercall = data;
 977                        break;
 978                }
 979                gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
 980                addr = gfn_to_hva(kvm, gfn);
 981                if (kvm_is_error_hva(addr))
 982                        return 1;
 983                kvm_x86_ops->patch_hypercall(vcpu, instructions);
 984                ((unsigned char *)instructions)[3] = 0xc3; /* ret */
 985                if (__copy_to_user((void __user *)addr, instructions, 4))
 986                        return 1;
 987                hv->hv_hypercall = data;
 988                mark_page_dirty(kvm, gfn);
 989                break;
 990        }
 991        case HV_X64_MSR_REFERENCE_TSC:
 992                hv->hv_tsc_page = data;
 993                if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
 994                        kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 995                break;
 996        case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 997                return kvm_hv_msr_set_crash_data(vcpu,
 998                                                 msr - HV_X64_MSR_CRASH_P0,
 999                                                 data);
1000        case HV_X64_MSR_CRASH_CTL:

1001                return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
1002        case HV_X64_MSR_RESET:
1003                if (data == 1) {
1004                        vcpu_debug(vcpu, "hyper-v reset requested\n");
1005                        kvm_make_request(KVM_REQ_HV_RESET, vcpu);
1006                }
1007                break;
1008        case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1009                hv->hv_reenlightenment_control = data;
1010                break;
1011        case HV_X64_MSR_TSC_EMULATION_CONTROL:
1012                hv->hv_tsc_emulation_control = data;
1013                break;
1014        case HV_X64_MSR_TSC_EMULATION_STATUS:
1015                hv->hv_tsc_emulation_status = data;
1016                break;
1017        default:
1018                vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
1019                            msr, data);
1020                return 1;
1021        }
1022        return 0;
1023}
1024
1025/* Calculate cpu time spent by current task in 100ns units */
1026static u64 current_task_runtime_100ns(void)
1027{
1028        u64 utime, stime;
1029
1030        task_cputime_adjusted(current, &utime, &stime);
1031
1032        return div_u64(utime + stime, 100);
1033}
1034
1035static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1036{
1037        struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
1038
1039        switch (msr) {
1040        case HV_X64_MSR_VP_INDEX:
1041                if (!host)
1042                        return 1;
1043                hv->vp_index = (u32)data;
1044                break;
1045        case HV_X64_MSR_VP_ASSIST_PAGE: {
1046                u64 gfn;
1047                unsigned long addr;
1048
1049                if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
1050                        hv->hv_vapic = data;
1051                        if (kvm_lapic_enable_pv_eoi(vcpu, 0))
1052                                return 1;
1053                        break;
1054                }
1055                gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
1056                addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
1057                if (kvm_is_error_hva(addr))
1058                        return 1;
1059                if (__clear_user((void __user *)addr, PAGE_SIZE))
1060                        return 1;
1061                hv->hv_vapic = data;
1062                kvm_vcpu_mark_page_dirty(vcpu, gfn);
1063                if (kvm_lapic_enable_pv_eoi(vcpu,
1064                                            gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
1065                        return 1;
1066                break;
1067        }
1068        case HV_X64_MSR_EOI:
1069                return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1070        case HV_X64_MSR_ICR:
1071                return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1072        case HV_X64_MSR_TPR:
1073                return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1074        case HV_X64_MSR_VP_RUNTIME:
1075                if (!host)
1076                        return 1;
1077                hv->runtime_offset = data - current_task_runtime_100ns();
1078                break;
1079        case HV_X64_MSR_SCONTROL:
1080        case HV_X64_MSR_SVERSION:
1081        case HV_X64_MSR_SIEFP:
1082        case HV_X64_MSR_SIMP:
1083        case HV_X64_MSR_EOM:
1084        case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1085                return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
1086        case HV_X64_MSR_STIMER0_CONFIG:
1087        case HV_X64_MSR_STIMER1_CONFIG:
1088        case HV_X64_MSR_STIMER2_CONFIG:
1089        case HV_X64_MSR_STIMER3_CONFIG: {
1090                int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1091
1092                return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
1093                                         data, host);
1094        }
1095        case HV_X64_MSR_STIMER0_COUNT:
1096        case HV_X64_MSR_STIMER1_COUNT:
1097        case HV_X64_MSR_STIMER2_COUNT:
1098        case HV_X64_MSR_STIMER3_COUNT: {
1099                int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1100
1101                return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
1102                                        data, host);
1103        }
1104        default:
1105                vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
1106                            msr, data);
1107                return 1;
1108        }
1109
1110        return 0;
1111}
1112
1113static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1114{
1115        u64 data = 0;
1116        struct kvm *kvm = vcpu->kvm;
1117        struct kvm_hv *hv = &kvm->arch.hyperv;
1118
1119        switch (msr) {
1120        case HV_X64_MSR_GUEST_OS_ID:
1121                data = hv->hv_guest_os_id;
1122                break;
1123        case HV_X64_MSR_HYPERCALL:
1124                data = hv->hv_hypercall;
1125                break;
1126        case HV_X64_MSR_TIME_REF_COUNT:
1127                data = get_time_ref_counter(kvm);
1128                break;
1129        case HV_X64_MSR_REFERENCE_TSC:
1130                data = hv->hv_tsc_page;
1131                break;
1132        case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1133                return kvm_hv_msr_get_crash_data(vcpu,
1134                                                 msr - HV_X64_MSR_CRASH_P0,
1135                                                 pdata);
1136        case HV_X64_MSR_CRASH_CTL:
1137                return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
1138        case HV_X64_MSR_RESET:
1139                data = 0;
1140                break;
1141        case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1142                data = hv->hv_reenlightenment_control;
1143                break;
1144        case HV_X64_MSR_TSC_EMULATION_CONTROL:
1145                data = hv->hv_tsc_emulation_control;
1146                break;
1147        case HV_X64_MSR_TSC_EMULATION_STATUS:
1148                data = hv->hv_tsc_emulation_status;
1149                break;
1150        default:
1151                vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1152                return 1;
1153        }
1154
1155        *pdata = data;
1156        return 0;
1157}
1158
1159static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1160{
1161        u64 data = 0;
1162        struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
1163
1164        switch (msr) {
1165        case HV_X64_MSR_VP_INDEX:
1166                data = hv->vp_index;
1167                break;
1168        case HV_X64_MSR_EOI:
1169                return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1170        case HV_X64_MSR_ICR:
1171                return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1172        case HV_X64_MSR_TPR:
1173                return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1174        case HV_X64_MSR_VP_ASSIST_PAGE:
1175                data = hv->hv_vapic;
1176                break;
1177        case HV_X64_MSR_VP_RUNTIME:
1178                data = current_task_runtime_100ns() + hv->runtime_offset;
1179                break;
1180        case HV_X64_MSR_SCONTROL:
1181        case HV_X64_MSR_SVERSION:
1182        case HV_X64_MSR_SIEFP:
1183        case HV_X64_MSR_SIMP:
1184        case HV_X64_MSR_EOM:
1185        case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1186                return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
1187        case HV_X64_MSR_STIMER0_CONFIG:
1188        case HV_X64_MSR_STIMER1_CONFIG:
1189        case HV_X64_MSR_STIMER2_CONFIG:
1190        case HV_X64_MSR_STIMER3_CONFIG: {
1191                int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1192
1193                return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
1194                                         pdata);
1195        }
1196        case HV_X64_MSR_STIMER0_COUNT:
1197        case HV_X64_MSR_STIMER1_COUNT:
1198        case HV_X64_MSR_STIMER2_COUNT:
1199        case HV_X64_MSR_STIMER3_COUNT: {
1200                int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1201
1202                return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
1203                                        pdata);
1204        }
1205        case HV_X64_MSR_TSC_FREQUENCY:
1206                data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
1207                break;
1208        case HV_X64_MSR_APIC_FREQUENCY:
1209                data = APIC_BUS_FREQUENCY;
1210                break;
1211        default:
1212                vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1213                return 1;
1214        }
1215        *pdata = data;
1216        return 0;
1217}
1218
1219int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1220{
1221        if (kvm_hv_msr_partition_wide(msr)) {
1222                int r;
1223
1224                mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1225                r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
1226                mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1227                return r;
1228        } else
1229                return kvm_hv_set_msr(vcpu, msr, data, host);
1230}
1231
1232int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1233{
1234        if (kvm_hv_msr_partition_wide(msr)) {
1235                int r;
1236
1237                mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1238                r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
1239                mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1240                return r;
1241        } else
1242                return kvm_hv_get_msr(vcpu, msr, pdata);
1243}
1244
1245static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
1246{
1247        int i = 0, j;
1248
1249        if (!(valid_bank_mask & BIT_ULL(bank_no)))
1250                return -1;
1251
1252        for (j = 0; j < bank_no; j++)
1253                if (valid_bank_mask & BIT_ULL(j))
1254                        i++;
1255
1256        return i;
1257}
1258
1259static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
1260                            u16 rep_cnt, bool ex)
1261{
1262        struct kvm *kvm = current_vcpu->kvm;
1263        struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
1264        struct hv_tlb_flush_ex flush_ex;
1265        struct hv_tlb_flush flush;
1266        struct kvm_vcpu *vcpu;
1267        unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
1268        unsigned long valid_bank_mask = 0;
1269        u64 sparse_banks[64];
1270        int sparse_banks_len, i;
1271        bool all_cpus;
1272
1273        if (!ex) {
1274                if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
1275                        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1276
1277                trace_kvm_hv_flush_tlb(flush.processor_mask,
1278                                       flush.address_space, flush.flags);
1279
1280                sparse_banks[0] = flush.processor_mask;
1281                all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
1282        } else {
1283                if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
1284                                            sizeof(flush_ex))))
1285                        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1286
1287                trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
1288                                          flush_ex.hv_vp_set.format,
1289                                          flush_ex.address_space,
1290                                          flush_ex.flags);
1291
1292                valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
1293                all_cpus = flush_ex.hv_vp_set.format !=
1294                        HV_GENERIC_SET_SPARSE_4K;
1295
1296                sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
1297                        sizeof(sparse_banks[0]);
1298
1299                if (!sparse_banks_len && !all_cpus)
1300                        goto ret_success;
1301
1302                if (!all_cpus &&
1303                    kvm_read_guest(kvm,
1304                                   ingpa + offsetof(struct hv_tlb_flush_ex,
1305                                                    hv_vp_set.bank_contents),
1306                                   sparse_banks,
1307                                   sparse_banks_len))
1308                        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1309        }
1310
1311        cpumask_clear(&hv_current->tlb_lush);
1312
1313        kvm_for_each_vcpu(i, vcpu, kvm) {
1314                struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
1315                int bank = hv->vp_index / 64, sbank = 0;
1316
1317                if (!all_cpus) {
1318                        /* Banks >64 can't be represented */
1319                        if (bank >= 64)
1320                                continue;
1321
1322                        /* Non-ex hypercalls can only address first 64 vCPUs */
1323                        if (!ex && bank)
1324                                continue;
1325
1326                        if (ex) {
1327                                /*
1328                                 * Check is the bank of this vCPU is in sparse
1329                                 * set and get the sparse bank number.
1330                                 */
1331                                sbank = get_sparse_bank_no(valid_bank_mask,
1332                                                           bank);
1333
1334                                if (sbank < 0)
1335                                        continue;
1336                        }
1337
1338                        if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
1339                                continue;
1340                }
1341
1342                /*
1343                 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
1344                 * can't analyze it here, flush TLB regardless of the specified
1345                 * address space.
1346                 */
1347                __set_bit(i, vcpu_bitmap);
1348        }
1349
1350        kvm_make_vcpus_request_mask(kvm,
1351                                    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
1352                                    vcpu_bitmap, &hv_current->tlb_lush);
1353
1354ret_success:
1355        /* We always do full TLB flush, set rep_done = rep_cnt. */
1356        return (u64)HV_STATUS_SUCCESS |
1357                ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
1358}
1359
1360bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1361{
1362        return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
1363}
1364
1365static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
1366{
1367        bool longmode;
1368
1369        longmode = is_64_bit_mode(vcpu);
1370        if (longmode)
1371                kvm_register_write(vcpu, VCPU_REGS_RAX, result);
1372        else {
1373                kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
1374                kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
1375        }
1376}
1377
1378static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
1379{
1380        kvm_hv_hypercall_set_result(vcpu, result);
1381        ++vcpu->stat.hypercalls;
1382        return kvm_skip_emulated_instruction(vcpu);
1383}
1384
1385static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
1386{
1387        return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
1388}
1389
1390static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
1391{
1392        struct eventfd_ctx *eventfd;
1393
1394        if (unlikely(!fast)) {
1395                int ret;
1396                gpa_t gpa = param;
1397
1398                if ((gpa & (__alignof__(param) - 1)) ||
1399                    offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
1400                        return HV_STATUS_INVALID_ALIGNMENT;
1401
1402                ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
1403                if (ret < 0)
1404                        return HV_STATUS_INVALID_ALIGNMENT;
1405        }
1406
1407        /*
1408         * Per spec, bits 32-47 contain the extra "flag number".  However, we
1409         * have no use for it, and in all known usecases it is zero, so just
1410         * report lookup failure if it isn't.
1411         */
1412        if (param & 0xffff00000000ULL)
1413                return HV_STATUS_INVALID_PORT_ID;
1414        /* remaining bits are reserved-zero */
1415        if (param & ~KVM_HYPERV_CONN_ID_MASK)
1416                return HV_STATUS_INVALID_HYPERCALL_INPUT;
1417
1418        /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
1419        rcu_read_lock();
1420        eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
1421        rcu_read_unlock();
1422        if (!eventfd)
1423                return HV_STATUS_INVALID_PORT_ID;
1424
1425        eventfd_signal(eventfd, 1);
1426        return HV_STATUS_SUCCESS;
1427}
1428
1429int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1430{
1431        u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
1432        uint16_t code, rep_idx, rep_cnt;
1433        bool fast, longmode, rep;
1434
1435        /*
1436         * hypercall generates UD from non zero cpl and real mode
1437         * per HYPER-V spec
1438         */
1439        if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
1440                kvm_queue_exception(vcpu, UD_VECTOR);
1441                return 1;
1442        }
1443
1444        longmode = is_64_bit_mode(vcpu);
1445
1446        if (!longmode) {
1447                param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
1448                        (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
1449                ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
1450                        (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
1451                outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
1452                        (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
1453        }
1454#ifdef CONFIG_X86_64
1455        else {
1456                param = kvm_register_read(vcpu, VCPU_REGS_RCX);
1457                ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
1458                outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
1459        }
1460#endif
1461
1462        code = param & 0xffff;
1463        fast = !!(param & HV_HYPERCALL_FAST_BIT);
1464        rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
1465        rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
1466        rep = !!(rep_cnt || rep_idx);
1467
1468        trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
1469
1470        switch (code) {
1471        case HVCALL_NOTIFY_LONG_SPIN_WAIT:
1472                if (unlikely(rep)) {
1473                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1474                        break;
1475                }
1476                kvm_vcpu_on_spin(vcpu, true);
1477                break;
1478        case HVCALL_SIGNAL_EVENT:
1479                if (unlikely(rep)) {
1480                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1481                        break;
1482                }
1483                ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
1484                if (ret != HV_STATUS_INVALID_PORT_ID)
1485                        break;
1486                /* maybe userspace knows this conn_id: fall through */
1487        case HVCALL_POST_MESSAGE:
1488                /* don't bother userspace if it has no way to handle it */
1489                if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
1490                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1491                        break;
1492                }
1493                vcpu->run->exit_reason = KVM_EXIT_HYPERV;
1494                vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
1495                vcpu->run->hyperv.u.hcall.input = param;
1496                vcpu->run->hyperv.u.hcall.params[0] = ingpa;
1497                vcpu->run->hyperv.u.hcall.params[1] = outgpa;
1498                vcpu->arch.complete_userspace_io =
1499                                kvm_hv_hypercall_complete_userspace;
1500                return 0;
1501        case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
1502                if (unlikely(fast || !rep_cnt || rep_idx)) {
1503                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1504                        break;
1505                }
1506                ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
1507                break;
1508        case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
1509                if (unlikely(fast || rep)) {
1510                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1511                        break;
1512                }
1513                ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
1514                break;
1515        case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
1516                if (unlikely(fast || !rep_cnt || rep_idx)) {
1517                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1518                        break;
1519                }
1520                ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1521                break;
1522        case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
1523                if (unlikely(fast || rep)) {
1524                        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1525                        break;
1526                }
1527                ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1528                break;
1529        default:
1530                ret = HV_STATUS_INVALID_HYPERCALL_CODE;
1531                break;
1532        }
1533
1534        return kvm_hv_hypercall_complete(vcpu, ret);
1535}
1536
1537void kvm_hv_init_vm(struct kvm *kvm)
1538{
1539        mutex_init(&kvm->arch.hyperv.hv_lock);
1540        idr_init(&kvm->arch.hyperv.conn_to_evt);
1541}
1542
1543void kvm_hv_destroy_vm(struct kvm *kvm)
1544{
1545        struct eventfd_ctx *eventfd;
1546        int i;
1547
1548        idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
1549                eventfd_ctx_put(eventfd);
1550        idr_destroy(&kvm->arch.hyperv.conn_to_evt);
1551}
1552
1553static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
1554{
1555        struct kvm_hv *hv = &kvm->arch.hyperv;
1556        struct eventfd_ctx *eventfd;
1557        int ret;
1558
1559        eventfd = eventfd_ctx_fdget(fd);
1560        if (IS_ERR(eventfd))
1561                return PTR_ERR(eventfd);
1562
1563        mutex_lock(&hv->hv_lock);
1564        ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
1565                        GFP_KERNEL);
1566        mutex_unlock(&hv->hv_lock);
1567
1568        if (ret >= 0)
1569                return 0;
1570
1571        if (ret == -ENOSPC)
1572                ret = -EEXIST;
1573        eventfd_ctx_put(eventfd);
1574        return ret;
1575}
1576
1577static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
1578{
1579        struct kvm_hv *hv = &kvm->arch.hyperv;
1580        struct eventfd_ctx *eventfd;
1581
1582        mutex_lock(&hv->hv_lock);
1583        eventfd = idr_remove(&hv->conn_to_evt, conn_id);
1584        mutex_unlock(&hv->hv_lock);
1585
1586        if (!eventfd)
1587                return -ENOENT;
1588
1589        synchronize_srcu(&kvm->srcu);
1590        eventfd_ctx_put(eventfd);
1591        return 0;
1592}
1593
1594int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
1595{
1596        if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
1597            (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
1598                return -EINVAL;
1599
1600        if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
1601                return kvm_hv_eventfd_deassign(kvm, args->conn_id);
1602        return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
1603}
1604