linux/virt/kvm/arm/arch_timer.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 ARM Ltd.
   3 * Author: Marc Zyngier <marc.zyngier@arm.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License version 2 as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18
  19#include <linux/cpu.h>
  20#include <linux/of_irq.h>
  21#include <linux/kvm.h>
  22#include <linux/kvm_host.h>
  23#include <linux/interrupt.h>
  24
  25#include <clocksource/arm_arch_timer.h>
  26#include <asm/arch_timer.h>
  27
  28#include <kvm/arm_vgic.h>
  29#include <kvm/arm_arch_timer.h>
  30
  31#include "trace.h"
  32
  33static struct timecounter *timecounter;
  34static struct workqueue_struct *wqueue;
  35static unsigned int host_vtimer_irq;
  36
  37void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
  38{
  39        vcpu->arch.timer_cpu.active_cleared_last = false;
  40}
  41
  42static cycle_t kvm_phys_timer_read(void)
  43{
  44        return timecounter->cc->read(timecounter->cc);
  45}
  46
  47static bool timer_is_armed(struct arch_timer_cpu *timer)
  48{
  49        return timer->armed;
  50}
  51
  52/* timer_arm: as in "arm the timer", not as in ARM the company */
  53static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
  54{
  55        timer->armed = true;
  56        hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
  57                      HRTIMER_MODE_ABS);
  58}
  59
  60static void timer_disarm(struct arch_timer_cpu *timer)
  61{
  62        if (timer_is_armed(timer)) {
  63                hrtimer_cancel(&timer->timer);
  64                cancel_work_sync(&timer->expired);
  65                timer->armed = false;
  66        }
  67}
  68
  69static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
  70{
  71        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
  72
  73        /*
  74         * We disable the timer in the world switch and let it be
  75         * handled by kvm_timer_sync_hwstate(). Getting a timer
  76         * interrupt at this point is a sure sign of some major
  77         * breakage.
  78         */
  79        pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
  80        return IRQ_HANDLED;
  81}
  82
  83/*
  84 * Work function for handling the backup timer that we schedule when a vcpu is
  85 * no longer running, but had a timer programmed to fire in the future.
  86 */
  87static void kvm_timer_inject_irq_work(struct work_struct *work)
  88{
  89        struct kvm_vcpu *vcpu;
  90
  91        vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
  92        vcpu->arch.timer_cpu.armed = false;
  93
  94        WARN_ON(!kvm_timer_should_fire(vcpu));
  95
  96        /*
  97         * If the vcpu is blocked we want to wake it up so that it will see
  98         * the timer has expired when entering the guest.
  99         */
 100        kvm_vcpu_kick(vcpu);
 101}
 102
 103static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
 104{
 105        cycle_t cval, now;
 106
 107        cval = vcpu->arch.timer_cpu.cntv_cval;
 108        now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
 109
 110        if (now < cval) {
 111                u64 ns;
 112
 113                ns = cyclecounter_cyc2ns(timecounter->cc,
 114                                         cval - now,
 115                                         timecounter->mask,
 116                                         &timecounter->frac);
 117                return ns;
 118        }
 119
 120        return 0;
 121}
 122
 123static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 124{
 125        struct arch_timer_cpu *timer;
 126        struct kvm_vcpu *vcpu;
 127        u64 ns;
 128
 129        timer = container_of(hrt, struct arch_timer_cpu, timer);
 130        vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
 131
 132        /*
 133         * Check that the timer has really expired from the guest's
 134         * PoV (NTP on the host may have forced it to expire
 135         * early). If we should have slept longer, restart it.
 136         */
 137        ns = kvm_timer_compute_delta(vcpu);
 138        if (unlikely(ns)) {
 139                hrtimer_forward_now(hrt, ns_to_ktime(ns));
 140                return HRTIMER_RESTART;
 141        }
 142
 143        queue_work(wqueue, &timer->expired);
 144        return HRTIMER_NORESTART;
 145}
 146
 147static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
 148{
 149        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 150
 151        return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
 152                (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
 153}
 154
 155bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 156{
 157        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 158        cycle_t cval, now;
 159
 160        if (!kvm_timer_irq_can_fire(vcpu))
 161                return false;
 162
 163        cval = timer->cntv_cval;
 164        now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
 165
 166        return cval <= now;
 167}
 168
 169static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
 170{
 171        int ret;
 172        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 173
 174        BUG_ON(!vgic_initialized(vcpu->kvm));
 175
 176        timer->active_cleared_last = false;
 177        timer->irq.level = new_level;
 178        trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
 179                                   timer->irq.level);
 180        ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
 181                                         timer->map,
 182                                         timer->irq.level);
 183        WARN_ON(ret);
 184}
 185
 186/*
 187 * Check if there was a change in the timer state (should we raise or lower
 188 * the line level to the GIC).
 189 */
 190static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 191{
 192        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 193
 194        /*
 195         * If userspace modified the timer registers via SET_ONE_REG before
 196         * the vgic was initialized, we mustn't set the timer->irq.level value
 197         * because the guest would never see the interrupt.  Instead wait
 198         * until we call this function from kvm_timer_flush_hwstate.
 199         */
 200        if (!vgic_initialized(vcpu->kvm))
 201                return -ENODEV;
 202
 203        if (kvm_timer_should_fire(vcpu) != timer->irq.level)
 204                kvm_timer_update_irq(vcpu, !timer->irq.level);
 205
 206        return 0;
 207}
 208
 209/*
 210 * Schedule the background timer before calling kvm_vcpu_block, so that this
 211 * thread is removed from its waitqueue and made runnable when there's a timer
 212 * interrupt to handle.
 213 */
 214void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 215{
 216        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 217
 218        BUG_ON(timer_is_armed(timer));
 219
 220        /*
 221         * No need to schedule a background timer if the guest timer has
 222         * already expired, because kvm_vcpu_block will return before putting
 223         * the thread to sleep.
 224         */
 225        if (kvm_timer_should_fire(vcpu))
 226                return;
 227
 228        /*
 229         * If the timer is not capable of raising interrupts (disabled or
 230         * masked), then there's no more work for us to do.
 231         */
 232        if (!kvm_timer_irq_can_fire(vcpu))
 233                return;
 234
 235        /*  The timer has not yet expired, schedule a background timer */
 236        timer_arm(timer, kvm_timer_compute_delta(vcpu));
 237}
 238
 239void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
 240{
 241        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 242        timer_disarm(timer);
 243}
 244
 245/**
 246 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
 247 * @vcpu: The vcpu pointer
 248 *
 249 * Check if the virtual timer has expired while we were running in the host,
 250 * and inject an interrupt if that was the case.
 251 */
 252void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 253{
 254        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 255        bool phys_active;
 256        int ret;
 257
 258        if (kvm_timer_update_state(vcpu))
 259                return;
 260
 261        /*
 262        * If we enter the guest with the virtual input level to the VGIC
 263        * asserted, then we have already told the VGIC what we need to, and
 264        * we don't need to exit from the guest until the guest deactivates
 265        * the already injected interrupt, so therefore we should set the
 266        * hardware active state to prevent unnecessary exits from the guest.
 267        *
 268        * Also, if we enter the guest with the virtual timer interrupt active,
 269        * then it must be active on the physical distributor, because we set
 270        * the HW bit and the guest must be able to deactivate the virtual and
 271        * physical interrupt at the same time.
 272        *
 273        * Conversely, if the virtual input level is deasserted and the virtual
 274        * interrupt is not active, then always clear the hardware active state
 275        * to ensure that hardware interrupts from the timer triggers a guest
 276        * exit.
 277        */
 278        if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
 279                phys_active = true;
 280        else
 281                phys_active = false;
 282
 283        /*
 284         * We want to avoid hitting the (re)distributor as much as
 285         * possible, as this is a potentially expensive MMIO access
 286         * (not to mention locks in the irq layer), and a solution for
 287         * this is to cache the "active" state in memory.
 288         *
 289         * Things to consider: we cannot cache an "active set" state,
 290         * because the HW can change this behind our back (it becomes
 291         * "clear" in the HW). We must then restrict the caching to
 292         * the "clear" state.
 293         *
 294         * The cache is invalidated on:
 295         * - vcpu put, indicating that the HW cannot be trusted to be
 296         *   in a sane state on the next vcpu load,
 297         * - any change in the interrupt state
 298         *
 299         * Usage conditions:
 300         * - cached value is "active clear"
 301         * - value to be programmed is "active clear"
 302         */
 303        if (timer->active_cleared_last && !phys_active)
 304                return;
 305
 306        ret = irq_set_irqchip_state(timer->map->irq,
 307                                    IRQCHIP_STATE_ACTIVE,
 308                                    phys_active);
 309        WARN_ON(ret);
 310
 311        timer->active_cleared_last = !phys_active;
 312}
 313
 314/**
 315 * kvm_timer_sync_hwstate - sync timer state from cpu
 316 * @vcpu: The vcpu pointer
 317 *
 318 * Check if the virtual timer has expired while we were running in the guest,
 319 * and inject an interrupt if that was the case.
 320 */
 321void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 322{
 323        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 324
 325        BUG_ON(timer_is_armed(timer));
 326
 327        /*
 328         * The guest could have modified the timer registers or the timer
 329         * could have expired, update the timer state.
 330         */
 331        kvm_timer_update_state(vcpu);
 332}
 333
 334int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 335                         const struct kvm_irq_level *irq)
 336{
 337        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 338        struct irq_phys_map *map;
 339
 340        /*
 341         * The vcpu timer irq number cannot be determined in
 342         * kvm_timer_vcpu_init() because it is called much before
 343         * kvm_vcpu_set_target(). To handle this, we determine
 344         * vcpu timer irq number when the vcpu is reset.
 345         */
 346        timer->irq.irq = irq->irq;
 347
 348        /*
 349         * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
 350         * and to 0 for ARMv7.  We provide an implementation that always
 351         * resets the timer to be disabled and unmasked and is compliant with
 352         * the ARMv7 architecture.
 353         */
 354        timer->cntv_ctl = 0;
 355        kvm_timer_update_state(vcpu);
 356
 357        /*
 358         * Tell the VGIC that the virtual interrupt is tied to a
 359         * physical interrupt. We do that once per VCPU.
 360         */
 361        map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
 362        if (WARN_ON(IS_ERR(map)))
 363                return PTR_ERR(map);
 364
 365        timer->map = map;
 366        return 0;
 367}
 368
 369void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 370{
 371        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 372
 373        INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
 374        hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 375        timer->timer.function = kvm_timer_expire;
 376}
 377
 378static void kvm_timer_init_interrupt(void *info)
 379{
 380        enable_percpu_irq(host_vtimer_irq, 0);
 381}
 382
 383int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 384{
 385        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 386
 387        switch (regid) {
 388        case KVM_REG_ARM_TIMER_CTL:
 389                timer->cntv_ctl = value;
 390                break;
 391        case KVM_REG_ARM_TIMER_CNT:
 392                vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
 393                break;
 394        case KVM_REG_ARM_TIMER_CVAL:
 395                timer->cntv_cval = value;
 396                break;
 397        default:
 398                return -1;
 399        }
 400
 401        kvm_timer_update_state(vcpu);
 402        return 0;
 403}
 404
 405u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
 406{
 407        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 408
 409        switch (regid) {
 410        case KVM_REG_ARM_TIMER_CTL:
 411                return timer->cntv_ctl;
 412        case KVM_REG_ARM_TIMER_CNT:
 413                return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
 414        case KVM_REG_ARM_TIMER_CVAL:
 415                return timer->cntv_cval;
 416        }
 417        return (u64)-1;
 418}
 419
 420static int kvm_timer_cpu_notify(struct notifier_block *self,
 421                                unsigned long action, void *cpu)
 422{
 423        switch (action) {
 424        case CPU_STARTING:
 425        case CPU_STARTING_FROZEN:
 426                kvm_timer_init_interrupt(NULL);
 427                break;
 428        case CPU_DYING:
 429        case CPU_DYING_FROZEN:
 430                disable_percpu_irq(host_vtimer_irq);
 431                break;
 432        }
 433
 434        return NOTIFY_OK;
 435}
 436
 437static struct notifier_block kvm_timer_cpu_nb = {
 438        .notifier_call = kvm_timer_cpu_notify,
 439};
 440
 441static const struct of_device_id arch_timer_of_match[] = {
 442        { .compatible   = "arm,armv7-timer",    },
 443        { .compatible   = "arm,armv8-timer",    },
 444        {},
 445};
 446
 447int kvm_timer_hyp_init(void)
 448{
 449        struct device_node *np;
 450        unsigned int ppi;
 451        int err;
 452
 453        timecounter = arch_timer_get_timecounter();
 454        if (!timecounter)
 455                return -ENODEV;
 456
 457        np = of_find_matching_node(NULL, arch_timer_of_match);
 458        if (!np) {
 459                kvm_err("kvm_arch_timer: can't find DT node\n");
 460                return -ENODEV;
 461        }
 462
 463        ppi = irq_of_parse_and_map(np, 2);
 464        if (!ppi) {
 465                kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
 466                err = -EINVAL;
 467                goto out;
 468        }
 469
 470        err = request_percpu_irq(ppi, kvm_arch_timer_handler,
 471                                 "kvm guest timer", kvm_get_running_vcpus());
 472        if (err) {
 473                kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
 474                        ppi, err);
 475                goto out;
 476        }
 477
 478        host_vtimer_irq = ppi;
 479
 480        err = __register_cpu_notifier(&kvm_timer_cpu_nb);
 481        if (err) {
 482                kvm_err("Cannot register timer CPU notifier\n");
 483                goto out_free;
 484        }
 485
 486        wqueue = create_singlethread_workqueue("kvm_arch_timer");
 487        if (!wqueue) {
 488                err = -ENOMEM;
 489                goto out_free;
 490        }
 491
 492        kvm_info("%s IRQ%d\n", np->name, ppi);
 493        on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
 494
 495        goto out;
 496out_free:
 497        free_percpu_irq(ppi, kvm_get_running_vcpus());
 498out:
 499        of_node_put(np);
 500        return err;
 501}
 502
 503void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 504{
 505        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 506
 507        timer_disarm(timer);
 508        if (timer->map)
 509                kvm_vgic_unmap_phys_irq(vcpu, timer->map);
 510}
 511
 512void kvm_timer_enable(struct kvm *kvm)
 513{
 514        if (kvm->arch.timer.enabled)
 515                return;
 516
 517        /*
 518         * There is a potential race here between VCPUs starting for the first
 519         * time, which may be enabling the timer multiple times.  That doesn't
 520         * hurt though, because we're just setting a variable to the same
 521         * variable that it already was.  The important thing is that all
 522         * VCPUs have the enabled variable set, before entering the guest, if
 523         * the arch timers are enabled.
 524         */
 525        if (timecounter && wqueue)
 526                kvm->arch.timer.enabled = 1;
 527}
 528
 529void kvm_timer_init(struct kvm *kvm)
 530{
 531        kvm->arch.timer.cntvoff = kvm_phys_timer_read();
 532}
 533