linux/drivers/clocksource/hyperv_timer.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Clocksource driver for the synthetic counter and timers
   5 * provided by the Hyper-V hypervisor to guest VMs, as described
   6 * in the Hyper-V Top Level Functional Spec (TLFS). This driver
   7 * is instruction set architecture independent.
   8 *
   9 * Copyright (C) 2019, Microsoft, Inc.
  10 *
  11 * Author:  Michael Kelley <mikelley@microsoft.com>
  12 */
  13
  14#include <linux/percpu.h>
  15#include <linux/cpumask.h>
  16#include <linux/clockchips.h>
  17#include <linux/clocksource.h>
  18#include <linux/sched_clock.h>
  19#include <linux/mm.h>
  20#include <linux/cpuhotplug.h>
  21#include <clocksource/hyperv_timer.h>
  22#include <asm/hyperv-tlfs.h>
  23#include <asm/mshyperv.h>
  24
  25static struct clock_event_device __percpu *hv_clock_event;
  26static u64 hv_sched_clock_offset __ro_after_init;
  27
  28/*
  29 * If false, we're using the old mechanism for stimer0 interrupts
  30 * where it sends a VMbus message when it expires. The old
  31 * mechanism is used when running on older versions of Hyper-V
  32 * that don't support Direct Mode. While Hyper-V provides
  33 * four stimer's per CPU, Linux uses only stimer0.
  34 *
  35 * Because Direct Mode does not require processing a VMbus
  36 * message, stimer interrupts can be enabled earlier in the
  37 * process of booting a CPU, and consistent with when timer
  38 * interrupts are enabled for other clocksource drivers.
  39 * However, for legacy versions of Hyper-V when Direct Mode
  40 * is not enabled, setting up stimer interrupts must be
  41 * delayed until VMbus is initialized and can process the
  42 * interrupt message.
  43 */
  44static bool direct_mode_enabled;
  45
  46static int stimer0_irq;
  47static int stimer0_vector;
  48static int stimer0_message_sint;
  49
  50/*
  51 * ISR for when stimer0 is operating in Direct Mode.  Direct Mode
  52 * does not use VMbus or any VMbus messages, so process here and not
  53 * in the VMbus driver code.
  54 */
  55void hv_stimer0_isr(void)
  56{
  57        struct clock_event_device *ce;
  58
  59        ce = this_cpu_ptr(hv_clock_event);
  60        ce->event_handler(ce);
  61}
  62EXPORT_SYMBOL_GPL(hv_stimer0_isr);
  63
  64static int hv_ce_set_next_event(unsigned long delta,
  65                                struct clock_event_device *evt)
  66{
  67        u64 current_tick;
  68
  69        current_tick = hv_read_reference_counter();
  70        current_tick += delta;
  71        hv_init_timer(0, current_tick);
  72        return 0;
  73}
  74
  75static int hv_ce_shutdown(struct clock_event_device *evt)
  76{
  77        hv_init_timer(0, 0);
  78        hv_init_timer_config(0, 0);
  79        if (direct_mode_enabled)
  80                hv_disable_stimer0_percpu_irq(stimer0_irq);
  81
  82        return 0;
  83}
  84
  85static int hv_ce_set_oneshot(struct clock_event_device *evt)
  86{
  87        union hv_stimer_config timer_cfg;
  88
  89        timer_cfg.as_uint64 = 0;
  90        timer_cfg.enable = 1;
  91        timer_cfg.auto_enable = 1;
  92        if (direct_mode_enabled) {
  93                /*
  94                 * When it expires, the timer will directly interrupt
  95                 * on the specified hardware vector/IRQ.
  96                 */
  97                timer_cfg.direct_mode = 1;
  98                timer_cfg.apic_vector = stimer0_vector;
  99                hv_enable_stimer0_percpu_irq(stimer0_irq);
 100        } else {
 101                /*
 102                 * When it expires, the timer will generate a VMbus message,
 103                 * to be handled by the normal VMbus interrupt handler.
 104                 */
 105                timer_cfg.direct_mode = 0;
 106                timer_cfg.sintx = stimer0_message_sint;
 107        }
 108        hv_init_timer_config(0, timer_cfg.as_uint64);
 109        return 0;
 110}
 111
 112/*
 113 * hv_stimer_init - Per-cpu initialization of the clockevent
 114 */
 115static int hv_stimer_init(unsigned int cpu)
 116{
 117        struct clock_event_device *ce;
 118
 119        if (!hv_clock_event)
 120                return 0;
 121
 122        ce = per_cpu_ptr(hv_clock_event, cpu);
 123        ce->name = "Hyper-V clockevent";
 124        ce->features = CLOCK_EVT_FEAT_ONESHOT;
 125        ce->cpumask = cpumask_of(cpu);
 126        ce->rating = 1000;
 127        ce->set_state_shutdown = hv_ce_shutdown;
 128        ce->set_state_oneshot = hv_ce_set_oneshot;
 129        ce->set_next_event = hv_ce_set_next_event;
 130
 131        clockevents_config_and_register(ce,
 132                                        HV_CLOCK_HZ,
 133                                        HV_MIN_DELTA_TICKS,
 134                                        HV_MAX_MAX_DELTA_TICKS);
 135        return 0;
 136}
 137
 138/*
 139 * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
 140 */
 141int hv_stimer_cleanup(unsigned int cpu)
 142{
 143        struct clock_event_device *ce;
 144
 145        if (!hv_clock_event)
 146                return 0;
 147
 148        /*
 149         * In the legacy case where Direct Mode is not enabled
 150         * (which can only be on x86/64), stimer cleanup happens
 151         * relatively early in the CPU offlining process. We
 152         * must unbind the stimer-based clockevent device so
 153         * that the LAPIC timer can take over until clockevents
 154         * are no longer needed in the offlining process. Note
 155         * that clockevents_unbind_device() eventually calls
 156         * hv_ce_shutdown().
 157         *
 158         * The unbind should not be done when Direct Mode is
 159         * enabled because we may be on an architecture where
 160         * there are no other clockevent devices to fallback to.
 161         */
 162        ce = per_cpu_ptr(hv_clock_event, cpu);
 163        if (direct_mode_enabled)
 164                hv_ce_shutdown(ce);
 165        else
 166                clockevents_unbind_device(ce, cpu);
 167
 168        return 0;
 169}
 170EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
 171
 172/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
 173int hv_stimer_alloc(void)
 174{
 175        int ret = 0;
 176
 177        /*
 178         * Synthetic timers are always available except on old versions of
 179         * Hyper-V on x86.  In that case, return as error as Linux will use a
 180         * clockevent based on emulated LAPIC timer hardware.
 181         */
 182        if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
 183                return -EINVAL;
 184
 185        hv_clock_event = alloc_percpu(struct clock_event_device);
 186        if (!hv_clock_event)
 187                return -ENOMEM;
 188
 189        direct_mode_enabled = ms_hyperv.misc_features &
 190                        HV_STIMER_DIRECT_MODE_AVAILABLE;
 191        if (direct_mode_enabled) {
 192                ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
 193                                hv_stimer0_isr);
 194                if (ret)
 195                        goto free_percpu;
 196
 197                /*
 198                 * Since we are in Direct Mode, stimer initialization
 199                 * can be done now with a CPUHP value in the same range
 200                 * as other clockevent devices.
 201                 */
 202                ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
 203                                "clockevents/hyperv/stimer:starting",
 204                                hv_stimer_init, hv_stimer_cleanup);
 205                if (ret < 0)
 206                        goto free_stimer0_irq;
 207        }
 208        return ret;
 209
 210free_stimer0_irq:
 211        hv_remove_stimer0_irq(stimer0_irq);
 212        stimer0_irq = 0;
 213free_percpu:
 214        free_percpu(hv_clock_event);
 215        hv_clock_event = NULL;
 216        return ret;
 217}
 218EXPORT_SYMBOL_GPL(hv_stimer_alloc);
 219
 220/*
 221 * hv_stimer_legacy_init -- Called from the VMbus driver to handle
 222 * the case when Direct Mode is not enabled, and the stimer
 223 * must be initialized late in the CPU onlining process.
 224 *
 225 */
 226void hv_stimer_legacy_init(unsigned int cpu, int sint)
 227{
 228        if (direct_mode_enabled)
 229                return;
 230
 231        /*
 232         * This function gets called by each vCPU, so setting the
 233         * global stimer_message_sint value each time is conceptually
 234         * not ideal, but the value passed in is always the same and
 235         * it avoids introducing yet another interface into this
 236         * clocksource driver just to set the sint in the legacy case.
 237         */
 238        stimer0_message_sint = sint;
 239        (void)hv_stimer_init(cpu);
 240}
 241EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
 242
 243/*
 244 * hv_stimer_legacy_cleanup -- Called from the VMbus driver to
 245 * handle the case when Direct Mode is not enabled, and the
 246 * stimer must be cleaned up early in the CPU offlining
 247 * process.
 248 */
 249void hv_stimer_legacy_cleanup(unsigned int cpu)
 250{
 251        if (direct_mode_enabled)
 252                return;
 253        (void)hv_stimer_cleanup(cpu);
 254}
 255EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
 256
 257
 258/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
 259void hv_stimer_free(void)
 260{
 261        if (!hv_clock_event)
 262                return;
 263
 264        if (direct_mode_enabled) {
 265                cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
 266                hv_remove_stimer0_irq(stimer0_irq);
 267                stimer0_irq = 0;
 268        }
 269        free_percpu(hv_clock_event);
 270        hv_clock_event = NULL;
 271}
 272EXPORT_SYMBOL_GPL(hv_stimer_free);
 273
 274/*
 275 * Do a global cleanup of clockevents for the cases of kexec and
 276 * vmbus exit
 277 */
 278void hv_stimer_global_cleanup(void)
 279{
 280        int     cpu;
 281
 282        /*
 283         * hv_stime_legacy_cleanup() will stop the stimer if Direct
 284         * Mode is not enabled, and fallback to the LAPIC timer.
 285         */
 286        for_each_present_cpu(cpu) {
 287                hv_stimer_legacy_cleanup(cpu);
 288        }
 289
 290        /*
 291         * If Direct Mode is enabled, the cpuhp teardown callback
 292         * (hv_stimer_cleanup) will be run on all CPUs to stop the
 293         * stimers.
 294         */
 295        hv_stimer_free();
 296}
 297EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
 298
 299/*
 300 * Code and definitions for the Hyper-V clocksources.  Two
 301 * clocksources are defined: one that reads the Hyper-V defined MSR, and
 302 * the other that uses the TSC reference page feature as defined in the
 303 * TLFS.  The MSR version is for compatibility with old versions of
 304 * Hyper-V and 32-bit x86.  The TSC reference page version is preferred.
 305 *
 306 * The Hyper-V clocksource ratings of 250 are chosen to be below the
 307 * TSC clocksource rating of 300.  In configurations where Hyper-V offers
 308 * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource
 309 * is available and preferred.  With the higher rating, it will be the
 310 * default.  On older hardware and Hyper-V versions, the TSC is marked
 311 * "unstable", so no TSC clocksource is created and the selected Hyper-V
 312 * clocksource will be the default.
 313 */
 314
 315u64 (*hv_read_reference_counter)(void);
 316EXPORT_SYMBOL_GPL(hv_read_reference_counter);
 317
 318static union {
 319        struct ms_hyperv_tsc_page page;
 320        u8 reserved[PAGE_SIZE];
 321} tsc_pg __aligned(PAGE_SIZE);
 322
 323struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
 324{
 325        return &tsc_pg.page;
 326}
 327EXPORT_SYMBOL_GPL(hv_get_tsc_page);
 328
 329static u64 notrace read_hv_clock_tsc(void)
 330{
 331        u64 current_tick = hv_read_tsc_page(hv_get_tsc_page());
 332
 333        if (current_tick == U64_MAX)
 334                hv_get_time_ref_count(current_tick);
 335
 336        return current_tick;
 337}
 338
 339static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
 340{
 341        return read_hv_clock_tsc();
 342}
 343
 344static u64 read_hv_sched_clock_tsc(void)
 345{
 346        return (read_hv_clock_tsc() - hv_sched_clock_offset) *
 347                (NSEC_PER_SEC / HV_CLOCK_HZ);
 348}
 349
 350static void suspend_hv_clock_tsc(struct clocksource *arg)
 351{
 352        u64 tsc_msr;
 353
 354        /* Disable the TSC page */
 355        hv_get_reference_tsc(tsc_msr);
 356        tsc_msr &= ~BIT_ULL(0);
 357        hv_set_reference_tsc(tsc_msr);
 358}
 359
 360
 361static void resume_hv_clock_tsc(struct clocksource *arg)
 362{
 363        phys_addr_t phys_addr = virt_to_phys(&tsc_pg);
 364        u64 tsc_msr;
 365
 366        /* Re-enable the TSC page */
 367        hv_get_reference_tsc(tsc_msr);
 368        tsc_msr &= GENMASK_ULL(11, 0);
 369        tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
 370        hv_set_reference_tsc(tsc_msr);
 371}
 372
 373static int hv_cs_enable(struct clocksource *cs)
 374{
 375        hv_enable_vdso_clocksource();
 376        return 0;
 377}
 378
 379static struct clocksource hyperv_cs_tsc = {
 380        .name   = "hyperv_clocksource_tsc_page",
 381        .rating = 250,
 382        .read   = read_hv_clock_tsc_cs,
 383        .mask   = CLOCKSOURCE_MASK(64),
 384        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 385        .suspend= suspend_hv_clock_tsc,
 386        .resume = resume_hv_clock_tsc,
 387        .enable = hv_cs_enable,
 388};
 389
 390static u64 notrace read_hv_clock_msr(void)
 391{
 392        u64 current_tick;
 393        /*
 394         * Read the partition counter to get the current tick count. This count
 395         * is set to 0 when the partition is created and is incremented in
 396         * 100 nanosecond units.
 397         */
 398        hv_get_time_ref_count(current_tick);
 399        return current_tick;
 400}
 401
 402static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
 403{
 404        return read_hv_clock_msr();
 405}
 406
 407static u64 read_hv_sched_clock_msr(void)
 408{
 409        return (read_hv_clock_msr() - hv_sched_clock_offset) *
 410                (NSEC_PER_SEC / HV_CLOCK_HZ);
 411}
 412
 413static struct clocksource hyperv_cs_msr = {
 414        .name   = "hyperv_clocksource_msr",
 415        .rating = 250,
 416        .read   = read_hv_clock_msr_cs,
 417        .mask   = CLOCKSOURCE_MASK(64),
 418        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 419};
 420
 421static bool __init hv_init_tsc_clocksource(void)
 422{
 423        u64             tsc_msr;
 424        phys_addr_t     phys_addr;
 425
 426        if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
 427                return false;
 428
 429        hv_read_reference_counter = read_hv_clock_tsc;
 430        phys_addr = virt_to_phys(hv_get_tsc_page());
 431
 432        /*
 433         * The Hyper-V TLFS specifies to preserve the value of reserved
 434         * bits in registers. So read the existing value, preserve the
 435         * low order 12 bits, and add in the guest physical address
 436         * (which already has at least the low 12 bits set to zero since
 437         * it is page aligned). Also set the "enable" bit, which is bit 0.
 438         */
 439        hv_get_reference_tsc(tsc_msr);
 440        tsc_msr &= GENMASK_ULL(11, 0);
 441        tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
 442        hv_set_reference_tsc(tsc_msr);
 443
 444        hv_set_clocksource_vdso(hyperv_cs_tsc);
 445        clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 446
 447        hv_sched_clock_offset = hv_read_reference_counter();
 448        hv_setup_sched_clock(read_hv_sched_clock_tsc);
 449
 450        return true;
 451}
 452
 453void __init hv_init_clocksource(void)
 454{
 455        /*
 456         * Try to set up the TSC page clocksource. If it succeeds, we're
 457         * done. Otherwise, set up the MSR clocksoruce.  At least one of
 458         * these will always be available except on very old versions of
 459         * Hyper-V on x86.  In that case we won't have a Hyper-V
 460         * clocksource, but Linux will still run with a clocksource based
 461         * on the emulated PIT or LAPIC timer.
 462         */
 463        if (hv_init_tsc_clocksource())
 464                return;
 465
 466        if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
 467                return;
 468
 469        hv_read_reference_counter = read_hv_clock_msr;
 470        clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
 471
 472        hv_sched_clock_offset = hv_read_reference_counter();
 473        hv_setup_sched_clock(read_hv_sched_clock_msr);
 474}
 475EXPORT_SYMBOL_GPL(hv_init_clocksource);
 476