linux/arch/x86/xen/time.c
<<
>>
Prefs
   1/*
   2 * Xen time implementation.
   3 *
   4 * This is implemented in terms of a clocksource driver which uses
   5 * the hypervisor clock as a nanosecond timebase, and a clockevent
   6 * driver which uses the hypervisor's timer mechanism.
   7 *
   8 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
   9 */
  10#include <linux/kernel.h>
  11#include <linux/interrupt.h>
  12#include <linux/clocksource.h>
  13#include <linux/clockchips.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/math64.h>
  16#include <linux/gfp.h>
  17#include <linux/slab.h>
  18#include <linux/pvclock_gtod.h>
  19#include <linux/timekeeper_internal.h>
  20
  21#include <asm/pvclock.h>
  22#include <asm/xen/hypervisor.h>
  23#include <asm/xen/hypercall.h>
  24
  25#include <xen/events.h>
  26#include <xen/features.h>
  27#include <xen/interface/xen.h>
  28#include <xen/interface/vcpu.h>
  29
  30#include "xen-ops.h"
  31
  32/* Xen may fire a timer up to this many ns early */
  33#define TIMER_SLOP      100000
  34#define NS_PER_TICK     (1000000000LL / HZ)
  35
  36/* snapshots of runstate info */
  37static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
  38
  39/* unused ns of stolen time */
  40static DEFINE_PER_CPU(u64, xen_residual_stolen);
  41
  42static void do_stolen_accounting(void)
  43{
  44        struct vcpu_runstate_info state;
  45        struct vcpu_runstate_info *snap;
  46        s64 runnable, offline, stolen;
  47        cputime_t ticks;
  48
  49        xen_get_runstate_snapshot(&state);
  50
  51        WARN_ON(state.state != RUNSTATE_running);
  52
  53        snap = this_cpu_ptr(&xen_runstate_snapshot);
  54
  55        /* work out how much time the VCPU has not been runn*ing*  */
  56        runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
  57        offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
  58
  59        *snap = state;
  60
  61        /* Add the appropriate number of ticks of stolen time,
  62           including any left-overs from last time. */
  63        stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
  64
  65        if (stolen < 0)
  66                stolen = 0;
  67
  68        ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
  69        __this_cpu_write(xen_residual_stolen, stolen);
  70        account_steal_ticks(ticks);
  71}
  72
  73/* Get the TSC speed from Xen */
  74static unsigned long xen_tsc_khz(void)
  75{
  76        struct pvclock_vcpu_time_info *info =
  77                &HYPERVISOR_shared_info->vcpu_info[0].time;
  78
  79        return pvclock_tsc_khz(info);
  80}
  81
  82cycle_t xen_clocksource_read(void)
  83{
  84        struct pvclock_vcpu_time_info *src;
  85        cycle_t ret;
  86
  87        preempt_disable_notrace();
  88        src = &__this_cpu_read(xen_vcpu)->time;
  89        ret = pvclock_clocksource_read(src);
  90        preempt_enable_notrace();
  91        return ret;
  92}
  93
  94static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
  95{
  96        return xen_clocksource_read();
  97}
  98
  99static void xen_read_wallclock(struct timespec *ts)
 100{
 101        struct shared_info *s = HYPERVISOR_shared_info;
 102        struct pvclock_wall_clock *wall_clock = &(s->wc);
 103        struct pvclock_vcpu_time_info *vcpu_time;
 104
 105        vcpu_time = &get_cpu_var(xen_vcpu)->time;
 106        pvclock_read_wallclock(wall_clock, vcpu_time, ts);
 107        put_cpu_var(xen_vcpu);
 108}
 109
 110static void xen_get_wallclock(struct timespec *now)
 111{
 112        xen_read_wallclock(now);
 113}
 114
 115static int xen_set_wallclock(const struct timespec *now)
 116{
 117        return -1;
 118}
 119
 120static int xen_pvclock_gtod_notify(struct notifier_block *nb,
 121                                   unsigned long was_set, void *priv)
 122{
 123        /* Protected by the calling core code serialization */
 124        static struct timespec64 next_sync;
 125
 126        struct xen_platform_op op;
 127        struct timespec64 now;
 128        struct timekeeper *tk = priv;
 129        static bool settime64_supported = true;
 130        int ret;
 131
 132        now.tv_sec = tk->xtime_sec;
 133        now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 134
 135        /*
 136         * We only take the expensive HV call when the clock was set
 137         * or when the 11 minutes RTC synchronization time elapsed.
 138         */
 139        if (!was_set && timespec64_compare(&now, &next_sync) < 0)
 140                return NOTIFY_OK;
 141
 142again:
 143        if (settime64_supported) {
 144                op.cmd = XENPF_settime64;
 145                op.u.settime64.mbz = 0;
 146                op.u.settime64.secs = now.tv_sec;
 147                op.u.settime64.nsecs = now.tv_nsec;
 148                op.u.settime64.system_time = xen_clocksource_read();
 149        } else {
 150                op.cmd = XENPF_settime32;
 151                op.u.settime32.secs = now.tv_sec;
 152                op.u.settime32.nsecs = now.tv_nsec;
 153                op.u.settime32.system_time = xen_clocksource_read();
 154        }
 155
 156        ret = HYPERVISOR_platform_op(&op);
 157
 158        if (ret == -ENOSYS && settime64_supported) {
 159                settime64_supported = false;
 160                goto again;
 161        }
 162        if (ret < 0)
 163                return NOTIFY_BAD;
 164
 165        /*
 166         * Move the next drift compensation time 11 minutes
 167         * ahead. That's emulating the sync_cmos_clock() update for
 168         * the hardware RTC.
 169         */
 170        next_sync = now;
 171        next_sync.tv_sec += 11 * 60;
 172
 173        return NOTIFY_OK;
 174}
 175
 176static struct notifier_block xen_pvclock_gtod_notifier = {
 177        .notifier_call = xen_pvclock_gtod_notify,
 178};
 179
 180static struct clocksource xen_clocksource __read_mostly = {
 181        .name = "xen",
 182        .rating = 400,
 183        .read = xen_clocksource_get_cycles,
 184        .mask = ~0,
 185        .flags = CLOCK_SOURCE_IS_CONTINUOUS,
 186};
 187
 188/*
 189   Xen clockevent implementation
 190
 191   Xen has two clockevent implementations:
 192
 193   The old timer_op one works with all released versions of Xen prior
 194   to version 3.0.4.  This version of the hypervisor provides a
 195   single-shot timer with nanosecond resolution.  However, sharing the
 196   same event channel is a 100Hz tick which is delivered while the
 197   vcpu is running.  We don't care about or use this tick, but it will
 198   cause the core time code to think the timer fired too soon, and
 199   will end up resetting it each time.  It could be filtered, but
 200   doing so has complications when the ktime clocksource is not yet
 201   the xen clocksource (ie, at boot time).
 202
 203   The new vcpu_op-based timer interface allows the tick timer period
 204   to be changed or turned off.  The tick timer is not useful as a
 205   periodic timer because events are only delivered to running vcpus.
 206   The one-shot timer can report when a timeout is in the past, so
 207   set_next_event is capable of returning -ETIME when appropriate.
 208   This interface is used when available.
 209*/
 210
 211
 212/*
 213  Get a hypervisor absolute time.  In theory we could maintain an
 214  offset between the kernel's time and the hypervisor's time, and
 215  apply that to a kernel's absolute timeout.  Unfortunately the
 216  hypervisor and kernel times can drift even if the kernel is using
 217  the Xen clocksource, because ntp can warp the kernel's clocksource.
 218*/
 219static s64 get_abs_timeout(unsigned long delta)
 220{
 221        return xen_clocksource_read() + delta;
 222}
 223
 224static int xen_timerop_shutdown(struct clock_event_device *evt)
 225{
 226        /* cancel timeout */
 227        HYPERVISOR_set_timer_op(0);
 228
 229        return 0;
 230}
 231
 232static int xen_timerop_set_next_event(unsigned long delta,
 233                                      struct clock_event_device *evt)
 234{
 235        WARN_ON(!clockevent_state_oneshot(evt));
 236
 237        if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
 238                BUG();
 239
 240        /* We may have missed the deadline, but there's no real way of
 241           knowing for sure.  If the event was in the past, then we'll
 242           get an immediate interrupt. */
 243
 244        return 0;
 245}
 246
 247static const struct clock_event_device xen_timerop_clockevent = {
 248        .name                   = "xen",
 249        .features               = CLOCK_EVT_FEAT_ONESHOT,
 250
 251        .max_delta_ns           = 0xffffffff,
 252        .min_delta_ns           = TIMER_SLOP,
 253
 254        .mult                   = 1,
 255        .shift                  = 0,
 256        .rating                 = 500,
 257
 258        .set_state_shutdown     = xen_timerop_shutdown,
 259        .set_next_event         = xen_timerop_set_next_event,
 260};
 261
 262static int xen_vcpuop_shutdown(struct clock_event_device *evt)
 263{
 264        int cpu = smp_processor_id();
 265
 266        if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
 267            HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
 268                BUG();
 269
 270        return 0;
 271}
 272
 273static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
 274{
 275        int cpu = smp_processor_id();
 276
 277        if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
 278                BUG();
 279
 280        return 0;
 281}
 282
 283static int xen_vcpuop_set_next_event(unsigned long delta,
 284                                     struct clock_event_device *evt)
 285{
 286        int cpu = smp_processor_id();
 287        struct vcpu_set_singleshot_timer single;
 288        int ret;
 289
 290        WARN_ON(!clockevent_state_oneshot(evt));
 291
 292        single.timeout_abs_ns = get_abs_timeout(delta);
 293        single.flags = VCPU_SSHOTTMR_future;
 294
 295        ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
 296
 297        BUG_ON(ret != 0 && ret != -ETIME);
 298
 299        return ret;
 300}
 301
 302static const struct clock_event_device xen_vcpuop_clockevent = {
 303        .name = "xen",
 304        .features = CLOCK_EVT_FEAT_ONESHOT,
 305
 306        .max_delta_ns = 0xffffffff,
 307        .min_delta_ns = TIMER_SLOP,
 308
 309        .mult = 1,
 310        .shift = 0,
 311        .rating = 500,
 312
 313        .set_state_shutdown = xen_vcpuop_shutdown,
 314        .set_state_oneshot = xen_vcpuop_set_oneshot,
 315        .set_next_event = xen_vcpuop_set_next_event,
 316};
 317
 318static const struct clock_event_device *xen_clockevent =
 319        &xen_timerop_clockevent;
 320
 321struct xen_clock_event_device {
 322        struct clock_event_device evt;
 323        char name[16];
 324};
 325static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
 326
 327static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 328{
 329        struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
 330        irqreturn_t ret;
 331
 332        ret = IRQ_NONE;
 333        if (evt->event_handler) {
 334                evt->event_handler(evt);
 335                ret = IRQ_HANDLED;
 336        }
 337
 338        do_stolen_accounting();
 339
 340        return ret;
 341}
 342
 343void xen_teardown_timer(int cpu)
 344{
 345        struct clock_event_device *evt;
 346        BUG_ON(cpu == 0);
 347        evt = &per_cpu(xen_clock_events, cpu).evt;
 348
 349        if (evt->irq >= 0) {
 350                unbind_from_irqhandler(evt->irq, NULL);
 351                evt->irq = -1;
 352        }
 353}
 354
 355void xen_setup_timer(int cpu)
 356{
 357        struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
 358        struct clock_event_device *evt = &xevt->evt;
 359        int irq;
 360
 361        WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
 362        if (evt->irq >= 0)
 363                xen_teardown_timer(cpu);
 364
 365        printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
 366
 367        snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
 368
 369        irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
 370                                      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
 371                                      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
 372                                      xevt->name, NULL);
 373        (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
 374
 375        memcpy(evt, xen_clockevent, sizeof(*evt));
 376
 377        evt->cpumask = cpumask_of(cpu);
 378        evt->irq = irq;
 379}
 380
 381
 382void xen_setup_cpu_clockevents(void)
 383{
 384        clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
 385}
 386
 387void xen_timer_resume(void)
 388{
 389        int cpu;
 390
 391        pvclock_resume();
 392
 393        if (xen_clockevent != &xen_vcpuop_clockevent)
 394                return;
 395
 396        for_each_online_cpu(cpu) {
 397                if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
 398                        BUG();
 399        }
 400}
 401
 402static const struct pv_time_ops xen_time_ops __initconst = {
 403        .sched_clock = xen_clocksource_read,
 404};
 405
 406static void __init xen_time_init(void)
 407{
 408        int cpu = smp_processor_id();
 409        struct timespec tp;
 410
 411        /* As Dom0 is never moved, no penalty on using TSC there */
 412        if (xen_initial_domain())
 413                xen_clocksource.rating = 275;
 414
 415        clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
 416
 417        if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
 418                /* Successfully turned off 100Hz tick, so we have the
 419                   vcpuop-based timer interface */
 420                printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
 421                xen_clockevent = &xen_vcpuop_clockevent;
 422        }
 423
 424        /* Set initial system time with full resolution */
 425        xen_read_wallclock(&tp);
 426        do_settimeofday(&tp);
 427
 428        setup_force_cpu_cap(X86_FEATURE_TSC);
 429
 430        xen_setup_runstate_info(cpu);
 431        xen_setup_timer(cpu);
 432        xen_setup_cpu_clockevents();
 433
 434        if (xen_initial_domain())
 435                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 436}
 437
 438void __init xen_init_time_ops(void)
 439{
 440        pv_time_ops = xen_time_ops;
 441
 442        x86_init.timers.timer_init = xen_time_init;
 443        x86_init.timers.setup_percpu_clockev = x86_init_noop;
 444        x86_cpuinit.setup_percpu_clockev = x86_init_noop;
 445
 446        x86_platform.calibrate_tsc = xen_tsc_khz;
 447        x86_platform.get_wallclock = xen_get_wallclock;
 448        /* Dom0 uses the native method to set the hardware RTC. */
 449        if (!xen_initial_domain())
 450                x86_platform.set_wallclock = xen_set_wallclock;
 451}
 452
 453#ifdef CONFIG_XEN_PVHVM
 454static void xen_hvm_setup_cpu_clockevents(void)
 455{
 456        int cpu = smp_processor_id();
 457        xen_setup_runstate_info(cpu);
 458        /*
 459         * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
 460         * doing it xen_hvm_cpu_notify (which gets called by smp_init during
 461         * early bootup and also during CPU hotplug events).
 462         */
 463        xen_setup_cpu_clockevents();
 464}
 465
 466void __init xen_hvm_init_time_ops(void)
 467{
 468        /* vector callback is needed otherwise we cannot receive interrupts
 469         * on cpu > 0 and at this point we don't know how many cpus are
 470         * available */
 471        if (!xen_have_vector_callback)
 472                return;
 473        if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
 474                printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
 475                                "disable pv timer\n");
 476                return;
 477        }
 478
 479        pv_time_ops = xen_time_ops;
 480        x86_init.timers.setup_percpu_clockev = xen_time_init;
 481        x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
 482
 483        x86_platform.calibrate_tsc = xen_tsc_khz;
 484        x86_platform.get_wallclock = xen_get_wallclock;
 485        x86_platform.set_wallclock = xen_set_wallclock;
 486}
 487#endif
 488