linux/arch/s390/kernel/vtime.c
<<
>>
Prefs
   1/*
   2 *    Virtual cpu timer based timer functions.
   3 *
   4 *    Copyright IBM Corp. 2004, 2012
   5 *    Author(s): Jan Glauber <jan.glauber@de.ibm.com>
   6 */
   7
   8#include <linux/kernel_stat.h>
   9#include <linux/notifier.h>
  10#include <linux/kprobes.h>
  11#include <linux/export.h>
  12#include <linux/kernel.h>
  13#include <linux/timex.h>
  14#include <linux/types.h>
  15#include <linux/time.h>
  16#include <linux/cpu.h>
  17#include <linux/smp.h>
  18
  19#include <asm/irq_regs.h>
  20#include <asm/cputime.h>
  21#include <asm/vtimer.h>
  22#include <asm/vtime.h>
  23#include <asm/irq.h>
  24#include <asm/cpu_mf.h>
  25#include <asm/smp.h>
  26#include "entry.h"
  27
  28static void virt_timer_expire(void);
  29
  30DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
  31
  32static LIST_HEAD(virt_timer_list);
  33static DEFINE_SPINLOCK(virt_timer_lock);
  34static atomic64_t virt_timer_current;
  35static atomic64_t virt_timer_elapsed;
  36
  37DEFINE_PER_CPU(u64, mt_cycles[8]);
  38static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
  39static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
  40static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
  41
  42static inline u64 get_vtimer(void)
  43{
  44        u64 timer;
  45
  46        asm volatile("stpt %0" : "=m" (timer));
  47        return timer;
  48}
  49
  50static inline void set_vtimer(u64 expires)
  51{
  52        u64 timer;
  53
  54        asm volatile(
  55                "       stpt    %0\n"   /* Store current cpu timer value */
  56                "       spt     %1"     /* Set new value imm. afterwards */
  57                : "=m" (timer) : "m" (expires));
  58        S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
  59        S390_lowcore.last_update_timer = expires;
  60}
  61
  62static inline int virt_timer_forward(u64 elapsed)
  63{
  64        BUG_ON(!irqs_disabled());
  65
  66        if (list_empty(&virt_timer_list))
  67                return 0;
  68        elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
  69        return elapsed >= atomic64_read(&virt_timer_current);
  70}
  71
  72static void update_mt_scaling(void)
  73{
  74        u64 cycles_new[8], *cycles_old;
  75        u64 delta, fac, mult, div;
  76        int i;
  77
  78        stcctm5(smp_cpu_mtid + 1, cycles_new);
  79        cycles_old = __get_cpu_var(mt_cycles);
  80        fac = 1;
  81        mult = div = 0;
  82        for (i = 0; i <= smp_cpu_mtid; i++) {
  83                delta = cycles_new[i] - cycles_old[i];
  84                div += delta;
  85                mult *= i + 1;
  86                mult += delta * fac;
  87                fac *= i + 1;
  88        }
  89        div *= fac;
  90        if (div > 0) {
  91                /* Update scaling factor */
  92                __this_cpu_write(mt_scaling_mult, mult);
  93                __this_cpu_write(mt_scaling_div, div);
  94                memcpy(cycles_old, cycles_new,
  95                       sizeof(u64) * (smp_cpu_mtid + 1));
  96        }
  97        __this_cpu_write(mt_scaling_jiffies, jiffies_64);
  98}
  99
 100/*
 101 * Update process times based on virtual cpu times stored by entry.S
 102 * to the lowcore fields user_timer, system_timer & steal_clock.
 103 */
 104static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 105{
 106        struct thread_info *ti = task_thread_info(tsk);
 107        u64 timer, clock, user, system, steal;
 108        u64 user_scaled, system_scaled;
 109
 110        timer = S390_lowcore.last_update_timer;
 111        clock = S390_lowcore.last_update_clock;
 112        asm volatile(
 113                "       stpt    %0\n"   /* Store current cpu timer value */
 114#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
 115                "       stckf   %1"     /* Store current tod clock value */
 116#else
 117                "       stck    %1"     /* Store current tod clock value */
 118#endif
 119                : "=m" (S390_lowcore.last_update_timer),
 120                  "=m" (S390_lowcore.last_update_clock));
 121        S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 122        S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 123
 124        /* Update MT utilization calculation */
 125        if (smp_cpu_mtid &&
 126            time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
 127                update_mt_scaling();
 128
 129        user = S390_lowcore.user_timer - ti->user_timer;
 130        S390_lowcore.steal_timer -= user;
 131        ti->user_timer = S390_lowcore.user_timer;
 132
 133        system = S390_lowcore.system_timer - ti->system_timer;
 134        S390_lowcore.steal_timer -= system;
 135        ti->system_timer = S390_lowcore.system_timer;
 136
 137        user_scaled = user;
 138        system_scaled = system;
 139        /* Do MT utilization scaling */
 140        if (smp_cpu_mtid) {
 141                u64 mult = __get_cpu_var(mt_scaling_mult);
 142                u64 div = __get_cpu_var(mt_scaling_div);
 143
 144                user_scaled = (user_scaled * mult) / div;
 145                system_scaled = (system_scaled * mult) / div;
 146        }
 147        account_user_time(tsk, user, user_scaled);
 148        account_system_time(tsk, hardirq_offset, system, system_scaled);
 149
 150        steal = S390_lowcore.steal_timer;
 151        if ((s64) steal > 0) {
 152                S390_lowcore.steal_timer = 0;
 153                account_steal_time(steal);
 154        }
 155
 156        return virt_timer_forward(user + system);
 157}
 158
 159void vtime_task_switch(struct task_struct *prev)
 160{
 161        struct thread_info *ti;
 162
 163        do_account_vtime(prev, 0);
 164        ti = task_thread_info(prev);
 165        ti->user_timer = S390_lowcore.user_timer;
 166        ti->system_timer = S390_lowcore.system_timer;
 167        ti = task_thread_info(current);
 168        S390_lowcore.user_timer = ti->user_timer;
 169        S390_lowcore.system_timer = ti->system_timer;
 170}
 171
 172/*
 173 * In s390, accounting pending user time also implies
 174 * accounting system time in order to correctly compute
 175 * the stolen time accounting.
 176 */
 177void vtime_account_user(struct task_struct *tsk)
 178{
 179        if (do_account_vtime(tsk, HARDIRQ_OFFSET))
 180                virt_timer_expire();
 181}
 182
 183/*
 184 * Update process times based on virtual cpu times stored by entry.S
 185 * to the lowcore fields user_timer, system_timer & steal_clock.
 186 */
 187void vtime_account_irq_enter(struct task_struct *tsk)
 188{
 189        struct thread_info *ti = task_thread_info(tsk);
 190        u64 timer, system, system_scaled;
 191
 192        WARN_ON_ONCE(!irqs_disabled());
 193
 194        timer = S390_lowcore.last_update_timer;
 195        S390_lowcore.last_update_timer = get_vtimer();
 196        S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 197
 198        /* Update MT utilization calculation */
 199        if (smp_cpu_mtid &&
 200            time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
 201                update_mt_scaling();
 202
 203        system = S390_lowcore.system_timer - ti->system_timer;
 204        S390_lowcore.steal_timer -= system;
 205        ti->system_timer = S390_lowcore.system_timer;
 206        system_scaled = system;
 207        /* Do MT utilization scaling */
 208        if (smp_cpu_mtid) {
 209                u64 mult = __get_cpu_var(mt_scaling_mult);
 210                u64 div = __get_cpu_var(mt_scaling_div);
 211
 212                system_scaled = (system_scaled * mult) / div;
 213        }
 214        account_system_time(tsk, 0, system, system_scaled);
 215
 216        virt_timer_forward(system);
 217}
 218EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 219
 220void vtime_account_system(struct task_struct *tsk)
 221__attribute__((alias("vtime_account_irq_enter")));
 222EXPORT_SYMBOL_GPL(vtime_account_system);
 223
 224void __kprobes vtime_stop_cpu(void)
 225{
 226        struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
 227        unsigned long long idle_time;
 228        unsigned long psw_mask;
 229
 230        trace_hardirqs_on();
 231
 232        /* Wait for external, I/O or machine check interrupt. */
 233        psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
 234                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 235        idle->nohz_delay = 0;
 236
 237        /* Call the assembler magic in entry.S */
 238        psw_idle(idle, psw_mask);
 239
 240        /* Account time spent with enabled wait psw loaded as idle time. */
 241        idle->sequence++;
 242        smp_wmb();
 243        idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
 244        idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
 245        idle->idle_time += idle_time;
 246        idle->idle_count++;
 247        account_idle_time(idle_time);
 248        smp_wmb();
 249        idle->sequence++;
 250}
 251
 252cputime64_t s390_get_idle_time(int cpu)
 253{
 254        struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
 255        unsigned long long now, idle_enter, idle_exit;
 256        unsigned int sequence;
 257
 258        do {
 259                now = get_tod_clock();
 260                sequence = ACCESS_ONCE(idle->sequence);
 261                idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
 262                idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
 263        } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
 264        return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
 265}
 266
 267/*
 268 * Sorted add to a list. List is linear searched until first bigger
 269 * element is found.
 270 */
 271static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
 272{
 273        struct vtimer_list *tmp;
 274
 275        list_for_each_entry(tmp, head, entry) {
 276                if (tmp->expires > timer->expires) {
 277                        list_add_tail(&timer->entry, &tmp->entry);
 278                        return;
 279                }
 280        }
 281        list_add_tail(&timer->entry, head);
 282}
 283
 284/*
 285 * Handler for expired virtual CPU timer.
 286 */
 287static void virt_timer_expire(void)
 288{
 289        struct vtimer_list *timer, *tmp;
 290        unsigned long elapsed;
 291        LIST_HEAD(cb_list);
 292
 293        /* walk timer list, fire all expired timers */
 294        spin_lock(&virt_timer_lock);
 295        elapsed = atomic64_read(&virt_timer_elapsed);
 296        list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
 297                if (timer->expires < elapsed)
 298                        /* move expired timer to the callback queue */
 299                        list_move_tail(&timer->entry, &cb_list);
 300                else
 301                        timer->expires -= elapsed;
 302        }
 303        if (!list_empty(&virt_timer_list)) {
 304                timer = list_first_entry(&virt_timer_list,
 305                                         struct vtimer_list, entry);
 306                atomic64_set(&virt_timer_current, timer->expires);
 307        }
 308        atomic64_sub(elapsed, &virt_timer_elapsed);
 309        spin_unlock(&virt_timer_lock);
 310
 311        /* Do callbacks and recharge periodic timers */
 312        list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
 313                list_del_init(&timer->entry);
 314                timer->function(timer->data);
 315                if (timer->interval) {
 316                        /* Recharge interval timer */
 317                        timer->expires = timer->interval +
 318                                atomic64_read(&virt_timer_elapsed);
 319                        spin_lock(&virt_timer_lock);
 320                        list_add_sorted(timer, &virt_timer_list);
 321                        spin_unlock(&virt_timer_lock);
 322                }
 323        }
 324}
 325
 326void init_virt_timer(struct vtimer_list *timer)
 327{
 328        timer->function = NULL;
 329        INIT_LIST_HEAD(&timer->entry);
 330}
 331EXPORT_SYMBOL(init_virt_timer);
 332
 333static inline int vtimer_pending(struct vtimer_list *timer)
 334{
 335        return !list_empty(&timer->entry);
 336}
 337
 338static void internal_add_vtimer(struct vtimer_list *timer)
 339{
 340        if (list_empty(&virt_timer_list)) {
 341                /* First timer, just program it. */
 342                atomic64_set(&virt_timer_current, timer->expires);
 343                atomic64_set(&virt_timer_elapsed, 0);
 344                list_add(&timer->entry, &virt_timer_list);
 345        } else {
 346                /* Update timer against current base. */
 347                timer->expires += atomic64_read(&virt_timer_elapsed);
 348                if (likely((s64) timer->expires <
 349                           (s64) atomic64_read(&virt_timer_current)))
 350                        /* The new timer expires before the current timer. */
 351                        atomic64_set(&virt_timer_current, timer->expires);
 352                /* Insert new timer into the list. */
 353                list_add_sorted(timer, &virt_timer_list);
 354        }
 355}
 356
 357static void __add_vtimer(struct vtimer_list *timer, int periodic)
 358{
 359        unsigned long flags;
 360
 361        timer->interval = periodic ? timer->expires : 0;
 362        spin_lock_irqsave(&virt_timer_lock, flags);
 363        internal_add_vtimer(timer);
 364        spin_unlock_irqrestore(&virt_timer_lock, flags);
 365}
 366
 367/*
 368 * add_virt_timer - add an oneshot virtual CPU timer
 369 */
 370void add_virt_timer(struct vtimer_list *timer)
 371{
 372        __add_vtimer(timer, 0);
 373}
 374EXPORT_SYMBOL(add_virt_timer);
 375
 376/*
 377 * add_virt_timer_int - add an interval virtual CPU timer
 378 */
 379void add_virt_timer_periodic(struct vtimer_list *timer)
 380{
 381        __add_vtimer(timer, 1);
 382}
 383EXPORT_SYMBOL(add_virt_timer_periodic);
 384
 385static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
 386{
 387        unsigned long flags;
 388        int rc;
 389
 390        BUG_ON(!timer->function);
 391
 392        if (timer->expires == expires && vtimer_pending(timer))
 393                return 1;
 394        spin_lock_irqsave(&virt_timer_lock, flags);
 395        rc = vtimer_pending(timer);
 396        if (rc)
 397                list_del_init(&timer->entry);
 398        timer->interval = periodic ? expires : 0;
 399        timer->expires = expires;
 400        internal_add_vtimer(timer);
 401        spin_unlock_irqrestore(&virt_timer_lock, flags);
 402        return rc;
 403}
 404
 405/*
 406 * returns whether it has modified a pending timer (1) or not (0)
 407 */
 408int mod_virt_timer(struct vtimer_list *timer, u64 expires)
 409{
 410        return __mod_vtimer(timer, expires, 0);
 411}
 412EXPORT_SYMBOL(mod_virt_timer);
 413
 414/*
 415 * returns whether it has modified a pending timer (1) or not (0)
 416 */
 417int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
 418{
 419        return __mod_vtimer(timer, expires, 1);
 420}
 421EXPORT_SYMBOL(mod_virt_timer_periodic);
 422
 423/*
 424 * Delete a virtual timer.
 425 *
 426 * returns whether the deleted timer was pending (1) or not (0)
 427 */
 428int del_virt_timer(struct vtimer_list *timer)
 429{
 430        unsigned long flags;
 431
 432        if (!vtimer_pending(timer))
 433                return 0;
 434        spin_lock_irqsave(&virt_timer_lock, flags);
 435        list_del_init(&timer->entry);
 436        spin_unlock_irqrestore(&virt_timer_lock, flags);
 437        return 1;
 438}
 439EXPORT_SYMBOL(del_virt_timer);
 440
 441/*
 442 * Start the virtual CPU timer on the current CPU.
 443 */
 444void init_cpu_vtimer(void)
 445{
 446        /* set initial cpu timer */
 447        set_vtimer(VTIMER_MAX_SLICE);
 448        /* Setup initial MT scaling values */
 449        if (smp_cpu_mtid) {
 450                __this_cpu_write(mt_scaling_jiffies, jiffies);
 451                __this_cpu_write(mt_scaling_mult, 1);
 452                __this_cpu_write(mt_scaling_div, 1);
 453                stcctm5(smp_cpu_mtid + 1, __get_cpu_var(mt_cycles));
 454        }
 455}
 456
 457static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
 458                            void *hcpu)
 459{
 460        struct s390_idle_data *idle;
 461        long cpu = (long) hcpu;
 462
 463        idle = &per_cpu(s390_idle, cpu);
 464        switch (action & ~CPU_TASKS_FROZEN) {
 465        case CPU_DYING:
 466                idle->nohz_delay = 0;
 467        default:
 468                break;
 469        }
 470        return NOTIFY_OK;
 471}
 472
 473void __init vtime_init(void)
 474{
 475        /* Enable cpu timer interrupts on the boot cpu. */
 476        init_cpu_vtimer();
 477        cpu_notifier(s390_nohz_notify, 0);
 478}
 479