LXR linux/kernel/sched/cputime.c

   1#include <linux/export.h>
   2#include <linux/sched.h>
   3#include <linux/tsacct_kern.h>
   4#include <linux/kernel_stat.h>
   5#include <linux/static_key.h>
   6#include <linux/context_tracking.h>
   7#include "sched.h"
   8#ifdef CONFIG_PARAVIRT
   9#include <asm/paravirt.h>
  10#endif
  11
  12
  13#ifdef CONFIG_IRQ_TIME_ACCOUNTING
  14
  15/*
  16 * There are no locks covering percpu hardirq/softirq time.
  17 * They are only modified in vtime_account, on corresponding CPU
  18 * with interrupts disabled. So, writes are safe.
  19 * They are read and saved off onto struct rq in update_rq_clock().
  20 * This may result in other CPU reading this CPU's irq time and can
  21 * race with irq/vtime_account on this CPU. We would either get old
  22 * or new value with a side effect of accounting a slice of irq time to wrong
  23 * task when irq is in progress while we read rq->clock. That is a worthy
  24 * compromise in place of having locks on each irq in account_system_time.
  25 */
  26DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
  27
  28static int sched_clock_irqtime;
  29
  30void enable_sched_clock_irqtime(void)
  31{
  32        sched_clock_irqtime = 1;
  33}
  34
  35void disable_sched_clock_irqtime(void)
  36{
  37        sched_clock_irqtime = 0;
  38}
  39
  40/*
  41 * Called before incrementing preempt_count on {soft,}irq_enter
  42 * and before decrementing preempt_count on {soft,}irq_exit.
  43 */
  44void irqtime_account_irq(struct task_struct *curr)
  45{
  46        struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
  47        s64 delta;
  48        int cpu;
  49
  50        if (!sched_clock_irqtime)
  51                return;
  52
  53        cpu = smp_processor_id();
  54        delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
  55        irqtime->irq_start_time += delta;
  56
  57        u64_stats_update_begin(&irqtime->sync);
  58        /*
  59         * We do not account for softirq time from ksoftirqd here.
  60         * We want to continue accounting softirq time to ksoftirqd thread
  61         * in that case, so as not to confuse scheduler with a special task
  62         * that do not consume any time, but still wants to run.
  63         */
  64        if (hardirq_count())
  65                irqtime->hardirq_time += delta;
  66        else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  67                irqtime->softirq_time += delta;
  68
  69        u64_stats_update_end(&irqtime->sync);
  70}
  71EXPORT_SYMBOL_GPL(irqtime_account_irq);
  72
  73static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
  74{
  75        u64 *cpustat = kcpustat_this_cpu->cpustat;
  76        cputime_t irq_cputime;
  77
  78        irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
  79        irq_cputime = min(irq_cputime, maxtime);
  80        cpustat[idx] += irq_cputime;
  81
  82        return irq_cputime;
  83}
  84
  85static cputime_t irqtime_account_hi_update(cputime_t maxtime)
  86{
  87        return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time),
  88                                      CPUTIME_IRQ, maxtime);
  89}
  90
  91static cputime_t irqtime_account_si_update(cputime_t maxtime)
  92{
  93        return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time),
  94                                      CPUTIME_SOFTIRQ, maxtime);
  95}
  96
  97#else /* CONFIG_IRQ_TIME_ACCOUNTING */
  98
  99#define sched_clock_irqtime     (0)
 100
 101static cputime_t irqtime_account_hi_update(cputime_t dummy)
 102{
 103        return 0;
 104}
 105
 106static cputime_t irqtime_account_si_update(cputime_t dummy)
 107{
 108        return 0;
 109}
 110
 111#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 112
 113static inline void task_group_account_field(struct task_struct *p, int index,
 114                                            u64 tmp)
 115{
 116        /*
 117         * Since all updates are sure to touch the root cgroup, we
 118         * get ourselves ahead and touch it first. If the root cgroup
 119         * is the only cgroup, then nothing else should be necessary.
 120         *
 121         */
 122        __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
 123
 124        cpuacct_account_field(p, index, tmp);
 125}
 126
 127/*
 128 * Account user cpu time to a process.
 129 * @p: the process that the cpu time gets accounted to
 130 * @cputime: the cpu time spent in user space since the last update
 131 */
 132void account_user_time(struct task_struct *p, cputime_t cputime)
 133{
 134        int index;
 135
 136        /* Add user time to process. */
 137        p->utime += cputime;
 138        account_group_user_time(p, cputime);
 139
 140        index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 141
 142        /* Add user time to cpustat. */
 143        task_group_account_field(p, index, (__force u64) cputime);
 144
 145        /* Account for user time used */
 146        acct_account_cputime(p);
 147}
 148
 149/*
 150 * Account guest cpu time to a process.
 151 * @p: the process that the cpu time gets accounted to
 152 * @cputime: the cpu time spent in virtual machine since the last update
 153 */
 154static void account_guest_time(struct task_struct *p, cputime_t cputime)
 155{
 156        u64 *cpustat = kcpustat_this_cpu->cpustat;
 157
 158        /* Add guest time to process. */
 159        p->utime += cputime;
 160        account_group_user_time(p, cputime);
 161        p->gtime += cputime;
 162
 163        /* Add guest time to cpustat. */
 164        if (task_nice(p) > 0) {
 165                cpustat[CPUTIME_NICE] += (__force u64) cputime;
 166                cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
 167        } else {
 168                cpustat[CPUTIME_USER] += (__force u64) cputime;
 169                cpustat[CPUTIME_GUEST] += (__force u64) cputime;
 170        }
 171}
 172
 173/*
 174 * Account system cpu time to a process and desired cpustat field
 175 * @p: the process that the cpu time gets accounted to
 176 * @cputime: the cpu time spent in kernel space since the last update
 177 * @index: pointer to cpustat field that has to be updated
 178 */
 179static inline
 180void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
 181{
 182        /* Add system time to process. */
 183        p->stime += cputime;
 184        account_group_system_time(p, cputime);
 185
 186        /* Add system time to cpustat. */
 187        task_group_account_field(p, index, (__force u64) cputime);
 188
 189        /* Account for system time used */
 190        acct_account_cputime(p);
 191}
 192
 193/*
 194 * Account system cpu time to a process.
 195 * @p: the process that the cpu time gets accounted to
 196 * @hardirq_offset: the offset to subtract from hardirq_count()
 197 * @cputime: the cpu time spent in kernel space since the last update
 198 */
 199void account_system_time(struct task_struct *p, int hardirq_offset,
 200                         cputime_t cputime)
 201{
 202        int index;
 203
 204        if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 205                account_guest_time(p, cputime);
 206                return;
 207        }
 208
 209        if (hardirq_count() - hardirq_offset)
 210                index = CPUTIME_IRQ;
 211        else if (in_serving_softirq())
 212                index = CPUTIME_SOFTIRQ;
 213        else
 214                index = CPUTIME_SYSTEM;
 215
 216        __account_system_time(p, cputime, index);
 217}
 218
 219/*
 220 * Account for involuntary wait time.
 221 * @cputime: the cpu time spent in involuntary wait
 222 */
 223void account_steal_time(cputime_t cputime)
 224{
 225        u64 *cpustat = kcpustat_this_cpu->cpustat;
 226
 227        cpustat[CPUTIME_STEAL] += (__force u64) cputime;
 228}
 229
 230/*
 231 * Account for idle time.
 232 * @cputime: the cpu time spent in idle wait
 233 */
 234void account_idle_time(cputime_t cputime)
 235{
 236        u64 *cpustat = kcpustat_this_cpu->cpustat;
 237        struct rq *rq = this_rq();
 238
 239        if (atomic_read(&rq->nr_iowait) > 0)
 240                cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
 241        else
 242                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 243}
 244
 245/*
 246 * When a guest is interrupted for a longer amount of time, missed clock
 247 * ticks are not redelivered later. Due to that, this function may on
 248 * occasion account more time than the calling functions think elapsed.
 249 */
 250static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 251{
 252#ifdef CONFIG_PARAVIRT
 253        if (static_key_false(&paravirt_steal_enabled)) {
 254                cputime_t steal_cputime;
 255                u64 steal;
 256
 257                steal = paravirt_steal_clock(smp_processor_id());
 258                steal -= this_rq()->prev_steal_time;
 259
 260                steal_cputime = min(nsecs_to_cputime(steal), maxtime);
 261                account_steal_time(steal_cputime);
 262                this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
 263
 264                return steal_cputime;
 265        }
 266#endif
 267        return 0;
 268}
 269
 270/*
 271 * Account how much elapsed time was spent in steal, irq, or softirq time.
 272 */
 273static inline cputime_t account_other_time(cputime_t max)
 274{
 275        cputime_t accounted;
 276
 277        /* Shall be converted to a lockdep-enabled lightweight check */
 278        WARN_ON_ONCE(!irqs_disabled());
 279
 280        accounted = steal_account_process_time(max);
 281
 282        if (accounted < max)
 283                accounted += irqtime_account_hi_update(max - accounted);
 284
 285        if (accounted < max)
 286                accounted += irqtime_account_si_update(max - accounted);
 287
 288        return accounted;
 289}
 290
 291#ifdef CONFIG_64BIT
 292static inline u64 read_sum_exec_runtime(struct task_struct *t)
 293{
 294        return t->se.sum_exec_runtime;
 295}
 296#else
 297static u64 read_sum_exec_runtime(struct task_struct *t)
 298{
 299        u64 ns;
 300        struct rq_flags rf;
 301        struct rq *rq;
 302
 303        rq = task_rq_lock(t, &rf);
 304        ns = t->se.sum_exec_runtime;
 305        task_rq_unlock(rq, t, &rf);
 306
 307        return ns;
 308}
 309#endif
 310
 311/*
 312 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
 313 * tasks (sum on group iteration) belonging to @tsk's group.
 314 */
 315void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 316{
 317        struct signal_struct *sig = tsk->signal;
 318        cputime_t utime, stime;
 319        struct task_struct *t;
 320        unsigned int seq, nextseq;
 321        unsigned long flags;
 322
 323        /*
 324         * Update current task runtime to account pending time since last
 325         * scheduler action or thread_group_cputime() call. This thread group
 326         * might have other running tasks on different CPUs, but updating
 327         * their runtime can affect syscall performance, so we skip account
 328         * those pending times and rely only on values updated on tick or
 329         * other scheduler action.
 330         */
 331        if (same_thread_group(current, tsk))
 332                (void) task_sched_runtime(current);
 333
 334        rcu_read_lock();
 335        /* Attempt a lockless read on the first round. */
 336        nextseq = 0;
 337        do {
 338                seq = nextseq;
 339                flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
 340                times->utime = sig->utime;
 341                times->stime = sig->stime;
 342                times->sum_exec_runtime = sig->sum_sched_runtime;
 343
 344                for_each_thread(tsk, t) {
 345                        task_cputime(t, &utime, &stime);
 346                        times->utime += utime;
 347                        times->stime += stime;
 348                        times->sum_exec_runtime += read_sum_exec_runtime(t);
 349                }
 350                /* If lockless access failed, take the lock. */
 351                nextseq = 1;
 352        } while (need_seqretry(&sig->stats_lock, seq));
 353        done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
 354        rcu_read_unlock();
 355}
 356
 357#ifdef CONFIG_IRQ_TIME_ACCOUNTING
 358/*
 359 * Account a tick to a process and cpustat
 360 * @p: the process that the cpu time gets accounted to
 361 * @user_tick: is the tick from userspace
 362 * @rq: the pointer to rq
 363 *
 364 * Tick demultiplexing follows the order
 365 * - pending hardirq update
 366 * - pending softirq update
 367 * - user_time
 368 * - idle_time
 369 * - system time
 370 *   - check for guest_time
 371 *   - else account as system_time
 372 *
 373 * Check for hardirq is done both for system and user time as there is
 374 * no timer going off while we are on hardirq and hence we may never get an
 375 * opportunity to update it solely in system time.
 376 * p->stime and friends are only updated on system time and not on irq
 377 * softirq as those do not count in task exec_runtime any more.
 378 */
 379static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 380                                         struct rq *rq, int ticks)
 381{
 382        u64 cputime = (__force u64) cputime_one_jiffy * ticks;
 383        cputime_t other;
 384
 385        /*
 386         * When returning from idle, many ticks can get accounted at
 387         * once, including some ticks of steal, irq, and softirq time.
 388         * Subtract those ticks from the amount of time accounted to
 389         * idle, or potentially user or system time. Due to rounding,
 390         * other time can exceed ticks occasionally.
 391         */
 392        other = account_other_time(ULONG_MAX);
 393        if (other >= cputime)
 394                return;
 395        cputime -= other;
 396
 397        if (this_cpu_ksoftirqd() == p) {
 398                /*
 399                 * ksoftirqd time do not get accounted in cpu_softirq_time.
 400                 * So, we have to handle it separately here.
 401                 * Also, p->stime needs to be updated for ksoftirqd.
 402                 */
 403                __account_system_time(p, cputime, CPUTIME_SOFTIRQ);
 404        } else if (user_tick) {
 405                account_user_time(p, cputime);
 406        } else if (p == rq->idle) {
 407                account_idle_time(cputime);
 408        } else if (p->flags & PF_VCPU) { /* System time or guest time */
 409                account_guest_time(p, cputime);
 410        } else {
 411                __account_system_time(p, cputime, CPUTIME_SYSTEM);
 412        }
 413}
 414
 415static void irqtime_account_idle_ticks(int ticks)
 416{
 417        struct rq *rq = this_rq();
 418
 419        irqtime_account_process_tick(current, 0, rq, ticks);
 420}
 421#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 422static inline void irqtime_account_idle_ticks(int ticks) {}
 423static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 424                                                struct rq *rq, int nr_ticks) {}
 425#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 426
 427/*
 428 * Use precise platform statistics if available:
 429 */
 430#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 431
 432#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 433void vtime_common_task_switch(struct task_struct *prev)
 434{
 435        if (is_idle_task(prev))
 436                vtime_account_idle(prev);
 437        else
 438                vtime_account_system(prev);
 439
 440#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 441        vtime_account_user(prev);
 442#endif
 443        arch_vtime_task_switch(prev);
 444}
 445#endif
 446
 447#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 448
 449
 450#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 451/*
 452 * Archs that account the whole time spent in the idle task
 453 * (outside irq) as idle time can rely on this and just implement
 454 * vtime_account_system() and vtime_account_idle(). Archs that
 455 * have other meaning of the idle time (s390 only includes the
 456 * time spent by the CPU when it's in low power mode) must override
 457 * vtime_account().
 458 */
 459#ifndef __ARCH_HAS_VTIME_ACCOUNT
 460void vtime_account_irq_enter(struct task_struct *tsk)
 461{
 462        if (!in_interrupt() && is_idle_task(tsk))
 463                vtime_account_idle(tsk);
 464        else
 465                vtime_account_system(tsk);
 466}
 467EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 468#endif /* __ARCH_HAS_VTIME_ACCOUNT */
 469
 470void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 471{
 472        *ut = p->utime;
 473        *st = p->stime;
 474}
 475EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 476
 477void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 478{
 479        struct task_cputime cputime;
 480
 481        thread_group_cputime(p, &cputime);
 482
 483        *ut = cputime.utime;
 484        *st = cputime.stime;
 485}
 486#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 487/*
 488 * Account a single tick of cpu time.
 489 * @p: the process that the cpu time gets accounted to
 490 * @user_tick: indicates if the tick is a user or a system tick
 491 */
 492void account_process_tick(struct task_struct *p, int user_tick)
 493{
 494        cputime_t cputime, steal;
 495        struct rq *rq = this_rq();
 496
 497        if (vtime_accounting_cpu_enabled())
 498                return;
 499
 500        if (sched_clock_irqtime) {
 501                irqtime_account_process_tick(p, user_tick, rq, 1);
 502                return;
 503        }
 504
 505        cputime = cputime_one_jiffy;
 506        steal = steal_account_process_time(ULONG_MAX);
 507
 508        if (steal >= cputime)
 509                return;
 510
 511        cputime -= steal;
 512
 513        if (user_tick)
 514                account_user_time(p, cputime);
 515        else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
 516                account_system_time(p, HARDIRQ_OFFSET, cputime);
 517        else
 518                account_idle_time(cputime);
 519}
 520
 521/*
 522 * Account multiple ticks of idle time.
 523 * @ticks: number of stolen ticks
 524 */
 525void account_idle_ticks(unsigned long ticks)
 526{
 527        cputime_t cputime, steal;
 528
 529        if (sched_clock_irqtime) {
 530                irqtime_account_idle_ticks(ticks);
 531                return;
 532        }
 533
 534        cputime = jiffies_to_cputime(ticks);
 535        steal = steal_account_process_time(ULONG_MAX);
 536
 537        if (steal >= cputime)
 538                return;
 539
 540        cputime -= steal;
 541        account_idle_time(cputime);
 542}
 543
 544/*
 545 * Perform (stime * rtime) / total, but avoid multiplication overflow by
 546 * loosing precision when the numbers are big.
 547 */
 548static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
 549{
 550        u64 scaled;
 551
 552        for (;;) {
 553                /* Make sure "rtime" is the bigger of stime/rtime */
 554                if (stime > rtime)
 555                        swap(rtime, stime);
 556
 557                /* Make sure 'total' fits in 32 bits */
 558                if (total >> 32)
 559                        goto drop_precision;
 560
 561                /* Does rtime (and thus stime) fit in 32 bits? */
 562                if (!(rtime >> 32))
 563                        break;
 564
 565                /* Can we just balance rtime/stime rather than dropping bits? */
 566                if (stime >> 31)
 567                        goto drop_precision;
 568
 569                /* We can grow stime and shrink rtime and try to make them both fit */
 570                stime <<= 1;
 571                rtime >>= 1;
 572                continue;
 573
 574drop_precision:
 575                /* We drop from rtime, it has more bits than stime */
 576                rtime >>= 1;
 577                total >>= 1;
 578        }
 579
 580        /*
 581         * Make sure gcc understands that this is a 32x32->64 multiply,
 582         * followed by a 64/32->64 divide.
 583         */
 584        scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
 585        return (__force cputime_t) scaled;
 586}
 587
 588/*
 589 * Adjust tick based cputime random precision against scheduler runtime
 590 * accounting.
 591 *
 592 * Tick based cputime accounting depend on random scheduling timeslices of a
 593 * task to be interrupted or not by the timer.  Depending on these
 594 * circumstances, the number of these interrupts may be over or
 595 * under-optimistic, matching the real user and system cputime with a variable
 596 * precision.
 597 *
 598 * Fix this by scaling these tick based values against the total runtime
 599 * accounted by the CFS scheduler.
 600 *
 601 * This code provides the following guarantees:
 602 *
 603 *   stime + utime == rtime
 604 *   stime_i+1 >= stime_i, utime_i+1 >= utime_i
 605 *
 606 * Assuming that rtime_i+1 >= rtime_i.
 607 */
 608static void cputime_adjust(struct task_cputime *curr,
 609                           struct prev_cputime *prev,
 610                           cputime_t *ut, cputime_t *st)
 611{
 612        cputime_t rtime, stime, utime;
 613        unsigned long flags;
 614
 615        /* Serialize concurrent callers such that we can honour our guarantees */
 616        raw_spin_lock_irqsave(&prev->lock, flags);
 617        rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 618
 619        /*
 620         * This is possible under two circumstances:
 621         *  - rtime isn't monotonic after all (a bug);
 622         *  - we got reordered by the lock.
 623         *
 624         * In both cases this acts as a filter such that the rest of the code
 625         * can assume it is monotonic regardless of anything else.
 626         */
 627        if (prev->stime + prev->utime >= rtime)
 628                goto out;
 629
 630        stime = curr->stime;
 631        utime = curr->utime;
 632
 633        /*
 634         * If either stime or both stime and utime are 0, assume all runtime is
 635         * userspace. Once a task gets some ticks, the monotonicy code at
 636         * 'update' will ensure things converge to the observed ratio.
 637         */
 638        if (stime == 0) {
 639                utime = rtime;
 640                goto update;
 641        }
 642
 643        if (utime == 0) {
 644                stime = rtime;
 645                goto update;
 646        }
 647
 648        stime = scale_stime((__force u64)stime, (__force u64)rtime,
 649                            (__force u64)(stime + utime));
 650
 651update:
 652        /*
 653         * Make sure stime doesn't go backwards; this preserves monotonicity
 654         * for utime because rtime is monotonic.
 655         *
 656         *  utime_i+1 = rtime_i+1 - stime_i
 657         *            = rtime_i+1 - (rtime_i - utime_i)
 658         *            = (rtime_i+1 - rtime_i) + utime_i
 659         *            >= utime_i
 660         */
 661        if (stime < prev->stime)
 662                stime = prev->stime;
 663        utime = rtime - stime;
 664
 665        /*
 666         * Make sure utime doesn't go backwards; this still preserves
 667         * monotonicity for stime, analogous argument to above.
 668         */
 669        if (utime < prev->utime) {
 670                utime = prev->utime;
 671                stime = rtime - utime;
 672        }
 673
 674        prev->stime = stime;
 675        prev->utime = utime;
 676out:
 677        *ut = prev->utime;
 678        *st = prev->stime;
 679        raw_spin_unlock_irqrestore(&prev->lock, flags);
 680}
 681
 682void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 683{
 684        struct task_cputime cputime = {
 685                .sum_exec_runtime = p->se.sum_exec_runtime,
 686        };
 687
 688        task_cputime(p, &cputime.utime, &cputime.stime);
 689        cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 690}
 691EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 692
 693void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 694{
 695        struct task_cputime cputime;
 696
 697        thread_group_cputime(p, &cputime);
 698        cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 699}
 700#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 701
 702#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 703static cputime_t vtime_delta(struct task_struct *tsk)
 704{
 705        unsigned long now = READ_ONCE(jiffies);
 706
 707        if (time_before(now, (unsigned long)tsk->vtime_snap))
 708                return 0;
 709
 710        return jiffies_to_cputime(now - tsk->vtime_snap);
 711}
 712
 713static cputime_t get_vtime_delta(struct task_struct *tsk)
 714{
 715        unsigned long now = READ_ONCE(jiffies);
 716        cputime_t delta, other;
 717
 718        /*
 719         * Unlike tick based timing, vtime based timing never has lost
 720         * ticks, and no need for steal time accounting to make up for
 721         * lost ticks. Vtime accounts a rounded version of actual
 722         * elapsed time. Limit account_other_time to prevent rounding
 723         * errors from causing elapsed vtime to go negative.
 724         */
 725        delta = jiffies_to_cputime(now - tsk->vtime_snap);
 726        other = account_other_time(delta);
 727        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
 728        tsk->vtime_snap = now;
 729
 730        return delta - other;
 731}
 732
 733static void __vtime_account_system(struct task_struct *tsk)
 734{
 735        cputime_t delta_cpu = get_vtime_delta(tsk);
 736
 737        account_system_time(tsk, irq_count(), delta_cpu);
 738}
 739
 740void vtime_account_system(struct task_struct *tsk)
 741{
 742        if (!vtime_delta(tsk))
 743                return;
 744
 745        write_seqcount_begin(&tsk->vtime_seqcount);
 746        __vtime_account_system(tsk);
 747        write_seqcount_end(&tsk->vtime_seqcount);
 748}
 749
 750void vtime_account_user(struct task_struct *tsk)
 751{
 752        cputime_t delta_cpu;
 753
 754        write_seqcount_begin(&tsk->vtime_seqcount);
 755        tsk->vtime_snap_whence = VTIME_SYS;
 756        if (vtime_delta(tsk)) {
 757                delta_cpu = get_vtime_delta(tsk);
 758                account_user_time(tsk, delta_cpu);
 759        }
 760        write_seqcount_end(&tsk->vtime_seqcount);
 761}
 762
 763void vtime_user_enter(struct task_struct *tsk)
 764{
 765        write_seqcount_begin(&tsk->vtime_seqcount);
 766        if (vtime_delta(tsk))
 767                __vtime_account_system(tsk);
 768        tsk->vtime_snap_whence = VTIME_USER;
 769        write_seqcount_end(&tsk->vtime_seqcount);
 770}
 771
 772void vtime_guest_enter(struct task_struct *tsk)
 773{
 774        /*
 775         * The flags must be updated under the lock with
 776         * the vtime_snap flush and update.
 777         * That enforces a right ordering and update sequence
 778         * synchronization against the reader (task_gtime())
 779         * that can thus safely catch up with a tickless delta.
 780         */
 781        write_seqcount_begin(&tsk->vtime_seqcount);
 782        if (vtime_delta(tsk))
 783                __vtime_account_system(tsk);
 784        current->flags |= PF_VCPU;
 785        write_seqcount_end(&tsk->vtime_seqcount);
 786}
 787EXPORT_SYMBOL_GPL(vtime_guest_enter);
 788
 789void vtime_guest_exit(struct task_struct *tsk)
 790{
 791        write_seqcount_begin(&tsk->vtime_seqcount);
 792        __vtime_account_system(tsk);
 793        current->flags &= ~PF_VCPU;
 794        write_seqcount_end(&tsk->vtime_seqcount);
 795}
 796EXPORT_SYMBOL_GPL(vtime_guest_exit);
 797
 798void vtime_account_idle(struct task_struct *tsk)
 799{
 800        cputime_t delta_cpu = get_vtime_delta(tsk);
 801
 802        account_idle_time(delta_cpu);
 803}
 804
 805void arch_vtime_task_switch(struct task_struct *prev)
 806{
 807        write_seqcount_begin(&prev->vtime_seqcount);
 808        prev->vtime_snap_whence = VTIME_INACTIVE;
 809        write_seqcount_end(&prev->vtime_seqcount);
 810
 811        write_seqcount_begin(&current->vtime_seqcount);
 812        current->vtime_snap_whence = VTIME_SYS;
 813        current->vtime_snap = jiffies;
 814        write_seqcount_end(&current->vtime_seqcount);
 815}
 816
 817void vtime_init_idle(struct task_struct *t, int cpu)
 818{
 819        unsigned long flags;
 820
 821        local_irq_save(flags);
 822        write_seqcount_begin(&t->vtime_seqcount);
 823        t->vtime_snap_whence = VTIME_SYS;
 824        t->vtime_snap = jiffies;
 825        write_seqcount_end(&t->vtime_seqcount);
 826        local_irq_restore(flags);
 827}
 828
 829cputime_t task_gtime(struct task_struct *t)
 830{
 831        unsigned int seq;
 832        cputime_t gtime;
 833
 834        if (!vtime_accounting_enabled())
 835                return t->gtime;
 836
 837        do {
 838                seq = read_seqcount_begin(&t->vtime_seqcount);
 839
 840                gtime = t->gtime;
 841                if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
 842                        gtime += vtime_delta(t);
 843
 844        } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 845
 846        return gtime;
 847}
 848
 849/*
 850 * Fetch cputime raw values from fields of task_struct and
 851 * add up the pending nohz execution time since the last
 852 * cputime snapshot.
 853 */
 854void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 855{
 856        cputime_t delta;
 857        unsigned int seq;
 858
 859        if (!vtime_accounting_enabled()) {
 860                *utime = t->utime;
 861                *stime = t->stime;
 862                return;
 863        }
 864
 865        do {
 866                seq = read_seqcount_begin(&t->vtime_seqcount);
 867
 868                *utime = t->utime;
 869                *stime = t->stime;
 870
 871                /* Task is sleeping, nothing to add */
 872                if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
 873                        continue;
 874
 875                delta = vtime_delta(t);
 876
 877                /*
 878                 * Task runs either in user or kernel space, add pending nohz time to
 879                 * the right place.
 880                 */
 881                if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU)
 882                        *utime += delta;
 883                else if (t->vtime_snap_whence == VTIME_SYS)
 884                        *stime += delta;
 885        } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 886}
 887#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 888