linux/kernel/sched/cputime.c
<<
>>
Prefs
   1#include <linux/export.h>
   2#include <linux/sched.h>
   3#include <linux/tsacct_kern.h>
   4#include <linux/kernel_stat.h>
   5#include <linux/static_key.h>
   6#include <linux/context_tracking.h>
   7#include "sched.h"
   8#ifdef CONFIG_PARAVIRT
   9#include <asm/paravirt.h>
  10#endif
  11
  12
  13#ifdef CONFIG_IRQ_TIME_ACCOUNTING
  14
  15/*
  16 * There are no locks covering percpu hardirq/softirq time.
  17 * They are only modified in vtime_account, on corresponding CPU
  18 * with interrupts disabled. So, writes are safe.
  19 * They are read and saved off onto struct rq in update_rq_clock().
  20 * This may result in other CPU reading this CPU's irq time and can
  21 * race with irq/vtime_account on this CPU. We would either get old
  22 * or new value with a side effect of accounting a slice of irq time to wrong
  23 * task when irq is in progress while we read rq->clock. That is a worthy
  24 * compromise in place of having locks on each irq in account_system_time.
  25 */
  26DEFINE_PER_CPU(u64, cpu_hardirq_time);
  27DEFINE_PER_CPU(u64, cpu_softirq_time);
  28
  29static DEFINE_PER_CPU(u64, irq_start_time);
  30static int sched_clock_irqtime;
  31
  32void enable_sched_clock_irqtime(void)
  33{
  34        sched_clock_irqtime = 1;
  35}
  36
  37void disable_sched_clock_irqtime(void)
  38{
  39        sched_clock_irqtime = 0;
  40}
  41
  42#ifndef CONFIG_64BIT
  43DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  44#endif /* CONFIG_64BIT */
  45
  46/*
  47 * Called before incrementing preempt_count on {soft,}irq_enter
  48 * and before decrementing preempt_count on {soft,}irq_exit.
  49 */
  50void irqtime_account_irq(struct task_struct *curr)
  51{
  52        s64 delta;
  53        int cpu;
  54
  55        if (!sched_clock_irqtime)
  56                return;
  57
  58        cpu = smp_processor_id();
  59        delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
  60        __this_cpu_add(irq_start_time, delta);
  61
  62        irq_time_write_begin();
  63        /*
  64         * We do not account for softirq time from ksoftirqd here.
  65         * We want to continue accounting softirq time to ksoftirqd thread
  66         * in that case, so as not to confuse scheduler with a special task
  67         * that do not consume any time, but still wants to run.
  68         */
  69        if (hardirq_count())
  70                __this_cpu_add(cpu_hardirq_time, delta);
  71        else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  72                __this_cpu_add(cpu_softirq_time, delta);
  73
  74        irq_time_write_end();
  75}
  76EXPORT_SYMBOL_GPL(irqtime_account_irq);
  77
  78static cputime_t irqtime_account_hi_update(cputime_t maxtime)
  79{
  80        u64 *cpustat = kcpustat_this_cpu->cpustat;
  81        unsigned long flags;
  82        cputime_t irq_cputime;
  83
  84        local_irq_save(flags);
  85        irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
  86                      cpustat[CPUTIME_IRQ];
  87        irq_cputime = min(irq_cputime, maxtime);
  88        cpustat[CPUTIME_IRQ] += irq_cputime;
  89        local_irq_restore(flags);
  90        return irq_cputime;
  91}
  92
  93static cputime_t irqtime_account_si_update(cputime_t maxtime)
  94{
  95        u64 *cpustat = kcpustat_this_cpu->cpustat;
  96        unsigned long flags;
  97        cputime_t softirq_cputime;
  98
  99        local_irq_save(flags);
 100        softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
 101                          cpustat[CPUTIME_SOFTIRQ];
 102        softirq_cputime = min(softirq_cputime, maxtime);
 103        cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
 104        local_irq_restore(flags);
 105        return softirq_cputime;
 106}
 107
 108#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 109
 110#define sched_clock_irqtime     (0)
 111
 112static cputime_t irqtime_account_hi_update(cputime_t dummy)
 113{
 114        return 0;
 115}
 116
 117static cputime_t irqtime_account_si_update(cputime_t dummy)
 118{
 119        return 0;
 120}
 121
 122#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 123
 124static inline void task_group_account_field(struct task_struct *p, int index,
 125                                            u64 tmp)
 126{
 127        /*
 128         * Since all updates are sure to touch the root cgroup, we
 129         * get ourselves ahead and touch it first. If the root cgroup
 130         * is the only cgroup, then nothing else should be necessary.
 131         *
 132         */
 133        __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
 134
 135        cpuacct_account_field(p, index, tmp);
 136}
 137
 138/*
 139 * Account user cpu time to a process.
 140 * @p: the process that the cpu time gets accounted to
 141 * @cputime: the cpu time spent in user space since the last update
 142 * @cputime_scaled: cputime scaled by cpu frequency
 143 */
 144void account_user_time(struct task_struct *p, cputime_t cputime,
 145                       cputime_t cputime_scaled)
 146{
 147        int index;
 148
 149        /* Add user time to process. */
 150        p->utime += cputime;
 151        p->utimescaled += cputime_scaled;
 152        account_group_user_time(p, cputime);
 153
 154        index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 155
 156        /* Add user time to cpustat. */
 157        task_group_account_field(p, index, (__force u64) cputime);
 158
 159        /* Account for user time used */
 160        acct_account_cputime(p);
 161}
 162
 163/*
 164 * Account guest cpu time to a process.
 165 * @p: the process that the cpu time gets accounted to
 166 * @cputime: the cpu time spent in virtual machine since the last update
 167 * @cputime_scaled: cputime scaled by cpu frequency
 168 */
 169static void account_guest_time(struct task_struct *p, cputime_t cputime,
 170                               cputime_t cputime_scaled)
 171{
 172        u64 *cpustat = kcpustat_this_cpu->cpustat;
 173
 174        /* Add guest time to process. */
 175        p->utime += cputime;
 176        p->utimescaled += cputime_scaled;
 177        account_group_user_time(p, cputime);
 178        p->gtime += cputime;
 179
 180        /* Add guest time to cpustat. */
 181        if (task_nice(p) > 0) {
 182                cpustat[CPUTIME_NICE] += (__force u64) cputime;
 183                cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
 184        } else {
 185                cpustat[CPUTIME_USER] += (__force u64) cputime;
 186                cpustat[CPUTIME_GUEST] += (__force u64) cputime;
 187        }
 188}
 189
 190/*
 191 * Account system cpu time to a process and desired cpustat field
 192 * @p: the process that the cpu time gets accounted to
 193 * @cputime: the cpu time spent in kernel space since the last update
 194 * @cputime_scaled: cputime scaled by cpu frequency
 195 * @target_cputime64: pointer to cpustat field that has to be updated
 196 */
 197static inline
 198void __account_system_time(struct task_struct *p, cputime_t cputime,
 199                        cputime_t cputime_scaled, int index)
 200{
 201        /* Add system time to process. */
 202        p->stime += cputime;
 203        p->stimescaled += cputime_scaled;
 204        account_group_system_time(p, cputime);
 205
 206        /* Add system time to cpustat. */
 207        task_group_account_field(p, index, (__force u64) cputime);
 208
 209        /* Account for system time used */
 210        acct_account_cputime(p);
 211}
 212
 213/*
 214 * Account system cpu time to a process.
 215 * @p: the process that the cpu time gets accounted to
 216 * @hardirq_offset: the offset to subtract from hardirq_count()
 217 * @cputime: the cpu time spent in kernel space since the last update
 218 * @cputime_scaled: cputime scaled by cpu frequency
 219 */
 220void account_system_time(struct task_struct *p, int hardirq_offset,
 221                         cputime_t cputime, cputime_t cputime_scaled)
 222{
 223        int index;
 224
 225        if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 226                account_guest_time(p, cputime, cputime_scaled);
 227                return;
 228        }
 229
 230        if (hardirq_count() - hardirq_offset)
 231                index = CPUTIME_IRQ;
 232        else if (in_serving_softirq())
 233                index = CPUTIME_SOFTIRQ;
 234        else
 235                index = CPUTIME_SYSTEM;
 236
 237        __account_system_time(p, cputime, cputime_scaled, index);
 238}
 239
 240/*
 241 * Account for involuntary wait time.
 242 * @cputime: the cpu time spent in involuntary wait
 243 */
 244void account_steal_time(cputime_t cputime)
 245{
 246        u64 *cpustat = kcpustat_this_cpu->cpustat;
 247
 248        cpustat[CPUTIME_STEAL] += (__force u64) cputime;
 249}
 250
 251/*
 252 * Account for idle time.
 253 * @cputime: the cpu time spent in idle wait
 254 */
 255void account_idle_time(cputime_t cputime)
 256{
 257        u64 *cpustat = kcpustat_this_cpu->cpustat;
 258        struct rq *rq = this_rq();
 259
 260        if (atomic_read(&rq->nr_iowait) > 0)
 261                cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
 262        else
 263                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 264}
 265
 266/*
 267 * When a guest is interrupted for a longer amount of time, missed clock
 268 * ticks are not redelivered later. Due to that, this function may on
 269 * occasion account more time than the calling functions think elapsed.
 270 */
 271static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 272{
 273#ifdef CONFIG_PARAVIRT
 274        if (static_key_false(&paravirt_steal_enabled)) {
 275                cputime_t steal_cputime;
 276                u64 steal;
 277
 278                steal = paravirt_steal_clock(smp_processor_id());
 279                steal -= this_rq()->prev_steal_time;
 280
 281                steal_cputime = min(nsecs_to_cputime(steal), maxtime);
 282                account_steal_time(steal_cputime);
 283                this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
 284
 285                return steal_cputime;
 286        }
 287#endif
 288        return 0;
 289}
 290
 291/*
 292 * Account how much elapsed time was spent in steal, irq, or softirq time.
 293 */
 294static inline cputime_t account_other_time(cputime_t max)
 295{
 296        cputime_t accounted;
 297
 298        accounted = steal_account_process_time(max);
 299
 300        if (accounted < max)
 301                accounted += irqtime_account_hi_update(max - accounted);
 302
 303        if (accounted < max)
 304                accounted += irqtime_account_si_update(max - accounted);
 305
 306        return accounted;
 307}
 308
 309/*
 310 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
 311 * tasks (sum on group iteration) belonging to @tsk's group.
 312 */
 313void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 314{
 315        struct signal_struct *sig = tsk->signal;
 316        cputime_t utime, stime;
 317        struct task_struct *t;
 318        unsigned int seq, nextseq;
 319        unsigned long flags;
 320
 321        rcu_read_lock();
 322        /* Attempt a lockless read on the first round. */
 323        nextseq = 0;
 324        do {
 325                seq = nextseq;
 326                flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
 327                times->utime = sig->utime;
 328                times->stime = sig->stime;
 329                times->sum_exec_runtime = sig->sum_sched_runtime;
 330
 331                for_each_thread(tsk, t) {
 332                        task_cputime(t, &utime, &stime);
 333                        times->utime += utime;
 334                        times->stime += stime;
 335                        times->sum_exec_runtime += task_sched_runtime(t);
 336                }
 337                /* If lockless access failed, take the lock. */
 338                nextseq = 1;
 339        } while (need_seqretry(&sig->stats_lock, seq));
 340        done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
 341        rcu_read_unlock();
 342}
 343
 344#ifdef CONFIG_IRQ_TIME_ACCOUNTING
 345/*
 346 * Account a tick to a process and cpustat
 347 * @p: the process that the cpu time gets accounted to
 348 * @user_tick: is the tick from userspace
 349 * @rq: the pointer to rq
 350 *
 351 * Tick demultiplexing follows the order
 352 * - pending hardirq update
 353 * - pending softirq update
 354 * - user_time
 355 * - idle_time
 356 * - system time
 357 *   - check for guest_time
 358 *   - else account as system_time
 359 *
 360 * Check for hardirq is done both for system and user time as there is
 361 * no timer going off while we are on hardirq and hence we may never get an
 362 * opportunity to update it solely in system time.
 363 * p->stime and friends are only updated on system time and not on irq
 364 * softirq as those do not count in task exec_runtime any more.
 365 */
 366static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 367                                         struct rq *rq, int ticks)
 368{
 369        u64 cputime = (__force u64) cputime_one_jiffy * ticks;
 370        cputime_t scaled, other;
 371
 372        /*
 373         * When returning from idle, many ticks can get accounted at
 374         * once, including some ticks of steal, irq, and softirq time.
 375         * Subtract those ticks from the amount of time accounted to
 376         * idle, or potentially user or system time. Due to rounding,
 377         * other time can exceed ticks occasionally.
 378         */
 379        other = account_other_time(ULONG_MAX);
 380        if (other >= cputime)
 381                return;
 382        cputime -= other;
 383        scaled = cputime_to_scaled(cputime);
 384
 385        if (this_cpu_ksoftirqd() == p) {
 386                /*
 387                 * ksoftirqd time do not get accounted in cpu_softirq_time.
 388                 * So, we have to handle it separately here.
 389                 * Also, p->stime needs to be updated for ksoftirqd.
 390                 */
 391                __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
 392        } else if (user_tick) {
 393                account_user_time(p, cputime, scaled);
 394        } else if (p == rq->idle) {
 395                account_idle_time(cputime);
 396        } else if (p->flags & PF_VCPU) { /* System time or guest time */
 397                account_guest_time(p, cputime, scaled);
 398        } else {
 399                __account_system_time(p, cputime, scaled,       CPUTIME_SYSTEM);
 400        }
 401}
 402
 403static void irqtime_account_idle_ticks(int ticks)
 404{
 405        struct rq *rq = this_rq();
 406
 407        irqtime_account_process_tick(current, 0, rq, ticks);
 408}
 409#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 410static inline void irqtime_account_idle_ticks(int ticks) {}
 411static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 412                                                struct rq *rq, int nr_ticks) {}
 413#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 414
 415/*
 416 * Use precise platform statistics if available:
 417 */
 418#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 419
 420#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 421void vtime_common_task_switch(struct task_struct *prev)
 422{
 423        if (is_idle_task(prev))
 424                vtime_account_idle(prev);
 425        else
 426                vtime_account_system(prev);
 427
 428#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 429        vtime_account_user(prev);
 430#endif
 431        arch_vtime_task_switch(prev);
 432}
 433#endif
 434
 435#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 436
 437
 438#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 439/*
 440 * Archs that account the whole time spent in the idle task
 441 * (outside irq) as idle time can rely on this and just implement
 442 * vtime_account_system() and vtime_account_idle(). Archs that
 443 * have other meaning of the idle time (s390 only includes the
 444 * time spent by the CPU when it's in low power mode) must override
 445 * vtime_account().
 446 */
 447#ifndef __ARCH_HAS_VTIME_ACCOUNT
 448void vtime_account_irq_enter(struct task_struct *tsk)
 449{
 450        if (!in_interrupt() && is_idle_task(tsk))
 451                vtime_account_idle(tsk);
 452        else
 453                vtime_account_system(tsk);
 454}
 455EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 456#endif /* __ARCH_HAS_VTIME_ACCOUNT */
 457
 458void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 459{
 460        *ut = p->utime;
 461        *st = p->stime;
 462}
 463EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 464
 465void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 466{
 467        struct task_cputime cputime;
 468
 469        thread_group_cputime(p, &cputime);
 470
 471        *ut = cputime.utime;
 472        *st = cputime.stime;
 473}
 474#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 475/*
 476 * Account a single tick of cpu time.
 477 * @p: the process that the cpu time gets accounted to
 478 * @user_tick: indicates if the tick is a user or a system tick
 479 */
 480void account_process_tick(struct task_struct *p, int user_tick)
 481{
 482        cputime_t cputime, scaled, steal;
 483        struct rq *rq = this_rq();
 484
 485        if (vtime_accounting_cpu_enabled())
 486                return;
 487
 488        if (sched_clock_irqtime) {
 489                irqtime_account_process_tick(p, user_tick, rq, 1);
 490                return;
 491        }
 492
 493        cputime = cputime_one_jiffy;
 494        steal = steal_account_process_time(ULONG_MAX);
 495
 496        if (steal >= cputime)
 497                return;
 498
 499        cputime -= steal;
 500        scaled = cputime_to_scaled(cputime);
 501
 502        if (user_tick)
 503                account_user_time(p, cputime, scaled);
 504        else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
 505                account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
 506        else
 507                account_idle_time(cputime);
 508}
 509
 510/*
 511 * Account multiple ticks of idle time.
 512 * @ticks: number of stolen ticks
 513 */
 514void account_idle_ticks(unsigned long ticks)
 515{
 516        cputime_t cputime, steal;
 517
 518        if (sched_clock_irqtime) {
 519                irqtime_account_idle_ticks(ticks);
 520                return;
 521        }
 522
 523        cputime = jiffies_to_cputime(ticks);
 524        steal = steal_account_process_time(ULONG_MAX);
 525
 526        if (steal >= cputime)
 527                return;
 528
 529        cputime -= steal;
 530        account_idle_time(cputime);
 531}
 532
 533/*
 534 * Perform (stime * rtime) / total, but avoid multiplication overflow by
 535 * loosing precision when the numbers are big.
 536 */
 537static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
 538{
 539        u64 scaled;
 540
 541        for (;;) {
 542                /* Make sure "rtime" is the bigger of stime/rtime */
 543                if (stime > rtime)
 544                        swap(rtime, stime);
 545
 546                /* Make sure 'total' fits in 32 bits */
 547                if (total >> 32)
 548                        goto drop_precision;
 549
 550                /* Does rtime (and thus stime) fit in 32 bits? */
 551                if (!(rtime >> 32))
 552                        break;
 553
 554                /* Can we just balance rtime/stime rather than dropping bits? */
 555                if (stime >> 31)
 556                        goto drop_precision;
 557
 558                /* We can grow stime and shrink rtime and try to make them both fit */
 559                stime <<= 1;
 560                rtime >>= 1;
 561                continue;
 562
 563drop_precision:
 564                /* We drop from rtime, it has more bits than stime */
 565                rtime >>= 1;
 566                total >>= 1;
 567        }
 568
 569        /*
 570         * Make sure gcc understands that this is a 32x32->64 multiply,
 571         * followed by a 64/32->64 divide.
 572         */
 573        scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
 574        return (__force cputime_t) scaled;
 575}
 576
 577/*
 578 * Adjust tick based cputime random precision against scheduler runtime
 579 * accounting.
 580 *
 581 * Tick based cputime accounting depend on random scheduling timeslices of a
 582 * task to be interrupted or not by the timer.  Depending on these
 583 * circumstances, the number of these interrupts may be over or
 584 * under-optimistic, matching the real user and system cputime with a variable
 585 * precision.
 586 *
 587 * Fix this by scaling these tick based values against the total runtime
 588 * accounted by the CFS scheduler.
 589 *
 590 * This code provides the following guarantees:
 591 *
 592 *   stime + utime == rtime
 593 *   stime_i+1 >= stime_i, utime_i+1 >= utime_i
 594 *
 595 * Assuming that rtime_i+1 >= rtime_i.
 596 */
 597static void cputime_adjust(struct task_cputime *curr,
 598                           struct prev_cputime *prev,
 599                           cputime_t *ut, cputime_t *st)
 600{
 601        cputime_t rtime, stime, utime;
 602        unsigned long flags;
 603
 604        /* Serialize concurrent callers such that we can honour our guarantees */
 605        raw_spin_lock_irqsave(&prev->lock, flags);
 606        rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 607
 608        /*
 609         * This is possible under two circumstances:
 610         *  - rtime isn't monotonic after all (a bug);
 611         *  - we got reordered by the lock.
 612         *
 613         * In both cases this acts as a filter such that the rest of the code
 614         * can assume it is monotonic regardless of anything else.
 615         */
 616        if (prev->stime + prev->utime >= rtime)
 617                goto out;
 618
 619        stime = curr->stime;
 620        utime = curr->utime;
 621
 622        /*
 623         * If either stime or both stime and utime are 0, assume all runtime is
 624         * userspace. Once a task gets some ticks, the monotonicy code at
 625         * 'update' will ensure things converge to the observed ratio.
 626         */
 627        if (stime == 0) {
 628                utime = rtime;
 629                goto update;
 630        }
 631
 632        if (utime == 0) {
 633                stime = rtime;
 634                goto update;
 635        }
 636
 637        stime = scale_stime((__force u64)stime, (__force u64)rtime,
 638                            (__force u64)(stime + utime));
 639
 640update:
 641        /*
 642         * Make sure stime doesn't go backwards; this preserves monotonicity
 643         * for utime because rtime is monotonic.
 644         *
 645         *  utime_i+1 = rtime_i+1 - stime_i
 646         *            = rtime_i+1 - (rtime_i - utime_i)
 647         *            = (rtime_i+1 - rtime_i) + utime_i
 648         *            >= utime_i
 649         */
 650        if (stime < prev->stime)
 651                stime = prev->stime;
 652        utime = rtime - stime;
 653
 654        /*
 655         * Make sure utime doesn't go backwards; this still preserves
 656         * monotonicity for stime, analogous argument to above.
 657         */
 658        if (utime < prev->utime) {
 659                utime = prev->utime;
 660                stime = rtime - utime;
 661        }
 662
 663        prev->stime = stime;
 664        prev->utime = utime;
 665out:
 666        *ut = prev->utime;
 667        *st = prev->stime;
 668        raw_spin_unlock_irqrestore(&prev->lock, flags);
 669}
 670
 671void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 672{
 673        struct task_cputime cputime = {
 674                .sum_exec_runtime = p->se.sum_exec_runtime,
 675        };
 676
 677        task_cputime(p, &cputime.utime, &cputime.stime);
 678        cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 679}
 680EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 681
 682void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 683{
 684        struct task_cputime cputime;
 685
 686        thread_group_cputime(p, &cputime);
 687        cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 688}
 689#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 690
 691#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 692static cputime_t vtime_delta(struct task_struct *tsk)
 693{
 694        unsigned long now = READ_ONCE(jiffies);
 695
 696        if (time_before(now, (unsigned long)tsk->vtime_snap))
 697                return 0;
 698
 699        return jiffies_to_cputime(now - tsk->vtime_snap);
 700}
 701
 702static cputime_t get_vtime_delta(struct task_struct *tsk)
 703{
 704        unsigned long now = READ_ONCE(jiffies);
 705        cputime_t delta, other;
 706
 707        /*
 708         * Unlike tick based timing, vtime based timing never has lost
 709         * ticks, and no need for steal time accounting to make up for
 710         * lost ticks. Vtime accounts a rounded version of actual
 711         * elapsed time. Limit account_other_time to prevent rounding
 712         * errors from causing elapsed vtime to go negative.
 713         */
 714        delta = jiffies_to_cputime(now - tsk->vtime_snap);
 715        other = account_other_time(delta);
 716        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
 717        tsk->vtime_snap = now;
 718
 719        return delta - other;
 720}
 721
 722static void __vtime_account_system(struct task_struct *tsk)
 723{
 724        cputime_t delta_cpu = get_vtime_delta(tsk);
 725
 726        account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
 727}
 728
 729void vtime_account_system(struct task_struct *tsk)
 730{
 731        if (!vtime_delta(tsk))
 732                return;
 733
 734        write_seqcount_begin(&tsk->vtime_seqcount);
 735        __vtime_account_system(tsk);
 736        write_seqcount_end(&tsk->vtime_seqcount);
 737}
 738
 739void vtime_account_user(struct task_struct *tsk)
 740{
 741        cputime_t delta_cpu;
 742
 743        write_seqcount_begin(&tsk->vtime_seqcount);
 744        tsk->vtime_snap_whence = VTIME_SYS;
 745        if (vtime_delta(tsk)) {
 746                delta_cpu = get_vtime_delta(tsk);
 747                account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
 748        }
 749        write_seqcount_end(&tsk->vtime_seqcount);
 750}
 751
 752void vtime_user_enter(struct task_struct *tsk)
 753{
 754        write_seqcount_begin(&tsk->vtime_seqcount);
 755        if (vtime_delta(tsk))
 756                __vtime_account_system(tsk);
 757        tsk->vtime_snap_whence = VTIME_USER;
 758        write_seqcount_end(&tsk->vtime_seqcount);
 759}
 760
 761void vtime_guest_enter(struct task_struct *tsk)
 762{
 763        /*
 764         * The flags must be updated under the lock with
 765         * the vtime_snap flush and update.
 766         * That enforces a right ordering and update sequence
 767         * synchronization against the reader (task_gtime())
 768         * that can thus safely catch up with a tickless delta.
 769         */
 770        write_seqcount_begin(&tsk->vtime_seqcount);
 771        if (vtime_delta(tsk))
 772                __vtime_account_system(tsk);
 773        current->flags |= PF_VCPU;
 774        write_seqcount_end(&tsk->vtime_seqcount);
 775}
 776EXPORT_SYMBOL_GPL(vtime_guest_enter);
 777
 778void vtime_guest_exit(struct task_struct *tsk)
 779{
 780        write_seqcount_begin(&tsk->vtime_seqcount);
 781        __vtime_account_system(tsk);
 782        current->flags &= ~PF_VCPU;
 783        write_seqcount_end(&tsk->vtime_seqcount);
 784}
 785EXPORT_SYMBOL_GPL(vtime_guest_exit);
 786
 787void vtime_account_idle(struct task_struct *tsk)
 788{
 789        cputime_t delta_cpu = get_vtime_delta(tsk);
 790
 791        account_idle_time(delta_cpu);
 792}
 793
 794void arch_vtime_task_switch(struct task_struct *prev)
 795{
 796        write_seqcount_begin(&prev->vtime_seqcount);
 797        prev->vtime_snap_whence = VTIME_INACTIVE;
 798        write_seqcount_end(&prev->vtime_seqcount);
 799
 800        write_seqcount_begin(&current->vtime_seqcount);
 801        current->vtime_snap_whence = VTIME_SYS;
 802        current->vtime_snap = jiffies;
 803        write_seqcount_end(&current->vtime_seqcount);
 804}
 805
 806void vtime_init_idle(struct task_struct *t, int cpu)
 807{
 808        unsigned long flags;
 809
 810        local_irq_save(flags);
 811        write_seqcount_begin(&t->vtime_seqcount);
 812        t->vtime_snap_whence = VTIME_SYS;
 813        t->vtime_snap = jiffies;
 814        write_seqcount_end(&t->vtime_seqcount);
 815        local_irq_restore(flags);
 816}
 817
 818cputime_t task_gtime(struct task_struct *t)
 819{
 820        unsigned int seq;
 821        cputime_t gtime;
 822
 823        if (!vtime_accounting_enabled())
 824                return t->gtime;
 825
 826        do {
 827                seq = read_seqcount_begin(&t->vtime_seqcount);
 828
 829                gtime = t->gtime;
 830                if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
 831                        gtime += vtime_delta(t);
 832
 833        } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 834
 835        return gtime;
 836}
 837
 838/*
 839 * Fetch cputime raw values from fields of task_struct and
 840 * add up the pending nohz execution time since the last
 841 * cputime snapshot.
 842 */
 843static void
 844fetch_task_cputime(struct task_struct *t,
 845                   cputime_t *u_dst, cputime_t *s_dst,
 846                   cputime_t *u_src, cputime_t *s_src,
 847                   cputime_t *udelta, cputime_t *sdelta)
 848{
 849        unsigned int seq;
 850        unsigned long long delta;
 851
 852        do {
 853                *udelta = 0;
 854                *sdelta = 0;
 855
 856                seq = read_seqcount_begin(&t->vtime_seqcount);
 857
 858                if (u_dst)
 859                        *u_dst = *u_src;
 860                if (s_dst)
 861                        *s_dst = *s_src;
 862
 863                /* Task is sleeping, nothing to add */
 864                if (t->vtime_snap_whence == VTIME_INACTIVE ||
 865                    is_idle_task(t))
 866                        continue;
 867
 868                delta = vtime_delta(t);
 869
 870                /*
 871                 * Task runs either in user or kernel space, add pending nohz time to
 872                 * the right place.
 873                 */
 874                if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
 875                        *udelta = delta;
 876                } else {
 877                        if (t->vtime_snap_whence == VTIME_SYS)
 878                                *sdelta = delta;
 879                }
 880        } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 881}
 882
 883
 884void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 885{
 886        cputime_t udelta, sdelta;
 887
 888        if (!vtime_accounting_enabled()) {
 889                if (utime)
 890                        *utime = t->utime;
 891                if (stime)
 892                        *stime = t->stime;
 893                return;
 894        }
 895
 896        fetch_task_cputime(t, utime, stime, &t->utime,
 897                           &t->stime, &udelta, &sdelta);
 898        if (utime)
 899                *utime += udelta;
 900        if (stime)
 901                *stime += sdelta;
 902}
 903
 904void task_cputime_scaled(struct task_struct *t,
 905                         cputime_t *utimescaled, cputime_t *stimescaled)
 906{
 907        cputime_t udelta, sdelta;
 908
 909        if (!vtime_accounting_enabled()) {
 910                if (utimescaled)
 911                        *utimescaled = t->utimescaled;
 912                if (stimescaled)
 913                        *stimescaled = t->stimescaled;
 914                return;
 915        }
 916
 917        fetch_task_cputime(t, utimescaled, stimescaled,
 918                           &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
 919        if (utimescaled)
 920                *utimescaled += cputime_to_scaled(udelta);
 921        if (stimescaled)
 922                *stimescaled += cputime_to_scaled(sdelta);
 923}
 924#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 925