LXR linux/kernel/sched/cputime.c

   1#include <linux/export.h>
   2#include <linux/sched.h>
   3#include <linux/tsacct_kern.h>
   4#include <linux/kernel_stat.h>
   5#include <linux/static_key.h>
   6#include <linux/context_tracking.h>
   7#include "sched.h"
   8#ifdef CONFIG_PARAVIRT
   9#include <asm/paravirt.h>
  10#endif
  11
  12
  13#ifdef CONFIG_IRQ_TIME_ACCOUNTING
  14
  15/*
  16 * There are no locks covering percpu hardirq/softirq time.
  17 * They are only modified in vtime_account, on corresponding CPU
  18 * with interrupts disabled. So, writes are safe.
  19 * They are read and saved off onto struct rq in update_rq_clock().
  20 * This may result in other CPU reading this CPU's irq time and can
  21 * race with irq/vtime_account on this CPU. We would either get old
  22 * or new value with a side effect of accounting a slice of irq time to wrong
  23 * task when irq is in progress while we read rq->clock. That is a worthy
  24 * compromise in place of having locks on each irq in account_system_time.
  25 */
  26DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
  27
  28static int sched_clock_irqtime;
  29
  30void enable_sched_clock_irqtime(void)
  31{
  32        sched_clock_irqtime = 1;
  33}
  34
  35void disable_sched_clock_irqtime(void)
  36{
  37        sched_clock_irqtime = 0;
  38}
  39
  40/*
  41 * Called before incrementing preempt_count on {soft,}irq_enter
  42 * and before decrementing preempt_count on {soft,}irq_exit.
  43 */
  44void irqtime_account_irq(struct task_struct *curr)
  45{
  46        struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
  47        s64 delta;
  48        int cpu;
  49
  50        if (!sched_clock_irqtime)
  51                return;
  52
  53        cpu = smp_processor_id();
  54        delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
  55        irqtime->irq_start_time += delta;
  56
  57        u64_stats_update_begin(&irqtime->sync);
  58        /*
  59         * We do not account for softirq time from ksoftirqd here.
  60         * We want to continue accounting softirq time to ksoftirqd thread
  61         * in that case, so as not to confuse scheduler with a special task
  62         * that do not consume any time, but still wants to run.
  63         */
  64        if (hardirq_count())
  65                irqtime->hardirq_time += delta;
  66        else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  67                irqtime->softirq_time += delta;
  68
  69        u64_stats_update_end(&irqtime->sync);
  70}
  71EXPORT_SYMBOL_GPL(irqtime_account_irq);
  72
  73static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
  74{
  75        u64 *cpustat = kcpustat_this_cpu->cpustat;
  76        cputime_t irq_cputime;
  77
  78        irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
  79        irq_cputime = min(irq_cputime, maxtime);
  80        cpustat[idx] += irq_cputime;
  81
  82        return irq_cputime;
  83}
  84
  85static cputime_t irqtime_account_hi_update(cputime_t maxtime)
  86{
  87        return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time),
  88                                      CPUTIME_IRQ, maxtime);
  89}
  90
  91static cputime_t irqtime_account_si_update(cputime_t maxtime)
  92{
  93        return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time),
  94                                      CPUTIME_SOFTIRQ, maxtime);
  95}
  96
  97#else /* CONFIG_IRQ_TIME_ACCOUNTING */
  98
  99#define sched_clock_irqtime     (0)
 100
 101static cputime_t irqtime_account_hi_update(cputime_t dummy)
 102{
 103        return 0;
 104}
 105
 106static cputime_t irqtime_account_si_update(cputime_t dummy)
 107{
 108        return 0;
 109}
 110
 111#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 112
 113static inline void task_group_account_field(struct task_struct *p, int index,
 114                                            u64 tmp)
 115{
 116        /*
 117         * Since all updates are sure to touch the root cgroup, we
 118         * get ourselves ahead and touch it first. If the root cgroup
 119         * is the only cgroup, then nothing else should be necessary.
 120         *
 121         */
 122        __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
 123
 124        cpuacct_account_field(p, index, tmp);
 125}
 126
 127/*
 128 * Account user cpu time to a process.
 129 * @p: the process that the cpu time gets accounted to
 130 * @cputime: the cpu time spent in user space since the last update
 131 * @cputime_scaled: cputime scaled by cpu frequency
 132 */
 133void account_user_time(struct task_struct *p, cputime_t cputime,
 134                       cputime_t cputime_scaled)
 135{
 136        int index;
 137
 138        /* Add user time to process. */
 139        p->utime += cputime;
 140        p->utimescaled += cputime_scaled;
 141        account_group_user_time(p, cputime);
 142
 143        index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 144
 145        /* Add user time to cpustat. */
 146        task_group_account_field(p, index, (__force u64) cputime);
 147
 148        /* Account for user time used */
 149        acct_account_cputime(p);
 150}
 151
 152/*
 153 * Account guest cpu time to a process.
 154 * @p: the process that the cpu time gets accounted to
 155 * @cputime: the cpu time spent in virtual machine since the last update
 156 * @cputime_scaled: cputime scaled by cpu frequency
 157 */
 158static void account_guest_time(struct task_struct *p, cputime_t cputime,
 159                               cputime_t cputime_scaled)
 160{
 161        u64 *cpustat = kcpustat_this_cpu->cpustat;
 162
 163        /* Add guest time to process. */
 164        p->utime += cputime;
 165        p->utimescaled += cputime_scaled;
 166        account_group_user_time(p, cputime);
 167        p->gtime += cputime;
 168
 169        /* Add guest time to cpustat. */
 170        if (task_nice(p) > 0) {
 171                cpustat[CPUTIME_NICE] += (__force u64) cputime;
 172                cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
 173        } else {
 174                cpustat[CPUTIME_USER] += (__force u64) cputime;
 175                cpustat[CPUTIME_GUEST] += (__force u64) cputime;
 176        }
 177}
 178
 179/*
 180 * Account system cpu time to a process and desired cpustat field
 181 * @p: the process that the cpu time gets accounted to
 182 * @cputime: the cpu time spent in kernel space since the last update
 183 * @cputime_scaled: cputime scaled by cpu frequency
 184 * @target_cputime64: pointer to cpustat field that has to be updated
 185 */
 186static inline
 187void __account_system_time(struct task_struct *p, cputime_t cputime,
 188                        cputime_t cputime_scaled, int index)
 189{
 190        /* Add system time to process. */
 191        p->stime += cputime;
 192        p->stimescaled += cputime_scaled;
 193        account_group_system_time(p, cputime);
 194
 195        /* Add system time to cpustat. */
 196        task_group_account_field(p, index, (__force u64) cputime);
 197
 198        /* Account for system time used */
 199        acct_account_cputime(p);
 200}
 201
 202/*
 203 * Account system cpu time to a process.
 204 * @p: the process that the cpu time gets accounted to
 205 * @hardirq_offset: the offset to subtract from hardirq_count()
 206 * @cputime: the cpu time spent in kernel space since the last update
 207 * @cputime_scaled: cputime scaled by cpu frequency
 208 */
 209void account_system_time(struct task_struct *p, int hardirq_offset,
 210                         cputime_t cputime, cputime_t cputime_scaled)
 211{
 212        int index;
 213
 214        if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 215                account_guest_time(p, cputime, cputime_scaled);
 216                return;
 217        }
 218
 219        if (hardirq_count() - hardirq_offset)
 220                index = CPUTIME_IRQ;
 221        else if (in_serving_softirq())
 222                index = CPUTIME_SOFTIRQ;
 223        else
 224                index = CPUTIME_SYSTEM;
 225
 226        __account_system_time(p, cputime, cputime_scaled, index);
 227}
 228
 229/*
 230 * Account for involuntary wait time.
 231 * @cputime: the cpu time spent in involuntary wait
 232 */
 233void account_steal_time(cputime_t cputime)
 234{
 235        u64 *cpustat = kcpustat_this_cpu->cpustat;
 236
 237        cpustat[CPUTIME_STEAL] += (__force u64) cputime;
 238}
 239
 240/*
 241 * Account for idle time.
 242 * @cputime: the cpu time spent in idle wait
 243 */
 244void account_idle_time(cputime_t cputime)
 245{
 246        u64 *cpustat = kcpustat_this_cpu->cpustat;
 247        struct rq *rq = this_rq();
 248
 249        if (atomic_read(&rq->nr_iowait) > 0)
 250                cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
 251        else
 252                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 253}
 254
 255/*
 256 * When a guest is interrupted for a longer amount of time, missed clock
 257 * ticks are not redelivered later. Due to that, this function may on
 258 * occasion account more time than the calling functions think elapsed.
 259 */
 260static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 261{
 262#ifdef CONFIG_PARAVIRT
 263        if (static_key_false(&paravirt_steal_enabled)) {
 264                cputime_t steal_cputime;
 265                u64 steal;
 266
 267                steal = paravirt_steal_clock(smp_processor_id());
 268                steal -= this_rq()->prev_steal_time;
 269
 270                steal_cputime = min(nsecs_to_cputime(steal), maxtime);
 271                account_steal_time(steal_cputime);
 272                this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
 273
 274                return steal_cputime;
 275        }
 276#endif
 277        return 0;
 278}
 279
 280/*
 281 * Account how much elapsed time was spent in steal, irq, or softirq time.
 282 */
 283static inline cputime_t account_other_time(cputime_t max)
 284{
 285        cputime_t accounted;
 286
 287        /* Shall be converted to a lockdep-enabled lightweight check */
 288        WARN_ON_ONCE(!irqs_disabled());
 289
 290        accounted = steal_account_process_time(max);
 291
 292        if (accounted < max)
 293                accounted += irqtime_account_hi_update(max - accounted);
 294
 295        if (accounted < max)
 296                accounted += irqtime_account_si_update(max - accounted);
 297
 298        return accounted;
 299}
 300
 301#ifdef CONFIG_64BIT
 302static inline u64 read_sum_exec_runtime(struct task_struct *t)
 303{
 304        return t->se.sum_exec_runtime;
 305}
 306#else
 307static u64 read_sum_exec_runtime(struct task_struct *t)
 308{
 309        u64 ns;
 310        struct rq_flags rf;
 311        struct rq *rq;
 312
 313        rq = task_rq_lock(t, &rf);
 314        ns = t->se.sum_exec_runtime;
 315        task_rq_unlock(rq, t, &rf);
 316
 317        return ns;
 318}
 319#endif
 320
 321/*
 322 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
 323 * tasks (sum on group iteration) belonging to @tsk's group.
 324 */
 325void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 326{
 327        struct signal_struct *sig = tsk->signal;
 328        cputime_t utime, stime;
 329        struct task_struct *t;
 330        unsigned int seq, nextseq;
 331        unsigned long flags;
 332
 333        /*
 334         * Update current task runtime to account pending time since last
 335         * scheduler action or thread_group_cputime() call. This thread group
 336         * might have other running tasks on different CPUs, but updating
 337         * their runtime can affect syscall performance, so we skip account
 338         * those pending times and rely only on values updated on tick or
 339         * other scheduler action.
 340         */
 341        if (same_thread_group(current, tsk))
 342                (void) task_sched_runtime(current);
 343
 344        rcu_read_lock();
 345        /* Attempt a lockless read on the first round. */
 346        nextseq = 0;
 347        do {
 348                seq = nextseq;
 349                flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
 350                times->utime = sig->utime;
 351                times->stime = sig->stime;
 352                times->sum_exec_runtime = sig->sum_sched_runtime;
 353
 354                for_each_thread(tsk, t) {
 355                        task_cputime(t, &utime, &stime);
 356                        times->utime += utime;
 357                        times->stime += stime;
 358                        times->sum_exec_runtime += read_sum_exec_runtime(t);
 359                }
 360                /* If lockless access failed, take the lock. */
 361                nextseq = 1;
 362        } while (need_seqretry(&sig->stats_lock, seq));
 363        done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
 364        rcu_read_unlock();
 365}
 366
 367#ifdef CONFIG_IRQ_TIME_ACCOUNTING
 368/*
 369 * Account a tick to a process and cpustat
 370 * @p: the process that the cpu time gets accounted to
 371 * @user_tick: is the tick from userspace
 372 * @rq: the pointer to rq
 373 *
 374 * Tick demultiplexing follows the order
 375 * - pending hardirq update
 376 * - pending softirq update
 377 * - user_time
 378 * - idle_time
 379 * - system time
 380 *   - check for guest_time
 381 *   - else account as system_time
 382 *
 383 * Check for hardirq is done both for system and user time as there is
 384 * no timer going off while we are on hardirq and hence we may never get an
 385 * opportunity to update it solely in system time.
 386 * p->stime and friends are only updated on system time and not on irq
 387 * softirq as those do not count in task exec_runtime any more.
 388 */
 389static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 390                                         struct rq *rq, int ticks)
 391{
 392        u64 cputime = (__force u64) cputime_one_jiffy * ticks;
 393        cputime_t scaled, other;
 394
 395        /*
 396         * When returning from idle, many ticks can get accounted at
 397         * once, including some ticks of steal, irq, and softirq time.
 398         * Subtract those ticks from the amount of time accounted to
 399         * idle, or potentially user or system time. Due to rounding,
 400         * other time can exceed ticks occasionally.
 401         */
 402        other = account_other_time(ULONG_MAX);
 403        if (other >= cputime)
 404                return;
 405        cputime -= other;
 406        scaled = cputime_to_scaled(cputime);
 407
 408        if (this_cpu_ksoftirqd() == p) {
 409                /*
 410                 * ksoftirqd time do not get accounted in cpu_softirq_time.
 411                 * So, we have to handle it separately here.
 412                 * Also, p->stime needs to be updated for ksoftirqd.
 413                 */
 414                __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
 415        } else if (user_tick) {
 416                account_user_time(p, cputime, scaled);
 417        } else if (p == rq->idle) {
 418                account_idle_time(cputime);
 419        } else if (p->flags & PF_VCPU) { /* System time or guest time */
 420                account_guest_time(p, cputime, scaled);
 421        } else {
 422                __account_system_time(p, cputime, scaled,       CPUTIME_SYSTEM);
 423        }
 424}
 425
 426static void irqtime_account_idle_ticks(int ticks)
 427{
 428        struct rq *rq = this_rq();
 429
 430        irqtime_account_process_tick(current, 0, rq, ticks);
 431}
 432#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 433static inline void irqtime_account_idle_ticks(int ticks) {}
 434static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 435                                                struct rq *rq, int nr_ticks) {}
 436#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 437
 438/*
 439 * Use precise platform statistics if available:
 440 */
 441#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 442
 443#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 444void vtime_common_task_switch(struct task_struct *prev)
 445{
 446        if (is_idle_task(prev))
 447                vtime_account_idle(prev);
 448        else
 449                vtime_account_system(prev);
 450
 451#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 452        vtime_account_user(prev);
 453#endif
 454        arch_vtime_task_switch(prev);
 455}
 456#endif
 457
 458#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 459
 460
 461#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 462/*
 463 * Archs that account the whole time spent in the idle task
 464 * (outside irq) as idle time can rely on this and just implement
 465 * vtime_account_system() and vtime_account_idle(). Archs that
 466 * have other meaning of the idle time (s390 only includes the
 467 * time spent by the CPU when it's in low power mode) must override
 468 * vtime_account().
 469 */
 470#ifndef __ARCH_HAS_VTIME_ACCOUNT
 471void vtime_account_irq_enter(struct task_struct *tsk)
 472{
 473        if (!in_interrupt() && is_idle_task(tsk))
 474                vtime_account_idle(tsk);
 475        else
 476                vtime_account_system(tsk);
 477}
 478EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 479#endif /* __ARCH_HAS_VTIME_ACCOUNT */
 480
 481void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 482{
 483        *ut = p->utime;
 484        *st = p->stime;
 485}
 486EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 487
 488void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 489{
 490        struct task_cputime cputime;
 491
 492        thread_group_cputime(p, &cputime);
 493
 494        *ut = cputime.utime;
 495        *st = cputime.stime;
 496}
 497#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 498/*
 499 * Account a single tick of cpu time.
 500 * @p: the process that the cpu time gets accounted to
 501 * @user_tick: indicates if the tick is a user or a system tick
 502 */
 503void account_process_tick(struct task_struct *p, int user_tick)
 504{
 505        cputime_t cputime, scaled, steal;
 506        struct rq *rq = this_rq();
 507
 508        if (vtime_accounting_cpu_enabled())
 509                return;
 510
 511        if (sched_clock_irqtime) {
 512                irqtime_account_process_tick(p, user_tick, rq, 1);
 513                return;
 514        }
 515
 516        cputime = cputime_one_jiffy;
 517        steal = steal_account_process_time(ULONG_MAX);
 518
 519        if (steal >= cputime)
 520                return;
 521
 522        cputime -= steal;
 523        scaled = cputime_to_scaled(cputime);
 524
 525        if (user_tick)
 526                account_user_time(p, cputime, scaled);
 527        else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
 528                account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
 529        else
 530                account_idle_time(cputime);
 531}
 532
 533/*
 534 * Account multiple ticks of idle time.
 535 * @ticks: number of stolen ticks
 536 */
 537void account_idle_ticks(unsigned long ticks)
 538{
 539        cputime_t cputime, steal;
 540
 541        if (sched_clock_irqtime) {
 542                irqtime_account_idle_ticks(ticks);
 543                return;
 544        }
 545
 546        cputime = jiffies_to_cputime(ticks);
 547        steal = steal_account_process_time(ULONG_MAX);
 548
 549        if (steal >= cputime)
 550                return;
 551
 552        cputime -= steal;
 553        account_idle_time(cputime);
 554}
 555
 556/*
 557 * Perform (stime * rtime) / total, but avoid multiplication overflow by
 558 * loosing precision when the numbers are big.
 559 */
 560static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
 561{
 562        u64 scaled;
 563
 564        for (;;) {
 565                /* Make sure "rtime" is the bigger of stime/rtime */
 566                if (stime > rtime)
 567                        swap(rtime, stime);
 568
 569                /* Make sure 'total' fits in 32 bits */
 570                if (total >> 32)
 571                        goto drop_precision;
 572
 573                /* Does rtime (and thus stime) fit in 32 bits? */
 574                if (!(rtime >> 32))
 575                        break;
 576
 577                /* Can we just balance rtime/stime rather than dropping bits? */
 578                if (stime >> 31)
 579                        goto drop_precision;
 580
 581                /* We can grow stime and shrink rtime and try to make them both fit */
 582                stime <<= 1;
 583                rtime >>= 1;
 584                continue;
 585
 586drop_precision:
 587                /* We drop from rtime, it has more bits than stime */
 588                rtime >>= 1;
 589                total >>= 1;
 590        }
 591
 592        /*
 593         * Make sure gcc understands that this is a 32x32->64 multiply,
 594         * followed by a 64/32->64 divide.
 595         */
 596        scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
 597        return (__force cputime_t) scaled;
 598}
 599
 600/*
 601 * Adjust tick based cputime random precision against scheduler runtime
 602 * accounting.
 603 *
 604 * Tick based cputime accounting depend on random scheduling timeslices of a
 605 * task to be interrupted or not by the timer.  Depending on these
 606 * circumstances, the number of these interrupts may be over or
 607 * under-optimistic, matching the real user and system cputime with a variable
 608 * precision.
 609 *
 610 * Fix this by scaling these tick based values against the total runtime
 611 * accounted by the CFS scheduler.
 612 *
 613 * This code provides the following guarantees:
 614 *
 615 *   stime + utime == rtime
 616 *   stime_i+1 >= stime_i, utime_i+1 >= utime_i
 617 *
 618 * Assuming that rtime_i+1 >= rtime_i.
 619 */
 620static void cputime_adjust(struct task_cputime *curr,
 621                           struct prev_cputime *prev,
 622                           cputime_t *ut, cputime_t *st)
 623{
 624        cputime_t rtime, stime, utime;
 625        unsigned long flags;
 626
 627        /* Serialize concurrent callers such that we can honour our guarantees */
 628        raw_spin_lock_irqsave(&prev->lock, flags);
 629        rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 630
 631        /*
 632         * This is possible under two circumstances:
 633         *  - rtime isn't monotonic after all (a bug);
 634         *  - we got reordered by the lock.
 635         *
 636         * In both cases this acts as a filter such that the rest of the code
 637         * can assume it is monotonic regardless of anything else.
 638         */
 639        if (prev->stime + prev->utime >= rtime)
 640                goto out;
 641
 642        stime = curr->stime;
 643        utime = curr->utime;
 644
 645        /*
 646         * If either stime or both stime and utime are 0, assume all runtime is
 647         * userspace. Once a task gets some ticks, the monotonicy code at
 648         * 'update' will ensure things converge to the observed ratio.
 649         */
 650        if (stime == 0) {
 651                utime = rtime;
 652                goto update;
 653        }
 654
 655        if (utime == 0) {
 656                stime = rtime;
 657                goto update;
 658        }
 659
 660        stime = scale_stime((__force u64)stime, (__force u64)rtime,
 661                            (__force u64)(stime + utime));
 662
 663update:
 664        /*
 665         * Make sure stime doesn't go backwards; this preserves monotonicity
 666         * for utime because rtime is monotonic.
 667         *
 668         *  utime_i+1 = rtime_i+1 - stime_i
 669         *            = rtime_i+1 - (rtime_i - utime_i)
 670         *            = (rtime_i+1 - rtime_i) + utime_i
 671         *            >= utime_i
 672         */
 673        if (stime < prev->stime)
 674                stime = prev->stime;
 675        utime = rtime - stime;
 676
 677        /*
 678         * Make sure utime doesn't go backwards; this still preserves
 679         * monotonicity for stime, analogous argument to above.
 680         */
 681        if (utime < prev->utime) {
 682                utime = prev->utime;
 683                stime = rtime - utime;
 684        }
 685
 686        prev->stime = stime;
 687        prev->utime = utime;
 688out:
 689        *ut = prev->utime;
 690        *st = prev->stime;
 691        raw_spin_unlock_irqrestore(&prev->lock, flags);
 692}
 693
 694void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 695{
 696        struct task_cputime cputime = {
 697                .sum_exec_runtime = p->se.sum_exec_runtime,
 698        };
 699
 700        task_cputime(p, &cputime.utime, &cputime.stime);
 701        cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 702}
 703EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 704
 705void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 706{
 707        struct task_cputime cputime;
 708
 709        thread_group_cputime(p, &cputime);
 710        cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 711}
 712#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 713
 714#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 715static cputime_t vtime_delta(struct task_struct *tsk)
 716{
 717        unsigned long now = READ_ONCE(jiffies);
 718
 719        if (time_before(now, (unsigned long)tsk->vtime_snap))
 720                return 0;
 721
 722        return jiffies_to_cputime(now - tsk->vtime_snap);
 723}
 724
 725static cputime_t get_vtime_delta(struct task_struct *tsk)
 726{
 727        unsigned long now = READ_ONCE(jiffies);
 728        cputime_t delta, other;
 729
 730        /*
 731         * Unlike tick based timing, vtime based timing never has lost
 732         * ticks, and no need for steal time accounting to make up for
 733         * lost ticks. Vtime accounts a rounded version of actual
 734         * elapsed time. Limit account_other_time to prevent rounding
 735         * errors from causing elapsed vtime to go negative.
 736         */
 737        delta = jiffies_to_cputime(now - tsk->vtime_snap);
 738        other = account_other_time(delta);
 739        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
 740        tsk->vtime_snap = now;
 741
 742        return delta - other;
 743}
 744
 745static void __vtime_account_system(struct task_struct *tsk)
 746{
 747        cputime_t delta_cpu = get_vtime_delta(tsk);
 748
 749        account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
 750}
 751
 752void vtime_account_system(struct task_struct *tsk)
 753{
 754        if (!vtime_delta(tsk))
 755                return;
 756
 757        write_seqcount_begin(&tsk->vtime_seqcount);
 758        __vtime_account_system(tsk);
 759        write_seqcount_end(&tsk->vtime_seqcount);
 760}
 761
 762void vtime_account_user(struct task_struct *tsk)
 763{
 764        cputime_t delta_cpu;
 765
 766        write_seqcount_begin(&tsk->vtime_seqcount);
 767        tsk->vtime_snap_whence = VTIME_SYS;
 768        if (vtime_delta(tsk)) {
 769                delta_cpu = get_vtime_delta(tsk);
 770                account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
 771        }
 772        write_seqcount_end(&tsk->vtime_seqcount);
 773}
 774
 775void vtime_user_enter(struct task_struct *tsk)
 776{
 777        write_seqcount_begin(&tsk->vtime_seqcount);
 778        if (vtime_delta(tsk))
 779                __vtime_account_system(tsk);
 780        tsk->vtime_snap_whence = VTIME_USER;
 781        write_seqcount_end(&tsk->vtime_seqcount);
 782}
 783
 784void vtime_guest_enter(struct task_struct *tsk)
 785{
 786        /*
 787         * The flags must be updated under the lock with
 788         * the vtime_snap flush and update.
 789         * That enforces a right ordering and update sequence
 790         * synchronization against the reader (task_gtime())
 791         * that can thus safely catch up with a tickless delta.
 792         */
 793        write_seqcount_begin(&tsk->vtime_seqcount);
 794        if (vtime_delta(tsk))
 795                __vtime_account_system(tsk);
 796        current->flags |= PF_VCPU;
 797        write_seqcount_end(&tsk->vtime_seqcount);
 798}
 799EXPORT_SYMBOL_GPL(vtime_guest_enter);
 800
 801void vtime_guest_exit(struct task_struct *tsk)
 802{
 803        write_seqcount_begin(&tsk->vtime_seqcount);
 804        __vtime_account_system(tsk);
 805        current->flags &= ~PF_VCPU;
 806        write_seqcount_end(&tsk->vtime_seqcount);
 807}
 808EXPORT_SYMBOL_GPL(vtime_guest_exit);
 809
 810void vtime_account_idle(struct task_struct *tsk)
 811{
 812        cputime_t delta_cpu = get_vtime_delta(tsk);
 813
 814        account_idle_time(delta_cpu);
 815}
 816
 817void arch_vtime_task_switch(struct task_struct *prev)
 818{
 819        write_seqcount_begin(&prev->vtime_seqcount);
 820        prev->vtime_snap_whence = VTIME_INACTIVE;
 821        write_seqcount_end(&prev->vtime_seqcount);
 822
 823        write_seqcount_begin(&current->vtime_seqcount);
 824        current->vtime_snap_whence = VTIME_SYS;
 825        current->vtime_snap = jiffies;
 826        write_seqcount_end(&current->vtime_seqcount);
 827}
 828
 829void vtime_init_idle(struct task_struct *t, int cpu)
 830{
 831        unsigned long flags;
 832
 833        local_irq_save(flags);
 834        write_seqcount_begin(&t->vtime_seqcount);
 835        t->vtime_snap_whence = VTIME_SYS;
 836        t->vtime_snap = jiffies;
 837        write_seqcount_end(&t->vtime_seqcount);
 838        local_irq_restore(flags);
 839}
 840
 841cputime_t task_gtime(struct task_struct *t)
 842{
 843        unsigned int seq;
 844        cputime_t gtime;
 845
 846        if (!vtime_accounting_enabled())
 847                return t->gtime;
 848
 849        do {
 850                seq = read_seqcount_begin(&t->vtime_seqcount);
 851
 852                gtime = t->gtime;
 853                if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
 854                        gtime += vtime_delta(t);
 855
 856        } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 857
 858        return gtime;
 859}
 860
 861/*
 862 * Fetch cputime raw values from fields of task_struct and
 863 * add up the pending nohz execution time since the last
 864 * cputime snapshot.
 865 */
 866static void
 867fetch_task_cputime(struct task_struct *t,
 868                   cputime_t *u_dst, cputime_t *s_dst,
 869                   cputime_t *u_src, cputime_t *s_src,
 870                   cputime_t *udelta, cputime_t *sdelta)
 871{
 872        unsigned int seq;
 873        unsigned long long delta;
 874
 875        do {
 876                *udelta = 0;
 877                *sdelta = 0;
 878
 879                seq = read_seqcount_begin(&t->vtime_seqcount);
 880
 881                if (u_dst)
 882                        *u_dst = *u_src;
 883                if (s_dst)
 884                        *s_dst = *s_src;
 885
 886                /* Task is sleeping, nothing to add */
 887                if (t->vtime_snap_whence == VTIME_INACTIVE ||
 888                    is_idle_task(t))
 889                        continue;
 890
 891                delta = vtime_delta(t);
 892
 893                /*
 894                 * Task runs either in user or kernel space, add pending nohz time to
 895                 * the right place.
 896                 */
 897                if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
 898                        *udelta = delta;
 899                } else {
 900                        if (t->vtime_snap_whence == VTIME_SYS)
 901                                *sdelta = delta;
 902                }
 903        } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 904}
 905
 906
 907void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 908{
 909        cputime_t udelta, sdelta;
 910
 911        if (!vtime_accounting_enabled()) {
 912                if (utime)
 913                        *utime = t->utime;
 914                if (stime)
 915                        *stime = t->stime;
 916                return;
 917        }
 918
 919        fetch_task_cputime(t, utime, stime, &t->utime,
 920                           &t->stime, &udelta, &sdelta);
 921        if (utime)
 922                *utime += udelta;
 923        if (stime)
 924                *stime += sdelta;
 925}
 926
 927void task_cputime_scaled(struct task_struct *t,
 928                         cputime_t *utimescaled, cputime_t *stimescaled)
 929{
 930        cputime_t udelta, sdelta;
 931
 932        if (!vtime_accounting_enabled()) {
 933                if (utimescaled)
 934                        *utimescaled = t->utimescaled;
 935                if (stimescaled)
 936                        *stimescaled = t->stimescaled;
 937                return;
 938        }
 939
 940        fetch_task_cputime(t, utimescaled, stimescaled,
 941                           &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
 942        if (utimescaled)
 943                *utimescaled += cputime_to_scaled(udelta);
 944        if (stimescaled)
 945                *stimescaled += cputime_to_scaled(sdelta);
 946}
 947#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 948