linux/kernel/sched/cpuacct.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * CPU accounting code for task groups.
   5 *
   6 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
   7 * (balbir@in.ibm.com).
   8 */
   9
  10/* Time spent by the tasks of the CPU accounting group executing in ... */
  11enum cpuacct_stat_index {
  12        CPUACCT_STAT_USER,      /* ... user mode */
  13        CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
  14
  15        CPUACCT_STAT_NSTATS,
  16};
  17
  18static const char * const cpuacct_stat_desc[] = {
  19        [CPUACCT_STAT_USER] = "user",
  20        [CPUACCT_STAT_SYSTEM] = "system",
  21};
  22
  23/* track CPU usage of a group of tasks and its child groups */
  24struct cpuacct {
  25        struct cgroup_subsys_state      css;
  26        /* cpuusage holds pointer to a u64-type object on every CPU */
  27        u64 __percpu    *cpuusage;
  28        struct kernel_cpustat __percpu  *cpustat;
  29};
  30
  31static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
  32{
  33        return css ? container_of(css, struct cpuacct, css) : NULL;
  34}
  35
  36/* Return CPU accounting group to which this task belongs */
  37static inline struct cpuacct *task_ca(struct task_struct *tsk)
  38{
  39        return css_ca(task_css(tsk, cpuacct_cgrp_id));
  40}
  41
  42static inline struct cpuacct *parent_ca(struct cpuacct *ca)
  43{
  44        return css_ca(ca->css.parent);
  45}
  46
  47static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
  48static struct cpuacct root_cpuacct = {
  49        .cpustat        = &kernel_cpustat,
  50        .cpuusage       = &root_cpuacct_cpuusage,
  51};
  52
  53/* Create a new CPU accounting group */
  54static struct cgroup_subsys_state *
  55cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
  56{
  57        struct cpuacct *ca;
  58
  59        if (!parent_css)
  60                return &root_cpuacct.css;
  61
  62        ca = kzalloc(sizeof(*ca), GFP_KERNEL);
  63        if (!ca)
  64                goto out;
  65
  66        ca->cpuusage = alloc_percpu(u64);
  67        if (!ca->cpuusage)
  68                goto out_free_ca;
  69
  70        ca->cpustat = alloc_percpu(struct kernel_cpustat);
  71        if (!ca->cpustat)
  72                goto out_free_cpuusage;
  73
  74        return &ca->css;
  75
  76out_free_cpuusage:
  77        free_percpu(ca->cpuusage);
  78out_free_ca:
  79        kfree(ca);
  80out:
  81        return ERR_PTR(-ENOMEM);
  82}
  83
  84/* Destroy an existing CPU accounting group */
  85static void cpuacct_css_free(struct cgroup_subsys_state *css)
  86{
  87        struct cpuacct *ca = css_ca(css);
  88
  89        free_percpu(ca->cpustat);
  90        free_percpu(ca->cpuusage);
  91        kfree(ca);
  92}
  93
  94static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
  95                                 enum cpuacct_stat_index index)
  96{
  97        u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
  98        u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
  99        u64 data;
 100
 101        /*
 102         * We allow index == CPUACCT_STAT_NSTATS here to read
 103         * the sum of usages.
 104         */
 105        if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
 106                return 0;
 107
 108#ifndef CONFIG_64BIT
 109        /*
 110         * Take rq->lock to make 64-bit read safe on 32-bit platforms.
 111         */
 112        raw_spin_rq_lock_irq(cpu_rq(cpu));
 113#endif
 114
 115        switch (index) {
 116        case CPUACCT_STAT_USER:
 117                data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
 118                break;
 119        case CPUACCT_STAT_SYSTEM:
 120                data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
 121                        cpustat[CPUTIME_SOFTIRQ];
 122                break;
 123        case CPUACCT_STAT_NSTATS:
 124                data = *cpuusage;
 125                break;
 126        }
 127
 128#ifndef CONFIG_64BIT
 129        raw_spin_rq_unlock_irq(cpu_rq(cpu));
 130#endif
 131
 132        return data;
 133}
 134
 135static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
 136{
 137        u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
 138        u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
 139
 140        /* Don't allow to reset global kernel_cpustat */
 141        if (ca == &root_cpuacct)
 142                return;
 143
 144#ifndef CONFIG_64BIT
 145        /*
 146         * Take rq->lock to make 64-bit write safe on 32-bit platforms.
 147         */
 148        raw_spin_rq_lock_irq(cpu_rq(cpu));
 149#endif
 150        *cpuusage = 0;
 151        cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
 152        cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
 153        cpustat[CPUTIME_SOFTIRQ] = 0;
 154
 155#ifndef CONFIG_64BIT
 156        raw_spin_rq_unlock_irq(cpu_rq(cpu));
 157#endif
 158}
 159
 160/* Return total CPU usage (in nanoseconds) of a group */
 161static u64 __cpuusage_read(struct cgroup_subsys_state *css,
 162                           enum cpuacct_stat_index index)
 163{
 164        struct cpuacct *ca = css_ca(css);
 165        u64 totalcpuusage = 0;
 166        int i;
 167
 168        for_each_possible_cpu(i)
 169                totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
 170
 171        return totalcpuusage;
 172}
 173
 174static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
 175                              struct cftype *cft)
 176{
 177        return __cpuusage_read(css, CPUACCT_STAT_USER);
 178}
 179
 180static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
 181                             struct cftype *cft)
 182{
 183        return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
 184}
 185
 186static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
 187{
 188        return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
 189}
 190
 191static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
 192                          u64 val)
 193{
 194        struct cpuacct *ca = css_ca(css);
 195        int cpu;
 196
 197        /*
 198         * Only allow '0' here to do a reset.
 199         */
 200        if (val)
 201                return -EINVAL;
 202
 203        for_each_possible_cpu(cpu)
 204                cpuacct_cpuusage_write(ca, cpu);
 205
 206        return 0;
 207}
 208
 209static int __cpuacct_percpu_seq_show(struct seq_file *m,
 210                                     enum cpuacct_stat_index index)
 211{
 212        struct cpuacct *ca = css_ca(seq_css(m));
 213        u64 percpu;
 214        int i;
 215
 216        for_each_possible_cpu(i) {
 217                percpu = cpuacct_cpuusage_read(ca, i, index);
 218                seq_printf(m, "%llu ", (unsigned long long) percpu);
 219        }
 220        seq_printf(m, "\n");
 221        return 0;
 222}
 223
 224static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
 225{
 226        return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
 227}
 228
 229static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
 230{
 231        return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
 232}
 233
 234static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
 235{
 236        return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
 237}
 238
 239static int cpuacct_all_seq_show(struct seq_file *m, void *V)
 240{
 241        struct cpuacct *ca = css_ca(seq_css(m));
 242        int index;
 243        int cpu;
 244
 245        seq_puts(m, "cpu");
 246        for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
 247                seq_printf(m, " %s", cpuacct_stat_desc[index]);
 248        seq_puts(m, "\n");
 249
 250        for_each_possible_cpu(cpu) {
 251                seq_printf(m, "%d", cpu);
 252                for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
 253                        seq_printf(m, " %llu",
 254                                   cpuacct_cpuusage_read(ca, cpu, index));
 255                seq_puts(m, "\n");
 256        }
 257        return 0;
 258}
 259
 260static int cpuacct_stats_show(struct seq_file *sf, void *v)
 261{
 262        struct cpuacct *ca = css_ca(seq_css(sf));
 263        struct task_cputime cputime;
 264        u64 val[CPUACCT_STAT_NSTATS];
 265        int cpu;
 266        int stat;
 267
 268        memset(&cputime, 0, sizeof(cputime));
 269        for_each_possible_cpu(cpu) {
 270                u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
 271
 272                cputime.utime += cpustat[CPUTIME_USER];
 273                cputime.utime += cpustat[CPUTIME_NICE];
 274                cputime.stime += cpustat[CPUTIME_SYSTEM];
 275                cputime.stime += cpustat[CPUTIME_IRQ];
 276                cputime.stime += cpustat[CPUTIME_SOFTIRQ];
 277
 278                cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
 279        }
 280
 281        cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime,
 282                &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]);
 283
 284        for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
 285                seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat],
 286                        nsec_to_clock_t(val[stat]));
 287        }
 288
 289        return 0;
 290}
 291
 292static struct cftype files[] = {
 293        {
 294                .name = "usage",
 295                .read_u64 = cpuusage_read,
 296                .write_u64 = cpuusage_write,
 297        },
 298        {
 299                .name = "usage_user",
 300                .read_u64 = cpuusage_user_read,
 301        },
 302        {
 303                .name = "usage_sys",
 304                .read_u64 = cpuusage_sys_read,
 305        },
 306        {
 307                .name = "usage_percpu",
 308                .seq_show = cpuacct_percpu_seq_show,
 309        },
 310        {
 311                .name = "usage_percpu_user",
 312                .seq_show = cpuacct_percpu_user_seq_show,
 313        },
 314        {
 315                .name = "usage_percpu_sys",
 316                .seq_show = cpuacct_percpu_sys_seq_show,
 317        },
 318        {
 319                .name = "usage_all",
 320                .seq_show = cpuacct_all_seq_show,
 321        },
 322        {
 323                .name = "stat",
 324                .seq_show = cpuacct_stats_show,
 325        },
 326        { }     /* terminate */
 327};
 328
 329/*
 330 * charge this task's execution time to its accounting group.
 331 *
 332 * called with rq->lock held.
 333 */
 334void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 335{
 336        unsigned int cpu = task_cpu(tsk);
 337        struct cpuacct *ca;
 338
 339        lockdep_assert_rq_held(cpu_rq(cpu));
 340
 341        for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
 342                *per_cpu_ptr(ca->cpuusage, cpu) += cputime;
 343}
 344
 345/*
 346 * Add user/system time to cpuacct.
 347 *
 348 * Note: it's the caller that updates the account of the root cgroup.
 349 */
 350void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
 351{
 352        struct cpuacct *ca;
 353
 354        for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
 355                __this_cpu_add(ca->cpustat->cpustat[index], val);
 356}
 357
 358struct cgroup_subsys cpuacct_cgrp_subsys = {
 359        .css_alloc      = cpuacct_css_alloc,
 360        .css_free       = cpuacct_css_free,
 361        .legacy_cftypes = files,
 362        .early_init     = true,
 363};
 364