linux/kernel/cgroup/stat.c
<<
>>
Prefs
   1#include "cgroup-internal.h"
   2
   3#include <linux/sched/cputime.h>
   4
   5static DEFINE_MUTEX(cgroup_stat_mutex);
   6static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
   7
   8static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
   9{
  10        return per_cpu_ptr(cgrp->cpu_stat, cpu);
  11}
  12
  13/**
  14 * cgroup_cpu_stat_updated - keep track of updated cpu_stat
  15 * @cgrp: target cgroup
  16 * @cpu: cpu on which cpu_stat was updated
  17 *
  18 * @cgrp's cpu_stat on @cpu was updated.  Put it on the parent's matching
  19 * cpu_stat->updated_children list.  See the comment on top of
  20 * cgroup_cpu_stat definition for details.
  21 */
  22static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
  23{
  24        raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
  25        struct cgroup *parent;
  26        unsigned long flags;
  27
  28        /*
  29         * Speculative already-on-list test.  This may race leading to
  30         * temporary inaccuracies, which is fine.
  31         *
  32         * Because @parent's updated_children is terminated with @parent
  33         * instead of NULL, we can tell whether @cgrp is on the list by
  34         * testing the next pointer for NULL.
  35         */
  36        if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
  37                return;
  38
  39        raw_spin_lock_irqsave(cpu_lock, flags);
  40
  41        /* put @cgrp and all ancestors on the corresponding updated lists */
  42        for (parent = cgroup_parent(cgrp); parent;
  43             cgrp = parent, parent = cgroup_parent(cgrp)) {
  44                struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
  45                struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
  46
  47                /*
  48                 * Both additions and removals are bottom-up.  If a cgroup
  49                 * is already in the tree, all ancestors are.
  50                 */
  51                if (cstat->updated_next)
  52                        break;
  53
  54                cstat->updated_next = pcstat->updated_children;
  55                pcstat->updated_children = cgrp;
  56        }
  57
  58        raw_spin_unlock_irqrestore(cpu_lock, flags);
  59}
  60
  61/**
  62 * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
  63 * @pos: current position
  64 * @root: root of the tree to traversal
  65 * @cpu: target cpu
  66 *
  67 * Walks the udpated cpu_stat tree on @cpu from @root.  %NULL @pos starts
  68 * the traversal and %NULL return indicates the end.  During traversal,
  69 * each returned cgroup is unlinked from the tree.  Must be called with the
  70 * matching cgroup_cpu_stat_lock held.
  71 *
  72 * The only ordering guarantee is that, for a parent and a child pair
  73 * covered by a given traversal, if a child is visited, its parent is
  74 * guaranteed to be visited afterwards.
  75 */
  76static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
  77                                                  struct cgroup *root, int cpu)
  78{
  79        struct cgroup_cpu_stat *cstat;
  80        struct cgroup *parent;
  81
  82        if (pos == root)
  83                return NULL;
  84
  85        /*
  86         * We're gonna walk down to the first leaf and visit/remove it.  We
  87         * can pick whatever unvisited node as the starting point.
  88         */
  89        if (!pos)
  90                pos = root;
  91        else
  92                pos = cgroup_parent(pos);
  93
  94        /* walk down to the first leaf */
  95        while (true) {
  96                cstat = cgroup_cpu_stat(pos, cpu);
  97                if (cstat->updated_children == pos)
  98                        break;
  99                pos = cstat->updated_children;
 100        }
 101
 102        /*
 103         * Unlink @pos from the tree.  As the updated_children list is
 104         * singly linked, we have to walk it to find the removal point.
 105         * However, due to the way we traverse, @pos will be the first
 106         * child in most cases. The only exception is @root.
 107         */
 108        parent = cgroup_parent(pos);
 109        if (parent && cstat->updated_next) {
 110                struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
 111                struct cgroup_cpu_stat *ncstat;
 112                struct cgroup **nextp;
 113
 114                nextp = &pcstat->updated_children;
 115                while (true) {
 116                        ncstat = cgroup_cpu_stat(*nextp, cpu);
 117                        if (*nextp == pos)
 118                                break;
 119
 120                        WARN_ON_ONCE(*nextp == parent);
 121                        nextp = &ncstat->updated_next;
 122                }
 123
 124                *nextp = cstat->updated_next;
 125                cstat->updated_next = NULL;
 126        }
 127
 128        return pos;
 129}
 130
 131static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
 132                                   struct cgroup_stat *src_stat)
 133{
 134        dst_stat->cputime.utime += src_stat->cputime.utime;
 135        dst_stat->cputime.stime += src_stat->cputime.stime;
 136        dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
 137}
 138
 139static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
 140{
 141        struct cgroup *parent = cgroup_parent(cgrp);
 142        struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
 143        struct task_cputime *last_cputime = &cstat->last_cputime;
 144        struct task_cputime cputime;
 145        struct cgroup_stat delta;
 146        unsigned seq;
 147
 148        lockdep_assert_held(&cgroup_stat_mutex);
 149
 150        /* fetch the current per-cpu values */
 151        do {
 152                seq = __u64_stats_fetch_begin(&cstat->sync);
 153                cputime = cstat->cputime;
 154        } while (__u64_stats_fetch_retry(&cstat->sync, seq));
 155
 156        /* accumulate the deltas to propgate */
 157        delta.cputime.utime = cputime.utime - last_cputime->utime;
 158        delta.cputime.stime = cputime.stime - last_cputime->stime;
 159        delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
 160                                         last_cputime->sum_exec_runtime;
 161        *last_cputime = cputime;
 162
 163        /* transfer the pending stat into delta */
 164        cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
 165        memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
 166
 167        /* propagate delta into the global stat and the parent's pending */
 168        cgroup_stat_accumulate(&cgrp->stat, &delta);
 169        if (parent)
 170                cgroup_stat_accumulate(&parent->pending_stat, &delta);
 171}
 172
 173/* see cgroup_stat_flush() */
 174static void cgroup_stat_flush_locked(struct cgroup *cgrp)
 175{
 176        int cpu;
 177
 178        lockdep_assert_held(&cgroup_stat_mutex);
 179
 180        for_each_possible_cpu(cpu) {
 181                raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
 182                struct cgroup *pos = NULL;
 183
 184                raw_spin_lock_irq(cpu_lock);
 185                while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
 186                        cgroup_cpu_stat_flush_one(pos, cpu);
 187                raw_spin_unlock_irq(cpu_lock);
 188        }
 189}
 190
 191/**
 192 * cgroup_stat_flush - flush stats in @cgrp's subtree
 193 * @cgrp: target cgroup
 194 *
 195 * Collect all per-cpu stats in @cgrp's subtree into the global counters
 196 * and propagate them upwards.  After this function returns, all cgroups in
 197 * the subtree have up-to-date ->stat.
 198 *
 199 * This also gets all cgroups in the subtree including @cgrp off the
 200 * ->updated_children lists.
 201 */
 202void cgroup_stat_flush(struct cgroup *cgrp)
 203{
 204        mutex_lock(&cgroup_stat_mutex);
 205        cgroup_stat_flush_locked(cgrp);
 206        mutex_unlock(&cgroup_stat_mutex);
 207}
 208
 209static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
 210{
 211        struct cgroup_cpu_stat *cstat;
 212
 213        cstat = get_cpu_ptr(cgrp->cpu_stat);
 214        u64_stats_update_begin(&cstat->sync);
 215        return cstat;
 216}
 217
 218static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
 219                                        struct cgroup_cpu_stat *cstat)
 220{
 221        u64_stats_update_end(&cstat->sync);
 222        cgroup_cpu_stat_updated(cgrp, smp_processor_id());
 223        put_cpu_ptr(cstat);
 224}
 225
 226void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
 227{
 228        struct cgroup_cpu_stat *cstat;
 229
 230        cstat = cgroup_cpu_stat_account_begin(cgrp);
 231        cstat->cputime.sum_exec_runtime += delta_exec;
 232        cgroup_cpu_stat_account_end(cgrp, cstat);
 233}
 234
 235void __cgroup_account_cputime_field(struct cgroup *cgrp,
 236                                    enum cpu_usage_stat index, u64 delta_exec)
 237{
 238        struct cgroup_cpu_stat *cstat;
 239
 240        cstat = cgroup_cpu_stat_account_begin(cgrp);
 241
 242        switch (index) {
 243        case CPUTIME_USER:
 244        case CPUTIME_NICE:
 245                cstat->cputime.utime += delta_exec;
 246                break;
 247        case CPUTIME_SYSTEM:
 248        case CPUTIME_IRQ:
 249        case CPUTIME_SOFTIRQ:
 250                cstat->cputime.stime += delta_exec;
 251                break;
 252        default:
 253                break;
 254        }
 255
 256        cgroup_cpu_stat_account_end(cgrp, cstat);
 257}
 258
 259void cgroup_stat_show_cputime(struct seq_file *seq)
 260{
 261        struct cgroup *cgrp = seq_css(seq)->cgroup;
 262        u64 usage, utime, stime;
 263
 264        if (!cgroup_parent(cgrp))
 265                return;
 266
 267        mutex_lock(&cgroup_stat_mutex);
 268
 269        cgroup_stat_flush_locked(cgrp);
 270
 271        usage = cgrp->stat.cputime.sum_exec_runtime;
 272        cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
 273                       &utime, &stime);
 274
 275        mutex_unlock(&cgroup_stat_mutex);
 276
 277        do_div(usage, NSEC_PER_USEC);
 278        do_div(utime, NSEC_PER_USEC);
 279        do_div(stime, NSEC_PER_USEC);
 280
 281        seq_printf(seq, "usage_usec %llu\n"
 282                   "user_usec %llu\n"
 283                   "system_usec %llu\n",
 284                   usage, utime, stime);
 285}
 286
 287int cgroup_stat_init(struct cgroup *cgrp)
 288{
 289        int cpu;
 290
 291        /* the root cgrp has cpu_stat preallocated */
 292        if (!cgrp->cpu_stat) {
 293                cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
 294                if (!cgrp->cpu_stat)
 295                        return -ENOMEM;
 296        }
 297
 298        /* ->updated_children list is self terminated */
 299        for_each_possible_cpu(cpu) {
 300                struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
 301
 302                cstat->updated_children = cgrp;
 303                u64_stats_init(&cstat->sync);
 304        }
 305
 306        prev_cputime_init(&cgrp->stat.prev_cputime);
 307
 308        return 0;
 309}
 310
 311void cgroup_stat_exit(struct cgroup *cgrp)
 312{
 313        int cpu;
 314
 315        cgroup_stat_flush(cgrp);
 316
 317        /* sanity check */
 318        for_each_possible_cpu(cpu) {
 319                struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
 320
 321                if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
 322                    WARN_ON_ONCE(cstat->updated_next))
 323                        return;
 324        }
 325
 326        free_percpu(cgrp->cpu_stat);
 327        cgrp->cpu_stat = NULL;
 328}
 329
 330void __init cgroup_stat_boot(void)
 331{
 332        int cpu;
 333
 334        for_each_possible_cpu(cpu)
 335                raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
 336
 337        BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
 338}
 339