LXR linux/kernel/sched/debug.c

   1/*
   2 * kernel/sched/debug.c
   3 *
   4 * Print the CFS rbtree and other debugging details
   5 *
   6 * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12#include "sched.h"
  13
  14static DEFINE_SPINLOCK(sched_debug_lock);
  15
  16/*
  17 * This allows printing both to /proc/sched_debug and
  18 * to the console
  19 */
  20#define SEQ_printf(m, x...)                     \
  21 do {                                           \
  22        if (m)                                  \
  23                seq_printf(m, x);               \
  24        else                                    \
  25                pr_cont(x);                     \
  26 } while (0)
  27
  28/*
  29 * Ease the printing of nsec fields:
  30 */
  31static long long nsec_high(unsigned long long nsec)
  32{
  33        if ((long long)nsec < 0) {
  34                nsec = -nsec;
  35                do_div(nsec, 1000000);
  36                return -nsec;
  37        }
  38        do_div(nsec, 1000000);
  39
  40        return nsec;
  41}
  42
  43static unsigned long nsec_low(unsigned long long nsec)
  44{
  45        if ((long long)nsec < 0)
  46                nsec = -nsec;
  47
  48        return do_div(nsec, 1000000);
  49}
  50
  51#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
  52
  53#define SCHED_FEAT(name, enabled)       \
  54        #name ,
  55
  56static const char * const sched_feat_names[] = {
  57#include "features.h"
  58};
  59
  60#undef SCHED_FEAT
  61
  62static int sched_feat_show(struct seq_file *m, void *v)
  63{
  64        int i;
  65
  66        for (i = 0; i < __SCHED_FEAT_NR; i++) {
  67                if (!(sysctl_sched_features & (1UL << i)))
  68                        seq_puts(m, "NO_");
  69                seq_printf(m, "%s ", sched_feat_names[i]);
  70        }
  71        seq_puts(m, "\n");
  72
  73        return 0;
  74}
  75
  76#ifdef HAVE_JUMP_LABEL
  77
  78#define jump_label_key__true  STATIC_KEY_INIT_TRUE
  79#define jump_label_key__false STATIC_KEY_INIT_FALSE
  80
  81#define SCHED_FEAT(name, enabled)       \
  82        jump_label_key__##enabled ,
  83
  84struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
  85#include "features.h"
  86};
  87
  88#undef SCHED_FEAT
  89
  90static void sched_feat_disable(int i)
  91{
  92        static_key_disable(&sched_feat_keys[i]);
  93}
  94
  95static void sched_feat_enable(int i)
  96{
  97        static_key_enable(&sched_feat_keys[i]);
  98}
  99#else
 100static void sched_feat_disable(int i) { };
 101static void sched_feat_enable(int i) { };
 102#endif /* HAVE_JUMP_LABEL */
 103
 104static int sched_feat_set(char *cmp)
 105{
 106        int i;
 107        int neg = 0;
 108
 109        if (strncmp(cmp, "NO_", 3) == 0) {
 110                neg = 1;
 111                cmp += 3;
 112        }
 113
 114        for (i = 0; i < __SCHED_FEAT_NR; i++) {
 115                if (strcmp(cmp, sched_feat_names[i]) == 0) {
 116                        if (neg) {
 117                                sysctl_sched_features &= ~(1UL << i);
 118                                sched_feat_disable(i);
 119                        } else {
 120                                sysctl_sched_features |= (1UL << i);
 121                                sched_feat_enable(i);
 122                        }
 123                        break;
 124                }
 125        }
 126
 127        return i;
 128}
 129
 130static ssize_t
 131sched_feat_write(struct file *filp, const char __user *ubuf,
 132                size_t cnt, loff_t *ppos)
 133{
 134        char buf[64];
 135        char *cmp;
 136        int i;
 137        struct inode *inode;
 138
 139        if (cnt > 63)
 140                cnt = 63;
 141
 142        if (copy_from_user(&buf, ubuf, cnt))
 143                return -EFAULT;
 144
 145        buf[cnt] = 0;
 146        cmp = strstrip(buf);
 147
 148        /* Ensure the static_key remains in a consistent state */
 149        inode = file_inode(filp);
 150        inode_lock(inode);
 151        i = sched_feat_set(cmp);
 152        inode_unlock(inode);
 153        if (i == __SCHED_FEAT_NR)
 154                return -EINVAL;
 155
 156        *ppos += cnt;
 157
 158        return cnt;
 159}
 160
 161static int sched_feat_open(struct inode *inode, struct file *filp)
 162{
 163        return single_open(filp, sched_feat_show, NULL);
 164}
 165
 166static const struct file_operations sched_feat_fops = {
 167        .open           = sched_feat_open,
 168        .write          = sched_feat_write,
 169        .read           = seq_read,
 170        .llseek         = seq_lseek,
 171        .release        = single_release,
 172};
 173
 174__read_mostly bool sched_debug_enabled;
 175
 176static __init int sched_init_debug(void)
 177{
 178        debugfs_create_file("sched_features", 0644, NULL, NULL,
 179                        &sched_feat_fops);
 180
 181        debugfs_create_bool("sched_debug", 0644, NULL,
 182                        &sched_debug_enabled);
 183
 184        return 0;
 185}
 186late_initcall(sched_init_debug);
 187
 188#ifdef CONFIG_SMP
 189
 190#ifdef CONFIG_SYSCTL
 191
 192static struct ctl_table sd_ctl_dir[] = {
 193        {
 194                .procname       = "sched_domain",
 195                .mode           = 0555,
 196        },
 197        {}
 198};
 199
 200static struct ctl_table sd_ctl_root[] = {
 201        {
 202                .procname       = "kernel",
 203                .mode           = 0555,
 204                .child          = sd_ctl_dir,
 205        },
 206        {}
 207};
 208
 209static struct ctl_table *sd_alloc_ctl_entry(int n)
 210{
 211        struct ctl_table *entry =
 212                kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
 213
 214        return entry;
 215}
 216
 217static void sd_free_ctl_entry(struct ctl_table **tablep)
 218{
 219        struct ctl_table *entry;
 220
 221        /*
 222         * In the intermediate directories, both the child directory and
 223         * procname are dynamically allocated and could fail but the mode
 224         * will always be set. In the lowest directory the names are
 225         * static strings and all have proc handlers.
 226         */
 227        for (entry = *tablep; entry->mode; entry++) {
 228                if (entry->child)
 229                        sd_free_ctl_entry(&entry->child);
 230                if (entry->proc_handler == NULL)
 231                        kfree(entry->procname);
 232        }
 233
 234        kfree(*tablep);
 235        *tablep = NULL;
 236}
 237
 238static int min_load_idx = 0;
 239static int max_load_idx = CPU_LOAD_IDX_MAX-1;
 240
 241static void
 242set_table_entry(struct ctl_table *entry,
 243                const char *procname, void *data, int maxlen,
 244                umode_t mode, proc_handler *proc_handler,
 245                bool load_idx)
 246{
 247        entry->procname = procname;
 248        entry->data = data;
 249        entry->maxlen = maxlen;
 250        entry->mode = mode;
 251        entry->proc_handler = proc_handler;
 252
 253        if (load_idx) {
 254                entry->extra1 = &min_load_idx;
 255                entry->extra2 = &max_load_idx;
 256        }
 257}
 258
 259static struct ctl_table *
 260sd_alloc_ctl_domain_table(struct sched_domain *sd)
 261{
 262        struct ctl_table *table = sd_alloc_ctl_entry(14);
 263
 264        if (table == NULL)
 265                return NULL;
 266
 267        set_table_entry(&table[0] , "min_interval",        &sd->min_interval,        sizeof(long), 0644, proc_doulongvec_minmax, false);
 268        set_table_entry(&table[1] , "max_interval",        &sd->max_interval,        sizeof(long), 0644, proc_doulongvec_minmax, false);
 269        set_table_entry(&table[2] , "busy_idx",            &sd->busy_idx,            sizeof(int) , 0644, proc_dointvec_minmax,   true );
 270        set_table_entry(&table[3] , "idle_idx",            &sd->idle_idx,            sizeof(int) , 0644, proc_dointvec_minmax,   true );
 271        set_table_entry(&table[4] , "newidle_idx",         &sd->newidle_idx,         sizeof(int) , 0644, proc_dointvec_minmax,   true );
 272        set_table_entry(&table[5] , "wake_idx",            &sd->wake_idx,            sizeof(int) , 0644, proc_dointvec_minmax,   true );
 273        set_table_entry(&table[6] , "forkexec_idx",        &sd->forkexec_idx,        sizeof(int) , 0644, proc_dointvec_minmax,   true );
 274        set_table_entry(&table[7] , "busy_factor",         &sd->busy_factor,         sizeof(int) , 0644, proc_dointvec_minmax,   false);
 275        set_table_entry(&table[8] , "imbalance_pct",       &sd->imbalance_pct,       sizeof(int) , 0644, proc_dointvec_minmax,   false);
 276        set_table_entry(&table[9] , "cache_nice_tries",    &sd->cache_nice_tries,    sizeof(int) , 0644, proc_dointvec_minmax,   false);
 277        set_table_entry(&table[10], "flags",               &sd->flags,               sizeof(int) , 0644, proc_dointvec_minmax,   false);
 278        set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
 279        set_table_entry(&table[12], "name",                sd->name,            CORENAME_MAX_SIZE, 0444, proc_dostring,          false);
 280        /* &table[13] is terminator */
 281
 282        return table;
 283}
 284
 285static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
 286{
 287        struct ctl_table *entry, *table;
 288        struct sched_domain *sd;
 289        int domain_num = 0, i;
 290        char buf[32];
 291
 292        for_each_domain(cpu, sd)
 293                domain_num++;
 294        entry = table = sd_alloc_ctl_entry(domain_num + 1);
 295        if (table == NULL)
 296                return NULL;
 297
 298        i = 0;
 299        for_each_domain(cpu, sd) {
 300                snprintf(buf, 32, "domain%d", i);
 301                entry->procname = kstrdup(buf, GFP_KERNEL);
 302                entry->mode = 0555;
 303                entry->child = sd_alloc_ctl_domain_table(sd);
 304                entry++;
 305                i++;
 306        }
 307        return table;
 308}
 309
 310static cpumask_var_t            sd_sysctl_cpus;
 311static struct ctl_table_header  *sd_sysctl_header;
 312
 313void register_sched_domain_sysctl(void)
 314{
 315        static struct ctl_table *cpu_entries;
 316        static struct ctl_table **cpu_idx;
 317        char buf[32];
 318        int i;
 319
 320        if (!cpu_entries) {
 321                cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
 322                if (!cpu_entries)
 323                        return;
 324
 325                WARN_ON(sd_ctl_dir[0].child);
 326                sd_ctl_dir[0].child = cpu_entries;
 327        }
 328
 329        if (!cpu_idx) {
 330                struct ctl_table *e = cpu_entries;
 331
 332                cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
 333                if (!cpu_idx)
 334                        return;
 335
 336                /* deal with sparse possible map */
 337                for_each_possible_cpu(i) {
 338                        cpu_idx[i] = e;
 339                        e++;
 340                }
 341        }
 342
 343        if (!cpumask_available(sd_sysctl_cpus)) {
 344                if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
 345                        return;
 346
 347                /* init to possible to not have holes in @cpu_entries */
 348                cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
 349        }
 350
 351        for_each_cpu(i, sd_sysctl_cpus) {
 352                struct ctl_table *e = cpu_idx[i];
 353
 354                if (e->child)
 355                        sd_free_ctl_entry(&e->child);
 356
 357                if (!e->procname) {
 358                        snprintf(buf, 32, "cpu%d", i);
 359                        e->procname = kstrdup(buf, GFP_KERNEL);
 360                }
 361                e->mode = 0555;
 362                e->child = sd_alloc_ctl_cpu_table(i);
 363
 364                __cpumask_clear_cpu(i, sd_sysctl_cpus);
 365        }
 366
 367        WARN_ON(sd_sysctl_header);
 368        sd_sysctl_header = register_sysctl_table(sd_ctl_root);
 369}
 370
 371void dirty_sched_domain_sysctl(int cpu)
 372{
 373        if (cpumask_available(sd_sysctl_cpus))
 374                __cpumask_set_cpu(cpu, sd_sysctl_cpus);
 375}
 376
 377/* may be called multiple times per register */
 378void unregister_sched_domain_sysctl(void)
 379{
 380        unregister_sysctl_table(sd_sysctl_header);
 381        sd_sysctl_header = NULL;
 382}
 383#endif /* CONFIG_SYSCTL */
 384#endif /* CONFIG_SMP */
 385
 386#ifdef CONFIG_FAIR_GROUP_SCHED
 387static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
 388{
 389        struct sched_entity *se = tg->se[cpu];
 390
 391#define P(F)            SEQ_printf(m, "  .%-30s: %lld\n",       #F, (long long)F)
 392#define P_SCHEDSTAT(F)  SEQ_printf(m, "  .%-30s: %lld\n",       #F, (long long)schedstat_val(F))
 393#define PN(F)           SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
 394#define PN_SCHEDSTAT(F) SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
 395
 396        if (!se)
 397                return;
 398
 399        PN(se->exec_start);
 400        PN(se->vruntime);
 401        PN(se->sum_exec_runtime);
 402
 403        if (schedstat_enabled()) {
 404                PN_SCHEDSTAT(se->statistics.wait_start);
 405                PN_SCHEDSTAT(se->statistics.sleep_start);
 406                PN_SCHEDSTAT(se->statistics.block_start);
 407                PN_SCHEDSTAT(se->statistics.sleep_max);
 408                PN_SCHEDSTAT(se->statistics.block_max);
 409                PN_SCHEDSTAT(se->statistics.exec_max);
 410                PN_SCHEDSTAT(se->statistics.slice_max);
 411                PN_SCHEDSTAT(se->statistics.wait_max);
 412                PN_SCHEDSTAT(se->statistics.wait_sum);
 413                P_SCHEDSTAT(se->statistics.wait_count);
 414        }
 415
 416        P(se->load.weight);
 417        P(se->runnable_weight);
 418#ifdef CONFIG_SMP
 419        P(se->avg.load_avg);
 420        P(se->avg.util_avg);
 421        P(se->avg.runnable_load_avg);
 422#endif
 423
 424#undef PN_SCHEDSTAT
 425#undef PN
 426#undef P_SCHEDSTAT
 427#undef P
 428}
 429#endif
 430
 431#ifdef CONFIG_CGROUP_SCHED
 432static char group_path[PATH_MAX];
 433
 434static char *task_group_path(struct task_group *tg)
 435{
 436        if (autogroup_path(tg, group_path, PATH_MAX))
 437                return group_path;
 438
 439        cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
 440
 441        return group_path;
 442}
 443#endif
 444
 445static void
 446print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 447{
 448        if (rq->curr == p)
 449                SEQ_printf(m, ">R");
 450        else
 451                SEQ_printf(m, " %c", task_state_to_char(p));
 452
 453        SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
 454                p->comm, task_pid_nr(p),
 455                SPLIT_NS(p->se.vruntime),
 456                (long long)(p->nvcsw + p->nivcsw),
 457                p->prio);
 458
 459        SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 460                SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
 461                SPLIT_NS(p->se.sum_exec_runtime),
 462                SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
 463
 464#ifdef CONFIG_NUMA_BALANCING
 465        SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
 466#endif
 467#ifdef CONFIG_CGROUP_SCHED
 468        SEQ_printf(m, " %s", task_group_path(task_group(p)));
 469#endif
 470
 471        SEQ_printf(m, "\n");
 472}
 473
 474static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 475{
 476        struct task_struct *g, *p;
 477
 478        SEQ_printf(m, "\n");
 479        SEQ_printf(m, "runnable tasks:\n");
 480        SEQ_printf(m, " S           task   PID         tree-key  switches  prio"
 481                   "     wait-time             sum-exec        sum-sleep\n");
 482        SEQ_printf(m, "-------------------------------------------------------"
 483                   "----------------------------------------------------\n");
 484
 485        rcu_read_lock();
 486        for_each_process_thread(g, p) {
 487                if (task_cpu(p) != rq_cpu)
 488                        continue;
 489
 490                print_task(m, rq, p);
 491        }
 492        rcu_read_unlock();
 493}
 494
 495void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 496{
 497        s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
 498                spread, rq0_min_vruntime, spread0;
 499        struct rq *rq = cpu_rq(cpu);
 500        struct sched_entity *last;
 501        unsigned long flags;
 502
 503#ifdef CONFIG_FAIR_GROUP_SCHED
 504        SEQ_printf(m, "\n");
 505        SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
 506#else
 507        SEQ_printf(m, "\n");
 508        SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
 509#endif
 510        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 511                        SPLIT_NS(cfs_rq->exec_clock));
 512
 513        raw_spin_lock_irqsave(&rq->lock, flags);
 514        if (rb_first_cached(&cfs_rq->tasks_timeline))
 515                MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
 516        last = __pick_last_entity(cfs_rq);
 517        if (last)
 518                max_vruntime = last->vruntime;
 519        min_vruntime = cfs_rq->min_vruntime;
 520        rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
 521        raw_spin_unlock_irqrestore(&rq->lock, flags);
 522        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
 523                        SPLIT_NS(MIN_vruntime));
 524        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
 525                        SPLIT_NS(min_vruntime));
 526        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
 527                        SPLIT_NS(max_vruntime));
 528        spread = max_vruntime - MIN_vruntime;
 529        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
 530                        SPLIT_NS(spread));
 531        spread0 = min_vruntime - rq0_min_vruntime;
 532        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
 533                        SPLIT_NS(spread0));
 534        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
 535                        cfs_rq->nr_spread_over);
 536        SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
 537        SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 538#ifdef CONFIG_SMP
 539        SEQ_printf(m, "  .%-30s: %ld\n", "runnable_weight", cfs_rq->runnable_weight);
 540        SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
 541                        cfs_rq->avg.load_avg);
 542        SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
 543                        cfs_rq->avg.runnable_load_avg);
 544        SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
 545                        cfs_rq->avg.util_avg);
 546        SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
 547                        cfs_rq->avg.util_est.enqueued);
 548        SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg",
 549                        cfs_rq->removed.load_avg);
 550        SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg",
 551                        cfs_rq->removed.util_avg);
 552        SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_sum",
 553                        cfs_rq->removed.runnable_sum);
 554#ifdef CONFIG_FAIR_GROUP_SCHED
 555        SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib",
 556                        cfs_rq->tg_load_avg_contrib);
 557        SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
 558                        atomic_long_read(&cfs_rq->tg->load_avg));
 559#endif
 560#endif
 561#ifdef CONFIG_CFS_BANDWIDTH
 562        SEQ_printf(m, "  .%-30s: %d\n", "throttled",
 563                        cfs_rq->throttled);
 564        SEQ_printf(m, "  .%-30s: %d\n", "throttle_count",
 565                        cfs_rq->throttle_count);
 566#endif
 567
 568#ifdef CONFIG_FAIR_GROUP_SCHED
 569        print_cfs_group_stats(m, cpu, cfs_rq->tg);
 570#endif
 571}
 572
 573void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 574{
 575#ifdef CONFIG_RT_GROUP_SCHED
 576        SEQ_printf(m, "\n");
 577        SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
 578#else
 579        SEQ_printf(m, "\n");
 580        SEQ_printf(m, "rt_rq[%d]:\n", cpu);
 581#endif
 582
 583#define P(x) \
 584        SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
 585#define PU(x) \
 586        SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
 587#define PN(x) \
 588        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
 589
 590        PU(rt_nr_running);
 591#ifdef CONFIG_SMP
 592        PU(rt_nr_migratory);
 593#endif
 594        P(rt_throttled);
 595        PN(rt_time);
 596        PN(rt_runtime);
 597
 598#undef PN
 599#undef PU
 600#undef P
 601}
 602
 603void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 604{
 605        struct dl_bw *dl_bw;
 606
 607        SEQ_printf(m, "\n");
 608        SEQ_printf(m, "dl_rq[%d]:\n", cpu);
 609
 610#define PU(x) \
 611        SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
 612
 613        PU(dl_nr_running);
 614#ifdef CONFIG_SMP
 615        PU(dl_nr_migratory);
 616        dl_bw = &cpu_rq(cpu)->rd->dl_bw;
 617#else
 618        dl_bw = &dl_rq->dl_bw;
 619#endif
 620        SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
 621        SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
 622
 623#undef PU
 624}
 625
 626extern __read_mostly int sched_clock_running;
 627
 628static void print_cpu(struct seq_file *m, int cpu)
 629{
 630        struct rq *rq = cpu_rq(cpu);
 631        unsigned long flags;
 632
 633#ifdef CONFIG_X86
 634        {
 635                unsigned int freq = cpu_khz ? : 1;
 636
 637                SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
 638                           cpu, freq / 1000, (freq % 1000));
 639        }
 640#else
 641        SEQ_printf(m, "cpu#%d\n", cpu);
 642#endif
 643
 644#define P(x)                                                            \
 645do {                                                                    \
 646        if (sizeof(rq->x) == 4)                                         \
 647                SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \
 648        else                                                            \
 649                SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
 650} while (0)
 651
 652#define PN(x) \
 653        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
 654
 655        P(nr_running);
 656        SEQ_printf(m, "  .%-30s: %lu\n", "load",
 657                   rq->load.weight);
 658        P(nr_switches);
 659        P(nr_load_updates);
 660        P(nr_uninterruptible);
 661        PN(next_balance);
 662        SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
 663        PN(clock);
 664        PN(clock_task);
 665        P(cpu_load[0]);
 666        P(cpu_load[1]);
 667        P(cpu_load[2]);
 668        P(cpu_load[3]);
 669        P(cpu_load[4]);
 670#undef P
 671#undef PN
 672
 673#ifdef CONFIG_SMP
 674#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 675        P64(avg_idle);
 676        P64(max_idle_balance_cost);
 677#undef P64
 678#endif
 679
 680#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
 681        if (schedstat_enabled()) {
 682                P(yld_count);
 683                P(sched_count);
 684                P(sched_goidle);
 685                P(ttwu_count);
 686                P(ttwu_local);
 687        }
 688#undef P
 689
 690        spin_lock_irqsave(&sched_debug_lock, flags);
 691        print_cfs_stats(m, cpu);
 692        print_rt_stats(m, cpu);
 693        print_dl_stats(m, cpu);
 694
 695        print_rq(m, rq, cpu);
 696        spin_unlock_irqrestore(&sched_debug_lock, flags);
 697        SEQ_printf(m, "\n");
 698}
 699
 700static const char *sched_tunable_scaling_names[] = {
 701        "none",
 702        "logaritmic",
 703        "linear"
 704};
 705
 706static void sched_debug_header(struct seq_file *m)
 707{
 708        u64 ktime, sched_clk, cpu_clk;
 709        unsigned long flags;
 710
 711        local_irq_save(flags);
 712        ktime = ktime_to_ns(ktime_get());
 713        sched_clk = sched_clock();
 714        cpu_clk = local_clock();
 715        local_irq_restore(flags);
 716
 717        SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
 718                init_utsname()->release,
 719                (int)strcspn(init_utsname()->version, " "),
 720                init_utsname()->version);
 721
 722#define P(x) \
 723        SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
 724#define PN(x) \
 725        SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 726        PN(ktime);
 727        PN(sched_clk);
 728        PN(cpu_clk);
 729        P(jiffies);
 730#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 731        P(sched_clock_stable());
 732#endif
 733#undef PN
 734#undef P
 735
 736        SEQ_printf(m, "\n");
 737        SEQ_printf(m, "sysctl_sched\n");
 738
 739#define P(x) \
 740        SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 741#define PN(x) \
 742        SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 743        PN(sysctl_sched_latency);
 744        PN(sysctl_sched_min_granularity);
 745        PN(sysctl_sched_wakeup_granularity);
 746        P(sysctl_sched_child_runs_first);
 747        P(sysctl_sched_features);
 748#undef PN
 749#undef P
 750
 751        SEQ_printf(m, "  .%-40s: %d (%s)\n",
 752                "sysctl_sched_tunable_scaling",
 753                sysctl_sched_tunable_scaling,
 754                sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
 755        SEQ_printf(m, "\n");
 756}
 757
 758static int sched_debug_show(struct seq_file *m, void *v)
 759{
 760        int cpu = (unsigned long)(v - 2);
 761
 762        if (cpu != -1)
 763                print_cpu(m, cpu);
 764        else
 765                sched_debug_header(m);
 766
 767        return 0;
 768}
 769
 770void sysrq_sched_debug_show(void)
 771{
 772        int cpu;
 773
 774        sched_debug_header(NULL);
 775        for_each_online_cpu(cpu)
 776                print_cpu(NULL, cpu);
 777
 778}
 779
 780/*
 781 * This itererator needs some explanation.
 782 * It returns 1 for the header position.
 783 * This means 2 is CPU 0.
 784 * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
 785 * to use cpumask_* to iterate over the CPUs.
 786 */
 787static void *sched_debug_start(struct seq_file *file, loff_t *offset)
 788{
 789        unsigned long n = *offset;
 790
 791        if (n == 0)
 792                return (void *) 1;
 793
 794        n--;
 795
 796        if (n > 0)
 797                n = cpumask_next(n - 1, cpu_online_mask);
 798        else
 799                n = cpumask_first(cpu_online_mask);
 800
 801        *offset = n + 1;
 802
 803        if (n < nr_cpu_ids)
 804                return (void *)(unsigned long)(n + 2);
 805
 806        return NULL;
 807}
 808
 809static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
 810{
 811        (*offset)++;
 812        return sched_debug_start(file, offset);
 813}
 814
 815static void sched_debug_stop(struct seq_file *file, void *data)
 816{
 817}
 818
 819static const struct seq_operations sched_debug_sops = {
 820        .start          = sched_debug_start,
 821        .next           = sched_debug_next,
 822        .stop           = sched_debug_stop,
 823        .show           = sched_debug_show,
 824};
 825
 826static int sched_debug_release(struct inode *inode, struct file *file)
 827{
 828        seq_release(inode, file);
 829
 830        return 0;
 831}
 832
 833static int sched_debug_open(struct inode *inode, struct file *filp)
 834{
 835        int ret = 0;
 836
 837        ret = seq_open(filp, &sched_debug_sops);
 838
 839        return ret;
 840}
 841
 842static const struct file_operations sched_debug_fops = {
 843        .open           = sched_debug_open,
 844        .read           = seq_read,
 845        .llseek         = seq_lseek,
 846        .release        = sched_debug_release,
 847};
 848
 849static int __init init_sched_debug_procfs(void)
 850{
 851        struct proc_dir_entry *pe;
 852
 853        pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
 854        if (!pe)
 855                return -ENOMEM;
 856        return 0;
 857}
 858
 859__initcall(init_sched_debug_procfs);
 860
 861#define __P(F)  SEQ_printf(m, "%-45s:%21Ld\n",       #F, (long long)F)
 862#define   P(F)  SEQ_printf(m, "%-45s:%21Ld\n",       #F, (long long)p->F)
 863#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 864#define   PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 865
 866
 867#ifdef CONFIG_NUMA_BALANCING
 868void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
 869                unsigned long tpf, unsigned long gsf, unsigned long gpf)
 870{
 871        SEQ_printf(m, "numa_faults node=%d ", node);
 872        SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
 873        SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
 874}
 875#endif
 876
 877
 878static void sched_show_numa(struct task_struct *p, struct seq_file *m)
 879{
 880#ifdef CONFIG_NUMA_BALANCING
 881        struct mempolicy *pol;
 882
 883        if (p->mm)
 884                P(mm->numa_scan_seq);
 885
 886        task_lock(p);
 887        pol = p->mempolicy;
 888        if (pol && !(pol->flags & MPOL_F_MORON))
 889                pol = NULL;
 890        mpol_get(pol);
 891        task_unlock(p);
 892
 893        P(numa_pages_migrated);
 894        P(numa_preferred_nid);
 895        P(total_numa_faults);
 896        SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
 897                        task_node(p), task_numa_group_id(p));
 898        show_numa_stats(p, m);
 899        mpol_put(pol);
 900#endif
 901}
 902
 903void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 904                                                  struct seq_file *m)
 905{
 906        unsigned long nr_switches;
 907
 908        SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
 909                                                get_nr_threads(p));
 910        SEQ_printf(m,
 911                "---------------------------------------------------------"
 912                "----------\n");
 913#define __P(F) \
 914        SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
 915#define P(F) \
 916        SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
 917#define P_SCHEDSTAT(F) \
 918        SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
 919#define __PN(F) \
 920        SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 921#define PN(F) \
 922        SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 923#define PN_SCHEDSTAT(F) \
 924        SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
 925
 926        PN(se.exec_start);
 927        PN(se.vruntime);
 928        PN(se.sum_exec_runtime);
 929
 930        nr_switches = p->nvcsw + p->nivcsw;
 931
 932        P(se.nr_migrations);
 933
 934        if (schedstat_enabled()) {
 935                u64 avg_atom, avg_per_cpu;
 936
 937                PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
 938                PN_SCHEDSTAT(se.statistics.wait_start);
 939                PN_SCHEDSTAT(se.statistics.sleep_start);
 940                PN_SCHEDSTAT(se.statistics.block_start);
 941                PN_SCHEDSTAT(se.statistics.sleep_max);
 942                PN_SCHEDSTAT(se.statistics.block_max);
 943                PN_SCHEDSTAT(se.statistics.exec_max);
 944                PN_SCHEDSTAT(se.statistics.slice_max);
 945                PN_SCHEDSTAT(se.statistics.wait_max);
 946                PN_SCHEDSTAT(se.statistics.wait_sum);
 947                P_SCHEDSTAT(se.statistics.wait_count);
 948                PN_SCHEDSTAT(se.statistics.iowait_sum);
 949                P_SCHEDSTAT(se.statistics.iowait_count);
 950                P_SCHEDSTAT(se.statistics.nr_migrations_cold);
 951                P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
 952                P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
 953                P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
 954                P_SCHEDSTAT(se.statistics.nr_forced_migrations);
 955                P_SCHEDSTAT(se.statistics.nr_wakeups);
 956                P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
 957                P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
 958                P_SCHEDSTAT(se.statistics.nr_wakeups_local);
 959                P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
 960                P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
 961                P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
 962                P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
 963                P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
 964
 965                avg_atom = p->se.sum_exec_runtime;
 966                if (nr_switches)
 967                        avg_atom = div64_ul(avg_atom, nr_switches);
 968                else
 969                        avg_atom = -1LL;
 970
 971                avg_per_cpu = p->se.sum_exec_runtime;
 972                if (p->se.nr_migrations) {
 973                        avg_per_cpu = div64_u64(avg_per_cpu,
 974                                                p->se.nr_migrations);
 975                } else {
 976                        avg_per_cpu = -1LL;
 977                }
 978
 979                __PN(avg_atom);
 980                __PN(avg_per_cpu);
 981        }
 982
 983        __P(nr_switches);
 984        SEQ_printf(m, "%-45s:%21Ld\n",
 985                   "nr_voluntary_switches", (long long)p->nvcsw);
 986        SEQ_printf(m, "%-45s:%21Ld\n",
 987                   "nr_involuntary_switches", (long long)p->nivcsw);
 988
 989        P(se.load.weight);
 990        P(se.runnable_weight);
 991#ifdef CONFIG_SMP
 992        P(se.avg.load_sum);
 993        P(se.avg.runnable_load_sum);
 994        P(se.avg.util_sum);
 995        P(se.avg.load_avg);
 996        P(se.avg.runnable_load_avg);
 997        P(se.avg.util_avg);
 998        P(se.avg.last_update_time);
 999        P(se.avg.util_est.ewma);
1000        P(se.avg.util_est.enqueued);

1001#endif
1002        P(policy);
1003        P(prio);
1004        if (p->policy == SCHED_DEADLINE) {
1005                P(dl.runtime);
1006                P(dl.deadline);
1007        }
1008#undef PN_SCHEDSTAT
1009#undef PN
1010#undef __PN
1011#undef P_SCHEDSTAT
1012#undef P
1013#undef __P
1014
1015        {
1016                unsigned int this_cpu = raw_smp_processor_id();
1017                u64 t0, t1;
1018
1019                t0 = cpu_clock(this_cpu);
1020                t1 = cpu_clock(this_cpu);
1021                SEQ_printf(m, "%-45s:%21Ld\n",
1022                           "clock-delta", (long long)(t1-t0));
1023        }
1024
1025        sched_show_numa(p, m);
1026}
1027
1028void proc_sched_set_task(struct task_struct *p)
1029{
1030#ifdef CONFIG_SCHEDSTATS
1031        memset(&p->se.statistics, 0, sizeof(p->se.statistics));
1032#endif
1033}
1034