linux/tools/power/x86/turbostat/turbostat.c
<<
>>
Prefs
   1/*
   2 * turbostat -- show CPU frequency and C-state residency
   3 * on modern Intel turbo-capable processors.
   4 *
   5 * Copyright (c) 2013 Intel Corporation.
   6 * Len Brown <len.brown@intel.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify it
   9 * under the terms and conditions of the GNU General Public License,
  10 * version 2, as published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope it will be useful, but WITHOUT
  13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15 * more details.
  16 *
  17 * You should have received a copy of the GNU General Public License along with
  18 * this program; if not, write to the Free Software Foundation, Inc.,
  19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  20 */
  21
  22#define _GNU_SOURCE
  23#include MSRHEADER
  24#include <stdarg.h>
  25#include <stdio.h>
  26#include <err.h>
  27#include <unistd.h>
  28#include <sys/types.h>
  29#include <sys/wait.h>
  30#include <sys/stat.h>
  31#include <sys/resource.h>
  32#include <fcntl.h>
  33#include <signal.h>
  34#include <sys/time.h>
  35#include <stdlib.h>
  36#include <dirent.h>
  37#include <string.h>
  38#include <ctype.h>
  39#include <sched.h>
  40#include <cpuid.h>
  41
  42char *proc_stat = "/proc/stat";
  43unsigned int interval_sec = 5;  /* set with -i interval_sec */
  44unsigned int verbose;           /* set with -v */
  45unsigned int rapl_verbose;      /* set with -R */
  46unsigned int rapl_joules;       /* set with -J */
  47unsigned int thermal_verbose;   /* set with -T */
  48unsigned int summary_only;      /* set with -S */
  49unsigned int dump_only;         /* set with -s */
  50unsigned int skip_c0;
  51unsigned int skip_c1;
  52unsigned int do_nhm_cstates;
  53unsigned int do_snb_cstates;
  54unsigned int do_c8_c9_c10;
  55unsigned int do_slm_cstates;
  56unsigned int use_c1_residency_msr;
  57unsigned int has_aperf;
  58unsigned int has_epb;
  59unsigned int units = 1000000;   /* MHz etc */
  60unsigned int genuine_intel;
  61unsigned int has_invariant_tsc;
  62unsigned int do_nehalem_platform_info;
  63unsigned int do_nehalem_turbo_ratio_limit;
  64unsigned int do_ivt_turbo_ratio_limit;
  65unsigned int extra_msr_offset32;
  66unsigned int extra_msr_offset64;
  67unsigned int extra_delta_offset32;
  68unsigned int extra_delta_offset64;
  69int do_smi;
  70double bclk;
  71unsigned int show_pkg;
  72unsigned int show_core;
  73unsigned int show_cpu;
  74unsigned int show_pkg_only;
  75unsigned int show_core_only;
  76char *output_buffer, *outp;
  77unsigned int do_rapl;
  78unsigned int do_dts;
  79unsigned int do_ptm;
  80unsigned int tcc_activation_temp;
  81unsigned int tcc_activation_temp_override;
  82double rapl_power_units, rapl_energy_units, rapl_time_units;
  83double rapl_joule_counter_range;
  84
  85#define RAPL_PKG                (1 << 0)
  86                                        /* 0x610 MSR_PKG_POWER_LIMIT */
  87                                        /* 0x611 MSR_PKG_ENERGY_STATUS */
  88#define RAPL_PKG_PERF_STATUS    (1 << 1)
  89                                        /* 0x613 MSR_PKG_PERF_STATUS */
  90#define RAPL_PKG_POWER_INFO     (1 << 2)
  91                                        /* 0x614 MSR_PKG_POWER_INFO */
  92
  93#define RAPL_DRAM               (1 << 3)
  94                                        /* 0x618 MSR_DRAM_POWER_LIMIT */
  95                                        /* 0x619 MSR_DRAM_ENERGY_STATUS */
  96                                        /* 0x61c MSR_DRAM_POWER_INFO */
  97#define RAPL_DRAM_PERF_STATUS   (1 << 4)
  98                                        /* 0x61b MSR_DRAM_PERF_STATUS */
  99
 100#define RAPL_CORES              (1 << 5)
 101                                        /* 0x638 MSR_PP0_POWER_LIMIT */
 102                                        /* 0x639 MSR_PP0_ENERGY_STATUS */
 103#define RAPL_CORE_POLICY        (1 << 6)
 104                                        /* 0x63a MSR_PP0_POLICY */
 105
 106
 107#define RAPL_GFX                (1 << 7)
 108                                        /* 0x640 MSR_PP1_POWER_LIMIT */
 109                                        /* 0x641 MSR_PP1_ENERGY_STATUS */
 110                                        /* 0x642 MSR_PP1_POLICY */
 111#define TJMAX_DEFAULT   100
 112
 113#define MAX(a, b) ((a) > (b) ? (a) : (b))
 114
 115int aperf_mperf_unstable;
 116int backwards_count;
 117char *progname;
 118
 119cpu_set_t *cpu_present_set, *cpu_affinity_set;
 120size_t cpu_present_setsize, cpu_affinity_setsize;
 121
 122struct thread_data {
 123        unsigned long long tsc;
 124        unsigned long long aperf;
 125        unsigned long long mperf;
 126        unsigned long long c1;
 127        unsigned long long extra_msr64;
 128        unsigned long long extra_delta64;
 129        unsigned long long extra_msr32;
 130        unsigned long long extra_delta32;
 131        unsigned int smi_count;
 132        unsigned int cpu_id;
 133        unsigned int flags;
 134#define CPU_IS_FIRST_THREAD_IN_CORE     0x2
 135#define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
 136} *thread_even, *thread_odd;
 137
 138struct core_data {
 139        unsigned long long c3;
 140        unsigned long long c6;
 141        unsigned long long c7;
 142        unsigned int core_temp_c;
 143        unsigned int core_id;
 144} *core_even, *core_odd;
 145
 146struct pkg_data {
 147        unsigned long long pc2;
 148        unsigned long long pc3;
 149        unsigned long long pc6;
 150        unsigned long long pc7;
 151        unsigned long long pc8;
 152        unsigned long long pc9;
 153        unsigned long long pc10;
 154        unsigned int package_id;
 155        unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
 156        unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
 157        unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
 158        unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
 159        unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
 160        unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
 161        unsigned int pkg_temp_c;
 162
 163} *package_even, *package_odd;
 164
 165#define ODD_COUNTERS thread_odd, core_odd, package_odd
 166#define EVEN_COUNTERS thread_even, core_even, package_even
 167
 168#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
 169        (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
 170                topo.num_threads_per_core + \
 171                (core_no) * topo.num_threads_per_core + (thread_no))
 172#define GET_CORE(core_base, core_no, pkg_no) \
 173        (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
 174#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 175
 176struct system_summary {
 177        struct thread_data threads;
 178        struct core_data cores;
 179        struct pkg_data packages;
 180} sum, average;
 181
 182
 183struct topo_params {
 184        int num_packages;
 185        int num_cpus;
 186        int num_cores;
 187        int max_cpu_num;
 188        int num_cores_per_pkg;
 189        int num_threads_per_core;
 190} topo;
 191
 192struct timeval tv_even, tv_odd, tv_delta;
 193
 194void setup_all_buffers(void);
 195
 196int cpu_is_not_present(int cpu)
 197{
 198        return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
 199}
 200/*
 201 * run func(thread, core, package) in topology order
 202 * skip non-present cpus
 203 */
 204
 205int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
 206        struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
 207{
 208        int retval, pkg_no, core_no, thread_no;
 209
 210        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 211                for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
 212                        for (thread_no = 0; thread_no <
 213                                topo.num_threads_per_core; ++thread_no) {
 214                                struct thread_data *t;
 215                                struct core_data *c;
 216                                struct pkg_data *p;
 217
 218                                t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
 219
 220                                if (cpu_is_not_present(t->cpu_id))
 221                                        continue;
 222
 223                                c = GET_CORE(core_base, core_no, pkg_no);
 224                                p = GET_PKG(pkg_base, pkg_no);
 225
 226                                retval = func(t, c, p);
 227                                if (retval)
 228                                        return retval;
 229                        }
 230                }
 231        }
 232        return 0;
 233}
 234
 235int cpu_migrate(int cpu)
 236{
 237        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 238        CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
 239        if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
 240                return -1;
 241        else
 242                return 0;
 243}
 244
 245int get_msr(int cpu, off_t offset, unsigned long long *msr)
 246{
 247        ssize_t retval;
 248        char pathname[32];
 249        int fd;
 250
 251        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
 252        fd = open(pathname, O_RDONLY);
 253        if (fd < 0)
 254                return -1;
 255
 256        retval = pread(fd, msr, sizeof *msr, offset);
 257        close(fd);
 258
 259        if (retval != sizeof *msr) {
 260                fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset);
 261                return -1;
 262        }
 263
 264        return 0;
 265}
 266
 267/*
 268 * Example Format w/ field column widths:
 269 *
 270 * Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
 271 * 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567
 272 */
 273
 274void print_header(void)
 275{
 276        if (show_pkg)
 277                outp += sprintf(outp, "Package ");
 278        if (show_core)
 279                outp += sprintf(outp, "    Core ");
 280        if (show_cpu)
 281                outp += sprintf(outp, "    CPU ");
 282        if (has_aperf)
 283                outp += sprintf(outp, "Avg_MHz ");
 284        if (do_nhm_cstates)
 285                outp += sprintf(outp, "  %%Busy ");
 286        if (has_aperf)
 287                outp += sprintf(outp, "Bzy_MHz ");
 288        outp += sprintf(outp, "TSC_MHz ");
 289        if (do_smi)
 290                outp += sprintf(outp, "    SMI ");
 291        if (extra_delta_offset32)
 292                outp += sprintf(outp, " count 0x%03X ", extra_delta_offset32);
 293        if (extra_delta_offset64)
 294                outp += sprintf(outp, " COUNT 0x%03X ", extra_delta_offset64);
 295        if (extra_msr_offset32)
 296                outp += sprintf(outp, "  MSR 0x%03X ", extra_msr_offset32);
 297        if (extra_msr_offset64)
 298                outp += sprintf(outp, "          MSR 0x%03X ", extra_msr_offset64);
 299        if (do_nhm_cstates)
 300                outp += sprintf(outp, " CPU%%c1 ");
 301        if (do_nhm_cstates && !do_slm_cstates)
 302                outp += sprintf(outp, " CPU%%c3 ");
 303        if (do_nhm_cstates)
 304                outp += sprintf(outp, " CPU%%c6 ");
 305        if (do_snb_cstates)
 306                outp += sprintf(outp, " CPU%%c7 ");
 307
 308        if (do_dts)
 309                outp += sprintf(outp, "CoreTmp ");
 310        if (do_ptm)
 311                outp += sprintf(outp, " PkgTmp ");
 312
 313        if (do_snb_cstates)
 314                outp += sprintf(outp, "Pkg%%pc2 ");
 315        if (do_nhm_cstates && !do_slm_cstates)
 316                outp += sprintf(outp, "Pkg%%pc3 ");
 317        if (do_nhm_cstates && !do_slm_cstates)
 318                outp += sprintf(outp, "Pkg%%pc6 ");
 319        if (do_snb_cstates)
 320                outp += sprintf(outp, "Pkg%%pc7 ");
 321        if (do_c8_c9_c10) {
 322                outp += sprintf(outp, "Pkg%%pc8 ");
 323                outp += sprintf(outp, "Pkg%%pc9 ");
 324                outp += sprintf(outp, "Pk%%pc10 ");
 325        }
 326
 327        if (do_rapl && !rapl_joules) {
 328                if (do_rapl & RAPL_PKG)
 329                        outp += sprintf(outp, "PkgWatt ");
 330                if (do_rapl & RAPL_CORES)
 331                        outp += sprintf(outp, "CorWatt ");
 332                if (do_rapl & RAPL_GFX)
 333                        outp += sprintf(outp, "GFXWatt ");
 334                if (do_rapl & RAPL_DRAM)
 335                        outp += sprintf(outp, "RAMWatt ");
 336                if (do_rapl & RAPL_PKG_PERF_STATUS)
 337                        outp += sprintf(outp, "  PKG_%% ");
 338                if (do_rapl & RAPL_DRAM_PERF_STATUS)
 339                        outp += sprintf(outp, "  RAM_%% ");
 340        } else {
 341                if (do_rapl & RAPL_PKG)
 342                        outp += sprintf(outp, "  Pkg_J ");
 343                if (do_rapl & RAPL_CORES)
 344                        outp += sprintf(outp, "  Cor_J ");
 345                if (do_rapl & RAPL_GFX)
 346                        outp += sprintf(outp, "  GFX_J ");
 347                if (do_rapl & RAPL_DRAM)
 348                        outp += sprintf(outp, "  RAM_W ");
 349                if (do_rapl & RAPL_PKG_PERF_STATUS)
 350                        outp += sprintf(outp, "  PKG_%% ");
 351                if (do_rapl & RAPL_DRAM_PERF_STATUS)
 352                        outp += sprintf(outp, "  RAM_%% ");
 353                outp += sprintf(outp, "  time ");
 354
 355        }
 356        outp += sprintf(outp, "\n");
 357}
 358
 359int dump_counters(struct thread_data *t, struct core_data *c,
 360        struct pkg_data *p)
 361{
 362        outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
 363
 364        if (t) {
 365                outp += sprintf(outp, "CPU: %d flags 0x%x\n",
 366                        t->cpu_id, t->flags);
 367                outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
 368                outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
 369                outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
 370                outp += sprintf(outp, "c1: %016llX\n", t->c1);
 371                outp += sprintf(outp, "msr0x%x: %08llX\n",
 372                        extra_delta_offset32, t->extra_delta32);
 373                outp += sprintf(outp, "msr0x%x: %016llX\n",
 374                        extra_delta_offset64, t->extra_delta64);
 375                outp += sprintf(outp, "msr0x%x: %08llX\n",
 376                        extra_msr_offset32, t->extra_msr32);
 377                outp += sprintf(outp, "msr0x%x: %016llX\n",
 378                        extra_msr_offset64, t->extra_msr64);
 379                if (do_smi)
 380                        outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
 381        }
 382
 383        if (c) {
 384                outp += sprintf(outp, "core: %d\n", c->core_id);
 385                outp += sprintf(outp, "c3: %016llX\n", c->c3);
 386                outp += sprintf(outp, "c6: %016llX\n", c->c6);
 387                outp += sprintf(outp, "c7: %016llX\n", c->c7);
 388                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
 389        }
 390
 391        if (p) {
 392                outp += sprintf(outp, "package: %d\n", p->package_id);
 393                outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
 394                outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
 395                outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
 396                outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
 397                outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
 398                outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
 399                outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
 400                outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
 401                outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
 402                outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
 403                outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
 404                outp += sprintf(outp, "Throttle PKG: %0X\n",
 405                        p->rapl_pkg_perf_status);
 406                outp += sprintf(outp, "Throttle RAM: %0X\n",
 407                        p->rapl_dram_perf_status);
 408                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
 409        }
 410
 411        outp += sprintf(outp, "\n");
 412
 413        return 0;
 414}
 415
 416/*
 417 * column formatting convention & formats
 418 */
 419int format_counters(struct thread_data *t, struct core_data *c,
 420        struct pkg_data *p)
 421{
 422        double interval_float;
 423        char *fmt8;
 424
 425         /* if showing only 1st thread in core and this isn't one, bail out */
 426        if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 427                return 0;
 428
 429         /* if showing only 1st thread in pkg and this isn't one, bail out */
 430        if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 431                return 0;
 432
 433        interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 434
 435        /* topo columns, print blanks on 1st (average) line */
 436        if (t == &average.threads) {
 437                if (show_pkg)
 438                        outp += sprintf(outp, "       -");
 439                if (show_core)
 440                        outp += sprintf(outp, "       -");
 441                if (show_cpu)
 442                        outp += sprintf(outp, "       -");
 443        } else {
 444                if (show_pkg) {
 445                        if (p)
 446                                outp += sprintf(outp, "%8d", p->package_id);
 447                        else
 448                                outp += sprintf(outp, "       -");
 449                }
 450                if (show_core) {
 451                        if (c)
 452                                outp += sprintf(outp, "%8d", c->core_id);
 453                        else
 454                                outp += sprintf(outp, "       -");
 455                }
 456                if (show_cpu)
 457                        outp += sprintf(outp, "%8d", t->cpu_id);
 458        }
 459
 460        /* AvgMHz */
 461        if (has_aperf)
 462                outp += sprintf(outp, "%8.0f",
 463                        1.0 / units * t->aperf / interval_float);
 464
 465        /* %c0 */
 466        if (do_nhm_cstates) {
 467                if (!skip_c0)
 468                        outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
 469                else
 470                        outp += sprintf(outp, "********");
 471        }
 472
 473        /* BzyMHz */
 474        if (has_aperf)
 475                outp += sprintf(outp, "%8.0f",
 476                        1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
 477
 478        /* TSC */
 479        outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
 480
 481        /* SMI */
 482        if (do_smi)
 483                outp += sprintf(outp, "%8d", t->smi_count);
 484
 485        /* delta */
 486        if (extra_delta_offset32)
 487                outp += sprintf(outp, "  %11llu", t->extra_delta32);
 488
 489        /* DELTA */
 490        if (extra_delta_offset64)
 491                outp += sprintf(outp, "  %11llu", t->extra_delta64);
 492        /* msr */
 493        if (extra_msr_offset32)
 494                outp += sprintf(outp, "  0x%08llx", t->extra_msr32);
 495
 496        /* MSR */
 497        if (extra_msr_offset64)
 498                outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
 499
 500        if (do_nhm_cstates) {
 501                if (!skip_c1)
 502                        outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
 503                else
 504                        outp += sprintf(outp, "********");
 505        }
 506
 507        /* print per-core data only for 1st thread in core */
 508        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 509                goto done;
 510
 511        if (do_nhm_cstates && !do_slm_cstates)
 512                outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
 513        if (do_nhm_cstates)
 514                outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
 515        if (do_snb_cstates)
 516                outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc);
 517
 518        if (do_dts)
 519                outp += sprintf(outp, "%8d", c->core_temp_c);
 520
 521        /* print per-package data only for 1st core in package */
 522        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 523                goto done;
 524
 525        if (do_ptm)
 526                outp += sprintf(outp, "%8d", p->pkg_temp_c);
 527
 528        if (do_snb_cstates)
 529                outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
 530        if (do_nhm_cstates && !do_slm_cstates)
 531                outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc);
 532        if (do_nhm_cstates && !do_slm_cstates)
 533                outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc);
 534        if (do_snb_cstates)
 535                outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc);
 536        if (do_c8_c9_c10) {
 537                outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc);
 538                outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc);
 539                outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc);
 540        }
 541
 542        /*
 543         * If measurement interval exceeds minimum RAPL Joule Counter range,
 544         * indicate that results are suspect by printing "**" in fraction place.
 545         */
 546        if (interval_float < rapl_joule_counter_range)
 547                fmt8 = "%8.2f";
 548        else
 549                fmt8 = " %6.0f**";
 550
 551        if (do_rapl && !rapl_joules) {
 552                if (do_rapl & RAPL_PKG)
 553                        outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
 554                if (do_rapl & RAPL_CORES)
 555                        outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
 556                if (do_rapl & RAPL_GFX)
 557                        outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
 558                if (do_rapl & RAPL_DRAM)
 559                        outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float);
 560                if (do_rapl & RAPL_PKG_PERF_STATUS)
 561                        outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 562                if (do_rapl & RAPL_DRAM_PERF_STATUS)
 563                        outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
 564        } else {
 565                if (do_rapl & RAPL_PKG)
 566                        outp += sprintf(outp, fmt8,
 567                                        p->energy_pkg * rapl_energy_units);
 568                if (do_rapl & RAPL_CORES)
 569                        outp += sprintf(outp, fmt8,
 570                                        p->energy_cores * rapl_energy_units);
 571                if (do_rapl & RAPL_GFX)
 572                        outp += sprintf(outp, fmt8,
 573                                        p->energy_gfx * rapl_energy_units);
 574                if (do_rapl & RAPL_DRAM)
 575                        outp += sprintf(outp, fmt8,
 576                                        p->energy_dram * rapl_energy_units);
 577                if (do_rapl & RAPL_PKG_PERF_STATUS)
 578                        outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 579                if (do_rapl & RAPL_DRAM_PERF_STATUS)
 580                        outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
 581        outp += sprintf(outp, fmt8, interval_float);
 582
 583        }
 584done:
 585        outp += sprintf(outp, "\n");
 586
 587        return 0;
 588}
 589
 590void flush_stdout()
 591{
 592        fputs(output_buffer, stdout);
 593        fflush(stdout);
 594        outp = output_buffer;
 595}
 596void flush_stderr()
 597{
 598        fputs(output_buffer, stderr);
 599        outp = output_buffer;
 600}
 601void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 602{
 603        static int printed;
 604
 605        if (!printed || !summary_only)
 606                print_header();
 607
 608        if (topo.num_cpus > 1)
 609                format_counters(&average.threads, &average.cores,
 610                        &average.packages);
 611
 612        printed = 1;
 613
 614        if (summary_only)
 615                return;
 616
 617        for_all_cpus(format_counters, t, c, p);
 618}
 619
 620#define DELTA_WRAP32(new, old)                  \
 621        if (new > old) {                        \
 622                old = new - old;                \
 623        } else {                                \
 624                old = 0x100000000 + new - old;  \
 625        }
 626
 627void
 628delta_package(struct pkg_data *new, struct pkg_data *old)
 629{
 630        old->pc2 = new->pc2 - old->pc2;
 631        old->pc3 = new->pc3 - old->pc3;
 632        old->pc6 = new->pc6 - old->pc6;
 633        old->pc7 = new->pc7 - old->pc7;
 634        old->pc8 = new->pc8 - old->pc8;
 635        old->pc9 = new->pc9 - old->pc9;
 636        old->pc10 = new->pc10 - old->pc10;
 637        old->pkg_temp_c = new->pkg_temp_c;
 638
 639        DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
 640        DELTA_WRAP32(new->energy_cores, old->energy_cores);
 641        DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
 642        DELTA_WRAP32(new->energy_dram, old->energy_dram);
 643        DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
 644        DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
 645}
 646
 647void
 648delta_core(struct core_data *new, struct core_data *old)
 649{
 650        old->c3 = new->c3 - old->c3;
 651        old->c6 = new->c6 - old->c6;
 652        old->c7 = new->c7 - old->c7;
 653        old->core_temp_c = new->core_temp_c;
 654}
 655
 656/*
 657 * old = new - old
 658 */
 659void
 660delta_thread(struct thread_data *new, struct thread_data *old,
 661        struct core_data *core_delta)
 662{
 663        old->tsc = new->tsc - old->tsc;
 664
 665        /* check for TSC < 1 Mcycles over interval */
 666        if (old->tsc < (1000 * 1000))
 667                errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
 668                     "You can disable all c-states by booting with \"idle=poll\"\n"
 669                     "or just the deep ones with \"processor.max_cstate=1\"");
 670
 671        old->c1 = new->c1 - old->c1;
 672
 673        if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
 674                old->aperf = new->aperf - old->aperf;
 675                old->mperf = new->mperf - old->mperf;
 676        } else {
 677
 678                if (!aperf_mperf_unstable) {
 679                        fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
 680                        fprintf(stderr, "* Frequency results do not cover entire interval *\n");
 681                        fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
 682
 683                        aperf_mperf_unstable = 1;
 684                }
 685                /*
 686                 * mperf delta is likely a huge "positive" number
 687                 * can not use it for calculating c0 time
 688                 */
 689                skip_c0 = 1;
 690                skip_c1 = 1;
 691        }
 692
 693
 694        if (use_c1_residency_msr) {
 695                /*
 696                 * Some models have a dedicated C1 residency MSR,
 697                 * which should be more accurate than the derivation below.
 698                 */
 699        } else {
 700                /*
 701                 * As counter collection is not atomic,
 702                 * it is possible for mperf's non-halted cycles + idle states
 703                 * to exceed TSC's all cycles: show c1 = 0% in that case.
 704                 */
 705                if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
 706                        old->c1 = 0;
 707                else {
 708                        /* normal case, derive c1 */
 709                        old->c1 = old->tsc - old->mperf - core_delta->c3
 710                                - core_delta->c6 - core_delta->c7;
 711                }
 712        }
 713
 714        if (old->mperf == 0) {
 715                if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
 716                old->mperf = 1; /* divide by 0 protection */
 717        }
 718
 719        old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
 720        old->extra_delta32 &= 0xFFFFFFFF;
 721
 722        old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
 723
 724        /*
 725         * Extra MSR is just a snapshot, simply copy latest w/o subtracting
 726         */
 727        old->extra_msr32 = new->extra_msr32;
 728        old->extra_msr64 = new->extra_msr64;
 729
 730        if (do_smi)
 731                old->smi_count = new->smi_count - old->smi_count;
 732}
 733
 734int delta_cpu(struct thread_data *t, struct core_data *c,
 735        struct pkg_data *p, struct thread_data *t2,
 736        struct core_data *c2, struct pkg_data *p2)
 737{
 738        /* calculate core delta only for 1st thread in core */
 739        if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
 740                delta_core(c, c2);
 741
 742        /* always calculate thread delta */
 743        delta_thread(t, t2, c2);        /* c2 is core delta */
 744
 745        /* calculate package delta only for 1st core in package */
 746        if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
 747                delta_package(p, p2);
 748
 749        return 0;
 750}
 751
 752void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 753{
 754        t->tsc = 0;
 755        t->aperf = 0;
 756        t->mperf = 0;
 757        t->c1 = 0;
 758
 759        t->smi_count = 0;
 760        t->extra_delta32 = 0;
 761        t->extra_delta64 = 0;
 762
 763        /* tells format_counters to dump all fields from this set */
 764        t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
 765
 766        c->c3 = 0;
 767        c->c6 = 0;
 768        c->c7 = 0;
 769        c->core_temp_c = 0;
 770
 771        p->pc2 = 0;
 772        p->pc3 = 0;
 773        p->pc6 = 0;
 774        p->pc7 = 0;
 775        p->pc8 = 0;
 776        p->pc9 = 0;
 777        p->pc10 = 0;
 778
 779        p->energy_pkg = 0;
 780        p->energy_dram = 0;
 781        p->energy_cores = 0;
 782        p->energy_gfx = 0;
 783        p->rapl_pkg_perf_status = 0;
 784        p->rapl_dram_perf_status = 0;
 785        p->pkg_temp_c = 0;
 786}
 787int sum_counters(struct thread_data *t, struct core_data *c,
 788        struct pkg_data *p)
 789{
 790        average.threads.tsc += t->tsc;
 791        average.threads.aperf += t->aperf;
 792        average.threads.mperf += t->mperf;
 793        average.threads.c1 += t->c1;
 794
 795        average.threads.extra_delta32 += t->extra_delta32;
 796        average.threads.extra_delta64 += t->extra_delta64;
 797
 798        /* sum per-core values only for 1st thread in core */
 799        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 800                return 0;
 801
 802        average.cores.c3 += c->c3;
 803        average.cores.c6 += c->c6;
 804        average.cores.c7 += c->c7;
 805
 806        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
 807
 808        /* sum per-pkg values only for 1st core in pkg */
 809        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 810                return 0;
 811
 812        average.packages.pc2 += p->pc2;
 813        average.packages.pc3 += p->pc3;
 814        average.packages.pc6 += p->pc6;
 815        average.packages.pc7 += p->pc7;
 816        average.packages.pc8 += p->pc8;
 817        average.packages.pc9 += p->pc9;
 818        average.packages.pc10 += p->pc10;
 819
 820        average.packages.energy_pkg += p->energy_pkg;
 821        average.packages.energy_dram += p->energy_dram;
 822        average.packages.energy_cores += p->energy_cores;
 823        average.packages.energy_gfx += p->energy_gfx;
 824
 825        average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
 826
 827        average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
 828        average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
 829        return 0;
 830}
 831/*
 832 * sum the counters for all cpus in the system
 833 * compute the weighted average
 834 */
 835void compute_average(struct thread_data *t, struct core_data *c,
 836        struct pkg_data *p)
 837{
 838        clear_counters(&average.threads, &average.cores, &average.packages);
 839
 840        for_all_cpus(sum_counters, t, c, p);
 841
 842        average.threads.tsc /= topo.num_cpus;
 843        average.threads.aperf /= topo.num_cpus;
 844        average.threads.mperf /= topo.num_cpus;
 845        average.threads.c1 /= topo.num_cpus;
 846
 847        average.threads.extra_delta32 /= topo.num_cpus;
 848        average.threads.extra_delta32 &= 0xFFFFFFFF;
 849
 850        average.threads.extra_delta64 /= topo.num_cpus;
 851
 852        average.cores.c3 /= topo.num_cores;
 853        average.cores.c6 /= topo.num_cores;
 854        average.cores.c7 /= topo.num_cores;
 855
 856        average.packages.pc2 /= topo.num_packages;
 857        average.packages.pc3 /= topo.num_packages;
 858        average.packages.pc6 /= topo.num_packages;
 859        average.packages.pc7 /= topo.num_packages;
 860
 861        average.packages.pc8 /= topo.num_packages;
 862        average.packages.pc9 /= topo.num_packages;
 863        average.packages.pc10 /= topo.num_packages;
 864}
 865
 866static unsigned long long rdtsc(void)
 867{
 868        unsigned int low, high;
 869
 870        asm volatile("rdtsc" : "=a" (low), "=d" (high));
 871
 872        return low | ((unsigned long long)high) << 32;
 873}
 874
 875
 876/*
 877 * get_counters(...)
 878 * migrate to cpu
 879 * acquire and record local counters for that cpu
 880 */
 881int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 882{
 883        int cpu = t->cpu_id;
 884        unsigned long long msr;
 885
 886        if (cpu_migrate(cpu)) {
 887                fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
 888                return -1;
 889        }
 890
 891        t->tsc = rdtsc();       /* we are running on local CPU of interest */
 892
 893        if (has_aperf) {
 894                if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
 895                        return -3;
 896                if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
 897                        return -4;
 898        }
 899
 900        if (do_smi) {
 901                if (get_msr(cpu, MSR_SMI_COUNT, &msr))
 902                        return -5;
 903                t->smi_count = msr & 0xFFFFFFFF;
 904        }
 905        if (extra_delta_offset32) {
 906                if (get_msr(cpu, extra_delta_offset32, &msr))
 907                        return -5;
 908                t->extra_delta32 = msr & 0xFFFFFFFF;
 909        }
 910
 911        if (extra_delta_offset64)
 912                if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
 913                        return -5;
 914
 915        if (extra_msr_offset32) {
 916                if (get_msr(cpu, extra_msr_offset32, &msr))
 917                        return -5;
 918                t->extra_msr32 = msr & 0xFFFFFFFF;
 919        }
 920
 921        if (extra_msr_offset64)
 922                if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
 923                        return -5;
 924
 925        if (use_c1_residency_msr) {
 926                if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
 927                        return -6;
 928        }
 929
 930        /* collect core counters only for 1st thread in core */
 931        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 932                return 0;
 933
 934        if (do_nhm_cstates && !do_slm_cstates) {
 935                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 936                        return -6;
 937        }
 938
 939        if (do_nhm_cstates) {
 940                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
 941                        return -7;
 942        }
 943
 944        if (do_snb_cstates)
 945                if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
 946                        return -8;
 947
 948        if (do_dts) {
 949                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
 950                        return -9;
 951                c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
 952        }
 953
 954
 955        /* collect package counters only for 1st core in package */
 956        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 957                return 0;
 958
 959        if (do_nhm_cstates && !do_slm_cstates) {
 960                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 961                        return -9;
 962                if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
 963                        return -10;
 964        }
 965        if (do_snb_cstates) {
 966                if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
 967                        return -11;
 968                if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
 969                        return -12;
 970        }
 971        if (do_c8_c9_c10) {
 972                if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
 973                        return -13;
 974                if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
 975                        return -13;
 976                if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
 977                        return -13;
 978        }
 979        if (do_rapl & RAPL_PKG) {
 980                if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 981                        return -13;
 982                p->energy_pkg = msr & 0xFFFFFFFF;
 983        }
 984        if (do_rapl & RAPL_CORES) {
 985                if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
 986                        return -14;
 987                p->energy_cores = msr & 0xFFFFFFFF;
 988        }
 989        if (do_rapl & RAPL_DRAM) {
 990                if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
 991                        return -15;
 992                p->energy_dram = msr & 0xFFFFFFFF;
 993        }
 994        if (do_rapl & RAPL_GFX) {
 995                if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
 996                        return -16;
 997                p->energy_gfx = msr & 0xFFFFFFFF;
 998        }
 999        if (do_rapl & RAPL_PKG_PERF_STATUS) {
1000                if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1001                        return -16;
1002                p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1003        }
1004        if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1005                if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1006                        return -16;
1007                p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1008        }
1009        if (do_ptm) {
1010                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1011                        return -17;
1012                p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1013        }
1014        return 0;
1015}
1016
1017void print_verbose_header(void)
1018{
1019        unsigned long long msr;
1020        unsigned int ratio;
1021
1022        if (!do_nehalem_platform_info)
1023                return;
1024
1025        get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
1026
1027        fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
1028
1029        ratio = (msr >> 40) & 0xFF;
1030        fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
1031                ratio, bclk, ratio * bclk);
1032
1033        ratio = (msr >> 8) & 0xFF;
1034        fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
1035                ratio, bclk, ratio * bclk);
1036
1037        get_msr(0, MSR_IA32_POWER_CTL, &msr);
1038        fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1039                msr, msr & 0x2 ? "EN" : "DIS");
1040
1041        if (!do_ivt_turbo_ratio_limit)
1042                goto print_nhm_turbo_ratio_limits;
1043
1044        get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
1045
1046        fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1047
1048        ratio = (msr >> 56) & 0xFF;
1049        if (ratio)
1050                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1051                        ratio, bclk, ratio * bclk);
1052
1053        ratio = (msr >> 48) & 0xFF;
1054        if (ratio)
1055                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1056                        ratio, bclk, ratio * bclk);
1057
1058        ratio = (msr >> 40) & 0xFF;
1059        if (ratio)
1060                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1061                        ratio, bclk, ratio * bclk);
1062
1063        ratio = (msr >> 32) & 0xFF;
1064        if (ratio)
1065                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1066                        ratio, bclk, ratio * bclk);
1067
1068        ratio = (msr >> 24) & 0xFF;
1069        if (ratio)
1070                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1071                        ratio, bclk, ratio * bclk);
1072
1073        ratio = (msr >> 16) & 0xFF;
1074        if (ratio)
1075                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1076                        ratio, bclk, ratio * bclk);
1077
1078        ratio = (msr >> 8) & 0xFF;
1079        if (ratio)
1080                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1081                        ratio, bclk, ratio * bclk);
1082
1083        ratio = (msr >> 0) & 0xFF;
1084        if (ratio)
1085                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1086                        ratio, bclk, ratio * bclk);
1087
1088print_nhm_turbo_ratio_limits:
1089        get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1090
1091#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1092#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1093
1094        fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
1095
1096        fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ",
1097                (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1098                (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1099                (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1100                (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1101                (msr & (1 << 15)) ? "" : "UN",
1102                (unsigned int)msr & 7);
1103
1104
1105        switch(msr & 0x7) {
1106        case 0:
1107                fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
1108                break;
1109        case 1:
1110                fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
1111                break;
1112        case 2:
1113                fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
1114                break;
1115        case 3:
1116                fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
1117                break;
1118        case 4:
1119                fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
1120                break;
1121        case 5:
1122                fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
1123                break;
1124        case 6:
1125                fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
1126                break;
1127        case 7:
1128                fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
1129                break;
1130        default:
1131                fprintf(stderr, "invalid");
1132        }
1133        fprintf(stderr, ")\n");
1134
1135        if (!do_nehalem_turbo_ratio_limit)
1136                return;
1137
1138        get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1139
1140        fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1141
1142        ratio = (msr >> 56) & 0xFF;
1143        if (ratio)
1144                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1145                        ratio, bclk, ratio * bclk);
1146
1147        ratio = (msr >> 48) & 0xFF;
1148        if (ratio)
1149                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1150                        ratio, bclk, ratio * bclk);
1151
1152        ratio = (msr >> 40) & 0xFF;
1153        if (ratio)
1154                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1155                        ratio, bclk, ratio * bclk);
1156
1157        ratio = (msr >> 32) & 0xFF;
1158        if (ratio)
1159                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1160                        ratio, bclk, ratio * bclk);
1161
1162        ratio = (msr >> 24) & 0xFF;
1163        if (ratio)
1164                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1165                        ratio, bclk, ratio * bclk);
1166
1167        ratio = (msr >> 16) & 0xFF;
1168        if (ratio)
1169                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1170                        ratio, bclk, ratio * bclk);
1171
1172        ratio = (msr >> 8) & 0xFF;
1173        if (ratio)
1174                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1175                        ratio, bclk, ratio * bclk);
1176
1177        ratio = (msr >> 0) & 0xFF;
1178        if (ratio)
1179                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1180                        ratio, bclk, ratio * bclk);
1181}
1182
1183void free_all_buffers(void)
1184{
1185        CPU_FREE(cpu_present_set);
1186        cpu_present_set = NULL;
1187        cpu_present_set = 0;
1188
1189        CPU_FREE(cpu_affinity_set);
1190        cpu_affinity_set = NULL;
1191        cpu_affinity_setsize = 0;
1192
1193        free(thread_even);
1194        free(core_even);
1195        free(package_even);
1196
1197        thread_even = NULL;
1198        core_even = NULL;
1199        package_even = NULL;
1200
1201        free(thread_odd);
1202        free(core_odd);
1203        free(package_odd);
1204
1205        thread_odd = NULL;
1206        core_odd = NULL;
1207        package_odd = NULL;
1208
1209        free(output_buffer);
1210        output_buffer = NULL;
1211        outp = NULL;
1212}
1213
1214/*
1215 * Open a file, and exit on failure
1216 */
1217FILE *fopen_or_die(const char *path, const char *mode)
1218{
1219        FILE *filep = fopen(path, "r");
1220        if (!filep)
1221                err(1, "%s: open failed", path);
1222        return filep;
1223}
1224
1225/*
1226 * Parse a file containing a single int.
1227 */
1228int parse_int_file(const char *fmt, ...)
1229{
1230        va_list args;
1231        char path[PATH_MAX];
1232        FILE *filep;
1233        int value;
1234
1235        va_start(args, fmt);
1236        vsnprintf(path, sizeof(path), fmt, args);
1237        va_end(args);
1238        filep = fopen_or_die(path, "r");
1239        if (fscanf(filep, "%d", &value) != 1)
1240                err(1, "%s: failed to parse number from file", path);
1241        fclose(filep);
1242        return value;
1243}
1244
1245/*
1246 * cpu_is_first_sibling_in_core(cpu)
1247 * return 1 if given CPU is 1st HT sibling in the core
1248 */
1249int cpu_is_first_sibling_in_core(int cpu)
1250{
1251        return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1252}
1253
1254/*
1255 * cpu_is_first_core_in_package(cpu)
1256 * return 1 if given CPU is 1st core in package
1257 */
1258int cpu_is_first_core_in_package(int cpu)
1259{
1260        return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1261}
1262
1263int get_physical_package_id(int cpu)
1264{
1265        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1266}
1267
1268int get_core_id(int cpu)
1269{
1270        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1271}
1272
1273int get_num_ht_siblings(int cpu)
1274{
1275        char path[80];
1276        FILE *filep;
1277        int sib1, sib2;
1278        int matches;
1279        char character;
1280
1281        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1282        filep = fopen_or_die(path, "r");
1283        /*
1284         * file format:
1285         * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
1286         * otherwinse 1 sibling (self).
1287         */
1288        matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
1289
1290        fclose(filep);
1291
1292        if (matches == 3)
1293                return 2;
1294        else
1295                return 1;
1296}
1297
1298/*
1299 * run func(thread, core, package) in topology order
1300 * skip non-present cpus
1301 */
1302
1303int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
1304        struct pkg_data *, struct thread_data *, struct core_data *,
1305        struct pkg_data *), struct thread_data *thread_base,
1306        struct core_data *core_base, struct pkg_data *pkg_base,
1307        struct thread_data *thread_base2, struct core_data *core_base2,
1308        struct pkg_data *pkg_base2)
1309{
1310        int retval, pkg_no, core_no, thread_no;
1311
1312        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1313                for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
1314                        for (thread_no = 0; thread_no <
1315                                topo.num_threads_per_core; ++thread_no) {
1316                                struct thread_data *t, *t2;
1317                                struct core_data *c, *c2;
1318                                struct pkg_data *p, *p2;
1319
1320                                t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
1321
1322                                if (cpu_is_not_present(t->cpu_id))
1323                                        continue;
1324
1325                                t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
1326
1327                                c = GET_CORE(core_base, core_no, pkg_no);
1328                                c2 = GET_CORE(core_base2, core_no, pkg_no);
1329
1330                                p = GET_PKG(pkg_base, pkg_no);
1331                                p2 = GET_PKG(pkg_base2, pkg_no);
1332
1333                                retval = func(t, c, p, t2, c2, p2);
1334                                if (retval)
1335                                        return retval;
1336                        }
1337                }
1338        }
1339        return 0;
1340}
1341
1342/*
1343 * run func(cpu) on every cpu in /proc/stat
1344 * return max_cpu number
1345 */
1346int for_all_proc_cpus(int (func)(int))
1347{
1348        FILE *fp;
1349        int cpu_num;
1350        int retval;
1351
1352        fp = fopen_or_die(proc_stat, "r");
1353
1354        retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
1355        if (retval != 0)
1356                err(1, "%s: failed to parse format", proc_stat);
1357
1358        while (1) {
1359                retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
1360                if (retval != 1)
1361                        break;
1362
1363                retval = func(cpu_num);
1364                if (retval) {
1365                        fclose(fp);
1366                        return(retval);
1367                }
1368        }
1369        fclose(fp);
1370        return 0;
1371}
1372
1373void re_initialize(void)
1374{
1375        free_all_buffers();
1376        setup_all_buffers();
1377        printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
1378}
1379
1380
1381/*
1382 * count_cpus()
1383 * remember the last one seen, it will be the max
1384 */
1385int count_cpus(int cpu)
1386{
1387        if (topo.max_cpu_num < cpu)
1388                topo.max_cpu_num = cpu;
1389
1390        topo.num_cpus += 1;
1391        return 0;
1392}
1393int mark_cpu_present(int cpu)
1394{
1395        CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
1396        return 0;
1397}
1398
1399void turbostat_loop()
1400{
1401        int retval;
1402        int restarted = 0;
1403
1404restart:
1405        restarted++;
1406
1407        retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1408        if (retval < -1) {
1409                exit(retval);
1410        } else if (retval == -1) {
1411                if (restarted > 1) {
1412                        exit(retval);
1413                }
1414                re_initialize();
1415                goto restart;
1416        }
1417        restarted = 0;
1418        gettimeofday(&tv_even, (struct timezone *)NULL);
1419
1420        while (1) {
1421                if (for_all_proc_cpus(cpu_is_not_present)) {
1422                        re_initialize();
1423                        goto restart;
1424                }
1425                sleep(interval_sec);
1426                retval = for_all_cpus(get_counters, ODD_COUNTERS);
1427                if (retval < -1) {
1428                        exit(retval);
1429                } else if (retval == -1) {
1430                        re_initialize();
1431                        goto restart;
1432                }
1433                gettimeofday(&tv_odd, (struct timezone *)NULL);
1434                timersub(&tv_odd, &tv_even, &tv_delta);
1435                for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
1436                compute_average(EVEN_COUNTERS);
1437                format_all_counters(EVEN_COUNTERS);
1438                flush_stdout();
1439                sleep(interval_sec);
1440                retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1441                if (retval < -1) {
1442                        exit(retval);
1443                } else if (retval == -1) {
1444                        re_initialize();
1445                        goto restart;
1446                }
1447                gettimeofday(&tv_even, (struct timezone *)NULL);
1448                timersub(&tv_even, &tv_odd, &tv_delta);
1449                for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
1450                compute_average(ODD_COUNTERS);
1451                format_all_counters(ODD_COUNTERS);
1452                flush_stdout();
1453        }
1454}
1455
1456void check_dev_msr()
1457{
1458        struct stat sb;
1459
1460        if (stat("/dev/cpu/0/msr", &sb))
1461                err(-5, "no /dev/cpu/0/msr\n"
1462                    "Try \"# modprobe msr\"");
1463}
1464
1465void check_super_user()
1466{
1467        if (getuid() != 0)
1468                errx(-6, "must be root");
1469}
1470
1471int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
1472{
1473        if (!genuine_intel)
1474                return 0;
1475
1476        if (family != 6)
1477                return 0;
1478
1479        switch (model) {
1480        case 0x1A:      /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
1481        case 0x1E:      /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
1482        case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
1483        case 0x25:      /* Westmere Client - Clarkdale, Arrandale */
1484        case 0x2C:      /* Westmere EP - Gulftown */
1485        case 0x2A:      /* SNB */
1486        case 0x2D:      /* SNB Xeon */
1487        case 0x3A:      /* IVB */
1488        case 0x3E:      /* IVB Xeon */
1489        case 0x3C:      /* HSW */
1490        case 0x3F:      /* HSX */
1491        case 0x45:      /* HSW */
1492        case 0x46:      /* HSW */
1493        case 0x37:      /* BYT */
1494        case 0x4D:      /* AVN */
1495        case 0x3D:      /* BDW */
1496        case 0x4F:      /* BDX */
1497        case 0x56:      /* BDX-DE */
1498                return 1;
1499        case 0x2E:      /* Nehalem-EX Xeon - Beckton */
1500        case 0x2F:      /* Westmere-EX Xeon - Eagleton */
1501        default:
1502                return 0;
1503        }
1504}
1505int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
1506{
1507        if (!genuine_intel)
1508                return 0;
1509
1510        if (family != 6)
1511                return 0;
1512
1513        switch (model) {
1514        case 0x3E:      /* IVB Xeon */
1515                return 1;
1516        default:
1517                return 0;
1518        }
1519}
1520
1521/*
1522 * print_epb()
1523 * Decode the ENERGY_PERF_BIAS MSR
1524 */
1525int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1526{
1527        unsigned long long msr;
1528        char *epb_string;
1529        int cpu;
1530
1531        if (!has_epb)
1532                return 0;
1533
1534        cpu = t->cpu_id;
1535
1536        /* EPB is per-package */
1537        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1538                return 0;
1539
1540        if (cpu_migrate(cpu)) {
1541                fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1542                return -1;
1543        }
1544
1545        if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
1546                return 0;
1547
1548        switch (msr & 0x7) {
1549        case ENERGY_PERF_BIAS_PERFORMANCE:
1550                epb_string = "performance";
1551                break;
1552        case ENERGY_PERF_BIAS_NORMAL:
1553                epb_string = "balanced";
1554                break;
1555        case ENERGY_PERF_BIAS_POWERSAVE:
1556                epb_string = "powersave";
1557                break;
1558        default:
1559                epb_string = "custom";
1560                break;
1561        }
1562        fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
1563
1564        return 0;
1565}
1566
1567#define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
1568#define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
1569
1570double get_tdp(model)
1571{
1572        unsigned long long msr;
1573
1574        if (do_rapl & RAPL_PKG_POWER_INFO)
1575                if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
1576                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
1577
1578        switch (model) {
1579        case 0x37:
1580        case 0x4D:
1581                return 30.0;
1582        default:
1583                return 135.0;
1584        }
1585}
1586
1587
1588/*
1589 * rapl_probe()
1590 *
1591 * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
1592 */
1593void rapl_probe(unsigned int family, unsigned int model)
1594{
1595        unsigned long long msr;
1596        unsigned int time_unit;
1597        double tdp;
1598
1599        if (!genuine_intel)
1600                return;
1601
1602        if (family != 6)
1603                return;
1604
1605        switch (model) {
1606        case 0x2A:
1607        case 0x3A:
1608        case 0x3C:      /* HSW */
1609        case 0x45:      /* HSW */
1610        case 0x46:      /* HSW */
1611        case 0x3D:      /* BDW */
1612                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
1613                break;
1614        case 0x3F:      /* HSX */
1615        case 0x4F:      /* BDX */
1616        case 0x56:      /* BDX-DE */
1617                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
1618                break;
1619        case 0x2D:
1620        case 0x3E:
1621                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
1622                break;
1623        case 0x37:      /* BYT */
1624        case 0x4D:      /* AVN */
1625                do_rapl = RAPL_PKG | RAPL_CORES ;
1626                break;
1627        default:
1628                return;
1629        }
1630
1631        /* units on package 0, verify later other packages match */
1632        if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
1633                return;
1634
1635        rapl_power_units = 1.0 / (1 << (msr & 0xF));
1636        if (model == 0x37)
1637                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
1638        else
1639                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1640
1641        time_unit = msr >> 16 & 0xF;
1642        if (time_unit == 0)
1643                time_unit = 0xA;
1644
1645        rapl_time_units = 1.0 / (1 << (time_unit));
1646
1647        tdp = get_tdp(model);
1648
1649        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
1650        if (verbose)
1651                fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
1652
1653        return;
1654}
1655
1656int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1657{
1658        unsigned long long msr;
1659        unsigned int dts;
1660        int cpu;
1661
1662        if (!(do_dts || do_ptm))
1663                return 0;
1664
1665        cpu = t->cpu_id;
1666
1667        /* DTS is per-core, no need to print for each thread */
1668        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 
1669                return 0;
1670
1671        if (cpu_migrate(cpu)) {
1672                fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1673                return -1;
1674        }
1675
1676        if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
1677                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1678                        return 0;
1679
1680                dts = (msr >> 16) & 0x7F;
1681                fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
1682                        cpu, msr, tcc_activation_temp - dts);
1683
1684#ifdef  THERM_DEBUG
1685                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
1686                        return 0;
1687
1688                dts = (msr >> 16) & 0x7F;
1689                dts2 = (msr >> 8) & 0x7F;
1690                fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1691                        cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1692#endif
1693        }
1694
1695
1696        if (do_dts) {
1697                unsigned int resolution;
1698
1699                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1700                        return 0;
1701
1702                dts = (msr >> 16) & 0x7F;
1703                resolution = (msr >> 27) & 0xF;
1704                fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
1705                        cpu, msr, tcc_activation_temp - dts, resolution);
1706
1707#ifdef THERM_DEBUG
1708                if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
1709                        return 0;
1710
1711                dts = (msr >> 16) & 0x7F;
1712                dts2 = (msr >> 8) & 0x7F;
1713                fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1714                        cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1715#endif
1716        }
1717
1718        return 0;
1719}
1720        
1721void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
1722{
1723        fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
1724                cpu, label,
1725                ((msr >> 15) & 1) ? "EN" : "DIS",
1726                ((msr >> 0) & 0x7FFF) * rapl_power_units,
1727                (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
1728                (((msr >> 16) & 1) ? "EN" : "DIS"));
1729
1730        return;
1731}
1732
1733int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1734{
1735        unsigned long long msr;
1736        int cpu;
1737
1738        if (!do_rapl)
1739                return 0;
1740
1741        /* RAPL counters are per package, so print only for 1st thread/package */
1742        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1743                return 0;
1744
1745        cpu = t->cpu_id;
1746        if (cpu_migrate(cpu)) {
1747                fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1748                return -1;
1749        }
1750
1751        if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
1752                return -1;
1753
1754        if (verbose) {
1755                fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
1756                        "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
1757                        rapl_power_units, rapl_energy_units, rapl_time_units);
1758        }
1759        if (do_rapl & RAPL_PKG_POWER_INFO) {
1760
1761                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
1762                        return -5;
1763
1764
1765                fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1766                        cpu, msr,
1767                        ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1768                        ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1769                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1770                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1771
1772        }
1773        if (do_rapl & RAPL_PKG) {
1774
1775                if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
1776                        return -9;
1777
1778                fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
1779                        cpu, msr, (msr >> 63) & 1 ? "": "UN");
1780
1781                print_power_limit_msr(cpu, msr, "PKG Limit #1");
1782                fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
1783                        cpu,
1784                        ((msr >> 47) & 1) ? "EN" : "DIS",
1785                        ((msr >> 32) & 0x7FFF) * rapl_power_units,
1786                        (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
1787                        ((msr >> 48) & 1) ? "EN" : "DIS");
1788        }
1789
1790        if (do_rapl & RAPL_DRAM) {
1791                if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
1792                        return -6;
1793
1794
1795                fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1796                        cpu, msr,
1797                        ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1798                        ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1799                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1800                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1801
1802
1803                if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
1804                        return -9;
1805                fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
1806                                cpu, msr, (msr >> 31) & 1 ? "": "UN");
1807
1808                print_power_limit_msr(cpu, msr, "DRAM Limit");
1809        }
1810        if (do_rapl & RAPL_CORE_POLICY) {
1811                if (verbose) {
1812                        if (get_msr(cpu, MSR_PP0_POLICY, &msr))
1813                                return -7;
1814
1815                        fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
1816                }
1817        }
1818        if (do_rapl & RAPL_CORES) {
1819                if (verbose) {
1820
1821                        if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
1822                                return -9;
1823                        fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
1824                                        cpu, msr, (msr >> 31) & 1 ? "": "UN");
1825                        print_power_limit_msr(cpu, msr, "Cores Limit");
1826                }
1827        }
1828        if (do_rapl & RAPL_GFX) {
1829                if (verbose) {
1830                        if (get_msr(cpu, MSR_PP1_POLICY, &msr))
1831                                return -8;
1832
1833                        fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
1834
1835                        if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
1836                                return -9;
1837                        fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
1838                                        cpu, msr, (msr >> 31) & 1 ? "": "UN");
1839                        print_power_limit_msr(cpu, msr, "GFX Limit");
1840                }
1841        }
1842        return 0;
1843}
1844
1845
1846int is_snb(unsigned int family, unsigned int model)
1847{
1848        if (!genuine_intel)
1849                return 0;
1850
1851        switch (model) {
1852        case 0x2A:
1853        case 0x2D:
1854        case 0x3A:      /* IVB */
1855        case 0x3E:      /* IVB Xeon */
1856        case 0x3C:      /* HSW */
1857        case 0x3F:      /* HSW */
1858        case 0x45:      /* HSW */
1859        case 0x46:      /* HSW */
1860        case 0x3D:      /* BDW */
1861        case 0x4F:      /* BDX */
1862        case 0x56:      /* BDX-DE */
1863                return 1;
1864        }
1865        return 0;
1866}
1867
1868int has_c8_c9_c10(unsigned int family, unsigned int model)
1869{
1870        if (!genuine_intel)
1871                return 0;
1872
1873        switch (model) {
1874        case 0x45:      /* HSW */
1875        case 0x3D:      /* BDW */
1876                return 1;
1877        }
1878        return 0;
1879}
1880
1881
1882int is_slm(unsigned int family, unsigned int model)
1883{
1884        if (!genuine_intel)
1885                return 0;
1886        switch (model) {
1887        case 0x37:      /* BYT */
1888        case 0x4D:      /* AVN */
1889                return 1;
1890        }
1891        return 0;
1892}
1893
1894#define SLM_BCLK_FREQS 5
1895double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
1896
1897double slm_bclk(void)
1898{
1899        unsigned long long msr = 3;
1900        unsigned int i;
1901        double freq;
1902
1903        if (get_msr(0, MSR_FSB_FREQ, &msr))
1904                fprintf(stderr, "SLM BCLK: unknown\n");
1905
1906        i = msr & 0xf;
1907        if (i >= SLM_BCLK_FREQS) {
1908                fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
1909                msr = 3;
1910        }
1911        freq = slm_freq_table[i];
1912
1913        fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
1914
1915        return freq;
1916}
1917
1918double discover_bclk(unsigned int family, unsigned int model)
1919{
1920        if (is_snb(family, model))
1921                return 100.00;
1922        else if (is_slm(family, model))
1923                return slm_bclk();
1924        else
1925                return 133.33;
1926}
1927
1928/*
1929 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
1930 * the Thermal Control Circuit (TCC) activates.
1931 * This is usually equal to tjMax.
1932 *
1933 * Older processors do not have this MSR, so there we guess,
1934 * but also allow cmdline over-ride with -T.
1935 *
1936 * Several MSR temperature values are in units of degrees-C
1937 * below this value, including the Digital Thermal Sensor (DTS),
1938 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
1939 */
1940int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1941{
1942        unsigned long long msr;
1943        unsigned int target_c_local;
1944        int cpu;
1945
1946        /* tcc_activation_temp is used only for dts or ptm */
1947        if (!(do_dts || do_ptm))
1948                return 0;
1949
1950        /* this is a per-package concept */
1951        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1952                return 0;
1953
1954        cpu = t->cpu_id;
1955        if (cpu_migrate(cpu)) {
1956                fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1957                return -1;
1958        }
1959
1960        if (tcc_activation_temp_override != 0) {
1961                tcc_activation_temp = tcc_activation_temp_override;
1962                fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
1963                        cpu, tcc_activation_temp);
1964                return 0;
1965        }
1966
1967        /* Temperature Target MSR is Nehalem and newer only */
1968        if (!do_nehalem_platform_info)
1969                goto guess;
1970
1971        if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
1972                goto guess;
1973
1974        target_c_local = (msr >> 16) & 0x7F;
1975
1976        if (verbose)
1977                fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
1978                        cpu, msr, target_c_local);
1979
1980        if (target_c_local < 85 || target_c_local > 127)
1981                goto guess;
1982
1983        tcc_activation_temp = target_c_local;
1984
1985        return 0;
1986
1987guess:
1988        tcc_activation_temp = TJMAX_DEFAULT;
1989        fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
1990                cpu, tcc_activation_temp);
1991
1992        return 0;
1993}
1994void check_cpuid()
1995{
1996        unsigned int eax, ebx, ecx, edx, max_level;
1997        unsigned int fms, family, model, stepping;
1998
1999        eax = ebx = ecx = edx = 0;
2000
2001        __get_cpuid(0, &max_level, &ebx, &ecx, &edx);
2002
2003        if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
2004                genuine_intel = 1;
2005
2006        if (verbose)
2007                fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
2008                        (char *)&ebx, (char *)&edx, (char *)&ecx);
2009
2010        __get_cpuid(1, &fms, &ebx, &ecx, &edx);
2011        family = (fms >> 8) & 0xf;
2012        model = (fms >> 4) & 0xf;
2013        stepping = fms & 0xf;
2014        if (family == 6 || family == 0xf)
2015                model += ((fms >> 16) & 0xf) << 4;
2016
2017        if (verbose)
2018                fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
2019                        max_level, family, model, stepping, family, model, stepping);
2020
2021        if (!(edx & (1 << 5)))
2022                errx(1, "CPUID: no MSR");
2023
2024        /*
2025         * check max extended function levels of CPUID.
2026         * This is needed to check for invariant TSC.
2027         * This check is valid for both Intel and AMD.
2028         */
2029        ebx = ecx = edx = 0;
2030        __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
2031
2032        if (max_level < 0x80000007)
2033                errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level);
2034
2035        /*
2036         * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
2037         * this check is valid for both Intel and AMD
2038         */
2039        __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
2040        has_invariant_tsc = edx & (1 << 8);
2041
2042        if (!has_invariant_tsc)
2043                errx(1, "No invariant TSC");
2044
2045        /*
2046         * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
2047         * this check is valid for both Intel and AMD
2048         */
2049
2050        __get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
2051        has_aperf = ecx & (1 << 0);
2052        do_dts = eax & (1 << 0);
2053        do_ptm = eax & (1 << 6);
2054        has_epb = ecx & (1 << 3);
2055
2056        if (verbose)
2057                fprintf(stderr, "CPUID(6): %s%s%s%s\n",
2058                        has_aperf ? "APERF" : "No APERF!",
2059                        do_dts ? ", DTS" : "",
2060                        do_ptm ? ", PTM": "",
2061                        has_epb ? ", EPB": "");
2062
2063        if (!has_aperf)
2064                errx(-1, "No APERF");
2065
2066        do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
2067        do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
2068        do_smi = do_nhm_cstates;
2069        do_snb_cstates = is_snb(family, model);
2070        do_c8_c9_c10 = has_c8_c9_c10(family, model);
2071        do_slm_cstates = is_slm(family, model);
2072        bclk = discover_bclk(family, model);
2073
2074        do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
2075        do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
2076        rapl_probe(family, model);
2077
2078        return;
2079}
2080
2081
2082void usage()
2083{
2084        errx(1, "%s: [-v][-R][-T][-p|-P|-S][-c MSR#][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
2085             progname);
2086}
2087
2088
2089/*
2090 * in /dev/cpu/ return success for names that are numbers
2091 * ie. filter out ".", "..", "microcode".
2092 */
2093int dir_filter(const struct dirent *dirp)
2094{
2095        if (isdigit(dirp->d_name[0]))
2096                return 1;
2097        else
2098                return 0;
2099}
2100
2101int open_dev_cpu_msr(int dummy1)
2102{
2103        return 0;
2104}
2105
2106void topology_probe()
2107{
2108        int i;
2109        int max_core_id = 0;
2110        int max_package_id = 0;
2111        int max_siblings = 0;
2112        struct cpu_topology {
2113                int core_id;
2114                int physical_package_id;
2115        } *cpus;
2116
2117        /* Initialize num_cpus, max_cpu_num */
2118        topo.num_cpus = 0;
2119        topo.max_cpu_num = 0;
2120        for_all_proc_cpus(count_cpus);
2121        if (!summary_only && topo.num_cpus > 1)
2122                show_cpu = 1;
2123
2124        if (verbose > 1)
2125                fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
2126
2127        cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
2128        if (cpus == NULL)
2129                err(1, "calloc cpus");
2130
2131        /*
2132         * Allocate and initialize cpu_present_set
2133         */
2134        cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
2135        if (cpu_present_set == NULL)
2136                err(3, "CPU_ALLOC");
2137        cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2138        CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
2139        for_all_proc_cpus(mark_cpu_present);
2140
2141        /*
2142         * Allocate and initialize cpu_affinity_set
2143         */
2144        cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
2145        if (cpu_affinity_set == NULL)
2146                err(3, "CPU_ALLOC");
2147        cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2148        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
2149
2150
2151        /*
2152         * For online cpus
2153         * find max_core_id, max_package_id
2154         */
2155        for (i = 0; i <= topo.max_cpu_num; ++i) {
2156                int siblings;
2157
2158                if (cpu_is_not_present(i)) {
2159                        if (verbose > 1)
2160                                fprintf(stderr, "cpu%d NOT PRESENT\n", i);
2161                        continue;
2162                }
2163                cpus[i].core_id = get_core_id(i);
2164                if (cpus[i].core_id > max_core_id)
2165                        max_core_id = cpus[i].core_id;
2166
2167                cpus[i].physical_package_id = get_physical_package_id(i);
2168                if (cpus[i].physical_package_id > max_package_id)
2169                        max_package_id = cpus[i].physical_package_id;
2170
2171                siblings = get_num_ht_siblings(i);
2172                if (siblings > max_siblings)
2173                        max_siblings = siblings;
2174                if (verbose > 1)
2175                        fprintf(stderr, "cpu %d pkg %d core %d\n",
2176                                i, cpus[i].physical_package_id, cpus[i].core_id);
2177        }
2178        topo.num_cores_per_pkg = max_core_id + 1;
2179        if (verbose > 1)
2180                fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
2181                        max_core_id, topo.num_cores_per_pkg);
2182        if (!summary_only && topo.num_cores_per_pkg > 1)
2183                show_core = 1;
2184
2185        topo.num_packages = max_package_id + 1;
2186        if (verbose > 1)
2187                fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
2188                        max_package_id, topo.num_packages);
2189        if (!summary_only && topo.num_packages > 1)
2190                show_pkg = 1;
2191
2192        topo.num_threads_per_core = max_siblings;
2193        if (verbose > 1)
2194                fprintf(stderr, "max_siblings %d\n", max_siblings);
2195
2196        free(cpus);
2197}
2198
2199void
2200allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
2201{
2202        int i;
2203
2204        *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
2205                topo.num_packages, sizeof(struct thread_data));
2206        if (*t == NULL)
2207                goto error;
2208
2209        for (i = 0; i < topo.num_threads_per_core *
2210                topo.num_cores_per_pkg * topo.num_packages; i++)
2211                (*t)[i].cpu_id = -1;
2212
2213        *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
2214                sizeof(struct core_data));
2215        if (*c == NULL)
2216                goto error;
2217
2218        for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
2219                (*c)[i].core_id = -1;
2220
2221        *p = calloc(topo.num_packages, sizeof(struct pkg_data));
2222        if (*p == NULL)
2223                goto error;
2224
2225        for (i = 0; i < topo.num_packages; i++)
2226                (*p)[i].package_id = i;
2227
2228        return;
2229error:
2230        err(1, "calloc counters");
2231}
2232/*
2233 * init_counter()
2234 *
2235 * set cpu_id, core_num, pkg_num
2236 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
2237 *
2238 * increment topo.num_cores when 1st core in pkg seen
2239 */
2240void init_counter(struct thread_data *thread_base, struct core_data *core_base,
2241        struct pkg_data *pkg_base, int thread_num, int core_num,
2242        int pkg_num, int cpu_id)
2243{
2244        struct thread_data *t;
2245        struct core_data *c;
2246        struct pkg_data *p;
2247
2248        t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
2249        c = GET_CORE(core_base, core_num, pkg_num);
2250        p = GET_PKG(pkg_base, pkg_num);
2251
2252        t->cpu_id = cpu_id;
2253        if (thread_num == 0) {
2254                t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
2255                if (cpu_is_first_core_in_package(cpu_id))
2256                        t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
2257        }
2258
2259        c->core_id = core_num;
2260        p->package_id = pkg_num;
2261}
2262
2263
2264int initialize_counters(int cpu_id)
2265{
2266        int my_thread_id, my_core_id, my_package_id;
2267
2268        my_package_id = get_physical_package_id(cpu_id);
2269        my_core_id = get_core_id(cpu_id);
2270
2271        if (cpu_is_first_sibling_in_core(cpu_id)) {
2272                my_thread_id = 0;
2273                topo.num_cores++;
2274        } else {
2275                my_thread_id = 1;
2276        }
2277
2278        init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
2279        init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
2280        return 0;
2281}
2282
2283void allocate_output_buffer()
2284{
2285        output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
2286        outp = output_buffer;
2287        if (outp == NULL)
2288                err(-1, "calloc output buffer");
2289}
2290
2291void setup_all_buffers(void)
2292{
2293        topology_probe();
2294        allocate_counters(&thread_even, &core_even, &package_even);
2295        allocate_counters(&thread_odd, &core_odd, &package_odd);
2296        allocate_output_buffer();
2297        for_all_proc_cpus(initialize_counters);
2298}
2299
2300void turbostat_init()
2301{
2302        check_cpuid();
2303
2304        check_dev_msr();
2305        check_super_user();
2306
2307        setup_all_buffers();
2308
2309        if (verbose)
2310                print_verbose_header();
2311
2312        if (verbose)
2313                for_all_cpus(print_epb, ODD_COUNTERS);
2314
2315        if (verbose)
2316                for_all_cpus(print_rapl, ODD_COUNTERS);
2317
2318        for_all_cpus(set_temperature_target, ODD_COUNTERS);
2319
2320        if (verbose)
2321                for_all_cpus(print_thermal, ODD_COUNTERS);
2322}
2323
2324int fork_it(char **argv)
2325{
2326        pid_t child_pid;
2327        int status;
2328
2329        status = for_all_cpus(get_counters, EVEN_COUNTERS);
2330        if (status)
2331                exit(status);
2332        /* clear affinity side-effect of get_counters() */
2333        sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
2334        gettimeofday(&tv_even, (struct timezone *)NULL);
2335
2336        child_pid = fork();
2337        if (!child_pid) {
2338                /* child */
2339                execvp(argv[0], argv);
2340        } else {
2341
2342                /* parent */
2343                if (child_pid == -1)
2344                        err(1, "fork");
2345
2346                signal(SIGINT, SIG_IGN);
2347                signal(SIGQUIT, SIG_IGN);
2348                if (waitpid(child_pid, &status, 0) == -1)
2349                        err(status, "waitpid");
2350        }
2351        /*
2352         * n.b. fork_it() does not check for errors from for_all_cpus()
2353         * because re-starting is problematic when forking
2354         */
2355        for_all_cpus(get_counters, ODD_COUNTERS);
2356        gettimeofday(&tv_odd, (struct timezone *)NULL);
2357        timersub(&tv_odd, &tv_even, &tv_delta);
2358        for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
2359        compute_average(EVEN_COUNTERS);
2360        format_all_counters(EVEN_COUNTERS);
2361        flush_stderr();
2362
2363        fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
2364
2365        return status;
2366}
2367
2368int get_and_dump_counters(void)
2369{
2370        int status;
2371
2372        status = for_all_cpus(get_counters, ODD_COUNTERS);
2373        if (status)
2374                return status;
2375
2376        status = for_all_cpus(dump_counters, ODD_COUNTERS);
2377        if (status)
2378                return status;
2379
2380        flush_stdout();
2381
2382        return status;
2383}
2384
2385void cmdline(int argc, char **argv)
2386{
2387        int opt;
2388
2389        progname = argv[0];
2390
2391        while ((opt = getopt(argc, argv, "+pPsSvi:c:C:m:M:RJT:")) != -1) {
2392                switch (opt) {
2393                case 'p':
2394                        show_core_only++;
2395                        break;
2396                case 'P':
2397                        show_pkg_only++;
2398                        break;
2399                case 's':
2400                        dump_only++;
2401                        break;
2402                case 'S':
2403                        summary_only++;
2404                        break;
2405                case 'v':
2406                        verbose++;
2407                        break;
2408                case 'i':
2409                        interval_sec = atoi(optarg);
2410                        break;
2411                case 'c':
2412                        sscanf(optarg, "%x", &extra_delta_offset32);
2413                        break;
2414                case 'C':
2415                        sscanf(optarg, "%x", &extra_delta_offset64);
2416                        break;
2417                case 'm':
2418                        sscanf(optarg, "%x", &extra_msr_offset32);
2419                        break;
2420                case 'M':
2421                        sscanf(optarg, "%x", &extra_msr_offset64);
2422                        break;
2423                case 'R':
2424                        rapl_verbose++;
2425                        break;
2426                case 'T':
2427                        tcc_activation_temp_override = atoi(optarg);
2428                        break;
2429                case 'J':
2430                        rapl_joules++;
2431                        break;
2432
2433                default:
2434                        usage();
2435                }
2436        }
2437}
2438
2439int main(int argc, char **argv)
2440{
2441        cmdline(argc, argv);
2442
2443        if (verbose)
2444                fprintf(stderr, "turbostat v3.7 Feb 6, 2014"
2445                        " - Len Brown <lenb@kernel.org>\n");
2446
2447        turbostat_init();
2448
2449        /* dump counters and exit */
2450        if (dump_only)
2451                return get_and_dump_counters();
2452
2453        /*
2454         * if any params left, it must be a command to fork
2455         */
2456        if (argc - optind)
2457                return fork_it(argv + optind);
2458        else
2459                turbostat_loop();
2460
2461        return 0;
2462}
2463