linux/tools/power/x86/turbostat/turbostat.c
<<
>>
Prefs
   1/*
   2 * turbostat -- show CPU frequency and C-state residency
   3 * on modern Intel turbo-capable processors.
   4 *
   5 * Copyright (c) 2013 Intel Corporation.
   6 * Len Brown <len.brown@intel.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify it
   9 * under the terms and conditions of the GNU General Public License,
  10 * version 2, as published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope it will be useful, but WITHOUT
  13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15 * more details.
  16 *
  17 * You should have received a copy of the GNU General Public License along with
  18 * this program; if not, write to the Free Software Foundation, Inc.,
  19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  20 */
  21
  22#define _GNU_SOURCE
  23#include MSRHEADER
  24#include INTEL_FAMILY_HEADER
  25#include <stdarg.h>
  26#include <stdio.h>
  27#include <err.h>
  28#include <unistd.h>
  29#include <sys/types.h>
  30#include <sys/wait.h>
  31#include <sys/stat.h>
  32#include <sys/resource.h>
  33#include <fcntl.h>
  34#include <signal.h>
  35#include <sys/time.h>
  36#include <stdlib.h>
  37#include <getopt.h>
  38#include <dirent.h>
  39#include <string.h>
  40#include <ctype.h>
  41#include <sched.h>
  42#include <time.h>
  43#include <cpuid.h>
  44#include <linux/capability.h>
  45#include <errno.h>
  46
  47char *proc_stat = "/proc/stat";
  48FILE *outf;
  49int *fd_percpu;
  50struct timespec interval_ts = {5, 0};
  51unsigned int debug;
  52unsigned int quiet;
  53unsigned int sums_need_wide_columns;
  54unsigned int rapl_joules;
  55unsigned int summary_only;
  56unsigned int list_header_only;
  57unsigned int dump_only;
  58unsigned int do_snb_cstates;
  59unsigned int do_knl_cstates;
  60unsigned int do_slm_cstates;
  61unsigned int use_c1_residency_msr;
  62unsigned int has_aperf;
  63unsigned int has_epb;
  64unsigned int do_irtl_snb;
  65unsigned int do_irtl_hsw;
  66unsigned int units = 1000000;   /* MHz etc */
  67unsigned int genuine_intel;
  68unsigned int has_invariant_tsc;
  69unsigned int do_nhm_platform_info;
  70unsigned int no_MSR_MISC_PWR_MGMT;
  71unsigned int aperf_mperf_multiplier = 1;
  72double bclk;
  73double base_hz;
  74unsigned int has_base_hz;
  75double tsc_tweak = 1.0;
  76unsigned int show_pkg_only;
  77unsigned int show_core_only;
  78char *output_buffer, *outp;
  79unsigned int do_rapl;
  80unsigned int do_dts;
  81unsigned int do_ptm;
  82unsigned long long  gfx_cur_rc6_ms;
  83unsigned int gfx_cur_mhz;
  84unsigned int tcc_activation_temp;
  85unsigned int tcc_activation_temp_override;
  86double rapl_power_units, rapl_time_units;
  87double rapl_dram_energy_units, rapl_energy_units;
  88double rapl_joule_counter_range;
  89unsigned int do_core_perf_limit_reasons;
  90unsigned int do_gfx_perf_limit_reasons;
  91unsigned int do_ring_perf_limit_reasons;
  92unsigned int crystal_hz;
  93unsigned long long tsc_hz;
  94int base_cpu;
  95double discover_bclk(unsigned int family, unsigned int model);
  96unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
  97                        /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
  98unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
  99unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
 100unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
 101unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
 102unsigned int has_misc_feature_control;
 103
 104#define RAPL_PKG                (1 << 0)
 105                                        /* 0x610 MSR_PKG_POWER_LIMIT */
 106                                        /* 0x611 MSR_PKG_ENERGY_STATUS */
 107#define RAPL_PKG_PERF_STATUS    (1 << 1)
 108                                        /* 0x613 MSR_PKG_PERF_STATUS */
 109#define RAPL_PKG_POWER_INFO     (1 << 2)
 110                                        /* 0x614 MSR_PKG_POWER_INFO */
 111
 112#define RAPL_DRAM               (1 << 3)
 113                                        /* 0x618 MSR_DRAM_POWER_LIMIT */
 114                                        /* 0x619 MSR_DRAM_ENERGY_STATUS */
 115#define RAPL_DRAM_PERF_STATUS   (1 << 4)
 116                                        /* 0x61b MSR_DRAM_PERF_STATUS */
 117#define RAPL_DRAM_POWER_INFO    (1 << 5)
 118                                        /* 0x61c MSR_DRAM_POWER_INFO */
 119
 120#define RAPL_CORES_POWER_LIMIT  (1 << 6)
 121                                        /* 0x638 MSR_PP0_POWER_LIMIT */
 122#define RAPL_CORE_POLICY        (1 << 7)
 123                                        /* 0x63a MSR_PP0_POLICY */
 124
 125#define RAPL_GFX                (1 << 8)
 126                                        /* 0x640 MSR_PP1_POWER_LIMIT */
 127                                        /* 0x641 MSR_PP1_ENERGY_STATUS */
 128                                        /* 0x642 MSR_PP1_POLICY */
 129
 130#define RAPL_CORES_ENERGY_STATUS        (1 << 9)
 131                                        /* 0x639 MSR_PP0_ENERGY_STATUS */
 132#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
 133#define TJMAX_DEFAULT   100
 134
 135#define MAX(a, b) ((a) > (b) ? (a) : (b))
 136
 137/*
 138 * buffer size used by sscanf() for added column names
 139 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
 140 */
 141#define NAME_BYTES 20
 142#define PATH_BYTES 128
 143
 144int backwards_count;
 145char *progname;
 146
 147#define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
 148cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
 149size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
 150#define MAX_ADDED_COUNTERS 16
 151
 152struct thread_data {
 153        struct timeval tv_begin;
 154        struct timeval tv_end;
 155        unsigned long long tsc;
 156        unsigned long long aperf;
 157        unsigned long long mperf;
 158        unsigned long long c1;
 159        unsigned long long  irq_count;
 160        unsigned int smi_count;
 161        unsigned int cpu_id;
 162        unsigned int flags;
 163#define CPU_IS_FIRST_THREAD_IN_CORE     0x2
 164#define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
 165        unsigned long long counter[MAX_ADDED_COUNTERS];
 166} *thread_even, *thread_odd;
 167
 168struct core_data {
 169        unsigned long long c3;
 170        unsigned long long c6;
 171        unsigned long long c7;
 172        unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
 173        unsigned int core_temp_c;
 174        unsigned int core_id;
 175        unsigned long long counter[MAX_ADDED_COUNTERS];
 176} *core_even, *core_odd;
 177
 178struct pkg_data {
 179        unsigned long long pc2;
 180        unsigned long long pc3;
 181        unsigned long long pc6;
 182        unsigned long long pc7;
 183        unsigned long long pc8;
 184        unsigned long long pc9;
 185        unsigned long long pc10;
 186        unsigned long long pkg_wtd_core_c0;
 187        unsigned long long pkg_any_core_c0;
 188        unsigned long long pkg_any_gfxe_c0;
 189        unsigned long long pkg_both_core_gfxe_c0;
 190        long long gfx_rc6_ms;
 191        unsigned int gfx_mhz;
 192        unsigned int package_id;
 193        unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
 194        unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
 195        unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
 196        unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
 197        unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
 198        unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
 199        unsigned int pkg_temp_c;
 200        unsigned long long counter[MAX_ADDED_COUNTERS];
 201} *package_even, *package_odd;
 202
 203#define ODD_COUNTERS thread_odd, core_odd, package_odd
 204#define EVEN_COUNTERS thread_even, core_even, package_even
 205
 206#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
 207        (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
 208                topo.num_threads_per_core + \
 209                (core_no) * topo.num_threads_per_core + (thread_no))
 210#define GET_CORE(core_base, core_no, pkg_no) \
 211        (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
 212#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 213
 214enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
 215enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
 216enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
 217
 218struct msr_counter {
 219        unsigned int msr_num;
 220        char name[NAME_BYTES];
 221        char path[PATH_BYTES];
 222        unsigned int width;
 223        enum counter_type type;
 224        enum counter_format format;
 225        struct msr_counter *next;
 226        unsigned int flags;
 227#define FLAGS_HIDE      (1 << 0)
 228#define FLAGS_SHOW      (1 << 1)
 229#define SYSFS_PERCPU    (1 << 1)
 230};
 231
 232struct sys_counters {
 233        unsigned int added_thread_counters;
 234        unsigned int added_core_counters;
 235        unsigned int added_package_counters;
 236        struct msr_counter *tp;
 237        struct msr_counter *cp;
 238        struct msr_counter *pp;
 239} sys;
 240
 241struct system_summary {
 242        struct thread_data threads;
 243        struct core_data cores;
 244        struct pkg_data packages;
 245} average;
 246
 247
 248struct topo_params {
 249        int num_packages;
 250        int num_cpus;
 251        int num_cores;
 252        int max_cpu_num;
 253        int num_cores_per_pkg;
 254        int num_threads_per_core;
 255} topo;
 256
 257struct timeval tv_even, tv_odd, tv_delta;
 258
 259int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
 260int *irqs_per_cpu;              /* indexed by cpu_num */
 261
 262void setup_all_buffers(void);
 263
 264int cpu_is_not_present(int cpu)
 265{
 266        return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
 267}
 268/*
 269 * run func(thread, core, package) in topology order
 270 * skip non-present cpus
 271 */
 272
 273int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
 274        struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
 275{
 276        int retval, pkg_no, core_no, thread_no;
 277
 278        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 279                for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
 280                        for (thread_no = 0; thread_no <
 281                                topo.num_threads_per_core; ++thread_no) {
 282                                struct thread_data *t;
 283                                struct core_data *c;
 284                                struct pkg_data *p;
 285
 286                                t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
 287
 288                                if (cpu_is_not_present(t->cpu_id))
 289                                        continue;
 290
 291                                c = GET_CORE(core_base, core_no, pkg_no);
 292                                p = GET_PKG(pkg_base, pkg_no);
 293
 294                                retval = func(t, c, p);
 295                                if (retval)
 296                                        return retval;
 297                        }
 298                }
 299        }
 300        return 0;
 301}
 302
 303int cpu_migrate(int cpu)
 304{
 305        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 306        CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
 307        if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
 308                return -1;
 309        else
 310                return 0;
 311}
 312int get_msr_fd(int cpu)
 313{
 314        char pathname[32];
 315        int fd;
 316
 317        fd = fd_percpu[cpu];
 318
 319        if (fd)
 320                return fd;
 321
 322        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
 323        fd = open(pathname, O_RDONLY);
 324        if (fd < 0)
 325                err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
 326
 327        fd_percpu[cpu] = fd;
 328
 329        return fd;
 330}
 331
 332int get_msr(int cpu, off_t offset, unsigned long long *msr)
 333{
 334        ssize_t retval;
 335
 336        retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 337
 338        if (retval != sizeof *msr)
 339                err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
 340
 341        return 0;
 342}
 343
 344/*
 345 * Each string in this array is compared in --show and --hide cmdline.
 346 * Thus, strings that are proper sub-sets must follow their more specific peers.
 347 */
 348struct msr_counter bic[] = {
 349        { 0x0, "Package" },
 350        { 0x0, "Avg_MHz" },
 351        { 0x0, "Bzy_MHz" },
 352        { 0x0, "TSC_MHz" },
 353        { 0x0, "IRQ" },
 354        { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
 355        { 0x0, "Busy%" },
 356        { 0x0, "CPU%c1" },
 357        { 0x0, "CPU%c3" },
 358        { 0x0, "CPU%c6" },
 359        { 0x0, "CPU%c7" },
 360        { 0x0, "ThreadC" },
 361        { 0x0, "CoreTmp" },
 362        { 0x0, "CoreCnt" },
 363        { 0x0, "PkgTmp" },
 364        { 0x0, "GFX%rc6" },
 365        { 0x0, "GFXMHz" },
 366        { 0x0, "Pkg%pc2" },
 367        { 0x0, "Pkg%pc3" },
 368        { 0x0, "Pkg%pc6" },
 369        { 0x0, "Pkg%pc7" },
 370        { 0x0, "Pkg%pc8" },
 371        { 0x0, "Pkg%pc9" },
 372        { 0x0, "Pkg%pc10" },
 373        { 0x0, "PkgWatt" },
 374        { 0x0, "CorWatt" },
 375        { 0x0, "GFXWatt" },
 376        { 0x0, "PkgCnt" },
 377        { 0x0, "RAMWatt" },
 378        { 0x0, "PKG_%" },
 379        { 0x0, "RAM_%" },
 380        { 0x0, "Pkg_J" },
 381        { 0x0, "Cor_J" },
 382        { 0x0, "GFX_J" },
 383        { 0x0, "RAM_J" },
 384        { 0x0, "Core" },
 385        { 0x0, "CPU" },
 386        { 0x0, "Mod%c6" },
 387        { 0x0, "sysfs" },
 388        { 0x0, "Totl%C0" },
 389        { 0x0, "Any%C0" },
 390        { 0x0, "GFX%C0" },
 391        { 0x0, "CPUGFX%" },
 392};
 393
 394
 395
 396#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
 397#define BIC_Package     (1ULL << 0)
 398#define BIC_Avg_MHz     (1ULL << 1)
 399#define BIC_Bzy_MHz     (1ULL << 2)
 400#define BIC_TSC_MHz     (1ULL << 3)
 401#define BIC_IRQ         (1ULL << 4)
 402#define BIC_SMI         (1ULL << 5)
 403#define BIC_Busy        (1ULL << 6)
 404#define BIC_CPU_c1      (1ULL << 7)
 405#define BIC_CPU_c3      (1ULL << 8)
 406#define BIC_CPU_c6      (1ULL << 9)
 407#define BIC_CPU_c7      (1ULL << 10)
 408#define BIC_ThreadC     (1ULL << 11)
 409#define BIC_CoreTmp     (1ULL << 12)
 410#define BIC_CoreCnt     (1ULL << 13)
 411#define BIC_PkgTmp      (1ULL << 14)
 412#define BIC_GFX_rc6     (1ULL << 15)
 413#define BIC_GFXMHz      (1ULL << 16)
 414#define BIC_Pkgpc2      (1ULL << 17)
 415#define BIC_Pkgpc3      (1ULL << 18)
 416#define BIC_Pkgpc6      (1ULL << 19)
 417#define BIC_Pkgpc7      (1ULL << 20)
 418#define BIC_Pkgpc8      (1ULL << 21)
 419#define BIC_Pkgpc9      (1ULL << 22)
 420#define BIC_Pkgpc10     (1ULL << 23)
 421#define BIC_PkgWatt     (1ULL << 24)
 422#define BIC_CorWatt     (1ULL << 25)
 423#define BIC_GFXWatt     (1ULL << 26)
 424#define BIC_PkgCnt      (1ULL << 27)
 425#define BIC_RAMWatt     (1ULL << 28)
 426#define BIC_PKG__       (1ULL << 29)
 427#define BIC_RAM__       (1ULL << 30)
 428#define BIC_Pkg_J       (1ULL << 31)
 429#define BIC_Cor_J       (1ULL << 32)
 430#define BIC_GFX_J       (1ULL << 33)
 431#define BIC_RAM_J       (1ULL << 34)
 432#define BIC_Core        (1ULL << 35)
 433#define BIC_CPU         (1ULL << 36)
 434#define BIC_Mod_c6      (1ULL << 37)
 435#define BIC_sysfs       (1ULL << 38)
 436#define BIC_Totl_c0     (1ULL << 39)
 437#define BIC_Any_c0      (1ULL << 40)
 438#define BIC_GFX_c0      (1ULL << 41)
 439#define BIC_CPUGFX      (1ULL << 42)
 440
 441unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
 442unsigned long long bic_present = BIC_sysfs;
 443
 444#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 445#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 446#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 447
 448#define MAX_DEFERRED 16
 449char *deferred_skip_names[MAX_DEFERRED];
 450int deferred_skip_index;
 451
 452/*
 453 * HIDE_LIST - hide this list of counters, show the rest [default]
 454 * SHOW_LIST - show this list of counters, hide the rest
 455 */
 456enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
 457
 458void help(void)
 459{
 460        fprintf(outf,
 461        "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
 462        "\n"
 463        "Turbostat forks the specified COMMAND and prints statistics\n"
 464        "when COMMAND completes.\n"
 465        "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
 466        "to print statistics, until interrupted.\n"
 467        "--add          add a counter\n"
 468        "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
 469        "--cpu  cpu-set limit output to summary plus cpu-set:\n"
 470        "               {core | package | j,k,l..m,n-p }\n"
 471        "--quiet        skip decoding system configuration header\n"
 472        "--interval sec Override default 5-second measurement interval\n"
 473        "--help         print this help message\n"
 474        "--list         list column headers only\n"
 475        "--out file     create or truncate \"file\" for all output\n"
 476        "--version      print version information\n"
 477        "\n"
 478        "For more help, run \"man turbostat\"\n");
 479}
 480
 481/*
 482 * bic_lookup
 483 * for all the strings in comma separate name_list,
 484 * set the approprate bit in return value.
 485 */
 486unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
 487{
 488        int i;
 489        unsigned long long retval = 0;
 490
 491        while (name_list) {
 492                char *comma;
 493
 494                comma = strchr(name_list, ',');
 495
 496                if (comma)
 497                        *comma = '\0';
 498
 499                for (i = 0; i < MAX_BIC; ++i) {
 500                        if (!strcmp(name_list, bic[i].name)) {
 501                                retval |= (1ULL << i);
 502                                break;
 503                        }
 504                }
 505                if (i == MAX_BIC) {
 506                        if (mode == SHOW_LIST) {
 507                                fprintf(stderr, "Invalid counter name: %s\n", name_list);
 508                                exit(-1);
 509                        }
 510                        deferred_skip_names[deferred_skip_index++] = name_list;
 511                        if (debug)
 512                                fprintf(stderr, "deferred \"%s\"\n", name_list);
 513                        if (deferred_skip_index >= MAX_DEFERRED) {
 514                                fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
 515                                        MAX_DEFERRED, name_list);
 516                                help();
 517                                exit(1);
 518                        }
 519                }
 520
 521                name_list = comma;
 522                if (name_list)
 523                        name_list++;
 524
 525        }
 526        return retval;
 527}
 528
 529
 530void print_header(char *delim)
 531{
 532        struct msr_counter *mp;
 533        int printed = 0;
 534
 535        if (debug)
 536                outp += sprintf(outp, "usec %s", delim);
 537        if (DO_BIC(BIC_Package))
 538                outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
 539        if (DO_BIC(BIC_Core))
 540                outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 541        if (DO_BIC(BIC_CPU))
 542                outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
 543        if (DO_BIC(BIC_Avg_MHz))
 544                outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
 545        if (DO_BIC(BIC_Busy))
 546                outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
 547        if (DO_BIC(BIC_Bzy_MHz))
 548                outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
 549        if (DO_BIC(BIC_TSC_MHz))
 550                outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
 551
 552        if (DO_BIC(BIC_IRQ)) {
 553                if (sums_need_wide_columns)
 554                        outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
 555                else
 556                        outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
 557        }
 558
 559        if (DO_BIC(BIC_SMI))
 560                outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
 561
 562        for (mp = sys.tp; mp; mp = mp->next) {
 563
 564                if (mp->format == FORMAT_RAW) {
 565                        if (mp->width == 64)
 566                                outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
 567                        else
 568                                outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
 569                } else {
 570                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 571                                outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
 572                        else
 573                                outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
 574                }
 575        }
 576
 577        if (DO_BIC(BIC_CPU_c1))
 578                outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
 579        if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 580                outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 581        if (DO_BIC(BIC_CPU_c6))
 582                outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
 583        if (DO_BIC(BIC_CPU_c7))
 584                outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
 585
 586        if (DO_BIC(BIC_Mod_c6))
 587                outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
 588
 589        if (DO_BIC(BIC_CoreTmp))
 590                outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
 591
 592        for (mp = sys.cp; mp; mp = mp->next) {
 593                if (mp->format == FORMAT_RAW) {
 594                        if (mp->width == 64)
 595                                outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 596                        else
 597                                outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 598                } else {
 599                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 600                                outp += sprintf(outp, "%s%8s", delim, mp->name);
 601                        else
 602                                outp += sprintf(outp, "%s%s", delim, mp->name);
 603                }
 604        }
 605
 606        if (DO_BIC(BIC_PkgTmp))
 607                outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
 608
 609        if (DO_BIC(BIC_GFX_rc6))
 610                outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
 611
 612        if (DO_BIC(BIC_GFXMHz))
 613                outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
 614
 615        if (DO_BIC(BIC_Totl_c0))
 616                outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
 617        if (DO_BIC(BIC_Any_c0))
 618                outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
 619        if (DO_BIC(BIC_GFX_c0))
 620                outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
 621        if (DO_BIC(BIC_CPUGFX))
 622                outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
 623
 624        if (DO_BIC(BIC_Pkgpc2))
 625                outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
 626        if (DO_BIC(BIC_Pkgpc3))
 627                outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
 628        if (DO_BIC(BIC_Pkgpc6))
 629                outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
 630        if (DO_BIC(BIC_Pkgpc7))
 631                outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
 632        if (DO_BIC(BIC_Pkgpc8))
 633                outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
 634        if (DO_BIC(BIC_Pkgpc9))
 635                outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
 636        if (DO_BIC(BIC_Pkgpc10))
 637                outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
 638
 639        if (do_rapl && !rapl_joules) {
 640                if (DO_BIC(BIC_PkgWatt))
 641                        outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
 642                if (DO_BIC(BIC_CorWatt))
 643                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 644                if (DO_BIC(BIC_GFXWatt))
 645                        outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
 646                if (DO_BIC(BIC_RAMWatt))
 647                        outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
 648                if (DO_BIC(BIC_PKG__))
 649                        outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 650                if (DO_BIC(BIC_RAM__))
 651                        outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 652        } else if (do_rapl && rapl_joules) {
 653                if (DO_BIC(BIC_Pkg_J))
 654                        outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
 655                if (DO_BIC(BIC_Cor_J))
 656                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 657                if (DO_BIC(BIC_GFX_J))
 658                        outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
 659                if (DO_BIC(BIC_RAM_J))
 660                        outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
 661                if (DO_BIC(BIC_PKG__))
 662                        outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 663                if (DO_BIC(BIC_RAM__))
 664                        outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 665        }
 666        for (mp = sys.pp; mp; mp = mp->next) {
 667                if (mp->format == FORMAT_RAW) {
 668                        if (mp->width == 64)
 669                                outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 670                        else
 671                                outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 672                } else {
 673                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 674                                outp += sprintf(outp, "%s%8s", delim, mp->name);
 675                        else
 676                                outp += sprintf(outp, "%s%s", delim, mp->name);
 677                }
 678        }
 679
 680        outp += sprintf(outp, "\n");
 681}
 682
 683int dump_counters(struct thread_data *t, struct core_data *c,
 684        struct pkg_data *p)
 685{
 686        int i;
 687        struct msr_counter *mp;
 688
 689        outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
 690
 691        if (t) {
 692                outp += sprintf(outp, "CPU: %d flags 0x%x\n",
 693                        t->cpu_id, t->flags);
 694                outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
 695                outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
 696                outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
 697                outp += sprintf(outp, "c1: %016llX\n", t->c1);
 698
 699                if (DO_BIC(BIC_IRQ))
 700                        outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
 701                if (DO_BIC(BIC_SMI))
 702                        outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 703
 704                for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 705                        outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
 706                                i, mp->msr_num, t->counter[i]);
 707                }
 708        }
 709
 710        if (c) {
 711                outp += sprintf(outp, "core: %d\n", c->core_id);
 712                outp += sprintf(outp, "c3: %016llX\n", c->c3);
 713                outp += sprintf(outp, "c6: %016llX\n", c->c6);
 714                outp += sprintf(outp, "c7: %016llX\n", c->c7);
 715                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
 716
 717                for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 718                        outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
 719                                i, mp->msr_num, c->counter[i]);
 720                }
 721                outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
 722        }
 723
 724        if (p) {
 725                outp += sprintf(outp, "package: %d\n", p->package_id);
 726
 727                outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
 728                outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
 729                outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
 730                outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
 731
 732                outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
 733                if (DO_BIC(BIC_Pkgpc3))
 734                        outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
 735                if (DO_BIC(BIC_Pkgpc6))
 736                        outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
 737                if (DO_BIC(BIC_Pkgpc7))
 738                        outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
 739                outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
 740                outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
 741                outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
 742                outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
 743                outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
 744                outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
 745                outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
 746                outp += sprintf(outp, "Throttle PKG: %0X\n",
 747                        p->rapl_pkg_perf_status);
 748                outp += sprintf(outp, "Throttle RAM: %0X\n",
 749                        p->rapl_dram_perf_status);
 750                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
 751
 752                for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 753                        outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
 754                                i, mp->msr_num, p->counter[i]);
 755                }
 756        }
 757
 758        outp += sprintf(outp, "\n");
 759
 760        return 0;
 761}
 762
 763/*
 764 * column formatting convention & formats
 765 */
 766int format_counters(struct thread_data *t, struct core_data *c,
 767        struct pkg_data *p)
 768{
 769        double interval_float, tsc;
 770        char *fmt8;
 771        int i;
 772        struct msr_counter *mp;
 773        char *delim = "\t";
 774        int printed = 0;
 775
 776         /* if showing only 1st thread in core and this isn't one, bail out */
 777        if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 778                return 0;
 779
 780         /* if showing only 1st thread in pkg and this isn't one, bail out */
 781        if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 782                return 0;
 783
 784        /*if not summary line and --cpu is used */
 785        if ((t != &average.threads) &&
 786                (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
 787                return 0;
 788
 789        if (debug) {
 790                /* on each row, print how many usec each timestamp took to gather */
 791                struct timeval tv;
 792
 793                timersub(&t->tv_end, &t->tv_begin, &tv);
 794                outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
 795        }
 796
 797        interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 798
 799        tsc = t->tsc * tsc_tweak;
 800
 801        /* topo columns, print blanks on 1st (average) line */
 802        if (t == &average.threads) {
 803                if (DO_BIC(BIC_Package))
 804                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 805                if (DO_BIC(BIC_Core))
 806                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 807                if (DO_BIC(BIC_CPU))
 808                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 809        } else {
 810                if (DO_BIC(BIC_Package)) {
 811                        if (p)
 812                                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
 813                        else
 814                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 815                }
 816                if (DO_BIC(BIC_Core)) {
 817                        if (c)
 818                                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
 819                        else
 820                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 821                }
 822                if (DO_BIC(BIC_CPU))
 823                        outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
 824        }
 825
 826        if (DO_BIC(BIC_Avg_MHz))
 827                outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
 828                        1.0 / units * t->aperf / interval_float);
 829
 830        if (DO_BIC(BIC_Busy))
 831                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
 832
 833        if (DO_BIC(BIC_Bzy_MHz)) {
 834                if (has_base_hz)
 835                        outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
 836                else
 837                        outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
 838                                tsc / units * t->aperf / t->mperf / interval_float);
 839        }
 840
 841        if (DO_BIC(BIC_TSC_MHz))
 842                outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
 843
 844        /* IRQ */
 845        if (DO_BIC(BIC_IRQ)) {
 846                if (sums_need_wide_columns)
 847                        outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
 848                else
 849                        outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
 850        }
 851
 852        /* SMI */
 853        if (DO_BIC(BIC_SMI))
 854                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
 855
 856        /* Added counters */
 857        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 858                if (mp->format == FORMAT_RAW) {
 859                        if (mp->width == 32)
 860                                outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
 861                        else
 862                                outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
 863                } else if (mp->format == FORMAT_DELTA) {
 864                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 865                                outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
 866                        else
 867                                outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
 868                } else if (mp->format == FORMAT_PERCENT) {
 869                        if (mp->type == COUNTER_USEC)
 870                                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
 871                        else
 872                                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
 873                }
 874        }
 875
 876        /* C1 */
 877        if (DO_BIC(BIC_CPU_c1))
 878                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
 879
 880
 881        /* print per-core data only for 1st thread in core */
 882        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 883                goto done;
 884
 885        if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 886                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
 887        if (DO_BIC(BIC_CPU_c6))
 888                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
 889        if (DO_BIC(BIC_CPU_c7))
 890                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
 891
 892        /* Mod%c6 */
 893        if (DO_BIC(BIC_Mod_c6))
 894                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
 895
 896        if (DO_BIC(BIC_CoreTmp))
 897                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
 898
 899        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 900                if (mp->format == FORMAT_RAW) {
 901                        if (mp->width == 32)
 902                                outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
 903                        else
 904                                outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
 905                } else if (mp->format == FORMAT_DELTA) {
 906                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 907                                outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
 908                        else
 909                                outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
 910                } else if (mp->format == FORMAT_PERCENT) {
 911                        outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
 912                }
 913        }
 914
 915        /* print per-package data only for 1st core in package */
 916        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 917                goto done;
 918
 919        /* PkgTmp */
 920        if (DO_BIC(BIC_PkgTmp))
 921                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
 922
 923        /* GFXrc6 */
 924        if (DO_BIC(BIC_GFX_rc6)) {
 925                if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
 926                        outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
 927                } else {
 928                        outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
 929                                p->gfx_rc6_ms / 10.0 / interval_float);
 930                }
 931        }
 932
 933        /* GFXMHz */
 934        if (DO_BIC(BIC_GFXMHz))
 935                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
 936
 937        /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 938        if (DO_BIC(BIC_Totl_c0))
 939                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
 940        if (DO_BIC(BIC_Any_c0))
 941                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
 942        if (DO_BIC(BIC_GFX_c0))
 943                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
 944        if (DO_BIC(BIC_CPUGFX))
 945                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 946
 947        if (DO_BIC(BIC_Pkgpc2))
 948                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
 949        if (DO_BIC(BIC_Pkgpc3))
 950                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
 951        if (DO_BIC(BIC_Pkgpc6))
 952                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
 953        if (DO_BIC(BIC_Pkgpc7))
 954                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
 955        if (DO_BIC(BIC_Pkgpc8))
 956                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
 957        if (DO_BIC(BIC_Pkgpc9))
 958                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
 959        if (DO_BIC(BIC_Pkgpc10))
 960                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
 961
 962        /*
 963         * If measurement interval exceeds minimum RAPL Joule Counter range,
 964         * indicate that results are suspect by printing "**" in fraction place.
 965         */
 966        if (interval_float < rapl_joule_counter_range)
 967                fmt8 = "%s%.2f";
 968        else
 969                fmt8 = "%6.0f**";
 970
 971        if (DO_BIC(BIC_PkgWatt))
 972                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
 973        if (DO_BIC(BIC_CorWatt))
 974                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
 975        if (DO_BIC(BIC_GFXWatt))
 976                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
 977        if (DO_BIC(BIC_RAMWatt))
 978                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
 979        if (DO_BIC(BIC_Pkg_J))
 980                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
 981        if (DO_BIC(BIC_Cor_J))
 982                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
 983        if (DO_BIC(BIC_GFX_J))
 984                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
 985        if (DO_BIC(BIC_RAM_J))
 986                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
 987        if (DO_BIC(BIC_PKG__))
 988                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 989        if (DO_BIC(BIC_RAM__))
 990                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
 991
 992        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 993                if (mp->format == FORMAT_RAW) {
 994                        if (mp->width == 32)
 995                                outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
 996                        else
 997                                outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
 998                } else if (mp->format == FORMAT_DELTA) {
 999                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1000                                outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1001                        else
1002                                outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1003                } else if (mp->format == FORMAT_PERCENT) {
1004                        outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1005                }
1006        }
1007
1008done:
1009        outp += sprintf(outp, "\n");
1010
1011        return 0;
1012}
1013
1014void flush_output_stdout(void)
1015{
1016        FILE *filep;
1017
1018        if (outf == stderr)
1019                filep = stdout;
1020        else
1021                filep = outf;
1022
1023        fputs(output_buffer, filep);
1024        fflush(filep);
1025
1026        outp = output_buffer;
1027}
1028void flush_output_stderr(void)
1029{
1030        fputs(output_buffer, outf);
1031        fflush(outf);
1032        outp = output_buffer;
1033}
1034void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1035{
1036        static int printed;
1037
1038        if (!printed || !summary_only)
1039                print_header("\t");
1040
1041        if (topo.num_cpus > 1)
1042                format_counters(&average.threads, &average.cores,
1043                        &average.packages);
1044
1045        printed = 1;
1046
1047        if (summary_only)
1048                return;
1049
1050        for_all_cpus(format_counters, t, c, p);
1051}
1052
1053#define DELTA_WRAP32(new, old)                  \
1054        if (new > old) {                        \
1055                old = new - old;                \
1056        } else {                                \
1057                old = 0x100000000 + new - old;  \
1058        }
1059
1060int
1061delta_package(struct pkg_data *new, struct pkg_data *old)
1062{
1063        int i;
1064        struct msr_counter *mp;
1065
1066
1067        if (DO_BIC(BIC_Totl_c0))
1068                old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1069        if (DO_BIC(BIC_Any_c0))
1070                old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1071        if (DO_BIC(BIC_GFX_c0))
1072                old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1073        if (DO_BIC(BIC_CPUGFX))
1074                old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1075
1076        old->pc2 = new->pc2 - old->pc2;
1077        if (DO_BIC(BIC_Pkgpc3))
1078                old->pc3 = new->pc3 - old->pc3;
1079        if (DO_BIC(BIC_Pkgpc6))
1080                old->pc6 = new->pc6 - old->pc6;
1081        if (DO_BIC(BIC_Pkgpc7))
1082                old->pc7 = new->pc7 - old->pc7;
1083        old->pc8 = new->pc8 - old->pc8;
1084        old->pc9 = new->pc9 - old->pc9;
1085        old->pc10 = new->pc10 - old->pc10;
1086        old->pkg_temp_c = new->pkg_temp_c;
1087
1088        /* flag an error when rc6 counter resets/wraps */
1089        if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1090                old->gfx_rc6_ms = -1;
1091        else
1092                old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1093
1094        old->gfx_mhz = new->gfx_mhz;
1095
1096        DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1097        DELTA_WRAP32(new->energy_cores, old->energy_cores);
1098        DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1099        DELTA_WRAP32(new->energy_dram, old->energy_dram);
1100        DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1101        DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1102
1103        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1104                if (mp->format == FORMAT_RAW)
1105                        old->counter[i] = new->counter[i];
1106                else
1107                        old->counter[i] = new->counter[i] - old->counter[i];
1108        }
1109
1110        return 0;
1111}
1112
1113void
1114delta_core(struct core_data *new, struct core_data *old)
1115{
1116        int i;
1117        struct msr_counter *mp;
1118
1119        old->c3 = new->c3 - old->c3;
1120        old->c6 = new->c6 - old->c6;
1121        old->c7 = new->c7 - old->c7;
1122        old->core_temp_c = new->core_temp_c;
1123        old->mc6_us = new->mc6_us - old->mc6_us;
1124
1125        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1126                if (mp->format == FORMAT_RAW)
1127                        old->counter[i] = new->counter[i];
1128                else
1129                        old->counter[i] = new->counter[i] - old->counter[i];
1130        }
1131}
1132
1133/*
1134 * old = new - old
1135 */
1136int
1137delta_thread(struct thread_data *new, struct thread_data *old,
1138        struct core_data *core_delta)
1139{
1140        int i;
1141        struct msr_counter *mp;
1142
1143        old->tsc = new->tsc - old->tsc;
1144
1145        /* check for TSC < 1 Mcycles over interval */
1146        if (old->tsc < (1000 * 1000))
1147                errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1148                     "You can disable all c-states by booting with \"idle=poll\"\n"
1149                     "or just the deep ones with \"processor.max_cstate=1\"");
1150
1151        old->c1 = new->c1 - old->c1;
1152
1153        if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1154                if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1155                        old->aperf = new->aperf - old->aperf;
1156                        old->mperf = new->mperf - old->mperf;
1157                } else {
1158                        return -1;
1159                }
1160        }
1161
1162
1163        if (use_c1_residency_msr) {
1164                /*
1165                 * Some models have a dedicated C1 residency MSR,
1166                 * which should be more accurate than the derivation below.
1167                 */
1168        } else {
1169                /*
1170                 * As counter collection is not atomic,
1171                 * it is possible for mperf's non-halted cycles + idle states
1172                 * to exceed TSC's all cycles: show c1 = 0% in that case.
1173                 */
1174                if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1175                        old->c1 = 0;
1176                else {
1177                        /* normal case, derive c1 */
1178                        old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1179                                - core_delta->c6 - core_delta->c7;
1180                }
1181        }
1182
1183        if (old->mperf == 0) {
1184                if (debug > 1)
1185                        fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1186                old->mperf = 1; /* divide by 0 protection */
1187        }
1188
1189        if (DO_BIC(BIC_IRQ))
1190                old->irq_count = new->irq_count - old->irq_count;
1191
1192        if (DO_BIC(BIC_SMI))
1193                old->smi_count = new->smi_count - old->smi_count;
1194
1195        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1196                if (mp->format == FORMAT_RAW)
1197                        old->counter[i] = new->counter[i];
1198                else
1199                        old->counter[i] = new->counter[i] - old->counter[i];
1200        }
1201        return 0;
1202}
1203
1204int delta_cpu(struct thread_data *t, struct core_data *c,
1205        struct pkg_data *p, struct thread_data *t2,
1206        struct core_data *c2, struct pkg_data *p2)
1207{
1208        int retval = 0;
1209
1210        /* calculate core delta only for 1st thread in core */
1211        if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1212                delta_core(c, c2);
1213
1214        /* always calculate thread delta */
1215        retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1216        if (retval)
1217                return retval;
1218
1219        /* calculate package delta only for 1st core in package */
1220        if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1221                retval = delta_package(p, p2);
1222
1223        return retval;
1224}
1225
1226void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1227{
1228        int i;
1229        struct msr_counter  *mp;
1230
1231        t->tsc = 0;
1232        t->aperf = 0;
1233        t->mperf = 0;
1234        t->c1 = 0;
1235
1236        t->irq_count = 0;
1237        t->smi_count = 0;
1238
1239        /* tells format_counters to dump all fields from this set */
1240        t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1241
1242        c->c3 = 0;
1243        c->c6 = 0;
1244        c->c7 = 0;
1245        c->mc6_us = 0;
1246        c->core_temp_c = 0;
1247
1248        p->pkg_wtd_core_c0 = 0;
1249        p->pkg_any_core_c0 = 0;
1250        p->pkg_any_gfxe_c0 = 0;
1251        p->pkg_both_core_gfxe_c0 = 0;
1252
1253        p->pc2 = 0;
1254        if (DO_BIC(BIC_Pkgpc3))
1255                p->pc3 = 0;
1256        if (DO_BIC(BIC_Pkgpc6))
1257                p->pc6 = 0;
1258        if (DO_BIC(BIC_Pkgpc7))
1259                p->pc7 = 0;
1260        p->pc8 = 0;
1261        p->pc9 = 0;
1262        p->pc10 = 0;
1263
1264        p->energy_pkg = 0;
1265        p->energy_dram = 0;
1266        p->energy_cores = 0;
1267        p->energy_gfx = 0;
1268        p->rapl_pkg_perf_status = 0;
1269        p->rapl_dram_perf_status = 0;
1270        p->pkg_temp_c = 0;
1271
1272        p->gfx_rc6_ms = 0;
1273        p->gfx_mhz = 0;
1274        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1275                t->counter[i] = 0;
1276
1277        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1278                c->counter[i] = 0;
1279
1280        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1281                p->counter[i] = 0;
1282}
1283int sum_counters(struct thread_data *t, struct core_data *c,
1284        struct pkg_data *p)
1285{
1286        int i;
1287        struct msr_counter *mp;
1288
1289        average.threads.tsc += t->tsc;
1290        average.threads.aperf += t->aperf;
1291        average.threads.mperf += t->mperf;
1292        average.threads.c1 += t->c1;
1293
1294        average.threads.irq_count += t->irq_count;
1295        average.threads.smi_count += t->smi_count;
1296
1297        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1298                if (mp->format == FORMAT_RAW)
1299                        continue;
1300                average.threads.counter[i] += t->counter[i];
1301        }
1302
1303        /* sum per-core values only for 1st thread in core */
1304        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1305                return 0;
1306
1307        average.cores.c3 += c->c3;
1308        average.cores.c6 += c->c6;
1309        average.cores.c7 += c->c7;
1310        average.cores.mc6_us += c->mc6_us;
1311
1312        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1313
1314        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1315                if (mp->format == FORMAT_RAW)
1316                        continue;
1317                average.cores.counter[i] += c->counter[i];
1318        }
1319
1320        /* sum per-pkg values only for 1st core in pkg */
1321        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1322                return 0;
1323
1324        if (DO_BIC(BIC_Totl_c0))
1325                average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1326        if (DO_BIC(BIC_Any_c0))
1327                average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1328        if (DO_BIC(BIC_GFX_c0))
1329                average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1330        if (DO_BIC(BIC_CPUGFX))
1331                average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1332
1333        average.packages.pc2 += p->pc2;
1334        if (DO_BIC(BIC_Pkgpc3))
1335                average.packages.pc3 += p->pc3;
1336        if (DO_BIC(BIC_Pkgpc6))
1337                average.packages.pc6 += p->pc6;
1338        if (DO_BIC(BIC_Pkgpc7))
1339                average.packages.pc7 += p->pc7;
1340        average.packages.pc8 += p->pc8;
1341        average.packages.pc9 += p->pc9;
1342        average.packages.pc10 += p->pc10;
1343
1344        average.packages.energy_pkg += p->energy_pkg;
1345        average.packages.energy_dram += p->energy_dram;
1346        average.packages.energy_cores += p->energy_cores;
1347        average.packages.energy_gfx += p->energy_gfx;
1348
1349        average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1350        average.packages.gfx_mhz = p->gfx_mhz;
1351
1352        average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1353
1354        average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1355        average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1356
1357        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1358                if (mp->format == FORMAT_RAW)
1359                        continue;
1360                average.packages.counter[i] += p->counter[i];
1361        }
1362        return 0;
1363}
1364/*
1365 * sum the counters for all cpus in the system
1366 * compute the weighted average
1367 */
1368void compute_average(struct thread_data *t, struct core_data *c,
1369        struct pkg_data *p)
1370{
1371        int i;
1372        struct msr_counter *mp;
1373
1374        clear_counters(&average.threads, &average.cores, &average.packages);
1375
1376        for_all_cpus(sum_counters, t, c, p);
1377
1378        average.threads.tsc /= topo.num_cpus;
1379        average.threads.aperf /= topo.num_cpus;
1380        average.threads.mperf /= topo.num_cpus;
1381        average.threads.c1 /= topo.num_cpus;
1382
1383        if (average.threads.irq_count > 9999999)
1384                sums_need_wide_columns = 1;
1385
1386        average.cores.c3 /= topo.num_cores;
1387        average.cores.c6 /= topo.num_cores;
1388        average.cores.c7 /= topo.num_cores;
1389        average.cores.mc6_us /= topo.num_cores;
1390
1391        if (DO_BIC(BIC_Totl_c0))
1392                average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1393        if (DO_BIC(BIC_Any_c0))
1394                average.packages.pkg_any_core_c0 /= topo.num_packages;
1395        if (DO_BIC(BIC_GFX_c0))
1396                average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1397        if (DO_BIC(BIC_CPUGFX))
1398                average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1399
1400        average.packages.pc2 /= topo.num_packages;
1401        if (DO_BIC(BIC_Pkgpc3))
1402                average.packages.pc3 /= topo.num_packages;
1403        if (DO_BIC(BIC_Pkgpc6))
1404                average.packages.pc6 /= topo.num_packages;
1405        if (DO_BIC(BIC_Pkgpc7))
1406                average.packages.pc7 /= topo.num_packages;
1407
1408        average.packages.pc8 /= topo.num_packages;
1409        average.packages.pc9 /= topo.num_packages;
1410        average.packages.pc10 /= topo.num_packages;
1411
1412        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1413                if (mp->format == FORMAT_RAW)
1414                        continue;
1415                if (mp->type == COUNTER_ITEMS) {
1416                        if (average.threads.counter[i] > 9999999)
1417                                sums_need_wide_columns = 1;
1418                        continue;
1419                }
1420                average.threads.counter[i] /= topo.num_cpus;
1421        }
1422        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1423                if (mp->format == FORMAT_RAW)
1424                        continue;
1425                if (mp->type == COUNTER_ITEMS) {
1426                        if (average.cores.counter[i] > 9999999)
1427                                sums_need_wide_columns = 1;
1428                }
1429                average.cores.counter[i] /= topo.num_cores;
1430        }
1431        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1432                if (mp->format == FORMAT_RAW)
1433                        continue;
1434                if (mp->type == COUNTER_ITEMS) {
1435                        if (average.packages.counter[i] > 9999999)
1436                                sums_need_wide_columns = 1;
1437                }
1438                average.packages.counter[i] /= topo.num_packages;
1439        }
1440}
1441
1442static unsigned long long rdtsc(void)
1443{
1444        unsigned int low, high;
1445
1446        asm volatile("rdtsc" : "=a" (low), "=d" (high));
1447
1448        return low | ((unsigned long long)high) << 32;
1449}
1450
1451/*
1452 * Open a file, and exit on failure
1453 */
1454FILE *fopen_or_die(const char *path, const char *mode)
1455{
1456        FILE *filep = fopen(path, mode);
1457
1458        if (!filep)
1459                err(1, "%s: open failed", path);
1460        return filep;
1461}
1462/*
1463 * snapshot_sysfs_counter()
1464 *
1465 * return snapshot of given counter
1466 */
1467unsigned long long snapshot_sysfs_counter(char *path)
1468{
1469        FILE *fp;
1470        int retval;
1471        unsigned long long counter;
1472
1473        fp = fopen_or_die(path, "r");
1474
1475        retval = fscanf(fp, "%lld", &counter);
1476        if (retval != 1)
1477                err(1, "snapshot_sysfs_counter(%s)", path);
1478
1479        fclose(fp);
1480
1481        return counter;
1482}
1483
1484int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1485{
1486        if (mp->msr_num != 0) {
1487                if (get_msr(cpu, mp->msr_num, counterp))
1488                        return -1;
1489        } else {
1490                char path[128];
1491
1492                if (mp->flags & SYSFS_PERCPU) {
1493                        sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1494                                 cpu, mp->path);
1495
1496                        *counterp = snapshot_sysfs_counter(path);
1497                } else {
1498                        *counterp = snapshot_sysfs_counter(mp->path);
1499                }
1500        }
1501
1502        return 0;
1503}
1504
1505/*
1506 * get_counters(...)
1507 * migrate to cpu
1508 * acquire and record local counters for that cpu
1509 */
1510int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1511{
1512        int cpu = t->cpu_id;
1513        unsigned long long msr;
1514        int aperf_mperf_retry_count = 0;
1515        struct msr_counter *mp;
1516        int i;
1517
1518
1519        gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1520
1521        if (cpu_migrate(cpu)) {
1522                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1523                return -1;
1524        }
1525
1526retry:
1527        t->tsc = rdtsc();       /* we are running on local CPU of interest */
1528
1529        if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1530                unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1531
1532                /*
1533                 * The TSC, APERF and MPERF must be read together for
1534                 * APERF/MPERF and MPERF/TSC to give accurate results.
1535                 *
1536                 * Unfortunately, APERF and MPERF are read by
1537                 * individual system call, so delays may occur
1538                 * between them.  If the time to read them
1539                 * varies by a large amount, we re-read them.
1540                 */
1541
1542                /*
1543                 * This initial dummy APERF read has been seen to
1544                 * reduce jitter in the subsequent reads.
1545                 */
1546
1547                if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1548                        return -3;
1549
1550                t->tsc = rdtsc();       /* re-read close to APERF */
1551
1552                tsc_before = t->tsc;
1553
1554                if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1555                        return -3;
1556
1557                tsc_between = rdtsc();
1558
1559                if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1560                        return -4;
1561
1562                tsc_after = rdtsc();
1563
1564                aperf_time = tsc_between - tsc_before;
1565                mperf_time = tsc_after - tsc_between;
1566
1567                /*
1568                 * If the system call latency to read APERF and MPERF
1569                 * differ by more than 2x, then try again.
1570                 */
1571                if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1572                        aperf_mperf_retry_count++;
1573                        if (aperf_mperf_retry_count < 5)
1574                                goto retry;
1575                        else
1576                                warnx("cpu%d jitter %lld %lld",
1577                                        cpu, aperf_time, mperf_time);
1578                }
1579                aperf_mperf_retry_count = 0;
1580
1581                t->aperf = t->aperf * aperf_mperf_multiplier;
1582                t->mperf = t->mperf * aperf_mperf_multiplier;
1583        }
1584
1585        if (DO_BIC(BIC_IRQ))
1586                t->irq_count = irqs_per_cpu[cpu];
1587        if (DO_BIC(BIC_SMI)) {
1588                if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1589                        return -5;
1590                t->smi_count = msr & 0xFFFFFFFF;
1591        }
1592        if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1593                if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1594                        return -6;
1595        }
1596
1597        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1598                if (get_mp(cpu, mp, &t->counter[i]))
1599                        return -10;
1600        }
1601
1602        /* collect core counters only for 1st thread in core */
1603        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1604                goto done;
1605
1606        if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
1607                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1608                        return -6;
1609        }
1610
1611        if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1612                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1613                        return -7;
1614        } else if (do_knl_cstates) {
1615                if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1616                        return -7;
1617        }
1618
1619        if (DO_BIC(BIC_CPU_c7))
1620                if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1621                        return -8;
1622
1623        if (DO_BIC(BIC_Mod_c6))
1624                if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1625                        return -8;
1626
1627        if (DO_BIC(BIC_CoreTmp)) {
1628                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1629                        return -9;
1630                c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1631        }
1632
1633        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1634                if (get_mp(cpu, mp, &c->counter[i]))
1635                        return -10;
1636        }
1637
1638        /* collect package counters only for 1st core in package */
1639        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1640                goto done;
1641
1642        if (DO_BIC(BIC_Totl_c0)) {
1643                if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1644                        return -10;
1645        }
1646        if (DO_BIC(BIC_Any_c0)) {
1647                if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1648                        return -11;
1649        }
1650        if (DO_BIC(BIC_GFX_c0)) {
1651                if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1652                        return -12;
1653        }
1654        if (DO_BIC(BIC_CPUGFX)) {
1655                if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1656                        return -13;
1657        }
1658        if (DO_BIC(BIC_Pkgpc3))
1659                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1660                        return -9;
1661        if (DO_BIC(BIC_Pkgpc6)) {
1662                if (do_slm_cstates) {
1663                        if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1664                                return -10;
1665                } else {
1666                        if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1667                                return -10;
1668                }
1669        }
1670
1671        if (DO_BIC(BIC_Pkgpc2))
1672                if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1673                        return -11;
1674        if (DO_BIC(BIC_Pkgpc7))
1675                if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1676                        return -12;
1677        if (DO_BIC(BIC_Pkgpc8))
1678                if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1679                        return -13;
1680        if (DO_BIC(BIC_Pkgpc9))
1681                if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1682                        return -13;
1683        if (DO_BIC(BIC_Pkgpc10))
1684                if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1685                        return -13;
1686
1687        if (do_rapl & RAPL_PKG) {
1688                if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1689                        return -13;
1690                p->energy_pkg = msr & 0xFFFFFFFF;
1691        }
1692        if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1693                if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1694                        return -14;
1695                p->energy_cores = msr & 0xFFFFFFFF;
1696        }
1697        if (do_rapl & RAPL_DRAM) {
1698                if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1699                        return -15;
1700                p->energy_dram = msr & 0xFFFFFFFF;
1701        }
1702        if (do_rapl & RAPL_GFX) {
1703                if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1704                        return -16;
1705                p->energy_gfx = msr & 0xFFFFFFFF;
1706        }
1707        if (do_rapl & RAPL_PKG_PERF_STATUS) {
1708                if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1709                        return -16;
1710                p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1711        }
1712        if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1713                if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1714                        return -16;
1715                p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1716        }
1717        if (DO_BIC(BIC_PkgTmp)) {
1718                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1719                        return -17;
1720                p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1721        }
1722
1723        if (DO_BIC(BIC_GFX_rc6))
1724                p->gfx_rc6_ms = gfx_cur_rc6_ms;
1725
1726        if (DO_BIC(BIC_GFXMHz))
1727                p->gfx_mhz = gfx_cur_mhz;
1728
1729        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1730                if (get_mp(cpu, mp, &p->counter[i]))
1731                        return -10;
1732        }
1733done:
1734        gettimeofday(&t->tv_end, (struct timezone *)NULL);
1735
1736        return 0;
1737}
1738
1739/*
1740 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1741 * If you change the values, note they are used both in comparisons
1742 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1743 */
1744
1745#define PCLUKN 0 /* Unknown */
1746#define PCLRSV 1 /* Reserved */
1747#define PCL__0 2 /* PC0 */
1748#define PCL__1 3 /* PC1 */
1749#define PCL__2 4 /* PC2 */
1750#define PCL__3 5 /* PC3 */
1751#define PCL__4 6 /* PC4 */
1752#define PCL__6 7 /* PC6 */
1753#define PCL_6N 8 /* PC6 No Retention */
1754#define PCL_6R 9 /* PC6 Retention */
1755#define PCL__7 10 /* PC7 */
1756#define PCL_7S 11 /* PC7 Shrink */
1757#define PCL__8 12 /* PC8 */
1758#define PCL__9 13 /* PC9 */
1759#define PCLUNL 14 /* Unlimited */
1760
1761int pkg_cstate_limit = PCLUKN;
1762char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1763        "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1764
1765int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1766int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1767int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1768int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1769int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1770int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1771int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1772int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1773
1774
1775static void
1776calculate_tsc_tweak()
1777{
1778        tsc_tweak = base_hz / tsc_hz;
1779}
1780
1781static void
1782dump_nhm_platform_info(void)
1783{
1784        unsigned long long msr;
1785        unsigned int ratio;
1786
1787        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1788
1789        fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1790
1791        ratio = (msr >> 40) & 0xFF;
1792        fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
1793                ratio, bclk, ratio * bclk);
1794
1795        ratio = (msr >> 8) & 0xFF;
1796        fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1797                ratio, bclk, ratio * bclk);
1798
1799        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1800        fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1801                base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1802
1803        return;
1804}
1805
1806static void
1807dump_hsw_turbo_ratio_limits(void)
1808{
1809        unsigned long long msr;
1810        unsigned int ratio;
1811
1812        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1813
1814        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1815
1816        ratio = (msr >> 8) & 0xFF;
1817        if (ratio)
1818                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
1819                        ratio, bclk, ratio * bclk);
1820
1821        ratio = (msr >> 0) & 0xFF;
1822        if (ratio)
1823                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
1824                        ratio, bclk, ratio * bclk);
1825        return;
1826}
1827
1828static void
1829dump_ivt_turbo_ratio_limits(void)
1830{
1831        unsigned long long msr;
1832        unsigned int ratio;
1833
1834        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1835
1836        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1837
1838        ratio = (msr >> 56) & 0xFF;
1839        if (ratio)
1840                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
1841                        ratio, bclk, ratio * bclk);
1842
1843        ratio = (msr >> 48) & 0xFF;
1844        if (ratio)
1845                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
1846                        ratio, bclk, ratio * bclk);
1847
1848        ratio = (msr >> 40) & 0xFF;
1849        if (ratio)
1850                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
1851                        ratio, bclk, ratio * bclk);
1852
1853        ratio = (msr >> 32) & 0xFF;
1854        if (ratio)
1855                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
1856                        ratio, bclk, ratio * bclk);
1857
1858        ratio = (msr >> 24) & 0xFF;
1859        if (ratio)
1860                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
1861                        ratio, bclk, ratio * bclk);
1862
1863        ratio = (msr >> 16) & 0xFF;
1864        if (ratio)
1865                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
1866                        ratio, bclk, ratio * bclk);
1867
1868        ratio = (msr >> 8) & 0xFF;
1869        if (ratio)
1870                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
1871                        ratio, bclk, ratio * bclk);
1872
1873        ratio = (msr >> 0) & 0xFF;
1874        if (ratio)
1875                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
1876                        ratio, bclk, ratio * bclk);
1877        return;
1878}
1879int has_turbo_ratio_group_limits(int family, int model)
1880{
1881
1882        if (!genuine_intel)
1883                return 0;
1884
1885        switch (model) {
1886        case INTEL_FAM6_ATOM_GOLDMONT:
1887        case INTEL_FAM6_SKYLAKE_X:
1888        case INTEL_FAM6_ATOM_DENVERTON:
1889                return 1;
1890        }
1891        return 0;
1892}
1893
1894static void
1895dump_turbo_ratio_limits(int family, int model)
1896{
1897        unsigned long long msr, core_counts;
1898        unsigned int ratio, group_size;
1899
1900        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1901        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1902
1903        if (has_turbo_ratio_group_limits(family, model)) {
1904                get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
1905                fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
1906        } else {
1907                core_counts = 0x0807060504030201;
1908        }
1909
1910        ratio = (msr >> 56) & 0xFF;
1911        group_size = (core_counts >> 56) & 0xFF;
1912        if (ratio)
1913                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1914                        ratio, bclk, ratio * bclk, group_size);
1915
1916        ratio = (msr >> 48) & 0xFF;
1917        group_size = (core_counts >> 48) & 0xFF;
1918        if (ratio)
1919                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1920                        ratio, bclk, ratio * bclk, group_size);
1921
1922        ratio = (msr >> 40) & 0xFF;
1923        group_size = (core_counts >> 40) & 0xFF;
1924        if (ratio)
1925                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1926                        ratio, bclk, ratio * bclk, group_size);
1927
1928        ratio = (msr >> 32) & 0xFF;
1929        group_size = (core_counts >> 32) & 0xFF;
1930        if (ratio)
1931                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1932                        ratio, bclk, ratio * bclk, group_size);
1933
1934        ratio = (msr >> 24) & 0xFF;
1935        group_size = (core_counts >> 24) & 0xFF;
1936        if (ratio)
1937                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1938                        ratio, bclk, ratio * bclk, group_size);
1939
1940        ratio = (msr >> 16) & 0xFF;
1941        group_size = (core_counts >> 16) & 0xFF;
1942        if (ratio)
1943                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1944                        ratio, bclk, ratio * bclk, group_size);
1945
1946        ratio = (msr >> 8) & 0xFF;
1947        group_size = (core_counts >> 8) & 0xFF;
1948        if (ratio)
1949                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1950                        ratio, bclk, ratio * bclk, group_size);
1951
1952        ratio = (msr >> 0) & 0xFF;
1953        group_size = (core_counts >> 0) & 0xFF;
1954        if (ratio)
1955                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1956                        ratio, bclk, ratio * bclk, group_size);
1957        return;
1958}
1959
1960static void
1961dump_atom_turbo_ratio_limits(void)
1962{
1963        unsigned long long msr;
1964        unsigned int ratio;
1965
1966        get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
1967        fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
1968
1969        ratio = (msr >> 0) & 0x3F;
1970        if (ratio)
1971                fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
1972                        ratio, bclk, ratio * bclk);
1973
1974        ratio = (msr >> 8) & 0x3F;
1975        if (ratio)
1976                fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
1977                        ratio, bclk, ratio * bclk);
1978
1979        ratio = (msr >> 16) & 0x3F;
1980        if (ratio)
1981                fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1982                        ratio, bclk, ratio * bclk);
1983
1984        get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
1985        fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
1986
1987        ratio = (msr >> 24) & 0x3F;
1988        if (ratio)
1989                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
1990                        ratio, bclk, ratio * bclk);
1991
1992        ratio = (msr >> 16) & 0x3F;
1993        if (ratio)
1994                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
1995                        ratio, bclk, ratio * bclk);
1996
1997        ratio = (msr >> 8) & 0x3F;
1998        if (ratio)
1999                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2000                        ratio, bclk, ratio * bclk);
2001
2002        ratio = (msr >> 0) & 0x3F;
2003        if (ratio)
2004                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2005                        ratio, bclk, ratio * bclk);
2006}
2007
2008static void
2009dump_knl_turbo_ratio_limits(void)
2010{
2011        const unsigned int buckets_no = 7;
2012
2013        unsigned long long msr;
2014        int delta_cores, delta_ratio;
2015        int i, b_nr;
2016        unsigned int cores[buckets_no];
2017        unsigned int ratio[buckets_no];
2018
2019        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2020
2021        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2022                base_cpu, msr);
2023
2024        /**
2025         * Turbo encoding in KNL is as follows:
2026         * [0] -- Reserved
2027         * [7:1] -- Base value of number of active cores of bucket 1.
2028         * [15:8] -- Base value of freq ratio of bucket 1.
2029         * [20:16] -- +ve delta of number of active cores of bucket 2.
2030         * i.e. active cores of bucket 2 =
2031         * active cores of bucket 1 + delta
2032         * [23:21] -- Negative delta of freq ratio of bucket 2.
2033         * i.e. freq ratio of bucket 2 =
2034         * freq ratio of bucket 1 - delta
2035         * [28:24]-- +ve delta of number of active cores of bucket 3.
2036         * [31:29]-- -ve delta of freq ratio of bucket 3.
2037         * [36:32]-- +ve delta of number of active cores of bucket 4.
2038         * [39:37]-- -ve delta of freq ratio of bucket 4.
2039         * [44:40]-- +ve delta of number of active cores of bucket 5.
2040         * [47:45]-- -ve delta of freq ratio of bucket 5.
2041         * [52:48]-- +ve delta of number of active cores of bucket 6.
2042         * [55:53]-- -ve delta of freq ratio of bucket 6.
2043         * [60:56]-- +ve delta of number of active cores of bucket 7.
2044         * [63:61]-- -ve delta of freq ratio of bucket 7.
2045         */
2046
2047        b_nr = 0;
2048        cores[b_nr] = (msr & 0xFF) >> 1;
2049        ratio[b_nr] = (msr >> 8) & 0xFF;
2050
2051        for (i = 16; i < 64; i += 8) {
2052                delta_cores = (msr >> i) & 0x1F;
2053                delta_ratio = (msr >> (i + 5)) & 0x7;
2054
2055                cores[b_nr + 1] = cores[b_nr] + delta_cores;
2056                ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2057                b_nr++;
2058        }
2059
2060        for (i = buckets_no - 1; i >= 0; i--)
2061                if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2062                        fprintf(outf,
2063                                "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2064                                ratio[i], bclk, ratio[i] * bclk, cores[i]);
2065}
2066
2067static void
2068dump_nhm_cst_cfg(void)
2069{
2070        unsigned long long msr;
2071
2072        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2073
2074#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
2075#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
2076
2077        fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2078
2079        fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
2080                (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2081                (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2082                (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2083                (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2084                (msr & (1 << 15)) ? "" : "UN",
2085                (unsigned int)msr & 0xF,
2086                pkg_cstate_limit_strings[pkg_cstate_limit]);
2087        return;
2088}
2089
2090static void
2091dump_config_tdp(void)
2092{
2093        unsigned long long msr;
2094
2095        get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2096        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2097        fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2098
2099        get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2100        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2101        if (msr) {
2102                fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2103                fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2104                fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2105                fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2106        }
2107        fprintf(outf, ")\n");
2108
2109        get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2110        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2111        if (msr) {
2112                fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2113                fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2114                fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2115                fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2116        }
2117        fprintf(outf, ")\n");
2118
2119        get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2120        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2121        if ((msr) & 0x3)
2122                fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2123        fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2124        fprintf(outf, ")\n");
2125
2126        get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2127        fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2128        fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2129        fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2130        fprintf(outf, ")\n");
2131}
2132
2133unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2134
2135void print_irtl(void)
2136{
2137        unsigned long long msr;
2138
2139        get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2140        fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2141        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2142                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2143
2144        get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2145        fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2146        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2147                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2148
2149        get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2150        fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2151        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2152                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2153
2154        if (!do_irtl_hsw)
2155                return;
2156
2157        get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2158        fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2159        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2160                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2161
2162        get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2163        fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2164        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2165                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2166
2167        get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2168        fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2169        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2170                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2171
2172}
2173void free_fd_percpu(void)
2174{
2175        int i;
2176
2177        for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2178                if (fd_percpu[i] != 0)
2179                        close(fd_percpu[i]);
2180        }
2181
2182        free(fd_percpu);
2183}
2184
2185void free_all_buffers(void)
2186{
2187        CPU_FREE(cpu_present_set);
2188        cpu_present_set = NULL;
2189        cpu_present_setsize = 0;
2190
2191        CPU_FREE(cpu_affinity_set);
2192        cpu_affinity_set = NULL;
2193        cpu_affinity_setsize = 0;
2194
2195        free(thread_even);
2196        free(core_even);
2197        free(package_even);
2198
2199        thread_even = NULL;
2200        core_even = NULL;
2201        package_even = NULL;
2202
2203        free(thread_odd);
2204        free(core_odd);
2205        free(package_odd);
2206
2207        thread_odd = NULL;
2208        core_odd = NULL;
2209        package_odd = NULL;
2210
2211        free(output_buffer);
2212        output_buffer = NULL;
2213        outp = NULL;
2214
2215        free_fd_percpu();
2216
2217        free(irq_column_2_cpu);
2218        free(irqs_per_cpu);
2219}
2220
2221
2222/*
2223 * Parse a file containing a single int.
2224 */
2225int parse_int_file(const char *fmt, ...)
2226{
2227        va_list args;
2228        char path[PATH_MAX];
2229        FILE *filep;
2230        int value;
2231
2232        va_start(args, fmt);
2233        vsnprintf(path, sizeof(path), fmt, args);
2234        va_end(args);
2235        filep = fopen_or_die(path, "r");
2236        if (fscanf(filep, "%d", &value) != 1)
2237                err(1, "%s: failed to parse number from file", path);
2238        fclose(filep);
2239        return value;
2240}
2241
2242/*
2243 * get_cpu_position_in_core(cpu)
2244 * return the position of the CPU among its HT siblings in the core
2245 * return -1 if the sibling is not in list
2246 */
2247int get_cpu_position_in_core(int cpu)
2248{
2249        char path[64];
2250        FILE *filep;
2251        int this_cpu;
2252        char character;
2253        int i;
2254
2255        sprintf(path,
2256                "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
2257                cpu);
2258        filep = fopen(path, "r");
2259        if (filep == NULL) {
2260                perror(path);
2261                exit(1);
2262        }
2263
2264        for (i = 0; i < topo.num_threads_per_core; i++) {
2265                fscanf(filep, "%d", &this_cpu);
2266                if (this_cpu == cpu) {
2267                        fclose(filep);
2268                        return i;
2269                }
2270
2271                /* Account for no separator after last thread*/
2272                if (i != (topo.num_threads_per_core - 1))
2273                        fscanf(filep, "%c", &character);
2274        }
2275
2276        fclose(filep);
2277        return -1;
2278}
2279
2280/*
2281 * cpu_is_first_core_in_package(cpu)
2282 * return 1 if given CPU is 1st core in package
2283 */
2284int cpu_is_first_core_in_package(int cpu)
2285{
2286        return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2287}
2288
2289int get_physical_package_id(int cpu)
2290{
2291        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2292}
2293
2294int get_core_id(int cpu)
2295{
2296        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2297}
2298
2299int get_num_ht_siblings(int cpu)
2300{
2301        char path[80];
2302        FILE *filep;
2303        int sib1;
2304        int matches = 0;
2305        char character;
2306        char str[100];
2307        char *ch;
2308
2309        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
2310        filep = fopen_or_die(path, "r");
2311
2312        /*
2313         * file format:
2314         * A ',' separated or '-' separated set of numbers
2315         * (eg 1-2 or 1,3,4,5)
2316         */
2317        fscanf(filep, "%d%c\n", &sib1, &character);
2318        fseek(filep, 0, SEEK_SET);
2319        fgets(str, 100, filep);
2320        ch = strchr(str, character);
2321        while (ch != NULL) {
2322                matches++;
2323                ch = strchr(ch+1, character);
2324        }
2325
2326        fclose(filep);
2327        return matches+1;
2328}
2329
2330/*
2331 * run func(thread, core, package) in topology order
2332 * skip non-present cpus
2333 */
2334
2335int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2336        struct pkg_data *, struct thread_data *, struct core_data *,
2337        struct pkg_data *), struct thread_data *thread_base,
2338        struct core_data *core_base, struct pkg_data *pkg_base,
2339        struct thread_data *thread_base2, struct core_data *core_base2,
2340        struct pkg_data *pkg_base2)
2341{
2342        int retval, pkg_no, core_no, thread_no;
2343
2344        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2345                for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
2346                        for (thread_no = 0; thread_no <
2347                                topo.num_threads_per_core; ++thread_no) {
2348                                struct thread_data *t, *t2;
2349                                struct core_data *c, *c2;
2350                                struct pkg_data *p, *p2;
2351
2352                                t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
2353
2354                                if (cpu_is_not_present(t->cpu_id))
2355                                        continue;
2356
2357                                t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
2358
2359                                c = GET_CORE(core_base, core_no, pkg_no);
2360                                c2 = GET_CORE(core_base2, core_no, pkg_no);
2361
2362                                p = GET_PKG(pkg_base, pkg_no);
2363                                p2 = GET_PKG(pkg_base2, pkg_no);
2364
2365                                retval = func(t, c, p, t2, c2, p2);
2366                                if (retval)
2367                                        return retval;
2368                        }
2369                }
2370        }
2371        return 0;
2372}
2373
2374/*
2375 * run func(cpu) on every cpu in /proc/stat
2376 * return max_cpu number
2377 */
2378int for_all_proc_cpus(int (func)(int))
2379{
2380        FILE *fp;
2381        int cpu_num;
2382        int retval;
2383
2384        fp = fopen_or_die(proc_stat, "r");
2385
2386        retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2387        if (retval != 0)
2388                err(1, "%s: failed to parse format", proc_stat);
2389
2390        while (1) {
2391                retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2392                if (retval != 1)
2393                        break;
2394
2395                retval = func(cpu_num);
2396                if (retval) {
2397                        fclose(fp);
2398                        return(retval);
2399                }
2400        }
2401        fclose(fp);
2402        return 0;
2403}
2404
2405void re_initialize(void)
2406{
2407        free_all_buffers();
2408        setup_all_buffers();
2409        printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2410}
2411
2412
2413/*
2414 * count_cpus()
2415 * remember the last one seen, it will be the max
2416 */
2417int count_cpus(int cpu)
2418{
2419        if (topo.max_cpu_num < cpu)
2420                topo.max_cpu_num = cpu;
2421
2422        topo.num_cpus += 1;
2423        return 0;
2424}
2425int mark_cpu_present(int cpu)
2426{
2427        CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2428        return 0;
2429}
2430
2431/*
2432 * snapshot_proc_interrupts()
2433 *
2434 * read and record summary of /proc/interrupts
2435 *
2436 * return 1 if config change requires a restart, else return 0
2437 */
2438int snapshot_proc_interrupts(void)
2439{
2440        static FILE *fp;
2441        int column, retval;
2442
2443        if (fp == NULL)
2444                fp = fopen_or_die("/proc/interrupts", "r");
2445        else
2446                rewind(fp);
2447
2448        /* read 1st line of /proc/interrupts to get cpu* name for each column */
2449        for (column = 0; column < topo.num_cpus; ++column) {
2450                int cpu_number;
2451
2452                retval = fscanf(fp, " CPU%d", &cpu_number);
2453                if (retval != 1)
2454                        break;
2455
2456                if (cpu_number > topo.max_cpu_num) {
2457                        warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2458                        return 1;
2459                }
2460
2461                irq_column_2_cpu[column] = cpu_number;
2462                irqs_per_cpu[cpu_number] = 0;
2463        }
2464
2465        /* read /proc/interrupt count lines and sum up irqs per cpu */
2466        while (1) {
2467                int column;
2468                char buf[64];
2469
2470                retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2471                if (retval != 1)
2472                        break;
2473
2474                /* read the count per cpu */
2475                for (column = 0; column < topo.num_cpus; ++column) {
2476
2477                        int cpu_number, irq_count;
2478
2479                        retval = fscanf(fp, " %d", &irq_count);
2480                        if (retval != 1)
2481                                break;
2482
2483                        cpu_number = irq_column_2_cpu[column];
2484                        irqs_per_cpu[cpu_number] += irq_count;
2485
2486                }
2487
2488                while (getc(fp) != '\n')
2489                        ;       /* flush interrupt description */
2490
2491        }
2492        return 0;
2493}
2494/*
2495 * snapshot_gfx_rc6_ms()
2496 *
2497 * record snapshot of
2498 * /sys/class/drm/card0/power/rc6_residency_ms
2499 *
2500 * return 1 if config change requires a restart, else return 0
2501 */
2502int snapshot_gfx_rc6_ms(void)
2503{
2504        FILE *fp;
2505        int retval;
2506
2507        fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2508
2509        retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2510        if (retval != 1)
2511                err(1, "GFX rc6");
2512
2513        fclose(fp);
2514
2515        return 0;
2516}
2517/*
2518 * snapshot_gfx_mhz()
2519 *
2520 * record snapshot of
2521 * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2522 *
2523 * return 1 if config change requires a restart, else return 0
2524 */
2525int snapshot_gfx_mhz(void)
2526{
2527        static FILE *fp;
2528        int retval;
2529
2530        if (fp == NULL)
2531                fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2532        else {
2533                rewind(fp);
2534                fflush(fp);
2535        }
2536
2537        retval = fscanf(fp, "%d", &gfx_cur_mhz);
2538        if (retval != 1)
2539                err(1, "GFX MHz");
2540
2541        return 0;
2542}
2543
2544/*
2545 * snapshot /proc and /sys files
2546 *
2547 * return 1 if configuration restart needed, else return 0
2548 */
2549int snapshot_proc_sysfs_files(void)
2550{
2551        if (DO_BIC(BIC_IRQ))
2552                if (snapshot_proc_interrupts())
2553                        return 1;
2554
2555        if (DO_BIC(BIC_GFX_rc6))
2556                snapshot_gfx_rc6_ms();
2557
2558        if (DO_BIC(BIC_GFXMHz))
2559                snapshot_gfx_mhz();
2560
2561        return 0;
2562}
2563
2564void turbostat_loop()
2565{
2566        int retval;
2567        int restarted = 0;
2568
2569restart:
2570        restarted++;
2571
2572        snapshot_proc_sysfs_files();
2573        retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2574        if (retval < -1) {
2575                exit(retval);
2576        } else if (retval == -1) {
2577                if (restarted > 1) {
2578                        exit(retval);
2579                }
2580                re_initialize();
2581                goto restart;
2582        }
2583        restarted = 0;
2584        gettimeofday(&tv_even, (struct timezone *)NULL);
2585
2586        while (1) {
2587                if (for_all_proc_cpus(cpu_is_not_present)) {
2588                        re_initialize();
2589                        goto restart;
2590                }
2591                nanosleep(&interval_ts, NULL);
2592                if (snapshot_proc_sysfs_files())
2593                        goto restart;
2594                retval = for_all_cpus(get_counters, ODD_COUNTERS);
2595                if (retval < -1) {
2596                        exit(retval);
2597                } else if (retval == -1) {
2598                        re_initialize();
2599                        goto restart;
2600                }
2601                gettimeofday(&tv_odd, (struct timezone *)NULL);
2602                timersub(&tv_odd, &tv_even, &tv_delta);
2603                if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2604                        re_initialize();
2605                        goto restart;
2606                }
2607                compute_average(EVEN_COUNTERS);
2608                format_all_counters(EVEN_COUNTERS);
2609                flush_output_stdout();
2610                nanosleep(&interval_ts, NULL);
2611                if (snapshot_proc_sysfs_files())
2612                        goto restart;
2613                retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2614                if (retval < -1) {
2615                        exit(retval);
2616                } else if (retval == -1) {
2617                        re_initialize();
2618                        goto restart;
2619                }
2620                gettimeofday(&tv_even, (struct timezone *)NULL);
2621                timersub(&tv_even, &tv_odd, &tv_delta);
2622                if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2623                        re_initialize();
2624                        goto restart;
2625                }
2626                compute_average(ODD_COUNTERS);
2627                format_all_counters(ODD_COUNTERS);
2628                flush_output_stdout();
2629        }
2630}
2631
2632void check_dev_msr()
2633{
2634        struct stat sb;
2635        char pathname[32];
2636
2637        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2638        if (stat(pathname, &sb))
2639                if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2640                        err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2641}
2642
2643void check_permissions()
2644{
2645        struct __user_cap_header_struct cap_header_data;
2646        cap_user_header_t cap_header = &cap_header_data;
2647        struct __user_cap_data_struct cap_data_data;
2648        cap_user_data_t cap_data = &cap_data_data;
2649        extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2650        int do_exit = 0;
2651        char pathname[32];
2652
2653        /* check for CAP_SYS_RAWIO */
2654        cap_header->pid = getpid();
2655        cap_header->version = _LINUX_CAPABILITY_VERSION;
2656        if (capget(cap_header, cap_data) < 0)
2657                err(-6, "capget(2) failed");
2658
2659        if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2660                do_exit++;
2661                warnx("capget(CAP_SYS_RAWIO) failed,"
2662                        " try \"# setcap cap_sys_rawio=ep %s\"", progname);
2663        }
2664
2665        /* test file permissions */
2666        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2667        if (euidaccess(pathname, R_OK)) {
2668                do_exit++;
2669                warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2670        }
2671
2672        /* if all else fails, thell them to be root */
2673        if (do_exit)
2674                if (getuid() != 0)
2675                        warnx("... or simply run as root");
2676
2677        if (do_exit)
2678                exit(-6);
2679}
2680
2681/*
2682 * NHM adds support for additional MSRs:
2683 *
2684 * MSR_SMI_COUNT                   0x00000034
2685 *
2686 * MSR_PLATFORM_INFO               0x000000ce
2687 * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
2688 *
2689 * MSR_MISC_PWR_MGMT               0x000001aa
2690 *
2691 * MSR_PKG_C3_RESIDENCY            0x000003f8
2692 * MSR_PKG_C6_RESIDENCY            0x000003f9
2693 * MSR_CORE_C3_RESIDENCY           0x000003fc
2694 * MSR_CORE_C6_RESIDENCY           0x000003fd
2695 *
2696 * Side effect:
2697 * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
2698 * sets has_misc_feature_control
2699 */
2700int probe_nhm_msrs(unsigned int family, unsigned int model)
2701{
2702        unsigned long long msr;
2703        unsigned int base_ratio;
2704        int *pkg_cstate_limits;
2705
2706        if (!genuine_intel)
2707                return 0;
2708
2709        if (family != 6)
2710                return 0;
2711
2712        bclk = discover_bclk(family, model);
2713
2714        switch (model) {
2715        case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2716        case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2717        case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
2718        case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
2719        case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
2720        case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2721        case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2722                pkg_cstate_limits = nhm_pkg_cstate_limits;
2723                break;
2724        case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
2725        case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
2726        case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2727        case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2728                pkg_cstate_limits = snb_pkg_cstate_limits;
2729                has_misc_feature_control = 1;
2730                break;
2731        case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2732        case INTEL_FAM6_HASWELL_X:      /* HSX */
2733        case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2734        case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2735        case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2736        case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2737        case INTEL_FAM6_BROADWELL_X:    /* BDX */
2738        case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2739        case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2740        case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2741        case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2742        case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2743                pkg_cstate_limits = hsw_pkg_cstate_limits;
2744                has_misc_feature_control = 1;
2745                break;
2746        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2747                pkg_cstate_limits = skx_pkg_cstate_limits;
2748                has_misc_feature_control = 1;
2749                break;
2750        case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
2751                no_MSR_MISC_PWR_MGMT = 1;
2752        case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
2753                pkg_cstate_limits = slv_pkg_cstate_limits;
2754                break;
2755        case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
2756                pkg_cstate_limits = amt_pkg_cstate_limits;
2757                no_MSR_MISC_PWR_MGMT = 1;
2758                break;
2759        case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
2760        case INTEL_FAM6_XEON_PHI_KNM:
2761                pkg_cstate_limits = phi_pkg_cstate_limits;
2762                break;
2763        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
2764        case INTEL_FAM6_ATOM_GEMINI_LAKE:
2765        case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2766                pkg_cstate_limits = bxt_pkg_cstate_limits;
2767                break;
2768        default:
2769                return 0;
2770        }
2771        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2772        pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2773
2774        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2775        base_ratio = (msr >> 8) & 0xFF;
2776
2777        base_hz = base_ratio * bclk * 1000000;
2778        has_base_hz = 1;
2779        return 1;
2780}
2781/*
2782 * SLV client has support for unique MSRs:
2783 *
2784 * MSR_CC6_DEMOTION_POLICY_CONFIG
2785 * MSR_MC6_DEMOTION_POLICY_CONFIG
2786 */
2787
2788int has_slv_msrs(unsigned int family, unsigned int model)
2789{
2790        if (!genuine_intel)
2791                return 0;
2792
2793        switch (model) {
2794        case INTEL_FAM6_ATOM_SILVERMONT1:
2795        case INTEL_FAM6_ATOM_MERRIFIELD:
2796        case INTEL_FAM6_ATOM_MOOREFIELD:
2797                return 1;
2798        }
2799        return 0;
2800}
2801int is_dnv(unsigned int family, unsigned int model)
2802{
2803
2804        if (!genuine_intel)
2805                return 0;
2806
2807        switch (model) {
2808        case INTEL_FAM6_ATOM_DENVERTON:
2809                return 1;
2810        }
2811        return 0;
2812}
2813int is_bdx(unsigned int family, unsigned int model)
2814{
2815
2816        if (!genuine_intel)
2817                return 0;
2818
2819        switch (model) {
2820        case INTEL_FAM6_BROADWELL_X:
2821        case INTEL_FAM6_BROADWELL_XEON_D:
2822                return 1;
2823        }
2824        return 0;
2825}
2826int is_skx(unsigned int family, unsigned int model)
2827{
2828
2829        if (!genuine_intel)
2830                return 0;
2831
2832        switch (model) {
2833        case INTEL_FAM6_SKYLAKE_X:
2834                return 1;
2835        }
2836        return 0;
2837}
2838
2839int has_turbo_ratio_limit(unsigned int family, unsigned int model)
2840{
2841        if (has_slv_msrs(family, model))
2842                return 0;
2843
2844        switch (model) {
2845        /* Nehalem compatible, but do not include turbo-ratio limit support */
2846        case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2847        case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2848        case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
2849        case INTEL_FAM6_XEON_PHI_KNM:
2850                return 0;
2851        default:
2852                return 1;
2853        }
2854}
2855int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
2856{
2857        if (has_slv_msrs(family, model))
2858                return 1;
2859
2860        return 0;
2861}
2862int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2863{
2864        if (!genuine_intel)
2865                return 0;
2866
2867        if (family != 6)
2868                return 0;
2869
2870        switch (model) {
2871        case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2872        case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2873                return 1;
2874        default:
2875                return 0;
2876        }
2877}
2878int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2879{
2880        if (!genuine_intel)
2881                return 0;
2882
2883        if (family != 6)
2884                return 0;
2885
2886        switch (model) {
2887        case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2888                return 1;
2889        default:
2890                return 0;
2891        }
2892}
2893
2894int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2895{
2896        if (!genuine_intel)
2897                return 0;
2898
2899        if (family != 6)
2900                return 0;
2901
2902        switch (model) {
2903        case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
2904        case INTEL_FAM6_XEON_PHI_KNM:
2905                return 1;
2906        default:
2907                return 0;
2908        }
2909}
2910int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
2911{
2912        if (!genuine_intel)
2913                return 0;
2914
2915        if (family != 6)
2916                return 0;
2917
2918        switch (model) {
2919        case INTEL_FAM6_ATOM_GOLDMONT:
2920        case INTEL_FAM6_SKYLAKE_X:
2921                return 1;
2922        default:
2923                return 0;
2924        }
2925}
2926int has_config_tdp(unsigned int family, unsigned int model)
2927{
2928        if (!genuine_intel)
2929                return 0;
2930
2931        if (family != 6)
2932                return 0;
2933
2934        switch (model) {
2935        case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2936        case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2937        case INTEL_FAM6_HASWELL_X:      /* HSX */
2938        case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2939        case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2940        case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2941        case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2942        case INTEL_FAM6_BROADWELL_X:    /* BDX */
2943        case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2944        case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2945        case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2946        case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2947        case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2948        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2949
2950        case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
2951        case INTEL_FAM6_XEON_PHI_KNM:
2952                return 1;
2953        default:
2954                return 0;
2955        }
2956}
2957
2958static void
2959dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
2960{
2961        if (!do_nhm_platform_info)
2962                return;
2963
2964        dump_nhm_platform_info();
2965
2966        if (has_hsw_turbo_ratio_limit(family, model))
2967                dump_hsw_turbo_ratio_limits();
2968
2969        if (has_ivt_turbo_ratio_limit(family, model))
2970                dump_ivt_turbo_ratio_limits();
2971
2972        if (has_turbo_ratio_limit(family, model))
2973                dump_turbo_ratio_limits(family, model);
2974
2975        if (has_atom_turbo_ratio_limit(family, model))
2976                dump_atom_turbo_ratio_limits();
2977
2978        if (has_knl_turbo_ratio_limit(family, model))
2979                dump_knl_turbo_ratio_limits();
2980
2981        if (has_config_tdp(family, model))
2982                dump_config_tdp();
2983
2984        dump_nhm_cst_cfg();
2985}
2986
2987static void
2988dump_sysfs_cstate_config(void)
2989{
2990        char path[64];
2991        char name_buf[16];
2992        char desc[64];
2993        FILE *input;
2994        int state;
2995        char *sp;
2996
2997        if (!DO_BIC(BIC_sysfs))
2998                return;
2999
3000        for (state = 0; state < 10; ++state) {
3001
3002                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3003                        base_cpu, state);
3004                input = fopen(path, "r");
3005                if (input == NULL)
3006                        continue;
3007                fgets(name_buf, sizeof(name_buf), input);
3008
3009                 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3010                sp = strchr(name_buf, '-');
3011                if (!sp)
3012                        sp = strchrnul(name_buf, '\n');
3013                *sp = '\0';
3014
3015                fclose(input);
3016
3017                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3018                        base_cpu, state);
3019                input = fopen(path, "r");
3020                if (input == NULL)
3021                        continue;
3022                fgets(desc, sizeof(desc), input);
3023
3024                fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3025                fclose(input);
3026        }
3027}
3028static void
3029dump_sysfs_pstate_config(void)
3030{
3031        char path[64];
3032        char driver_buf[64];
3033        char governor_buf[64];
3034        FILE *input;
3035        int turbo;
3036
3037        sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3038                        base_cpu);
3039        input = fopen(path, "r");
3040        if (input == NULL) {
3041                fprintf(stderr, "NSFOD %s\n", path);
3042                return;
3043        }
3044        fgets(driver_buf, sizeof(driver_buf), input);
3045        fclose(input);
3046
3047        sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3048                        base_cpu);
3049        input = fopen(path, "r");
3050        if (input == NULL) {
3051                fprintf(stderr, "NSFOD %s\n", path);
3052                return;
3053        }
3054        fgets(governor_buf, sizeof(governor_buf), input);
3055        fclose(input);
3056
3057        fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3058        fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3059
3060        sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3061        input = fopen(path, "r");
3062        if (input != NULL) {
3063                fscanf(input, "%d", &turbo);
3064                fprintf(outf, "cpufreq boost: %d\n", turbo);
3065                fclose(input);
3066        }
3067
3068        sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3069        input = fopen(path, "r");
3070        if (input != NULL) {
3071                fscanf(input, "%d", &turbo);
3072                fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3073                fclose(input);
3074        }
3075}
3076
3077
3078/*
3079 * print_epb()
3080 * Decode the ENERGY_PERF_BIAS MSR
3081 */
3082int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3083{
3084        unsigned long long msr;
3085        char *epb_string;
3086        int cpu;
3087
3088        if (!has_epb)
3089                return 0;
3090
3091        cpu = t->cpu_id;
3092
3093        /* EPB is per-package */
3094        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3095                return 0;
3096
3097        if (cpu_migrate(cpu)) {
3098                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3099                return -1;
3100        }
3101
3102        if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3103                return 0;
3104
3105        switch (msr & 0xF) {
3106        case ENERGY_PERF_BIAS_PERFORMANCE:
3107                epb_string = "performance";
3108                break;
3109        case ENERGY_PERF_BIAS_NORMAL:
3110                epb_string = "balanced";
3111                break;
3112        case ENERGY_PERF_BIAS_POWERSAVE:
3113                epb_string = "powersave";
3114                break;
3115        default:
3116                epb_string = "custom";
3117                break;
3118        }
3119        fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3120
3121        return 0;
3122}
3123/*
3124 * print_hwp()
3125 * Decode the MSR_HWP_CAPABILITIES
3126 */
3127int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3128{
3129        unsigned long long msr;
3130        int cpu;
3131
3132        if (!has_hwp)
3133                return 0;
3134
3135        cpu = t->cpu_id;
3136
3137        /* MSR_HWP_CAPABILITIES is per-package */
3138        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3139                return 0;
3140
3141        if (cpu_migrate(cpu)) {
3142                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3143                return -1;
3144        }
3145
3146        if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3147                return 0;
3148
3149        fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3150                cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3151
3152        /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3153        if ((msr & (1 << 0)) == 0)
3154                return 0;
3155
3156        if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3157                return 0;
3158
3159        fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3160                        "(high %d guar %d eff %d low %d)\n",
3161                        cpu, msr,
3162                        (unsigned int)HWP_HIGHEST_PERF(msr),
3163                        (unsigned int)HWP_GUARANTEED_PERF(msr),
3164                        (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3165                        (unsigned int)HWP_LOWEST_PERF(msr));
3166
3167        if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3168                return 0;
3169
3170        fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3171                        "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3172                        cpu, msr,
3173                        (unsigned int)(((msr) >> 0) & 0xff),
3174                        (unsigned int)(((msr) >> 8) & 0xff),
3175                        (unsigned int)(((msr) >> 16) & 0xff),
3176                        (unsigned int)(((msr) >> 24) & 0xff),
3177                        (unsigned int)(((msr) >> 32) & 0xff3),
3178                        (unsigned int)(((msr) >> 42) & 0x1));
3179
3180        if (has_hwp_pkg) {
3181                if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3182                        return 0;
3183
3184                fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3185                        "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3186                        cpu, msr,
3187                        (unsigned int)(((msr) >> 0) & 0xff),
3188                        (unsigned int)(((msr) >> 8) & 0xff),
3189                        (unsigned int)(((msr) >> 16) & 0xff),
3190                        (unsigned int)(((msr) >> 24) & 0xff),
3191                        (unsigned int)(((msr) >> 32) & 0xff3));
3192        }
3193        if (has_hwp_notify) {
3194                if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3195                        return 0;
3196
3197                fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3198                        "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3199                        cpu, msr,
3200                        ((msr) & 0x1) ? "EN" : "Dis",
3201                        ((msr) & 0x2) ? "EN" : "Dis");
3202        }
3203        if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3204                return 0;
3205
3206        fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3207                        "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3208                        cpu, msr,
3209                        ((msr) & 0x1) ? "" : "No-",
3210                        ((msr) & 0x2) ? "" : "No-");
3211
3212        return 0;
3213}
3214
3215/*
3216 * print_perf_limit()
3217 */
3218int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3219{
3220        unsigned long long msr;
3221        int cpu;
3222
3223        cpu = t->cpu_id;
3224
3225        /* per-package */
3226        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3227                return 0;
3228
3229        if (cpu_migrate(cpu)) {
3230                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3231                return -1;
3232        }
3233
3234        if (do_core_perf_limit_reasons) {
3235                get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3236                fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3237                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3238                        (msr & 1 << 15) ? "bit15, " : "",
3239                        (msr & 1 << 14) ? "bit14, " : "",
3240                        (msr & 1 << 13) ? "Transitions, " : "",
3241                        (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3242                        (msr & 1 << 11) ? "PkgPwrL2, " : "",
3243                        (msr & 1 << 10) ? "PkgPwrL1, " : "",
3244                        (msr & 1 << 9) ? "CorePwr, " : "",
3245                        (msr & 1 << 8) ? "Amps, " : "",
3246                        (msr & 1 << 6) ? "VR-Therm, " : "",
3247                        (msr & 1 << 5) ? "Auto-HWP, " : "",
3248                        (msr & 1 << 4) ? "Graphics, " : "",
3249                        (msr & 1 << 2) ? "bit2, " : "",
3250                        (msr & 1 << 1) ? "ThermStatus, " : "",
3251                        (msr & 1 << 0) ? "PROCHOT, " : "");
3252                fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3253                        (msr & 1 << 31) ? "bit31, " : "",
3254                        (msr & 1 << 30) ? "bit30, " : "",
3255                        (msr & 1 << 29) ? "Transitions, " : "",
3256                        (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3257                        (msr & 1 << 27) ? "PkgPwrL2, " : "",
3258                        (msr & 1 << 26) ? "PkgPwrL1, " : "",
3259                        (msr & 1 << 25) ? "CorePwr, " : "",
3260                        (msr & 1 << 24) ? "Amps, " : "",
3261                        (msr & 1 << 22) ? "VR-Therm, " : "",
3262                        (msr & 1 << 21) ? "Auto-HWP, " : "",
3263                        (msr & 1 << 20) ? "Graphics, " : "",
3264                        (msr & 1 << 18) ? "bit18, " : "",
3265                        (msr & 1 << 17) ? "ThermStatus, " : "",
3266                        (msr & 1 << 16) ? "PROCHOT, " : "");
3267
3268        }
3269        if (do_gfx_perf_limit_reasons) {
3270                get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3271                fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3272                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3273                        (msr & 1 << 0) ? "PROCHOT, " : "",
3274                        (msr & 1 << 1) ? "ThermStatus, " : "",
3275                        (msr & 1 << 4) ? "Graphics, " : "",
3276                        (msr & 1 << 6) ? "VR-Therm, " : "",
3277                        (msr & 1 << 8) ? "Amps, " : "",
3278                        (msr & 1 << 9) ? "GFXPwr, " : "",
3279                        (msr & 1 << 10) ? "PkgPwrL1, " : "",
3280                        (msr & 1 << 11) ? "PkgPwrL2, " : "");
3281                fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3282                        (msr & 1 << 16) ? "PROCHOT, " : "",
3283                        (msr & 1 << 17) ? "ThermStatus, " : "",
3284                        (msr & 1 << 20) ? "Graphics, " : "",
3285                        (msr & 1 << 22) ? "VR-Therm, " : "",
3286                        (msr & 1 << 24) ? "Amps, " : "",
3287                        (msr & 1 << 25) ? "GFXPwr, " : "",
3288                        (msr & 1 << 26) ? "PkgPwrL1, " : "",
3289                        (msr & 1 << 27) ? "PkgPwrL2, " : "");
3290        }
3291        if (do_ring_perf_limit_reasons) {
3292                get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3293                fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3294                fprintf(outf, " (Active: %s%s%s%s%s%s)",
3295                        (msr & 1 << 0) ? "PROCHOT, " : "",
3296                        (msr & 1 << 1) ? "ThermStatus, " : "",
3297                        (msr & 1 << 6) ? "VR-Therm, " : "",
3298                        (msr & 1 << 8) ? "Amps, " : "",
3299                        (msr & 1 << 10) ? "PkgPwrL1, " : "",
3300                        (msr & 1 << 11) ? "PkgPwrL2, " : "");
3301                fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3302                        (msr & 1 << 16) ? "PROCHOT, " : "",
3303                        (msr & 1 << 17) ? "ThermStatus, " : "",
3304                        (msr & 1 << 22) ? "VR-Therm, " : "",
3305                        (msr & 1 << 24) ? "Amps, " : "",
3306                        (msr & 1 << 26) ? "PkgPwrL1, " : "",
3307                        (msr & 1 << 27) ? "PkgPwrL2, " : "");
3308        }
3309        return 0;
3310}
3311
3312#define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3313#define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3314
3315double get_tdp(unsigned int model)
3316{
3317        unsigned long long msr;
3318
3319        if (do_rapl & RAPL_PKG_POWER_INFO)
3320                if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3321                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3322
3323        switch (model) {
3324        case INTEL_FAM6_ATOM_SILVERMONT1:
3325        case INTEL_FAM6_ATOM_SILVERMONT2:
3326                return 30.0;
3327        default:
3328                return 135.0;
3329        }
3330}
3331
3332/*
3333 * rapl_dram_energy_units_probe()
3334 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3335 */
3336static double
3337rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3338{
3339        /* only called for genuine_intel, family 6 */
3340
3341        switch (model) {
3342        case INTEL_FAM6_HASWELL_X:      /* HSX */
3343        case INTEL_FAM6_BROADWELL_X:    /* BDX */
3344        case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3345        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3346        case INTEL_FAM6_XEON_PHI_KNM:
3347                return (rapl_dram_energy_units = 15.3 / 1000000);
3348        default:
3349                return (rapl_energy_units);
3350        }
3351}
3352
3353
3354/*
3355 * rapl_probe()
3356 *
3357 * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3358 */
3359void rapl_probe(unsigned int family, unsigned int model)
3360{
3361        unsigned long long msr;
3362        unsigned int time_unit;
3363        double tdp;
3364
3365        if (!genuine_intel)
3366                return;
3367
3368        if (family != 6)
3369                return;
3370
3371        switch (model) {
3372        case INTEL_FAM6_SANDYBRIDGE:
3373        case INTEL_FAM6_IVYBRIDGE:
3374        case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3375        case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3376        case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3377        case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3378        case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3379                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3380                if (rapl_joules) {
3381                        BIC_PRESENT(BIC_Pkg_J);
3382                        BIC_PRESENT(BIC_Cor_J);
3383                        BIC_PRESENT(BIC_GFX_J);
3384                } else {
3385                        BIC_PRESENT(BIC_PkgWatt);
3386                        BIC_PRESENT(BIC_CorWatt);
3387                        BIC_PRESENT(BIC_GFXWatt);
3388                }
3389                break;
3390        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3391        case INTEL_FAM6_ATOM_GEMINI_LAKE:
3392                do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3393                if (rapl_joules)
3394                        BIC_PRESENT(BIC_Pkg_J);
3395                else
3396                        BIC_PRESENT(BIC_PkgWatt);
3397                break;
3398        case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3399        case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3400        case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3401        case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3402                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3403                BIC_PRESENT(BIC_PKG__);
3404                BIC_PRESENT(BIC_RAM__);
3405                if (rapl_joules) {
3406                        BIC_PRESENT(BIC_Pkg_J);
3407                        BIC_PRESENT(BIC_Cor_J);
3408                        BIC_PRESENT(BIC_RAM_J);
3409                        BIC_PRESENT(BIC_GFX_J);
3410                } else {
3411                        BIC_PRESENT(BIC_PkgWatt);
3412                        BIC_PRESENT(BIC_CorWatt);
3413                        BIC_PRESENT(BIC_RAMWatt);
3414                        BIC_PRESENT(BIC_GFXWatt);
3415                }
3416                break;
3417        case INTEL_FAM6_HASWELL_X:      /* HSX */
3418        case INTEL_FAM6_BROADWELL_X:    /* BDX */
3419        case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3420        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3421        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3422        case INTEL_FAM6_XEON_PHI_KNM:
3423                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3424                BIC_PRESENT(BIC_PKG__);
3425                BIC_PRESENT(BIC_RAM__);
3426                if (rapl_joules) {
3427                        BIC_PRESENT(BIC_Pkg_J);
3428                        BIC_PRESENT(BIC_RAM_J);
3429                } else {
3430                        BIC_PRESENT(BIC_PkgWatt);
3431                        BIC_PRESENT(BIC_RAMWatt);
3432                }
3433                break;
3434        case INTEL_FAM6_SANDYBRIDGE_X:
3435        case INTEL_FAM6_IVYBRIDGE_X:
3436                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3437                BIC_PRESENT(BIC_PKG__);
3438                BIC_PRESENT(BIC_RAM__);
3439                if (rapl_joules) {
3440                        BIC_PRESENT(BIC_Pkg_J);
3441                        BIC_PRESENT(BIC_Cor_J);
3442                        BIC_PRESENT(BIC_RAM_J);
3443                } else {
3444                        BIC_PRESENT(BIC_PkgWatt);
3445                        BIC_PRESENT(BIC_CorWatt);
3446                        BIC_PRESENT(BIC_RAMWatt);
3447                }
3448                break;
3449        case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3450        case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3451                do_rapl = RAPL_PKG | RAPL_CORES;
3452                if (rapl_joules) {
3453                        BIC_PRESENT(BIC_Pkg_J);
3454                        BIC_PRESENT(BIC_Cor_J);
3455                } else {
3456                        BIC_PRESENT(BIC_PkgWatt);
3457                        BIC_PRESENT(BIC_CorWatt);
3458                }
3459                break;
3460        case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3461                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3462                BIC_PRESENT(BIC_PKG__);
3463                BIC_PRESENT(BIC_RAM__);
3464                if (rapl_joules) {
3465                        BIC_PRESENT(BIC_Pkg_J);
3466                        BIC_PRESENT(BIC_Cor_J);
3467                        BIC_PRESENT(BIC_RAM_J);
3468                } else {
3469                        BIC_PRESENT(BIC_PkgWatt);
3470                        BIC_PRESENT(BIC_CorWatt);
3471                        BIC_PRESENT(BIC_RAMWatt);
3472                }
3473                break;
3474        default:
3475                return;
3476        }
3477
3478        /* units on package 0, verify later other packages match */
3479        if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3480                return;
3481
3482        rapl_power_units = 1.0 / (1 << (msr & 0xF));
3483        if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3484                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3485        else
3486                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3487
3488        rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3489
3490        time_unit = msr >> 16 & 0xF;
3491        if (time_unit == 0)
3492                time_unit = 0xA;
3493
3494        rapl_time_units = 1.0 / (1 << (time_unit));
3495
3496        tdp = get_tdp(model);
3497
3498        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3499        if (!quiet)
3500                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3501
3502        return;
3503}
3504
3505void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3506{
3507        if (!genuine_intel)
3508                return;
3509
3510        if (family != 6)
3511                return;
3512
3513        switch (model) {
3514        case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3515        case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3516        case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3517                do_gfx_perf_limit_reasons = 1;
3518        case INTEL_FAM6_HASWELL_X:      /* HSX */
3519                do_core_perf_limit_reasons = 1;
3520                do_ring_perf_limit_reasons = 1;
3521        default:
3522                return;
3523        }
3524}
3525
3526int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3527{
3528        unsigned long long msr;
3529        unsigned int dts, dts2;
3530        int cpu;
3531
3532        if (!(do_dts || do_ptm))
3533                return 0;
3534
3535        cpu = t->cpu_id;
3536
3537        /* DTS is per-core, no need to print for each thread */
3538        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3539                return 0;
3540
3541        if (cpu_migrate(cpu)) {
3542                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3543                return -1;
3544        }
3545
3546        if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3547                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3548                        return 0;
3549
3550                dts = (msr >> 16) & 0x7F;
3551                fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3552                        cpu, msr, tcc_activation_temp - dts);
3553
3554                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3555                        return 0;
3556
3557                dts = (msr >> 16) & 0x7F;
3558                dts2 = (msr >> 8) & 0x7F;
3559                fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3560                        cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3561        }
3562
3563
3564        if (do_dts && debug) {
3565                unsigned int resolution;
3566
3567                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3568                        return 0;
3569
3570                dts = (msr >> 16) & 0x7F;
3571                resolution = (msr >> 27) & 0xF;
3572                fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3573                        cpu, msr, tcc_activation_temp - dts, resolution);
3574
3575                if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3576                        return 0;
3577
3578                dts = (msr >> 16) & 0x7F;
3579                dts2 = (msr >> 8) & 0x7F;
3580                fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3581                        cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3582        }
3583
3584        return 0;
3585}
3586
3587void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3588{
3589        fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3590                cpu, label,
3591                ((msr >> 15) & 1) ? "EN" : "DIS",
3592                ((msr >> 0) & 0x7FFF) * rapl_power_units,
3593                (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3594                (((msr >> 16) & 1) ? "EN" : "DIS"));
3595
3596        return;
3597}
3598
3599int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3600{
3601        unsigned long long msr;
3602        int cpu;
3603
3604        if (!do_rapl)
3605                return 0;
3606
3607        /* RAPL counters are per package, so print only for 1st thread/package */
3608        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3609                return 0;
3610
3611        cpu = t->cpu_id;
3612        if (cpu_migrate(cpu)) {
3613                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3614                return -1;
3615        }
3616
3617        if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
3618                return -1;
3619
3620        fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
3621                rapl_power_units, rapl_energy_units, rapl_time_units);
3622
3623        if (do_rapl & RAPL_PKG_POWER_INFO) {
3624
3625                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
3626                        return -5;
3627
3628
3629                fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3630                        cpu, msr,
3631                        ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3632                        ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3633                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3634                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3635
3636        }
3637        if (do_rapl & RAPL_PKG) {
3638
3639                if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
3640                        return -9;
3641
3642                fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3643                        cpu, msr, (msr >> 63) & 1 ? "" : "UN");
3644
3645                print_power_limit_msr(cpu, msr, "PKG Limit #1");
3646                fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3647                        cpu,
3648                        ((msr >> 47) & 1) ? "EN" : "DIS",
3649                        ((msr >> 32) & 0x7FFF) * rapl_power_units,
3650                        (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
3651                        ((msr >> 48) & 1) ? "EN" : "DIS");
3652        }
3653
3654        if (do_rapl & RAPL_DRAM_POWER_INFO) {
3655                if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
3656                        return -6;
3657
3658                fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3659                        cpu, msr,
3660                        ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3661                        ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3662                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3663                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3664        }
3665        if (do_rapl & RAPL_DRAM) {
3666                if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
3667                        return -9;
3668                fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
3669                                cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3670
3671                print_power_limit_msr(cpu, msr, "DRAM Limit");
3672        }
3673        if (do_rapl & RAPL_CORE_POLICY) {
3674                if (get_msr(cpu, MSR_PP0_POLICY, &msr))
3675                        return -7;
3676
3677                fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
3678        }
3679        if (do_rapl & RAPL_CORES_POWER_LIMIT) {
3680                if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
3681                        return -9;
3682                fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
3683                                cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3684                print_power_limit_msr(cpu, msr, "Cores Limit");
3685        }
3686        if (do_rapl & RAPL_GFX) {
3687                if (get_msr(cpu, MSR_PP1_POLICY, &msr))
3688                        return -8;
3689
3690                fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
3691
3692                if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
3693                        return -9;
3694                fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
3695                                cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3696                print_power_limit_msr(cpu, msr, "GFX Limit");
3697        }
3698        return 0;
3699}
3700
3701/*
3702 * SNB adds support for additional MSRs:
3703 *
3704 * MSR_PKG_C7_RESIDENCY            0x000003fa
3705 * MSR_CORE_C7_RESIDENCY           0x000003fe
3706 * MSR_PKG_C2_RESIDENCY            0x0000060d
3707 */
3708
3709int has_snb_msrs(unsigned int family, unsigned int model)
3710{
3711        if (!genuine_intel)
3712                return 0;
3713
3714        switch (model) {
3715        case INTEL_FAM6_SANDYBRIDGE:
3716        case INTEL_FAM6_SANDYBRIDGE_X:
3717        case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3718        case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3719        case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3720        case INTEL_FAM6_HASWELL_X:      /* HSW */
3721        case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3722        case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3723        case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3724        case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3725        case INTEL_FAM6_BROADWELL_X:    /* BDX */
3726        case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3727        case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3728        case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3729        case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3730        case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3731        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3732        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3733        case INTEL_FAM6_ATOM_GEMINI_LAKE:
3734        case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3735                return 1;
3736        }
3737        return 0;
3738}
3739
3740/*
3741 * HSW adds support for additional MSRs:
3742 *
3743 * MSR_PKG_C8_RESIDENCY         0x00000630
3744 * MSR_PKG_C9_RESIDENCY         0x00000631
3745 * MSR_PKG_C10_RESIDENCY        0x00000632
3746 *
3747 * MSR_PKGC8_IRTL               0x00000633
3748 * MSR_PKGC9_IRTL               0x00000634
3749 * MSR_PKGC10_IRTL              0x00000635
3750 *
3751 */
3752int has_hsw_msrs(unsigned int family, unsigned int model)
3753{
3754        if (!genuine_intel)
3755                return 0;
3756
3757        switch (model) {
3758        case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3759        case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3760        case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3761        case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3762        case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3763        case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3764        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3765        case INTEL_FAM6_ATOM_GEMINI_LAKE:
3766                return 1;
3767        }
3768        return 0;
3769}
3770
3771/*
3772 * SKL adds support for additional MSRS:
3773 *
3774 * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
3775 * MSR_PKG_ANY_CORE_C0_RES         0x00000659
3776 * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
3777 * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
3778 */
3779int has_skl_msrs(unsigned int family, unsigned int model)
3780{
3781        if (!genuine_intel)
3782                return 0;
3783
3784        switch (model) {
3785        case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3786        case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3787        case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3788        case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3789                return 1;
3790        }
3791        return 0;
3792}
3793
3794int is_slm(unsigned int family, unsigned int model)
3795{
3796        if (!genuine_intel)
3797                return 0;
3798        switch (model) {
3799        case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3800        case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3801                return 1;
3802        }
3803        return 0;
3804}
3805
3806int is_knl(unsigned int family, unsigned int model)
3807{
3808        if (!genuine_intel)
3809                return 0;
3810        switch (model) {
3811        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3812        case INTEL_FAM6_XEON_PHI_KNM:
3813                return 1;
3814        }
3815        return 0;
3816}
3817
3818unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
3819{
3820        if (is_knl(family, model))
3821                return 1024;
3822        return 1;
3823}
3824
3825#define SLM_BCLK_FREQS 5
3826double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
3827
3828double slm_bclk(void)
3829{
3830        unsigned long long msr = 3;
3831        unsigned int i;
3832        double freq;
3833
3834        if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
3835                fprintf(outf, "SLM BCLK: unknown\n");
3836
3837        i = msr & 0xf;
3838        if (i >= SLM_BCLK_FREQS) {
3839                fprintf(outf, "SLM BCLK[%d] invalid\n", i);
3840                i = 3;
3841        }
3842        freq = slm_freq_table[i];
3843
3844        if (!quiet)
3845                fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
3846
3847        return freq;
3848}
3849
3850double discover_bclk(unsigned int family, unsigned int model)
3851{
3852        if (has_snb_msrs(family, model) || is_knl(family, model))
3853                return 100.00;
3854        else if (is_slm(family, model))
3855                return slm_bclk();
3856        else
3857                return 133.33;
3858}
3859
3860/*
3861 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
3862 * the Thermal Control Circuit (TCC) activates.
3863 * This is usually equal to tjMax.
3864 *
3865 * Older processors do not have this MSR, so there we guess,
3866 * but also allow cmdline over-ride with -T.
3867 *
3868 * Several MSR temperature values are in units of degrees-C
3869 * below this value, including the Digital Thermal Sensor (DTS),
3870 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
3871 */
3872int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3873{
3874        unsigned long long msr;
3875        unsigned int target_c_local;
3876        int cpu;
3877
3878        /* tcc_activation_temp is used only for dts or ptm */
3879        if (!(do_dts || do_ptm))
3880                return 0;
3881
3882        /* this is a per-package concept */
3883        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3884                return 0;
3885
3886        cpu = t->cpu_id;
3887        if (cpu_migrate(cpu)) {
3888                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3889                return -1;
3890        }
3891
3892        if (tcc_activation_temp_override != 0) {
3893                tcc_activation_temp = tcc_activation_temp_override;
3894                fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
3895                        cpu, tcc_activation_temp);
3896                return 0;
3897        }
3898
3899        /* Temperature Target MSR is Nehalem and newer only */
3900        if (!do_nhm_platform_info)
3901                goto guess;
3902
3903        if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
3904                goto guess;
3905
3906        target_c_local = (msr >> 16) & 0xFF;
3907
3908        if (!quiet)
3909                fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
3910                        cpu, msr, target_c_local);
3911
3912        if (!target_c_local)
3913                goto guess;
3914
3915        tcc_activation_temp = target_c_local;
3916
3917        return 0;
3918
3919guess:
3920        tcc_activation_temp = TJMAX_DEFAULT;
3921        fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
3922                cpu, tcc_activation_temp);
3923
3924        return 0;
3925}
3926
3927void decode_feature_control_msr(void)
3928{
3929        unsigned long long msr;
3930
3931        if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3932                fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3933                        base_cpu, msr,
3934                        msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3935                        msr & (1 << 18) ? "SGX" : "");
3936}
3937
3938void decode_misc_enable_msr(void)
3939{
3940        unsigned long long msr;
3941
3942        if (!genuine_intel)
3943                return;
3944
3945        if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3946                fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
3947                        base_cpu, msr,
3948                        msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
3949                        msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
3950                        msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
3951                        msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
3952                        msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
3953}
3954
3955void decode_misc_feature_control(void)
3956{
3957        unsigned long long msr;
3958
3959        if (!has_misc_feature_control)
3960                return;
3961
3962        if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
3963                fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
3964                        base_cpu, msr,
3965                        msr & (0 << 0) ? "No-" : "",
3966                        msr & (1 << 0) ? "No-" : "",
3967                        msr & (2 << 0) ? "No-" : "",
3968                        msr & (3 << 0) ? "No-" : "");
3969}
3970/*
3971 * Decode MSR_MISC_PWR_MGMT
3972 *
3973 * Decode the bits according to the Nehalem documentation
3974 * bit[0] seems to continue to have same meaning going forward
3975 * bit[1] less so...
3976 */
3977void decode_misc_pwr_mgmt_msr(void)
3978{
3979        unsigned long long msr;
3980
3981        if (!do_nhm_platform_info)
3982                return;
3983
3984        if (no_MSR_MISC_PWR_MGMT)
3985                return;
3986
3987        if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3988                fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
3989                        base_cpu, msr,
3990                        msr & (1 << 0) ? "DIS" : "EN",
3991                        msr & (1 << 1) ? "EN" : "DIS",
3992                        msr & (1 << 8) ? "EN" : "DIS");
3993}
3994/*
3995 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
3996 *
3997 * This MSRs are present on Silvermont processors,
3998 * Intel Atom processor E3000 series (Baytrail), and friends.
3999 */
4000void decode_c6_demotion_policy_msr(void)
4001{
4002        unsigned long long msr;
4003
4004        if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4005                fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4006                        base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4007
4008        if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4009                fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4010                        base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4011}
4012
4013void process_cpuid()
4014{
4015        unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
4016        unsigned int fms, family, model, stepping;
4017        unsigned int has_turbo;
4018
4019        eax = ebx = ecx = edx = 0;
4020
4021        __cpuid(0, max_level, ebx, ecx, edx);
4022
4023        if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
4024                genuine_intel = 1;
4025
4026        if (!quiet)
4027                fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4028                        (char *)&ebx, (char *)&edx, (char *)&ecx);
4029
4030        __cpuid(1, fms, ebx, ecx, edx);
4031        family = (fms >> 8) & 0xf;
4032        model = (fms >> 4) & 0xf;
4033        stepping = fms & 0xf;
4034        if (family == 6 || family == 0xf)
4035                model += ((fms >> 16) & 0xf) << 4;
4036
4037        if (!quiet) {
4038                fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4039                        max_level, family, model, stepping, family, model, stepping);
4040                fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
4041                        ecx & (1 << 0) ? "SSE3" : "-",
4042                        ecx & (1 << 3) ? "MONITOR" : "-",
4043                        ecx & (1 << 6) ? "SMX" : "-",
4044                        ecx & (1 << 7) ? "EIST" : "-",
4045                        ecx & (1 << 8) ? "TM2" : "-",
4046                        edx & (1 << 4) ? "TSC" : "-",
4047                        edx & (1 << 5) ? "MSR" : "-",
4048                        edx & (1 << 22) ? "ACPI-TM" : "-",
4049                        edx & (1 << 29) ? "TM" : "-");
4050        }
4051
4052        if (!(edx & (1 << 5)))
4053                errx(1, "CPUID: no MSR");
4054
4055        /*
4056         * check max extended function levels of CPUID.
4057         * This is needed to check for invariant TSC.
4058         * This check is valid for both Intel and AMD.
4059         */
4060        ebx = ecx = edx = 0;
4061        __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4062
4063        if (max_extended_level >= 0x80000007) {
4064
4065                /*
4066                 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4067                 * this check is valid for both Intel and AMD
4068                 */
4069                __cpuid(0x80000007, eax, ebx, ecx, edx);
4070                has_invariant_tsc = edx & (1 << 8);
4071        }
4072
4073        /*
4074         * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4075         * this check is valid for both Intel and AMD
4076         */
4077
4078        __cpuid(0x6, eax, ebx, ecx, edx);
4079        has_aperf = ecx & (1 << 0);
4080        if (has_aperf) {
4081                BIC_PRESENT(BIC_Avg_MHz);
4082                BIC_PRESENT(BIC_Busy);
4083                BIC_PRESENT(BIC_Bzy_MHz);
4084        }
4085        do_dts = eax & (1 << 0);
4086        if (do_dts)
4087                BIC_PRESENT(BIC_CoreTmp);
4088        has_turbo = eax & (1 << 1);
4089        do_ptm = eax & (1 << 6);
4090        if (do_ptm)
4091                BIC_PRESENT(BIC_PkgTmp);
4092        has_hwp = eax & (1 << 7);
4093        has_hwp_notify = eax & (1 << 8);
4094        has_hwp_activity_window = eax & (1 << 9);
4095        has_hwp_epp = eax & (1 << 10);
4096        has_hwp_pkg = eax & (1 << 11);
4097        has_epb = ecx & (1 << 3);
4098
4099        if (!quiet)
4100                fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4101                        "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4102                        has_aperf ? "" : "No-",
4103                        has_turbo ? "" : "No-",
4104                        do_dts ? "" : "No-",
4105                        do_ptm ? "" : "No-",
4106                        has_hwp ? "" : "No-",
4107                        has_hwp_notify ? "" : "No-",
4108                        has_hwp_activity_window ? "" : "No-",
4109                        has_hwp_epp ? "" : "No-",
4110                        has_hwp_pkg ? "" : "No-",
4111                        has_epb ? "" : "No-");
4112
4113        if (!quiet)
4114                decode_misc_enable_msr();
4115
4116
4117        if (max_level >= 0x7 && !quiet) {
4118                int has_sgx;
4119
4120                ecx = 0;
4121
4122                __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4123
4124                has_sgx = ebx & (1 << 2);
4125                fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4126
4127                if (has_sgx)
4128                        decode_feature_control_msr();
4129        }
4130
4131        if (max_level >= 0x15) {
4132                unsigned int eax_crystal;
4133                unsigned int ebx_tsc;
4134
4135                /*
4136                 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4137                 */
4138                eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4139                __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4140
4141                if (ebx_tsc != 0) {
4142
4143                        if (!quiet && (ebx != 0))
4144                                fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4145                                        eax_crystal, ebx_tsc, crystal_hz);
4146
4147                        if (crystal_hz == 0)
4148                                switch(model) {
4149                                case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4150                                case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4151                                case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4152                                case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4153                                        crystal_hz = 24000000;  /* 24.0 MHz */
4154                                        break;
4155                                case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4156                                case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
4157                                        crystal_hz = 25000000;  /* 25.0 MHz */
4158                                        break;
4159                                case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4160                                case INTEL_FAM6_ATOM_GEMINI_LAKE:
4161                                        crystal_hz = 19200000;  /* 19.2 MHz */
4162                                        break;
4163                                default:
4164                                        crystal_hz = 0;
4165                        }
4166
4167                        if (crystal_hz) {
4168                                tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4169                                if (!quiet)
4170                                        fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4171                                                tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4172                        }
4173                }
4174        }
4175        if (max_level >= 0x16) {
4176                unsigned int base_mhz, max_mhz, bus_mhz, edx;
4177
4178                /*
4179                 * CPUID 16H Base MHz, Max MHz, Bus MHz
4180                 */
4181                base_mhz = max_mhz = bus_mhz = edx = 0;
4182
4183                __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4184                if (!quiet)
4185                        fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4186                                base_mhz, max_mhz, bus_mhz);
4187        }
4188
4189        if (has_aperf)
4190                aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4191
4192        BIC_PRESENT(BIC_IRQ);
4193        BIC_PRESENT(BIC_TSC_MHz);
4194
4195        if (probe_nhm_msrs(family, model)) {
4196                do_nhm_platform_info = 1;
4197                BIC_PRESENT(BIC_CPU_c1);
4198                BIC_PRESENT(BIC_CPU_c3);
4199                BIC_PRESENT(BIC_CPU_c6);
4200                BIC_PRESENT(BIC_SMI);
4201        }
4202        do_snb_cstates = has_snb_msrs(family, model);
4203
4204        if (do_snb_cstates)
4205                BIC_PRESENT(BIC_CPU_c7);
4206
4207        do_irtl_snb = has_snb_msrs(family, model);
4208        if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4209                BIC_PRESENT(BIC_Pkgpc2);
4210        if (pkg_cstate_limit >= PCL__3)
4211                BIC_PRESENT(BIC_Pkgpc3);
4212        if (pkg_cstate_limit >= PCL__6)
4213                BIC_PRESENT(BIC_Pkgpc6);
4214        if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4215                BIC_PRESENT(BIC_Pkgpc7);
4216        if (has_slv_msrs(family, model)) {
4217                BIC_NOT_PRESENT(BIC_Pkgpc2);
4218                BIC_NOT_PRESENT(BIC_Pkgpc3);
4219                BIC_PRESENT(BIC_Pkgpc6);
4220                BIC_NOT_PRESENT(BIC_Pkgpc7);
4221                BIC_PRESENT(BIC_Mod_c6);
4222                use_c1_residency_msr = 1;
4223        }
4224        if (is_dnv(family, model)) {
4225                BIC_PRESENT(BIC_CPU_c1);
4226                BIC_NOT_PRESENT(BIC_CPU_c3);
4227                BIC_NOT_PRESENT(BIC_Pkgpc3);
4228                BIC_NOT_PRESENT(BIC_CPU_c7);
4229                BIC_NOT_PRESENT(BIC_Pkgpc7);
4230                use_c1_residency_msr = 1;
4231        }
4232        if (is_skx(family, model)) {
4233                BIC_NOT_PRESENT(BIC_CPU_c3);
4234                BIC_NOT_PRESENT(BIC_Pkgpc3);
4235                BIC_NOT_PRESENT(BIC_CPU_c7);
4236                BIC_NOT_PRESENT(BIC_Pkgpc7);
4237        }
4238        if (is_bdx(family, model)) {
4239                BIC_NOT_PRESENT(BIC_CPU_c7);
4240                BIC_NOT_PRESENT(BIC_Pkgpc7);
4241        }
4242        if (has_hsw_msrs(family, model)) {
4243                BIC_PRESENT(BIC_Pkgpc8);
4244                BIC_PRESENT(BIC_Pkgpc9);
4245                BIC_PRESENT(BIC_Pkgpc10);
4246        }
4247        do_irtl_hsw = has_hsw_msrs(family, model);
4248        if (has_skl_msrs(family, model)) {
4249                BIC_PRESENT(BIC_Totl_c0);
4250                BIC_PRESENT(BIC_Any_c0);
4251                BIC_PRESENT(BIC_GFX_c0);
4252                BIC_PRESENT(BIC_CPUGFX);
4253        }
4254        do_slm_cstates = is_slm(family, model);
4255        do_knl_cstates  = is_knl(family, model);
4256
4257        if (!quiet)
4258                decode_misc_pwr_mgmt_msr();
4259
4260        if (!quiet && has_slv_msrs(family, model))
4261                decode_c6_demotion_policy_msr();
4262
4263        rapl_probe(family, model);
4264        perf_limit_reasons_probe(family, model);
4265
4266        if (!quiet)
4267                dump_cstate_pstate_config_info(family, model);
4268
4269        if (!quiet)
4270                dump_sysfs_cstate_config();
4271        if (!quiet)
4272                dump_sysfs_pstate_config();
4273
4274        if (has_skl_msrs(family, model))
4275                calculate_tsc_tweak();
4276
4277        if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4278                BIC_PRESENT(BIC_GFX_rc6);
4279
4280        if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4281                BIC_PRESENT(BIC_GFXMHz);
4282
4283        if (!quiet)
4284                decode_misc_feature_control();
4285
4286        return;
4287}
4288
4289
4290/*
4291 * in /dev/cpu/ return success for names that are numbers
4292 * ie. filter out ".", "..", "microcode".
4293 */
4294int dir_filter(const struct dirent *dirp)
4295{
4296        if (isdigit(dirp->d_name[0]))
4297                return 1;
4298        else
4299                return 0;
4300}
4301
4302int open_dev_cpu_msr(int dummy1)
4303{
4304        return 0;
4305}
4306
4307void topology_probe()
4308{
4309        int i;
4310        int max_core_id = 0;
4311        int max_package_id = 0;
4312        int max_siblings = 0;
4313        struct cpu_topology {
4314                int core_id;
4315                int physical_package_id;
4316        } *cpus;
4317
4318        /* Initialize num_cpus, max_cpu_num */
4319        topo.num_cpus = 0;
4320        topo.max_cpu_num = 0;
4321        for_all_proc_cpus(count_cpus);
4322        if (!summary_only && topo.num_cpus > 1)
4323                BIC_PRESENT(BIC_CPU);
4324
4325        if (debug > 1)
4326                fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4327
4328        cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4329        if (cpus == NULL)
4330                err(1, "calloc cpus");
4331
4332        /*
4333         * Allocate and initialize cpu_present_set
4334         */
4335        cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4336        if (cpu_present_set == NULL)
4337                err(3, "CPU_ALLOC");
4338        cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4339        CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4340        for_all_proc_cpus(mark_cpu_present);
4341
4342        /*
4343         * Validate that all cpus in cpu_subset are also in cpu_present_set
4344         */
4345        for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4346                if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4347                        if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4348                                err(1, "cpu%d not present", i);
4349        }
4350
4351        /*
4352         * Allocate and initialize cpu_affinity_set
4353         */
4354        cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4355        if (cpu_affinity_set == NULL)
4356                err(3, "CPU_ALLOC");
4357        cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4358        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4359
4360
4361        /*
4362         * For online cpus
4363         * find max_core_id, max_package_id
4364         */
4365        for (i = 0; i <= topo.max_cpu_num; ++i) {
4366                int siblings;
4367
4368                if (cpu_is_not_present(i)) {
4369                        if (debug > 1)
4370                                fprintf(outf, "cpu%d NOT PRESENT\n", i);
4371                        continue;
4372                }
4373                cpus[i].core_id = get_core_id(i);
4374                if (cpus[i].core_id > max_core_id)
4375                        max_core_id = cpus[i].core_id;
4376
4377                cpus[i].physical_package_id = get_physical_package_id(i);
4378                if (cpus[i].physical_package_id > max_package_id)
4379                        max_package_id = cpus[i].physical_package_id;
4380
4381                siblings = get_num_ht_siblings(i);
4382                if (siblings > max_siblings)
4383                        max_siblings = siblings;
4384                if (debug > 1)
4385                        fprintf(outf, "cpu %d pkg %d core %d\n",
4386                                i, cpus[i].physical_package_id, cpus[i].core_id);
4387        }
4388        topo.num_cores_per_pkg = max_core_id + 1;
4389        if (debug > 1)
4390                fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4391                        max_core_id, topo.num_cores_per_pkg);
4392        if (!summary_only && topo.num_cores_per_pkg > 1)
4393                BIC_PRESENT(BIC_Core);
4394
4395        topo.num_packages = max_package_id + 1;
4396        if (debug > 1)
4397                fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4398                        max_package_id, topo.num_packages);
4399        if (!summary_only && topo.num_packages > 1)
4400                BIC_PRESENT(BIC_Package);
4401
4402        topo.num_threads_per_core = max_siblings;
4403        if (debug > 1)
4404                fprintf(outf, "max_siblings %d\n", max_siblings);
4405
4406        free(cpus);
4407}
4408
4409void
4410allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
4411{
4412        int i;
4413
4414        *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
4415                topo.num_packages, sizeof(struct thread_data));
4416        if (*t == NULL)
4417                goto error;
4418
4419        for (i = 0; i < topo.num_threads_per_core *
4420                topo.num_cores_per_pkg * topo.num_packages; i++)
4421                (*t)[i].cpu_id = -1;
4422
4423        *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
4424                sizeof(struct core_data));
4425        if (*c == NULL)
4426                goto error;
4427
4428        for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
4429                (*c)[i].core_id = -1;
4430
4431        *p = calloc(topo.num_packages, sizeof(struct pkg_data));
4432        if (*p == NULL)
4433                goto error;
4434
4435        for (i = 0; i < topo.num_packages; i++)
4436                (*p)[i].package_id = i;
4437
4438        return;
4439error:
4440        err(1, "calloc counters");
4441}
4442/*
4443 * init_counter()
4444 *
4445 * set cpu_id, core_num, pkg_num
4446 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4447 *
4448 * increment topo.num_cores when 1st core in pkg seen
4449 */
4450void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4451        struct pkg_data *pkg_base, int thread_num, int core_num,
4452        int pkg_num, int cpu_id)
4453{
4454        struct thread_data *t;
4455        struct core_data *c;
4456        struct pkg_data *p;
4457
4458        t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
4459        c = GET_CORE(core_base, core_num, pkg_num);
4460        p = GET_PKG(pkg_base, pkg_num);
4461
4462        t->cpu_id = cpu_id;
4463        if (thread_num == 0) {
4464                t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4465                if (cpu_is_first_core_in_package(cpu_id))
4466                        t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4467        }
4468
4469        c->core_id = core_num;
4470        p->package_id = pkg_num;
4471}
4472
4473
4474int initialize_counters(int cpu_id)
4475{
4476        int my_thread_id, my_core_id, my_package_id;
4477
4478        my_package_id = get_physical_package_id(cpu_id);
4479        my_core_id = get_core_id(cpu_id);
4480        my_thread_id = get_cpu_position_in_core(cpu_id);
4481        if (!my_thread_id)
4482                topo.num_cores++;
4483
4484        init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4485        init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4486        return 0;
4487}
4488
4489void allocate_output_buffer()
4490{
4491        output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4492        outp = output_buffer;
4493        if (outp == NULL)
4494                err(-1, "calloc output buffer");
4495}
4496void allocate_fd_percpu(void)
4497{
4498        fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4499        if (fd_percpu == NULL)
4500                err(-1, "calloc fd_percpu");
4501}
4502void allocate_irq_buffers(void)
4503{
4504        irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4505        if (irq_column_2_cpu == NULL)
4506                err(-1, "calloc %d", topo.num_cpus);
4507
4508        irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4509        if (irqs_per_cpu == NULL)
4510                err(-1, "calloc %d", topo.max_cpu_num + 1);
4511}
4512void setup_all_buffers(void)
4513{
4514        topology_probe();
4515        allocate_irq_buffers();
4516        allocate_fd_percpu();
4517        allocate_counters(&thread_even, &core_even, &package_even);
4518        allocate_counters(&thread_odd, &core_odd, &package_odd);
4519        allocate_output_buffer();
4520        for_all_proc_cpus(initialize_counters);
4521}
4522
4523void set_base_cpu(void)
4524{
4525        base_cpu = sched_getcpu();
4526        if (base_cpu < 0)
4527                err(-ENODEV, "No valid cpus found");
4528
4529        if (debug > 1)
4530                fprintf(outf, "base_cpu = %d\n", base_cpu);
4531}
4532
4533void turbostat_init()
4534{
4535        setup_all_buffers();
4536        set_base_cpu();
4537        check_dev_msr();
4538        check_permissions();
4539        process_cpuid();
4540
4541
4542        if (!quiet)
4543                for_all_cpus(print_hwp, ODD_COUNTERS);
4544
4545        if (!quiet)
4546                for_all_cpus(print_epb, ODD_COUNTERS);
4547
4548        if (!quiet)
4549                for_all_cpus(print_perf_limit, ODD_COUNTERS);
4550
4551        if (!quiet)
4552                for_all_cpus(print_rapl, ODD_COUNTERS);
4553
4554        for_all_cpus(set_temperature_target, ODD_COUNTERS);
4555
4556        if (!quiet)
4557                for_all_cpus(print_thermal, ODD_COUNTERS);
4558
4559        if (!quiet && do_irtl_snb)
4560                print_irtl();
4561}
4562
4563int fork_it(char **argv)
4564{
4565        pid_t child_pid;
4566        int status;
4567
4568        snapshot_proc_sysfs_files();
4569        status = for_all_cpus(get_counters, EVEN_COUNTERS);
4570        if (status)
4571                exit(status);
4572        /* clear affinity side-effect of get_counters() */
4573        sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
4574        gettimeofday(&tv_even, (struct timezone *)NULL);
4575
4576        child_pid = fork();
4577        if (!child_pid) {
4578                /* child */
4579                execvp(argv[0], argv);
4580                err(errno, "exec %s", argv[0]);
4581        } else {
4582
4583                /* parent */
4584                if (child_pid == -1)
4585                        err(1, "fork");
4586
4587                signal(SIGINT, SIG_IGN);
4588                signal(SIGQUIT, SIG_IGN);
4589                if (waitpid(child_pid, &status, 0) == -1)
4590                        err(status, "waitpid");
4591        }
4592        /*
4593         * n.b. fork_it() does not check for errors from for_all_cpus()
4594         * because re-starting is problematic when forking
4595         */
4596        snapshot_proc_sysfs_files();
4597        for_all_cpus(get_counters, ODD_COUNTERS);
4598        gettimeofday(&tv_odd, (struct timezone *)NULL);
4599        timersub(&tv_odd, &tv_even, &tv_delta);
4600        if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
4601                fprintf(outf, "%s: Counter reset detected\n", progname);
4602        else {
4603                compute_average(EVEN_COUNTERS);
4604                format_all_counters(EVEN_COUNTERS);
4605        }
4606
4607        fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
4608
4609        flush_output_stderr();
4610
4611        return status;
4612}
4613
4614int get_and_dump_counters(void)
4615{
4616        int status;
4617
4618        snapshot_proc_sysfs_files();
4619        status = for_all_cpus(get_counters, ODD_COUNTERS);
4620        if (status)
4621                return status;
4622
4623        status = for_all_cpus(dump_counters, ODD_COUNTERS);
4624        if (status)
4625                return status;
4626
4627        flush_output_stdout();
4628
4629        return status;
4630}
4631
4632void print_version() {
4633        fprintf(outf, "turbostat version 17.06.23"
4634                " - Len Brown <lenb@kernel.org>\n");
4635}
4636
4637int add_counter(unsigned int msr_num, char *path, char *name,
4638        unsigned int width, enum counter_scope scope,
4639        enum counter_type type, enum counter_format format, int flags)
4640{
4641        struct msr_counter *msrp;
4642
4643        msrp = calloc(1, sizeof(struct msr_counter));
4644        if (msrp == NULL) {
4645                perror("calloc");
4646                exit(1);
4647        }
4648
4649        msrp->msr_num = msr_num;
4650        strncpy(msrp->name, name, NAME_BYTES);
4651        if (path)
4652                strncpy(msrp->path, path, PATH_BYTES);
4653        msrp->width = width;
4654        msrp->type = type;
4655        msrp->format = format;
4656        msrp->flags = flags;
4657
4658        switch (scope) {
4659
4660        case SCOPE_CPU:
4661                msrp->next = sys.tp;
4662                sys.tp = msrp;
4663                sys.added_thread_counters++;
4664                if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
4665                        fprintf(stderr, "exceeded max %d added thread counters\n",
4666                                MAX_ADDED_COUNTERS);
4667                        exit(-1);
4668                }
4669                break;
4670
4671        case SCOPE_CORE:
4672                msrp->next = sys.cp;
4673                sys.cp = msrp;
4674                sys.added_core_counters++;
4675                if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
4676                        fprintf(stderr, "exceeded max %d added core counters\n",
4677                                MAX_ADDED_COUNTERS);
4678                        exit(-1);
4679                }
4680                break;
4681
4682        case SCOPE_PACKAGE:
4683                msrp->next = sys.pp;
4684                sys.pp = msrp;
4685                sys.added_package_counters++;
4686                if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
4687                        fprintf(stderr, "exceeded max %d added package counters\n",
4688                                MAX_ADDED_COUNTERS);
4689                        exit(-1);
4690                }
4691                break;
4692        }
4693
4694        return 0;
4695}
4696
4697void parse_add_command(char *add_command)
4698{
4699        int msr_num = 0;
4700        char *path = NULL;
4701        char name_buffer[NAME_BYTES] = "";
4702        int width = 64;
4703        int fail = 0;
4704        enum counter_scope scope = SCOPE_CPU;
4705        enum counter_type type = COUNTER_CYCLES;
4706        enum counter_format format = FORMAT_DELTA;
4707
4708        while (add_command) {
4709
4710                if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
4711                        goto next;
4712
4713                if (sscanf(add_command, "msr%d", &msr_num) == 1)
4714                        goto next;
4715
4716                if (*add_command == '/') {
4717                        path = add_command;
4718                        goto next;
4719                }
4720
4721                if (sscanf(add_command, "u%d", &width) == 1) {
4722                        if ((width == 32) || (width == 64))
4723                                goto next;
4724                        width = 64;
4725                }
4726                if (!strncmp(add_command, "cpu", strlen("cpu"))) {
4727                        scope = SCOPE_CPU;
4728                        goto next;
4729                }
4730                if (!strncmp(add_command, "core", strlen("core"))) {
4731                        scope = SCOPE_CORE;
4732                        goto next;
4733                }
4734                if (!strncmp(add_command, "package", strlen("package"))) {
4735                        scope = SCOPE_PACKAGE;
4736                        goto next;
4737                }
4738                if (!strncmp(add_command, "cycles", strlen("cycles"))) {
4739                        type = COUNTER_CYCLES;
4740                        goto next;
4741                }
4742                if (!strncmp(add_command, "seconds", strlen("seconds"))) {
4743                        type = COUNTER_SECONDS;
4744                        goto next;
4745                }
4746                if (!strncmp(add_command, "usec", strlen("usec"))) {
4747                        type = COUNTER_USEC;
4748                        goto next;
4749                }
4750                if (!strncmp(add_command, "raw", strlen("raw"))) {
4751                        format = FORMAT_RAW;
4752                        goto next;
4753                }
4754                if (!strncmp(add_command, "delta", strlen("delta"))) {
4755                        format = FORMAT_DELTA;
4756                        goto next;
4757                }
4758                if (!strncmp(add_command, "percent", strlen("percent"))) {
4759                        format = FORMAT_PERCENT;
4760                        goto next;
4761                }
4762
4763                if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
4764                        char *eos;
4765
4766                        eos = strchr(name_buffer, ',');
4767                        if (eos)
4768                                *eos = '\0';
4769                        goto next;
4770                }
4771
4772next:
4773                add_command = strchr(add_command, ',');
4774                if (add_command) {
4775                        *add_command = '\0';
4776                        add_command++;
4777                }
4778
4779        }
4780        if ((msr_num == 0) && (path == NULL)) {
4781                fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
4782                fail++;
4783        }
4784
4785        /* generate default column header */
4786        if (*name_buffer == '\0') {
4787                if (width == 32)
4788                        sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
4789                else
4790                        sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
4791        }
4792
4793        if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
4794                fail++;
4795
4796        if (fail) {
4797                help();
4798                exit(1);
4799        }
4800}
4801
4802int is_deferred_skip(char *name)
4803{
4804        int i;
4805
4806        for (i = 0; i < deferred_skip_index; ++i)
4807                if (!strcmp(name, deferred_skip_names[i]))
4808                        return 1;
4809        return 0;
4810}
4811
4812void probe_sysfs(void)
4813{
4814        char path[64];
4815        char name_buf[16];
4816        FILE *input;
4817        int state;
4818        char *sp;
4819
4820        if (!DO_BIC(BIC_sysfs))
4821                return;
4822
4823        for (state = 10; state > 0; --state) {
4824
4825                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4826                        base_cpu, state);
4827                input = fopen(path, "r");
4828                if (input == NULL)
4829                        continue;
4830                fgets(name_buf, sizeof(name_buf), input);
4831
4832                 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4833                sp = strchr(name_buf, '-');
4834                if (!sp)
4835                        sp = strchrnul(name_buf, '\n');
4836                *sp = '%';
4837                *(sp + 1) = '\0';
4838
4839                fclose(input);
4840
4841                sprintf(path, "cpuidle/state%d/time", state);
4842
4843                if (is_deferred_skip(name_buf))
4844                        continue;
4845
4846                add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
4847                                FORMAT_PERCENT, SYSFS_PERCPU);
4848        }
4849
4850        for (state = 10; state > 0; --state) {
4851
4852                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4853                        base_cpu, state);
4854                input = fopen(path, "r");
4855                if (input == NULL)
4856                        continue;
4857                fgets(name_buf, sizeof(name_buf), input);
4858                 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4859                sp = strchr(name_buf, '-');
4860                if (!sp)
4861                        sp = strchrnul(name_buf, '\n');
4862                *sp = '\0';
4863                fclose(input);
4864
4865                sprintf(path, "cpuidle/state%d/usage", state);
4866
4867                if (is_deferred_skip(name_buf))
4868                        continue;
4869
4870                add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
4871                                FORMAT_DELTA, SYSFS_PERCPU);
4872        }
4873
4874}
4875
4876
4877/*
4878 * parse cpuset with following syntax
4879 * 1,2,4..6,8-10 and set bits in cpu_subset
4880 */
4881void parse_cpu_command(char *optarg)
4882{
4883        unsigned int start, end;
4884        char *next;
4885
4886        if (!strcmp(optarg, "core")) {
4887                if (cpu_subset)
4888                        goto error;
4889                show_core_only++;
4890                return;
4891        }
4892        if (!strcmp(optarg, "package")) {
4893                if (cpu_subset)
4894                        goto error;
4895                show_pkg_only++;
4896                return;
4897        }
4898        if (show_core_only || show_pkg_only)
4899                goto error;
4900
4901        cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
4902        if (cpu_subset == NULL)
4903                err(3, "CPU_ALLOC");
4904        cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
4905
4906        CPU_ZERO_S(cpu_subset_size, cpu_subset);
4907
4908        next = optarg;
4909
4910        while (next && *next) {
4911
4912                if (*next == '-')       /* no negative cpu numbers */
4913                        goto error;
4914
4915                start = strtoul(next, &next, 10);
4916
4917                if (start >= CPU_SUBSET_MAXCPUS)
4918                        goto error;
4919                CPU_SET_S(start, cpu_subset_size, cpu_subset);
4920
4921                if (*next == '\0')
4922                        break;
4923
4924                if (*next == ',') {
4925                        next += 1;
4926                        continue;
4927                }
4928
4929                if (*next == '-') {
4930                        next += 1;      /* start range */
4931                } else if (*next == '.') {
4932                        next += 1;
4933                        if (*next == '.')
4934                                next += 1;      /* start range */
4935                        else
4936                                goto error;
4937                }
4938
4939                end = strtoul(next, &next, 10);
4940                if (end <= start)
4941                        goto error;
4942
4943                while (++start <= end) {
4944                        if (start >= CPU_SUBSET_MAXCPUS)
4945                                goto error;
4946                        CPU_SET_S(start, cpu_subset_size, cpu_subset);
4947                }
4948
4949                if (*next == ',')
4950                        next += 1;
4951                else if (*next != '\0')
4952                        goto error;
4953        }
4954
4955        return;
4956
4957error:
4958        fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
4959        help();
4960        exit(-1);
4961}
4962
4963int shown;
4964/*
4965 * parse_show_hide() - process cmdline to set default counter action
4966 */
4967void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
4968{
4969        /*
4970         * --show: show only those specified
4971         *  The 1st invocation will clear and replace the enabled mask
4972         *  subsequent invocations can add to it.
4973         */
4974        if (new_mode == SHOW_LIST) {
4975                if (shown == 0)
4976                        bic_enabled = bic_lookup(optarg, new_mode);
4977                else
4978                        bic_enabled |= bic_lookup(optarg, new_mode);
4979                shown = 1;
4980
4981                return;
4982        }
4983
4984        /*
4985         * --hide: do not show those specified
4986         *  multiple invocations simply clear more bits in enabled mask
4987         */
4988        bic_enabled &= ~bic_lookup(optarg, new_mode);
4989
4990}
4991
4992void cmdline(int argc, char **argv)
4993{
4994        int opt;
4995        int option_index = 0;
4996        static struct option long_options[] = {
4997                {"add",         required_argument,      0, 'a'},
4998                {"cpu",         required_argument,      0, 'c'},
4999                {"Dump",        no_argument,            0, 'D'},
5000                {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5001                {"interval",    required_argument,      0, 'i'},
5002                {"help",        no_argument,            0, 'h'},
5003                {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5004                {"Joules",      no_argument,            0, 'J'},
5005                {"list",        no_argument,            0, 'l'},
5006                {"out",         required_argument,      0, 'o'},
5007                {"quiet",       no_argument,            0, 'q'},
5008                {"show",        required_argument,      0, 's'},
5009                {"Summary",     no_argument,            0, 'S'},
5010                {"TCC",         required_argument,      0, 'T'},
5011                {"version",     no_argument,            0, 'v' },
5012                {0,             0,                      0,  0 }
5013        };
5014
5015        progname = argv[0];
5016
5017        while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
5018                                long_options, &option_index)) != -1) {
5019                switch (opt) {
5020                case 'a':
5021                        parse_add_command(optarg);
5022                        break;
5023                case 'c':
5024                        parse_cpu_command(optarg);
5025                        break;
5026                case 'D':
5027                        dump_only++;
5028                        break;
5029                case 'd':
5030                        debug++;
5031                        break;
5032                case 'H':
5033                        parse_show_hide(optarg, HIDE_LIST);
5034                        break;
5035                case 'h':
5036                default:
5037                        help();
5038                        exit(1);
5039                case 'i':
5040                        {
5041                                double interval = strtod(optarg, NULL);
5042
5043                                if (interval < 0.001) {
5044                                        fprintf(outf, "interval %f seconds is too small\n",
5045                                                interval);
5046                                        exit(2);
5047                                }
5048
5049                                interval_ts.tv_sec = interval;
5050                                interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5051                        }
5052                        break;
5053                case 'J':
5054                        rapl_joules++;
5055                        break;
5056                case 'l':
5057                        list_header_only++;
5058                        quiet++;
5059                        break;
5060                case 'o':
5061                        outf = fopen_or_die(optarg, "w");
5062                        break;
5063                case 'q':
5064                        quiet = 1;
5065                        break;
5066                case 's':
5067                        parse_show_hide(optarg, SHOW_LIST);
5068                        break;
5069                case 'S':
5070                        summary_only++;
5071                        break;
5072                case 'T':
5073                        tcc_activation_temp_override = atoi(optarg);
5074                        break;
5075                case 'v':
5076                        print_version();
5077                        exit(0);
5078                        break;
5079                }
5080        }
5081}
5082
5083int main(int argc, char **argv)
5084{
5085        outf = stderr;
5086
5087        cmdline(argc, argv);
5088
5089        if (!quiet)
5090                print_version();
5091
5092        probe_sysfs();
5093
5094        turbostat_init();
5095
5096        /* dump counters and exit */
5097        if (dump_only)
5098                return get_and_dump_counters();
5099
5100        /* list header and exit */
5101        if (list_header_only) {
5102                print_header(",");
5103                flush_output_stdout();
5104                return 0;
5105        }
5106
5107        /*
5108         * if any params left, it must be a command to fork
5109         */
5110        if (argc - optind)
5111                return fork_it(argv + optind);
5112        else
5113                turbostat_loop();
5114
5115        return 0;
5116}
5117