linux/tools/power/x86/turbostat/turbostat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * turbostat -- show CPU frequency and C-state residency
   4 * on modern Intel and AMD processors.
   5 *
   6 * Copyright (c) 2021 Intel Corporation.
   7 * Len Brown <len.brown@intel.com>
   8 */
   9
  10#define _GNU_SOURCE
  11#include MSRHEADER
  12#include INTEL_FAMILY_HEADER
  13#include <stdarg.h>
  14#include <stdio.h>
  15#include <err.h>
  16#include <unistd.h>
  17#include <sys/types.h>
  18#include <sys/wait.h>
  19#include <sys/stat.h>
  20#include <sys/select.h>
  21#include <sys/resource.h>
  22#include <fcntl.h>
  23#include <signal.h>
  24#include <sys/time.h>
  25#include <stdlib.h>
  26#include <getopt.h>
  27#include <dirent.h>
  28#include <string.h>
  29#include <ctype.h>
  30#include <sched.h>
  31#include <time.h>
  32#include <cpuid.h>
  33#include <sys/capability.h>
  34#include <errno.h>
  35#include <math.h>
  36#include <linux/perf_event.h>
  37#include <asm/unistd.h>
  38#include <stdbool.h>
  39
  40char *proc_stat = "/proc/stat";
  41FILE *outf;
  42int *fd_percpu;
  43int *fd_instr_count_percpu;
  44struct timeval interval_tv = { 5, 0 };
  45struct timespec interval_ts = { 5, 0 };
  46
  47/* Save original CPU model */
  48unsigned int model_orig;
  49
  50unsigned int num_iterations;
  51unsigned int debug;
  52unsigned int quiet;
  53unsigned int shown;
  54unsigned int sums_need_wide_columns;
  55unsigned int rapl_joules;
  56unsigned int summary_only;
  57unsigned int list_header_only;
  58unsigned int dump_only;
  59unsigned int do_snb_cstates;
  60unsigned int do_knl_cstates;
  61unsigned int do_slm_cstates;
  62unsigned int use_c1_residency_msr;
  63unsigned int has_aperf;
  64unsigned int has_epb;
  65unsigned int do_irtl_snb;
  66unsigned int do_irtl_hsw;
  67unsigned int units = 1000000;   /* MHz etc */
  68unsigned int genuine_intel;
  69unsigned int authentic_amd;
  70unsigned int hygon_genuine;
  71unsigned int max_level, max_extended_level;
  72unsigned int has_invariant_tsc;
  73unsigned int do_nhm_platform_info;
  74unsigned int no_MSR_MISC_PWR_MGMT;
  75unsigned int aperf_mperf_multiplier = 1;
  76double bclk;
  77double base_hz;
  78unsigned int has_base_hz;
  79double tsc_tweak = 1.0;
  80unsigned int show_pkg_only;
  81unsigned int show_core_only;
  82char *output_buffer, *outp;
  83unsigned int do_rapl;
  84unsigned int do_dts;
  85unsigned int do_ptm;
  86unsigned int do_ipc;
  87unsigned long long gfx_cur_rc6_ms;
  88unsigned long long cpuidle_cur_cpu_lpi_us;
  89unsigned long long cpuidle_cur_sys_lpi_us;
  90unsigned int gfx_cur_mhz;
  91unsigned int gfx_act_mhz;
  92unsigned int tj_max;
  93unsigned int tj_max_override;
  94int tcc_offset_bits;
  95double rapl_power_units, rapl_time_units;
  96double rapl_dram_energy_units, rapl_energy_units;
  97double rapl_joule_counter_range;
  98unsigned int do_core_perf_limit_reasons;
  99unsigned int has_automatic_cstate_conversion;
 100unsigned int dis_cstate_prewake;
 101unsigned int do_gfx_perf_limit_reasons;
 102unsigned int do_ring_perf_limit_reasons;
 103unsigned int crystal_hz;
 104unsigned long long tsc_hz;
 105int base_cpu;
 106double discover_bclk(unsigned int family, unsigned int model);
 107unsigned int has_hwp;           /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
 108                        /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
 109unsigned int has_hwp_notify;    /* IA32_HWP_INTERRUPT */
 110unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
 111unsigned int has_hwp_epp;       /* IA32_HWP_REQUEST[bits 31:24] */
 112unsigned int has_hwp_pkg;       /* IA32_HWP_REQUEST_PKG */
 113unsigned int has_misc_feature_control;
 114unsigned int first_counter_read = 1;
 115int ignore_stdin;
 116
 117#define RAPL_PKG                (1 << 0)
 118                                        /* 0x610 MSR_PKG_POWER_LIMIT */
 119                                        /* 0x611 MSR_PKG_ENERGY_STATUS */
 120#define RAPL_PKG_PERF_STATUS    (1 << 1)
 121                                        /* 0x613 MSR_PKG_PERF_STATUS */
 122#define RAPL_PKG_POWER_INFO     (1 << 2)
 123                                        /* 0x614 MSR_PKG_POWER_INFO */
 124
 125#define RAPL_DRAM               (1 << 3)
 126                                        /* 0x618 MSR_DRAM_POWER_LIMIT */
 127                                        /* 0x619 MSR_DRAM_ENERGY_STATUS */
 128#define RAPL_DRAM_PERF_STATUS   (1 << 4)
 129                                        /* 0x61b MSR_DRAM_PERF_STATUS */
 130#define RAPL_DRAM_POWER_INFO    (1 << 5)
 131                                        /* 0x61c MSR_DRAM_POWER_INFO */
 132
 133#define RAPL_CORES_POWER_LIMIT  (1 << 6)
 134                                        /* 0x638 MSR_PP0_POWER_LIMIT */
 135#define RAPL_CORE_POLICY        (1 << 7)
 136                                        /* 0x63a MSR_PP0_POLICY */
 137
 138#define RAPL_GFX                (1 << 8)
 139                                        /* 0x640 MSR_PP1_POWER_LIMIT */
 140                                        /* 0x641 MSR_PP1_ENERGY_STATUS */
 141                                        /* 0x642 MSR_PP1_POLICY */
 142
 143#define RAPL_CORES_ENERGY_STATUS        (1 << 9)
 144                                        /* 0x639 MSR_PP0_ENERGY_STATUS */
 145#define RAPL_PER_CORE_ENERGY    (1 << 10)
 146                                        /* Indicates cores energy collection is per-core,
 147                                         * not per-package. */
 148#define RAPL_AMD_F17H           (1 << 11)
 149                                        /* 0xc0010299 MSR_RAPL_PWR_UNIT */
 150                                        /* 0xc001029a MSR_CORE_ENERGY_STAT */
 151                                        /* 0xc001029b MSR_PKG_ENERGY_STAT */
 152#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
 153#define TJMAX_DEFAULT   100
 154
 155/* MSRs that are not yet in the kernel-provided header. */
 156#define MSR_RAPL_PWR_UNIT       0xc0010299
 157#define MSR_CORE_ENERGY_STAT    0xc001029a
 158#define MSR_PKG_ENERGY_STAT     0xc001029b
 159
 160#define MAX(a, b) ((a) > (b) ? (a) : (b))
 161
 162/*
 163 * buffer size used by sscanf() for added column names
 164 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
 165 */
 166#define NAME_BYTES 20
 167#define PATH_BYTES 128
 168
 169int backwards_count;
 170char *progname;
 171
 172#define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
 173cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
 174size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
 175#define MAX_ADDED_COUNTERS 8
 176#define MAX_ADDED_THREAD_COUNTERS 24
 177#define BITMASK_SIZE 32
 178
 179struct thread_data {
 180        struct timeval tv_begin;
 181        struct timeval tv_end;
 182        struct timeval tv_delta;
 183        unsigned long long tsc;
 184        unsigned long long aperf;
 185        unsigned long long mperf;
 186        unsigned long long c1;
 187        unsigned long long instr_count;
 188        unsigned long long irq_count;
 189        unsigned int smi_count;
 190        unsigned int cpu_id;
 191        unsigned int apic_id;
 192        unsigned int x2apic_id;
 193        unsigned int flags;
 194        bool is_atom;
 195#define CPU_IS_FIRST_THREAD_IN_CORE     0x2
 196#define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
 197        unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
 198} *thread_even, *thread_odd;
 199
 200struct core_data {
 201        unsigned long long c3;
 202        unsigned long long c6;
 203        unsigned long long c7;
 204        unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
 205        unsigned int core_temp_c;
 206        unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
 207        unsigned int core_id;
 208        unsigned long long counter[MAX_ADDED_COUNTERS];
 209} *core_even, *core_odd;
 210
 211struct pkg_data {
 212        unsigned long long pc2;
 213        unsigned long long pc3;
 214        unsigned long long pc6;
 215        unsigned long long pc7;
 216        unsigned long long pc8;
 217        unsigned long long pc9;
 218        unsigned long long pc10;
 219        unsigned long long cpu_lpi;
 220        unsigned long long sys_lpi;
 221        unsigned long long pkg_wtd_core_c0;
 222        unsigned long long pkg_any_core_c0;
 223        unsigned long long pkg_any_gfxe_c0;
 224        unsigned long long pkg_both_core_gfxe_c0;
 225        long long gfx_rc6_ms;
 226        unsigned int gfx_mhz;
 227        unsigned int gfx_act_mhz;
 228        unsigned int package_id;
 229        unsigned long long energy_pkg;  /* MSR_PKG_ENERGY_STATUS */
 230        unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
 231        unsigned long long energy_cores;        /* MSR_PP0_ENERGY_STATUS */
 232        unsigned long long energy_gfx;  /* MSR_PP1_ENERGY_STATUS */
 233        unsigned long long rapl_pkg_perf_status;        /* MSR_PKG_PERF_STATUS */
 234        unsigned long long rapl_dram_perf_status;       /* MSR_DRAM_PERF_STATUS */
 235        unsigned int pkg_temp_c;
 236        unsigned long long counter[MAX_ADDED_COUNTERS];
 237} *package_even, *package_odd;
 238
 239#define ODD_COUNTERS thread_odd, core_odd, package_odd
 240#define EVEN_COUNTERS thread_even, core_even, package_even
 241
 242#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
 243        ((thread_base) +                                                      \
 244         ((pkg_no) *                                                          \
 245          topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
 246         ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
 247         ((core_no) * topo.threads_per_core) +                                \
 248         (thread_no))
 249
 250#define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
 251        ((core_base) +                                                  \
 252         ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
 253         ((node_no) * topo.cores_per_node) +                            \
 254         (core_no))
 255
 256#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 257
 258enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
 259enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
 260enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
 261
 262struct msr_counter {
 263        unsigned int msr_num;
 264        char name[NAME_BYTES];
 265        char path[PATH_BYTES];
 266        unsigned int width;
 267        enum counter_type type;
 268        enum counter_format format;
 269        struct msr_counter *next;
 270        unsigned int flags;
 271#define FLAGS_HIDE      (1 << 0)
 272#define FLAGS_SHOW      (1 << 1)
 273#define SYSFS_PERCPU    (1 << 1)
 274};
 275
 276/*
 277 * The accumulated sum of MSR is defined as a monotonic
 278 * increasing MSR, it will be accumulated periodically,
 279 * despite its register's bit width.
 280 */
 281enum {
 282        IDX_PKG_ENERGY,
 283        IDX_DRAM_ENERGY,
 284        IDX_PP0_ENERGY,
 285        IDX_PP1_ENERGY,
 286        IDX_PKG_PERF,
 287        IDX_DRAM_PERF,
 288        IDX_COUNT,
 289};
 290
 291int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
 292
 293struct msr_sum_array {
 294        /* get_msr_sum() = sum + (get_msr() - last) */
 295        struct {
 296                /*The accumulated MSR value is updated by the timer */
 297                unsigned long long sum;
 298                /*The MSR footprint recorded in last timer */
 299                unsigned long long last;
 300        } entries[IDX_COUNT];
 301};
 302
 303/* The percpu MSR sum array.*/
 304struct msr_sum_array *per_cpu_msr_sum;
 305
 306off_t idx_to_offset(int idx)
 307{
 308        off_t offset;
 309
 310        switch (idx) {
 311        case IDX_PKG_ENERGY:
 312                if (do_rapl & RAPL_AMD_F17H)
 313                        offset = MSR_PKG_ENERGY_STAT;
 314                else
 315                        offset = MSR_PKG_ENERGY_STATUS;
 316                break;
 317        case IDX_DRAM_ENERGY:
 318                offset = MSR_DRAM_ENERGY_STATUS;
 319                break;
 320        case IDX_PP0_ENERGY:
 321                offset = MSR_PP0_ENERGY_STATUS;
 322                break;
 323        case IDX_PP1_ENERGY:
 324                offset = MSR_PP1_ENERGY_STATUS;
 325                break;
 326        case IDX_PKG_PERF:
 327                offset = MSR_PKG_PERF_STATUS;
 328                break;
 329        case IDX_DRAM_PERF:
 330                offset = MSR_DRAM_PERF_STATUS;
 331                break;
 332        default:
 333                offset = -1;
 334        }
 335        return offset;
 336}
 337
 338int offset_to_idx(off_t offset)
 339{
 340        int idx;
 341
 342        switch (offset) {
 343        case MSR_PKG_ENERGY_STATUS:
 344        case MSR_PKG_ENERGY_STAT:
 345                idx = IDX_PKG_ENERGY;
 346                break;
 347        case MSR_DRAM_ENERGY_STATUS:
 348                idx = IDX_DRAM_ENERGY;
 349                break;
 350        case MSR_PP0_ENERGY_STATUS:
 351                idx = IDX_PP0_ENERGY;
 352                break;
 353        case MSR_PP1_ENERGY_STATUS:
 354                idx = IDX_PP1_ENERGY;
 355                break;
 356        case MSR_PKG_PERF_STATUS:
 357                idx = IDX_PKG_PERF;
 358                break;
 359        case MSR_DRAM_PERF_STATUS:
 360                idx = IDX_DRAM_PERF;
 361                break;
 362        default:
 363                idx = -1;
 364        }
 365        return idx;
 366}
 367
 368int idx_valid(int idx)
 369{
 370        switch (idx) {
 371        case IDX_PKG_ENERGY:
 372                return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
 373        case IDX_DRAM_ENERGY:
 374                return do_rapl & RAPL_DRAM;
 375        case IDX_PP0_ENERGY:
 376                return do_rapl & RAPL_CORES_ENERGY_STATUS;
 377        case IDX_PP1_ENERGY:
 378                return do_rapl & RAPL_GFX;
 379        case IDX_PKG_PERF:
 380                return do_rapl & RAPL_PKG_PERF_STATUS;
 381        case IDX_DRAM_PERF:
 382                return do_rapl & RAPL_DRAM_PERF_STATUS;
 383        default:
 384                return 0;
 385        }
 386}
 387
 388struct sys_counters {
 389        unsigned int added_thread_counters;
 390        unsigned int added_core_counters;
 391        unsigned int added_package_counters;
 392        struct msr_counter *tp;
 393        struct msr_counter *cp;
 394        struct msr_counter *pp;
 395} sys;
 396
 397struct system_summary {
 398        struct thread_data threads;
 399        struct core_data cores;
 400        struct pkg_data packages;
 401} average;
 402
 403struct cpu_topology {
 404        int physical_package_id;
 405        int die_id;
 406        int logical_cpu_id;
 407        int physical_node_id;
 408        int logical_node_id;    /* 0-based count within the package */
 409        int physical_core_id;
 410        int thread_id;
 411        cpu_set_t *put_ids;     /* Processing Unit/Thread IDs */
 412} *cpus;
 413
 414struct topo_params {
 415        int num_packages;
 416        int num_die;
 417        int num_cpus;
 418        int num_cores;
 419        int max_cpu_num;
 420        int max_node_num;
 421        int nodes_per_pkg;
 422        int cores_per_node;
 423        int threads_per_core;
 424} topo;
 425
 426struct timeval tv_even, tv_odd, tv_delta;
 427
 428int *irq_column_2_cpu;          /* /proc/interrupts column numbers */
 429int *irqs_per_cpu;              /* indexed by cpu_num */
 430
 431void setup_all_buffers(void);
 432
 433char *sys_lpi_file;
 434char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
 435char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
 436
 437int cpu_is_not_present(int cpu)
 438{
 439        return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
 440}
 441
 442/*
 443 * run func(thread, core, package) in topology order
 444 * skip non-present cpus
 445 */
 446
 447int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
 448                 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
 449{
 450        int retval, pkg_no, core_no, thread_no, node_no;
 451
 452        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 453                for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
 454                        for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
 455                                for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
 456                                        struct thread_data *t;
 457                                        struct core_data *c;
 458                                        struct pkg_data *p;
 459
 460                                        t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 461
 462                                        if (cpu_is_not_present(t->cpu_id))
 463                                                continue;
 464
 465                                        c = GET_CORE(core_base, core_no, node_no, pkg_no);
 466                                        p = GET_PKG(pkg_base, pkg_no);
 467
 468                                        retval = func(t, c, p);
 469                                        if (retval)
 470                                                return retval;
 471                                }
 472                        }
 473                }
 474        }
 475        return 0;
 476}
 477
 478int cpu_migrate(int cpu)
 479{
 480        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 481        CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
 482        if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
 483                return -1;
 484        else
 485                return 0;
 486}
 487
 488int get_msr_fd(int cpu)
 489{
 490        char pathname[32];
 491        int fd;
 492
 493        fd = fd_percpu[cpu];
 494
 495        if (fd)
 496                return fd;
 497
 498        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
 499        fd = open(pathname, O_RDONLY);
 500        if (fd < 0)
 501                err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
 502
 503        fd_percpu[cpu] = fd;
 504
 505        return fd;
 506}
 507
 508static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
 509{
 510        return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
 511}
 512
 513static int perf_instr_count_open(int cpu_num)
 514{
 515        struct perf_event_attr pea;
 516        int fd;
 517
 518        memset(&pea, 0, sizeof(struct perf_event_attr));
 519        pea.type = PERF_TYPE_HARDWARE;
 520        pea.size = sizeof(struct perf_event_attr);
 521        pea.config = PERF_COUNT_HW_INSTRUCTIONS;
 522
 523        /* counter for cpu_num, including user + kernel and all processes */
 524        fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
 525        if (fd == -1)
 526                err(-1, "cpu%d: perf instruction counter\n", cpu_num);
 527
 528        return fd;
 529}
 530
 531int get_instr_count_fd(int cpu)
 532{
 533        if (fd_instr_count_percpu[cpu])
 534                return fd_instr_count_percpu[cpu];
 535
 536        fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
 537
 538        return fd_instr_count_percpu[cpu];
 539}
 540
 541int get_msr(int cpu, off_t offset, unsigned long long *msr)
 542{
 543        ssize_t retval;
 544
 545        retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 546
 547        if (retval != sizeof *msr)
 548                err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
 549
 550        return 0;
 551}
 552
 553/*
 554 * This list matches the column headers, except
 555 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
 556 * 2. Core and CPU are moved to the end, we can't have strings that contain them
 557 *    matching on them for --show and --hide.
 558 */
 559struct msr_counter bic[] = {
 560        { 0x0, "usec" },
 561        { 0x0, "Time_Of_Day_Seconds" },
 562        { 0x0, "Package" },
 563        { 0x0, "Node" },
 564        { 0x0, "Avg_MHz" },
 565        { 0x0, "Busy%" },
 566        { 0x0, "Bzy_MHz" },
 567        { 0x0, "TSC_MHz" },
 568        { 0x0, "IRQ" },
 569        { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL },
 570        { 0x0, "sysfs" },
 571        { 0x0, "CPU%c1" },
 572        { 0x0, "CPU%c3" },
 573        { 0x0, "CPU%c6" },
 574        { 0x0, "CPU%c7" },
 575        { 0x0, "ThreadC" },
 576        { 0x0, "CoreTmp" },
 577        { 0x0, "CoreCnt" },
 578        { 0x0, "PkgTmp" },
 579        { 0x0, "GFX%rc6" },
 580        { 0x0, "GFXMHz" },
 581        { 0x0, "Pkg%pc2" },
 582        { 0x0, "Pkg%pc3" },
 583        { 0x0, "Pkg%pc6" },
 584        { 0x0, "Pkg%pc7" },
 585        { 0x0, "Pkg%pc8" },
 586        { 0x0, "Pkg%pc9" },
 587        { 0x0, "Pk%pc10" },
 588        { 0x0, "CPU%LPI" },
 589        { 0x0, "SYS%LPI" },
 590        { 0x0, "PkgWatt" },
 591        { 0x0, "CorWatt" },
 592        { 0x0, "GFXWatt" },
 593        { 0x0, "PkgCnt" },
 594        { 0x0, "RAMWatt" },
 595        { 0x0, "PKG_%" },
 596        { 0x0, "RAM_%" },
 597        { 0x0, "Pkg_J" },
 598        { 0x0, "Cor_J" },
 599        { 0x0, "GFX_J" },
 600        { 0x0, "RAM_J" },
 601        { 0x0, "Mod%c6" },
 602        { 0x0, "Totl%C0" },
 603        { 0x0, "Any%C0" },
 604        { 0x0, "GFX%C0" },
 605        { 0x0, "CPUGFX%" },
 606        { 0x0, "Core" },
 607        { 0x0, "CPU" },
 608        { 0x0, "APIC" },
 609        { 0x0, "X2APIC" },
 610        { 0x0, "Die" },
 611        { 0x0, "GFXAMHz" },
 612        { 0x0, "IPC" },
 613};
 614
 615#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
 616#define BIC_USEC        (1ULL << 0)
 617#define BIC_TOD         (1ULL << 1)
 618#define BIC_Package     (1ULL << 2)
 619#define BIC_Node        (1ULL << 3)
 620#define BIC_Avg_MHz     (1ULL << 4)
 621#define BIC_Busy        (1ULL << 5)
 622#define BIC_Bzy_MHz     (1ULL << 6)
 623#define BIC_TSC_MHz     (1ULL << 7)
 624#define BIC_IRQ         (1ULL << 8)
 625#define BIC_SMI         (1ULL << 9)
 626#define BIC_sysfs       (1ULL << 10)
 627#define BIC_CPU_c1      (1ULL << 11)
 628#define BIC_CPU_c3      (1ULL << 12)
 629#define BIC_CPU_c6      (1ULL << 13)
 630#define BIC_CPU_c7      (1ULL << 14)
 631#define BIC_ThreadC     (1ULL << 15)
 632#define BIC_CoreTmp     (1ULL << 16)
 633#define BIC_CoreCnt     (1ULL << 17)
 634#define BIC_PkgTmp      (1ULL << 18)
 635#define BIC_GFX_rc6     (1ULL << 19)
 636#define BIC_GFXMHz      (1ULL << 20)
 637#define BIC_Pkgpc2      (1ULL << 21)
 638#define BIC_Pkgpc3      (1ULL << 22)
 639#define BIC_Pkgpc6      (1ULL << 23)
 640#define BIC_Pkgpc7      (1ULL << 24)
 641#define BIC_Pkgpc8      (1ULL << 25)
 642#define BIC_Pkgpc9      (1ULL << 26)
 643#define BIC_Pkgpc10     (1ULL << 27)
 644#define BIC_CPU_LPI     (1ULL << 28)
 645#define BIC_SYS_LPI     (1ULL << 29)
 646#define BIC_PkgWatt     (1ULL << 30)
 647#define BIC_CorWatt     (1ULL << 31)
 648#define BIC_GFXWatt     (1ULL << 32)
 649#define BIC_PkgCnt      (1ULL << 33)
 650#define BIC_RAMWatt     (1ULL << 34)
 651#define BIC_PKG__       (1ULL << 35)
 652#define BIC_RAM__       (1ULL << 36)
 653#define BIC_Pkg_J       (1ULL << 37)
 654#define BIC_Cor_J       (1ULL << 38)
 655#define BIC_GFX_J       (1ULL << 39)
 656#define BIC_RAM_J       (1ULL << 40)
 657#define BIC_Mod_c6      (1ULL << 41)
 658#define BIC_Totl_c0     (1ULL << 42)
 659#define BIC_Any_c0      (1ULL << 43)
 660#define BIC_GFX_c0      (1ULL << 44)
 661#define BIC_CPUGFX      (1ULL << 45)
 662#define BIC_Core        (1ULL << 46)
 663#define BIC_CPU         (1ULL << 47)
 664#define BIC_APIC        (1ULL << 48)
 665#define BIC_X2APIC      (1ULL << 49)
 666#define BIC_Die         (1ULL << 50)
 667#define BIC_GFXACTMHz   (1ULL << 51)
 668#define BIC_IPC         (1ULL << 52)
 669
 670#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
 671#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
 672#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
 673#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
 674#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 675
 676#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 677
 678unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
 679unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
 680
 681#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 682#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
 683#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
 684#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 685#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 686#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
 687
 688#define MAX_DEFERRED 16
 689char *deferred_skip_names[MAX_DEFERRED];
 690int deferred_skip_index;
 691
 692/*
 693 * HIDE_LIST - hide this list of counters, show the rest [default]
 694 * SHOW_LIST - show this list of counters, hide the rest
 695 */
 696enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
 697
 698void help(void)
 699{
 700        fprintf(outf,
 701                "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
 702                "\n"
 703                "Turbostat forks the specified COMMAND and prints statistics\n"
 704                "when COMMAND completes.\n"
 705                "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
 706                "to print statistics, until interrupted.\n"
 707                "  -a, --add    add a counter\n"
 708                "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
 709                "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
 710                "                 {core | package | j,k,l..m,n-p }\n"
 711                "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
 712                "  -D, --Dump   displays the raw counter values\n"
 713                "  -e, --enable [all | column]\n"
 714                "               shows all or the specified disabled column\n"
 715                "  -H, --hide [column|column,column,...]\n"
 716                "               hide the specified column(s)\n"
 717                "  -i, --interval sec.subsec\n"
 718                "               Override default 5-second measurement interval\n"
 719                "  -J, --Joules displays energy in Joules instead of Watts\n"
 720                "  -l, --list   list column headers only\n"
 721                "  -n, --num_iterations num\n"
 722                "               number of the measurement iterations\n"
 723                "  -o, --out file\n"
 724                "               create or truncate \"file\" for all output\n"
 725                "  -q, --quiet  skip decoding system configuration header\n"
 726                "  -s, --show [column|column,column,...]\n"
 727                "               show only the specified column(s)\n"
 728                "  -S, --Summary\n"
 729                "               limits output to 1-line system summary per interval\n"
 730                "  -T, --TCC temperature\n"
 731                "               sets the Thermal Control Circuit temperature in\n"
 732                "                 degrees Celsius\n"
 733                "  -h, --help   print this help message\n"
 734                "  -v, --version        print version information\n" "\n" "For more help, run \"man turbostat\"\n");
 735}
 736
 737/*
 738 * bic_lookup
 739 * for all the strings in comma separate name_list,
 740 * set the approprate bit in return value.
 741 */
 742unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
 743{
 744        int i;
 745        unsigned long long retval = 0;
 746
 747        while (name_list) {
 748                char *comma;
 749
 750                comma = strchr(name_list, ',');
 751
 752                if (comma)
 753                        *comma = '\0';
 754
 755                if (!strcmp(name_list, "all"))
 756                        return ~0;
 757                if (!strcmp(name_list, "topology"))
 758                        return BIC_TOPOLOGY;
 759                if (!strcmp(name_list, "power"))
 760                        return BIC_THERMAL_PWR;
 761                if (!strcmp(name_list, "idle"))
 762                        return BIC_IDLE;
 763                if (!strcmp(name_list, "frequency"))
 764                        return BIC_FREQUENCY;
 765                if (!strcmp(name_list, "other"))
 766                        return BIC_OTHER;
 767                if (!strcmp(name_list, "all"))
 768                        return 0;
 769
 770                for (i = 0; i < MAX_BIC; ++i) {
 771                        if (!strcmp(name_list, bic[i].name)) {
 772                                retval |= (1ULL << i);
 773                                break;
 774                        }
 775                }
 776                if (i == MAX_BIC) {
 777                        if (mode == SHOW_LIST) {
 778                                fprintf(stderr, "Invalid counter name: %s\n", name_list);
 779                                exit(-1);
 780                        }
 781                        deferred_skip_names[deferred_skip_index++] = name_list;
 782                        if (debug)
 783                                fprintf(stderr, "deferred \"%s\"\n", name_list);
 784                        if (deferred_skip_index >= MAX_DEFERRED) {
 785                                fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
 786                                        MAX_DEFERRED, name_list);
 787                                help();
 788                                exit(1);
 789                        }
 790                }
 791
 792                name_list = comma;
 793                if (name_list)
 794                        name_list++;
 795
 796        }
 797        return retval;
 798}
 799
 800void print_header(char *delim)
 801{
 802        struct msr_counter *mp;
 803        int printed = 0;
 804
 805        if (DO_BIC(BIC_USEC))
 806                outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
 807        if (DO_BIC(BIC_TOD))
 808                outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
 809        if (DO_BIC(BIC_Package))
 810                outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
 811        if (DO_BIC(BIC_Die))
 812                outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
 813        if (DO_BIC(BIC_Node))
 814                outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
 815        if (DO_BIC(BIC_Core))
 816                outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 817        if (DO_BIC(BIC_CPU))
 818                outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
 819        if (DO_BIC(BIC_APIC))
 820                outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
 821        if (DO_BIC(BIC_X2APIC))
 822                outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
 823        if (DO_BIC(BIC_Avg_MHz))
 824                outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
 825        if (DO_BIC(BIC_Busy))
 826                outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
 827        if (DO_BIC(BIC_Bzy_MHz))
 828                outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
 829        if (DO_BIC(BIC_TSC_MHz))
 830                outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
 831
 832        if (DO_BIC(BIC_IPC))
 833                outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
 834
 835        if (DO_BIC(BIC_IRQ)) {
 836                if (sums_need_wide_columns)
 837                        outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
 838                else
 839                        outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
 840        }
 841
 842        if (DO_BIC(BIC_SMI))
 843                outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
 844
 845        for (mp = sys.tp; mp; mp = mp->next) {
 846
 847                if (mp->format == FORMAT_RAW) {
 848                        if (mp->width == 64)
 849                                outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
 850                        else
 851                                outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
 852                } else {
 853                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 854                                outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
 855                        else
 856                                outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
 857                }
 858        }
 859
 860        if (DO_BIC(BIC_CPU_c1))
 861                outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
 862        if (DO_BIC(BIC_CPU_c3))
 863                outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 864        if (DO_BIC(BIC_CPU_c6))
 865                outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
 866        if (DO_BIC(BIC_CPU_c7))
 867                outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
 868
 869        if (DO_BIC(BIC_Mod_c6))
 870                outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
 871
 872        if (DO_BIC(BIC_CoreTmp))
 873                outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
 874
 875        if (do_rapl && !rapl_joules) {
 876                if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
 877                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 878        } else if (do_rapl && rapl_joules) {
 879                if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
 880                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 881        }
 882
 883        for (mp = sys.cp; mp; mp = mp->next) {
 884                if (mp->format == FORMAT_RAW) {
 885                        if (mp->width == 64)
 886                                outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 887                        else
 888                                outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 889                } else {
 890                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 891                                outp += sprintf(outp, "%s%8s", delim, mp->name);
 892                        else
 893                                outp += sprintf(outp, "%s%s", delim, mp->name);
 894                }
 895        }
 896
 897        if (DO_BIC(BIC_PkgTmp))
 898                outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
 899
 900        if (DO_BIC(BIC_GFX_rc6))
 901                outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
 902
 903        if (DO_BIC(BIC_GFXMHz))
 904                outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
 905
 906        if (DO_BIC(BIC_GFXACTMHz))
 907                outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
 908
 909        if (DO_BIC(BIC_Totl_c0))
 910                outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
 911        if (DO_BIC(BIC_Any_c0))
 912                outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
 913        if (DO_BIC(BIC_GFX_c0))
 914                outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
 915        if (DO_BIC(BIC_CPUGFX))
 916                outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
 917
 918        if (DO_BIC(BIC_Pkgpc2))
 919                outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
 920        if (DO_BIC(BIC_Pkgpc3))
 921                outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
 922        if (DO_BIC(BIC_Pkgpc6))
 923                outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
 924        if (DO_BIC(BIC_Pkgpc7))
 925                outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
 926        if (DO_BIC(BIC_Pkgpc8))
 927                outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
 928        if (DO_BIC(BIC_Pkgpc9))
 929                outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
 930        if (DO_BIC(BIC_Pkgpc10))
 931                outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
 932        if (DO_BIC(BIC_CPU_LPI))
 933                outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
 934        if (DO_BIC(BIC_SYS_LPI))
 935                outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
 936
 937        if (do_rapl && !rapl_joules) {
 938                if (DO_BIC(BIC_PkgWatt))
 939                        outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
 940                if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
 941                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 942                if (DO_BIC(BIC_GFXWatt))
 943                        outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
 944                if (DO_BIC(BIC_RAMWatt))
 945                        outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
 946                if (DO_BIC(BIC_PKG__))
 947                        outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 948                if (DO_BIC(BIC_RAM__))
 949                        outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 950        } else if (do_rapl && rapl_joules) {
 951                if (DO_BIC(BIC_Pkg_J))
 952                        outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
 953                if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
 954                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 955                if (DO_BIC(BIC_GFX_J))
 956                        outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
 957                if (DO_BIC(BIC_RAM_J))
 958                        outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
 959                if (DO_BIC(BIC_PKG__))
 960                        outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 961                if (DO_BIC(BIC_RAM__))
 962                        outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 963        }
 964        for (mp = sys.pp; mp; mp = mp->next) {
 965                if (mp->format == FORMAT_RAW) {
 966                        if (mp->width == 64)
 967                                outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 968                        else
 969                                outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 970                } else {
 971                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
 972                                outp += sprintf(outp, "%s%8s", delim, mp->name);
 973                        else
 974                                outp += sprintf(outp, "%s%s", delim, mp->name);
 975                }
 976        }
 977
 978        outp += sprintf(outp, "\n");
 979}
 980
 981int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 982{
 983        int i;
 984        struct msr_counter *mp;
 985
 986        outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
 987
 988        if (t) {
 989                outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
 990                outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
 991                outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
 992                outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
 993                outp += sprintf(outp, "c1: %016llX\n", t->c1);
 994
 995                if (DO_BIC(BIC_IPC))
 996                        outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
 997
 998                if (DO_BIC(BIC_IRQ))
 999                        outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
1000                if (DO_BIC(BIC_SMI))
1001                        outp += sprintf(outp, "SMI: %d\n", t->smi_count);
1002
1003                for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1004                        outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
1005                }
1006        }
1007
1008        if (c) {
1009                outp += sprintf(outp, "core: %d\n", c->core_id);
1010                outp += sprintf(outp, "c3: %016llX\n", c->c3);
1011                outp += sprintf(outp, "c6: %016llX\n", c->c6);
1012                outp += sprintf(outp, "c7: %016llX\n", c->c7);
1013                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
1014                outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
1015
1016                for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1017                        outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
1018                }
1019                outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
1020        }
1021
1022        if (p) {
1023                outp += sprintf(outp, "package: %d\n", p->package_id);
1024
1025                outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
1026                outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
1027                outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
1028                outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
1029
1030                outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
1031                if (DO_BIC(BIC_Pkgpc3))
1032                        outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
1033                if (DO_BIC(BIC_Pkgpc6))
1034                        outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
1035                if (DO_BIC(BIC_Pkgpc7))
1036                        outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
1037                outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
1038                outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
1039                outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
1040                outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
1041                outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
1042                outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
1043                outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
1044                outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
1045                outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
1046                outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
1047                outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
1048                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
1049
1050                for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1051                        outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
1052                }
1053        }
1054
1055        outp += sprintf(outp, "\n");
1056
1057        return 0;
1058}
1059
1060/*
1061 * column formatting convention & formats
1062 */
1063int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1064{
1065        double interval_float, tsc;
1066        char *fmt8;
1067        int i;
1068        struct msr_counter *mp;
1069        char *delim = "\t";
1070        int printed = 0;
1071
1072        /* if showing only 1st thread in core and this isn't one, bail out */
1073        if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1074                return 0;
1075
1076        /* if showing only 1st thread in pkg and this isn't one, bail out */
1077        if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1078                return 0;
1079
1080        /*if not summary line and --cpu is used */
1081        if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
1082                return 0;
1083
1084        if (DO_BIC(BIC_USEC)) {
1085                /* on each row, print how many usec each timestamp took to gather */
1086                struct timeval tv;
1087
1088                timersub(&t->tv_end, &t->tv_begin, &tv);
1089                outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
1090        }
1091
1092        /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
1093        if (DO_BIC(BIC_TOD))
1094                outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
1095
1096        interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
1097
1098        tsc = t->tsc * tsc_tweak;
1099
1100        /* topo columns, print blanks on 1st (average) line */
1101        if (t == &average.threads) {
1102                if (DO_BIC(BIC_Package))
1103                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1104                if (DO_BIC(BIC_Die))
1105                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1106                if (DO_BIC(BIC_Node))
1107                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1108                if (DO_BIC(BIC_Core))
1109                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1110                if (DO_BIC(BIC_CPU))
1111                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1112                if (DO_BIC(BIC_APIC))
1113                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1114                if (DO_BIC(BIC_X2APIC))
1115                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1116        } else {
1117                if (DO_BIC(BIC_Package)) {
1118                        if (p)
1119                                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
1120                        else
1121                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1122                }
1123                if (DO_BIC(BIC_Die)) {
1124                        if (c)
1125                                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
1126                        else
1127                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1128                }
1129                if (DO_BIC(BIC_Node)) {
1130                        if (t)
1131                                outp += sprintf(outp, "%s%d",
1132                                                (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
1133                        else
1134                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1135                }
1136                if (DO_BIC(BIC_Core)) {
1137                        if (c)
1138                                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
1139                        else
1140                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1141                }
1142                if (DO_BIC(BIC_CPU))
1143                        outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
1144                if (DO_BIC(BIC_APIC))
1145                        outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
1146                if (DO_BIC(BIC_X2APIC))
1147                        outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
1148        }
1149
1150        if (DO_BIC(BIC_Avg_MHz))
1151                outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
1152
1153        if (DO_BIC(BIC_Busy))
1154                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
1155
1156        if (DO_BIC(BIC_Bzy_MHz)) {
1157                if (has_base_hz)
1158                        outp +=
1159                            sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
1160                else
1161                        outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1162                                        tsc / units * t->aperf / t->mperf / interval_float);
1163        }
1164
1165        if (DO_BIC(BIC_TSC_MHz))
1166                outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
1167
1168        if (DO_BIC(BIC_IPC))
1169                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
1170
1171        /* IRQ */
1172        if (DO_BIC(BIC_IRQ)) {
1173                if (sums_need_wide_columns)
1174                        outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
1175                else
1176                        outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1177        }
1178
1179        /* SMI */
1180        if (DO_BIC(BIC_SMI))
1181                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1182
1183        /* Added counters */
1184        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1185                if (mp->format == FORMAT_RAW) {
1186                        if (mp->width == 32)
1187                                outp +=
1188                                    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
1189                        else
1190                                outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1191                } else if (mp->format == FORMAT_DELTA) {
1192                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1193                                outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1194                        else
1195                                outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1196                } else if (mp->format == FORMAT_PERCENT) {
1197                        if (mp->type == COUNTER_USEC)
1198                                outp +=
1199                                    sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1200                                            t->counter[i] / interval_float / 10000);
1201                        else
1202                                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
1203                }
1204        }
1205
1206        /* C1 */
1207        if (DO_BIC(BIC_CPU_c1))
1208                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
1209
1210        /* print per-core data only for 1st thread in core */
1211        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1212                goto done;
1213
1214        if (DO_BIC(BIC_CPU_c3))
1215                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
1216        if (DO_BIC(BIC_CPU_c6))
1217                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
1218        if (DO_BIC(BIC_CPU_c7))
1219                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
1220
1221        /* Mod%c6 */
1222        if (DO_BIC(BIC_Mod_c6))
1223                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1224
1225        if (DO_BIC(BIC_CoreTmp))
1226                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1227
1228        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1229                if (mp->format == FORMAT_RAW) {
1230                        if (mp->width == 32)
1231                                outp +=
1232                                    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
1233                        else
1234                                outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1235                } else if (mp->format == FORMAT_DELTA) {
1236                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1237                                outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1238                        else
1239                                outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1240                } else if (mp->format == FORMAT_PERCENT) {
1241                        outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
1242                }
1243        }
1244
1245        fmt8 = "%s%.2f";
1246
1247        if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1248                outp +=
1249                    sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1250        if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1251                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1252
1253        /* print per-package data only for 1st core in package */
1254        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1255                goto done;
1256
1257        /* PkgTmp */
1258        if (DO_BIC(BIC_PkgTmp))
1259                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1260
1261        /* GFXrc6 */
1262        if (DO_BIC(BIC_GFX_rc6)) {
1263                if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1264                        outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1265                } else {
1266                        outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1267                                        p->gfx_rc6_ms / 10.0 / interval_float);
1268                }
1269        }
1270
1271        /* GFXMHz */
1272        if (DO_BIC(BIC_GFXMHz))
1273                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1274
1275        /* GFXACTMHz */
1276        if (DO_BIC(BIC_GFXACTMHz))
1277                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
1278
1279        /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1280        if (DO_BIC(BIC_Totl_c0))
1281                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
1282        if (DO_BIC(BIC_Any_c0))
1283                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
1284        if (DO_BIC(BIC_GFX_c0))
1285                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
1286        if (DO_BIC(BIC_CPUGFX))
1287                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
1288
1289        if (DO_BIC(BIC_Pkgpc2))
1290                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
1291        if (DO_BIC(BIC_Pkgpc3))
1292                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
1293        if (DO_BIC(BIC_Pkgpc6))
1294                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
1295        if (DO_BIC(BIC_Pkgpc7))
1296                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
1297        if (DO_BIC(BIC_Pkgpc8))
1298                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
1299        if (DO_BIC(BIC_Pkgpc9))
1300                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
1301        if (DO_BIC(BIC_Pkgpc10))
1302                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
1303
1304        if (DO_BIC(BIC_CPU_LPI))
1305                outp +=
1306                    sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1307        if (DO_BIC(BIC_SYS_LPI))
1308                outp +=
1309                    sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1310
1311        if (DO_BIC(BIC_PkgWatt))
1312                outp +=
1313                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1314        if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1315                outp +=
1316                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1317        if (DO_BIC(BIC_GFXWatt))
1318                outp +=
1319                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1320        if (DO_BIC(BIC_RAMWatt))
1321                outp +=
1322                    sprintf(outp, fmt8, (printed++ ? delim : ""),
1323                            p->energy_dram * rapl_dram_energy_units / interval_float);
1324        if (DO_BIC(BIC_Pkg_J))
1325                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1326        if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1327                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1328        if (DO_BIC(BIC_GFX_J))
1329                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1330        if (DO_BIC(BIC_RAM_J))
1331                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1332        if (DO_BIC(BIC_PKG__))
1333                outp +=
1334                    sprintf(outp, fmt8, (printed++ ? delim : ""),
1335                            100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1336        if (DO_BIC(BIC_RAM__))
1337                outp +=
1338                    sprintf(outp, fmt8, (printed++ ? delim : ""),
1339                            100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1340
1341        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1342                if (mp->format == FORMAT_RAW) {
1343                        if (mp->width == 32)
1344                                outp +=
1345                                    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
1346                        else
1347                                outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1348                } else if (mp->format == FORMAT_DELTA) {
1349                        if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1350                                outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1351                        else
1352                                outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1353                } else if (mp->format == FORMAT_PERCENT) {
1354                        outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
1355                }
1356        }
1357
1358done:
1359        if (*(outp - 1) != '\n')
1360                outp += sprintf(outp, "\n");
1361
1362        return 0;
1363}
1364
1365void flush_output_stdout(void)
1366{
1367        FILE *filep;
1368
1369        if (outf == stderr)
1370                filep = stdout;
1371        else
1372                filep = outf;
1373
1374        fputs(output_buffer, filep);
1375        fflush(filep);
1376
1377        outp = output_buffer;
1378}
1379
1380void flush_output_stderr(void)
1381{
1382        fputs(output_buffer, outf);
1383        fflush(outf);
1384        outp = output_buffer;
1385}
1386
1387void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1388{
1389        static int printed;
1390
1391        if (!printed || !summary_only)
1392                print_header("\t");
1393
1394        format_counters(&average.threads, &average.cores, &average.packages);
1395
1396        printed = 1;
1397
1398        if (summary_only)
1399                return;
1400
1401        for_all_cpus(format_counters, t, c, p);
1402}
1403
1404#define DELTA_WRAP32(new, old)                  \
1405        old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
1406
1407int delta_package(struct pkg_data *new, struct pkg_data *old)
1408{
1409        int i;
1410        struct msr_counter *mp;
1411
1412        if (DO_BIC(BIC_Totl_c0))
1413                old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1414        if (DO_BIC(BIC_Any_c0))
1415                old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1416        if (DO_BIC(BIC_GFX_c0))
1417                old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1418        if (DO_BIC(BIC_CPUGFX))
1419                old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1420
1421        old->pc2 = new->pc2 - old->pc2;
1422        if (DO_BIC(BIC_Pkgpc3))
1423                old->pc3 = new->pc3 - old->pc3;
1424        if (DO_BIC(BIC_Pkgpc6))
1425                old->pc6 = new->pc6 - old->pc6;
1426        if (DO_BIC(BIC_Pkgpc7))
1427                old->pc7 = new->pc7 - old->pc7;
1428        old->pc8 = new->pc8 - old->pc8;
1429        old->pc9 = new->pc9 - old->pc9;
1430        old->pc10 = new->pc10 - old->pc10;
1431        old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1432        old->sys_lpi = new->sys_lpi - old->sys_lpi;
1433        old->pkg_temp_c = new->pkg_temp_c;
1434
1435        /* flag an error when rc6 counter resets/wraps */
1436        if (old->gfx_rc6_ms > new->gfx_rc6_ms)
1437                old->gfx_rc6_ms = -1;
1438        else
1439                old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1440
1441        old->gfx_mhz = new->gfx_mhz;
1442        old->gfx_act_mhz = new->gfx_act_mhz;
1443
1444        old->energy_pkg = new->energy_pkg - old->energy_pkg;
1445        old->energy_cores = new->energy_cores - old->energy_cores;
1446        old->energy_gfx = new->energy_gfx - old->energy_gfx;
1447        old->energy_dram = new->energy_dram - old->energy_dram;
1448        old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
1449        old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
1450
1451        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1452                if (mp->format == FORMAT_RAW)
1453                        old->counter[i] = new->counter[i];
1454                else
1455                        old->counter[i] = new->counter[i] - old->counter[i];
1456        }
1457
1458        return 0;
1459}
1460
1461void delta_core(struct core_data *new, struct core_data *old)
1462{
1463        int i;
1464        struct msr_counter *mp;
1465
1466        old->c3 = new->c3 - old->c3;
1467        old->c6 = new->c6 - old->c6;
1468        old->c7 = new->c7 - old->c7;
1469        old->core_temp_c = new->core_temp_c;
1470        old->mc6_us = new->mc6_us - old->mc6_us;
1471
1472        DELTA_WRAP32(new->core_energy, old->core_energy);
1473
1474        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1475                if (mp->format == FORMAT_RAW)
1476                        old->counter[i] = new->counter[i];
1477                else
1478                        old->counter[i] = new->counter[i] - old->counter[i];
1479        }
1480}
1481
1482int soft_c1_residency_display(int bic)
1483{
1484        if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1485                return 0;
1486
1487        return DO_BIC_READ(bic);
1488}
1489
1490/*
1491 * old = new - old
1492 */
1493int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
1494{
1495        int i;
1496        struct msr_counter *mp;
1497
1498        /* we run cpuid just the 1st time, copy the results */
1499        if (DO_BIC(BIC_APIC))
1500                new->apic_id = old->apic_id;
1501        if (DO_BIC(BIC_X2APIC))
1502                new->x2apic_id = old->x2apic_id;
1503
1504        /*
1505         * the timestamps from start of measurement interval are in "old"
1506         * the timestamp from end of measurement interval are in "new"
1507         * over-write old w/ new so we can print end of interval values
1508         */
1509
1510        timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1511        old->tv_begin = new->tv_begin;
1512        old->tv_end = new->tv_end;
1513
1514        old->tsc = new->tsc - old->tsc;
1515
1516        /* check for TSC < 1 Mcycles over interval */
1517        if (old->tsc < (1000 * 1000))
1518                errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1519                     "You can disable all c-states by booting with \"idle=poll\"\n"
1520                     "or just the deep ones with \"processor.max_cstate=1\"");
1521
1522        old->c1 = new->c1 - old->c1;
1523
1524        if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
1525                if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1526                        old->aperf = new->aperf - old->aperf;
1527                        old->mperf = new->mperf - old->mperf;
1528                } else {
1529                        return -1;
1530                }
1531        }
1532
1533        if (use_c1_residency_msr) {
1534                /*
1535                 * Some models have a dedicated C1 residency MSR,
1536                 * which should be more accurate than the derivation below.
1537                 */
1538        } else {
1539                /*
1540                 * As counter collection is not atomic,
1541                 * it is possible for mperf's non-halted cycles + idle states
1542                 * to exceed TSC's all cycles: show c1 = 0% in that case.
1543                 */
1544                if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1545                        old->c1 = 0;
1546                else {
1547                        /* normal case, derive c1 */
1548                        old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1549                            - core_delta->c6 - core_delta->c7;
1550                }
1551        }
1552
1553        if (old->mperf == 0) {
1554                if (debug > 1)
1555                        fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1556                old->mperf = 1; /* divide by 0 protection */
1557        }
1558
1559        if (DO_BIC(BIC_IPC))
1560                old->instr_count = new->instr_count - old->instr_count;
1561
1562        if (DO_BIC(BIC_IRQ))
1563                old->irq_count = new->irq_count - old->irq_count;
1564
1565        if (DO_BIC(BIC_SMI))
1566                old->smi_count = new->smi_count - old->smi_count;
1567
1568        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1569                if (mp->format == FORMAT_RAW)
1570                        old->counter[i] = new->counter[i];
1571                else
1572                        old->counter[i] = new->counter[i] - old->counter[i];
1573        }
1574        return 0;
1575}
1576
1577int delta_cpu(struct thread_data *t, struct core_data *c,
1578              struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
1579{
1580        int retval = 0;
1581
1582        /* calculate core delta only for 1st thread in core */
1583        if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1584                delta_core(c, c2);
1585
1586        /* always calculate thread delta */
1587        retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1588        if (retval)
1589                return retval;
1590
1591        /* calculate package delta only for 1st core in package */
1592        if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1593                retval = delta_package(p, p2);
1594
1595        return retval;
1596}
1597
1598void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1599{
1600        int i;
1601        struct msr_counter *mp;
1602
1603        t->tv_begin.tv_sec = 0;
1604        t->tv_begin.tv_usec = 0;
1605        t->tv_end.tv_sec = 0;
1606        t->tv_end.tv_usec = 0;
1607        t->tv_delta.tv_sec = 0;
1608        t->tv_delta.tv_usec = 0;
1609
1610        t->tsc = 0;
1611        t->aperf = 0;
1612        t->mperf = 0;
1613        t->c1 = 0;
1614
1615        t->instr_count = 0;
1616
1617        t->irq_count = 0;
1618        t->smi_count = 0;
1619
1620        /* tells format_counters to dump all fields from this set */
1621        t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1622
1623        c->c3 = 0;
1624        c->c6 = 0;
1625        c->c7 = 0;
1626        c->mc6_us = 0;
1627        c->core_temp_c = 0;
1628        c->core_energy = 0;
1629
1630        p->pkg_wtd_core_c0 = 0;
1631        p->pkg_any_core_c0 = 0;
1632        p->pkg_any_gfxe_c0 = 0;
1633        p->pkg_both_core_gfxe_c0 = 0;
1634
1635        p->pc2 = 0;
1636        if (DO_BIC(BIC_Pkgpc3))
1637                p->pc3 = 0;
1638        if (DO_BIC(BIC_Pkgpc6))
1639                p->pc6 = 0;
1640        if (DO_BIC(BIC_Pkgpc7))
1641                p->pc7 = 0;
1642        p->pc8 = 0;
1643        p->pc9 = 0;
1644        p->pc10 = 0;
1645        p->cpu_lpi = 0;
1646        p->sys_lpi = 0;
1647
1648        p->energy_pkg = 0;
1649        p->energy_dram = 0;
1650        p->energy_cores = 0;
1651        p->energy_gfx = 0;
1652        p->rapl_pkg_perf_status = 0;
1653        p->rapl_dram_perf_status = 0;
1654        p->pkg_temp_c = 0;
1655
1656        p->gfx_rc6_ms = 0;
1657        p->gfx_mhz = 0;
1658        p->gfx_act_mhz = 0;
1659        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1660                t->counter[i] = 0;
1661
1662        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1663                c->counter[i] = 0;
1664
1665        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1666                p->counter[i] = 0;
1667}
1668
1669int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1670{
1671        int i;
1672        struct msr_counter *mp;
1673
1674        /* copy un-changing apic_id's */
1675        if (DO_BIC(BIC_APIC))
1676                average.threads.apic_id = t->apic_id;
1677        if (DO_BIC(BIC_X2APIC))
1678                average.threads.x2apic_id = t->x2apic_id;
1679
1680        /* remember first tv_begin */
1681        if (average.threads.tv_begin.tv_sec == 0)
1682                average.threads.tv_begin = t->tv_begin;
1683
1684        /* remember last tv_end */
1685        average.threads.tv_end = t->tv_end;
1686
1687        average.threads.tsc += t->tsc;
1688        average.threads.aperf += t->aperf;
1689        average.threads.mperf += t->mperf;
1690        average.threads.c1 += t->c1;
1691
1692        average.threads.instr_count += t->instr_count;
1693
1694        average.threads.irq_count += t->irq_count;
1695        average.threads.smi_count += t->smi_count;
1696
1697        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1698                if (mp->format == FORMAT_RAW)
1699                        continue;
1700                average.threads.counter[i] += t->counter[i];
1701        }
1702
1703        /* sum per-core values only for 1st thread in core */
1704        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1705                return 0;
1706
1707        average.cores.c3 += c->c3;
1708        average.cores.c6 += c->c6;
1709        average.cores.c7 += c->c7;
1710        average.cores.mc6_us += c->mc6_us;
1711
1712        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1713
1714        average.cores.core_energy += c->core_energy;
1715
1716        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1717                if (mp->format == FORMAT_RAW)
1718                        continue;
1719                average.cores.counter[i] += c->counter[i];
1720        }
1721
1722        /* sum per-pkg values only for 1st core in pkg */
1723        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1724                return 0;
1725
1726        if (DO_BIC(BIC_Totl_c0))
1727                average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1728        if (DO_BIC(BIC_Any_c0))
1729                average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1730        if (DO_BIC(BIC_GFX_c0))
1731                average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1732        if (DO_BIC(BIC_CPUGFX))
1733                average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1734
1735        average.packages.pc2 += p->pc2;
1736        if (DO_BIC(BIC_Pkgpc3))
1737                average.packages.pc3 += p->pc3;
1738        if (DO_BIC(BIC_Pkgpc6))
1739                average.packages.pc6 += p->pc6;
1740        if (DO_BIC(BIC_Pkgpc7))
1741                average.packages.pc7 += p->pc7;
1742        average.packages.pc8 += p->pc8;
1743        average.packages.pc9 += p->pc9;
1744        average.packages.pc10 += p->pc10;
1745
1746        average.packages.cpu_lpi = p->cpu_lpi;
1747        average.packages.sys_lpi = p->sys_lpi;
1748
1749        average.packages.energy_pkg += p->energy_pkg;
1750        average.packages.energy_dram += p->energy_dram;
1751        average.packages.energy_cores += p->energy_cores;
1752        average.packages.energy_gfx += p->energy_gfx;
1753
1754        average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1755        average.packages.gfx_mhz = p->gfx_mhz;
1756        average.packages.gfx_act_mhz = p->gfx_act_mhz;
1757
1758        average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1759
1760        average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1761        average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1762
1763        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1764                if (mp->format == FORMAT_RAW)
1765                        continue;
1766                average.packages.counter[i] += p->counter[i];
1767        }
1768        return 0;
1769}
1770
1771/*
1772 * sum the counters for all cpus in the system
1773 * compute the weighted average
1774 */
1775void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1776{
1777        int i;
1778        struct msr_counter *mp;
1779
1780        clear_counters(&average.threads, &average.cores, &average.packages);
1781
1782        for_all_cpus(sum_counters, t, c, p);
1783
1784        /* Use the global time delta for the average. */
1785        average.threads.tv_delta = tv_delta;
1786
1787        average.threads.tsc /= topo.num_cpus;
1788        average.threads.aperf /= topo.num_cpus;
1789        average.threads.mperf /= topo.num_cpus;
1790        average.threads.instr_count /= topo.num_cpus;
1791        average.threads.c1 /= topo.num_cpus;
1792
1793        if (average.threads.irq_count > 9999999)
1794                sums_need_wide_columns = 1;
1795
1796        average.cores.c3 /= topo.num_cores;
1797        average.cores.c6 /= topo.num_cores;
1798        average.cores.c7 /= topo.num_cores;
1799        average.cores.mc6_us /= topo.num_cores;
1800
1801        if (DO_BIC(BIC_Totl_c0))
1802                average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1803        if (DO_BIC(BIC_Any_c0))
1804                average.packages.pkg_any_core_c0 /= topo.num_packages;
1805        if (DO_BIC(BIC_GFX_c0))
1806                average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1807        if (DO_BIC(BIC_CPUGFX))
1808                average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1809
1810        average.packages.pc2 /= topo.num_packages;
1811        if (DO_BIC(BIC_Pkgpc3))
1812                average.packages.pc3 /= topo.num_packages;
1813        if (DO_BIC(BIC_Pkgpc6))
1814                average.packages.pc6 /= topo.num_packages;
1815        if (DO_BIC(BIC_Pkgpc7))
1816                average.packages.pc7 /= topo.num_packages;
1817
1818        average.packages.pc8 /= topo.num_packages;
1819        average.packages.pc9 /= topo.num_packages;
1820        average.packages.pc10 /= topo.num_packages;
1821
1822        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1823                if (mp->format == FORMAT_RAW)
1824                        continue;
1825                if (mp->type == COUNTER_ITEMS) {
1826                        if (average.threads.counter[i] > 9999999)
1827                                sums_need_wide_columns = 1;
1828                        continue;
1829                }
1830                average.threads.counter[i] /= topo.num_cpus;
1831        }
1832        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1833                if (mp->format == FORMAT_RAW)
1834                        continue;
1835                if (mp->type == COUNTER_ITEMS) {
1836                        if (average.cores.counter[i] > 9999999)
1837                                sums_need_wide_columns = 1;
1838                }
1839                average.cores.counter[i] /= topo.num_cores;
1840        }
1841        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1842                if (mp->format == FORMAT_RAW)
1843                        continue;
1844                if (mp->type == COUNTER_ITEMS) {
1845                        if (average.packages.counter[i] > 9999999)
1846                                sums_need_wide_columns = 1;
1847                }
1848                average.packages.counter[i] /= topo.num_packages;
1849        }
1850}
1851
1852static unsigned long long rdtsc(void)
1853{
1854        unsigned int low, high;
1855
1856        asm volatile ("rdtsc":"=a" (low), "=d"(high));
1857
1858        return low | ((unsigned long long)high) << 32;
1859}
1860
1861/*
1862 * Open a file, and exit on failure
1863 */
1864FILE *fopen_or_die(const char *path, const char *mode)
1865{
1866        FILE *filep = fopen(path, mode);
1867
1868        if (!filep)
1869                err(1, "%s: open failed", path);
1870        return filep;
1871}
1872
1873/*
1874 * snapshot_sysfs_counter()
1875 *
1876 * return snapshot of given counter
1877 */
1878unsigned long long snapshot_sysfs_counter(char *path)
1879{
1880        FILE *fp;
1881        int retval;
1882        unsigned long long counter;
1883
1884        fp = fopen_or_die(path, "r");
1885
1886        retval = fscanf(fp, "%lld", &counter);
1887        if (retval != 1)
1888                err(1, "snapshot_sysfs_counter(%s)", path);
1889
1890        fclose(fp);
1891
1892        return counter;
1893}
1894
1895int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1896{
1897        if (mp->msr_num != 0) {
1898                if (get_msr(cpu, mp->msr_num, counterp))
1899                        return -1;
1900        } else {
1901                char path[128 + PATH_BYTES];
1902
1903                if (mp->flags & SYSFS_PERCPU) {
1904                        sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
1905
1906                        *counterp = snapshot_sysfs_counter(path);
1907                } else {
1908                        *counterp = snapshot_sysfs_counter(mp->path);
1909                }
1910        }
1911
1912        return 0;
1913}
1914
1915int get_epb(int cpu)
1916{
1917        char path[128 + PATH_BYTES];
1918        unsigned long long msr;
1919        int ret, epb = -1;
1920        FILE *fp;
1921
1922        sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
1923
1924        fp = fopen(path, "r");
1925        if (!fp)
1926                goto msr_fallback;
1927
1928        ret = fscanf(fp, "%d", &epb);
1929        if (ret != 1)
1930                err(1, "%s(%s)", __func__, path);
1931
1932        fclose(fp);
1933
1934        return epb;
1935
1936msr_fallback:
1937        get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
1938
1939        return msr & 0xf;
1940}
1941
1942void get_apic_id(struct thread_data *t)
1943{
1944        unsigned int eax, ebx, ecx, edx;
1945
1946        if (DO_BIC(BIC_APIC)) {
1947                eax = ebx = ecx = edx = 0;
1948                __cpuid(1, eax, ebx, ecx, edx);
1949
1950                t->apic_id = (ebx >> 24) & 0xff;
1951        }
1952
1953        if (!DO_BIC(BIC_X2APIC))
1954                return;
1955
1956        if (authentic_amd || hygon_genuine) {
1957                unsigned int topology_extensions;
1958
1959                if (max_extended_level < 0x8000001e)
1960                        return;
1961
1962                eax = ebx = ecx = edx = 0;
1963                __cpuid(0x80000001, eax, ebx, ecx, edx);
1964                topology_extensions = ecx & (1 << 22);
1965
1966                if (topology_extensions == 0)
1967                        return;
1968
1969                eax = ebx = ecx = edx = 0;
1970                __cpuid(0x8000001e, eax, ebx, ecx, edx);
1971
1972                t->x2apic_id = eax;
1973                return;
1974        }
1975
1976        if (!genuine_intel)
1977                return;
1978
1979        if (max_level < 0xb)
1980                return;
1981
1982        ecx = 0;
1983        __cpuid(0xb, eax, ebx, ecx, edx);
1984        t->x2apic_id = edx;
1985
1986        if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1987                fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
1988}
1989
1990/*
1991 * get_counters(...)
1992 * migrate to cpu
1993 * acquire and record local counters for that cpu
1994 */
1995int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1996{
1997        int cpu = t->cpu_id;
1998        unsigned long long msr;
1999        int aperf_mperf_retry_count = 0;
2000        struct msr_counter *mp;
2001        int i;
2002
2003        if (cpu_migrate(cpu)) {
2004                fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
2005                return -1;
2006        }
2007
2008        gettimeofday(&t->tv_begin, (struct timezone *)NULL);
2009
2010        if (first_counter_read)
2011                get_apic_id(t);
2012retry:
2013        t->tsc = rdtsc();       /* we are running on local CPU of interest */
2014
2015        if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
2016                unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
2017
2018                /*
2019                 * The TSC, APERF and MPERF must be read together for
2020                 * APERF/MPERF and MPERF/TSC to give accurate results.
2021                 *
2022                 * Unfortunately, APERF and MPERF are read by
2023                 * individual system call, so delays may occur
2024                 * between them.  If the time to read them
2025                 * varies by a large amount, we re-read them.
2026                 */
2027
2028                /*
2029                 * This initial dummy APERF read has been seen to
2030                 * reduce jitter in the subsequent reads.
2031                 */
2032
2033                if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2034                        return -3;
2035
2036                t->tsc = rdtsc();       /* re-read close to APERF */
2037
2038                tsc_before = t->tsc;
2039
2040                if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2041                        return -3;
2042
2043                tsc_between = rdtsc();
2044
2045                if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
2046                        return -4;
2047
2048                tsc_after = rdtsc();
2049
2050                aperf_time = tsc_between - tsc_before;
2051                mperf_time = tsc_after - tsc_between;
2052
2053                /*
2054                 * If the system call latency to read APERF and MPERF
2055                 * differ by more than 2x, then try again.
2056                 */
2057                if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
2058                        aperf_mperf_retry_count++;
2059                        if (aperf_mperf_retry_count < 5)
2060                                goto retry;
2061                        else
2062                                warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
2063                }
2064                aperf_mperf_retry_count = 0;
2065
2066                t->aperf = t->aperf * aperf_mperf_multiplier;
2067                t->mperf = t->mperf * aperf_mperf_multiplier;
2068        }
2069
2070        if (DO_BIC(BIC_IPC))
2071                if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
2072                        return -4;
2073
2074        if (DO_BIC(BIC_IRQ))
2075                t->irq_count = irqs_per_cpu[cpu];
2076        if (DO_BIC(BIC_SMI)) {
2077                if (get_msr(cpu, MSR_SMI_COUNT, &msr))
2078                        return -5;
2079                t->smi_count = msr & 0xFFFFFFFF;
2080        }
2081        if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
2082                if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
2083                        return -6;
2084        }
2085
2086        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2087                if (get_mp(cpu, mp, &t->counter[i]))
2088                        return -10;
2089        }
2090
2091        /* collect core counters only for 1st thread in core */
2092        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2093                goto done;
2094
2095        if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
2096                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
2097                        return -6;
2098        }
2099
2100        if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
2101                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
2102                        return -7;
2103        } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
2104                if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
2105                        return -7;
2106        }
2107
2108        if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
2109                if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
2110                        return -8;
2111                else if (t->is_atom) {
2112                        /*
2113                         * For Atom CPUs that has core cstate deeper than c6,
2114                         * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
2115                         * Minus CC7 (and deeper cstates) residency to get
2116                         * accturate cc6 residency.
2117                         */
2118                        c->c6 -= c->c7;
2119                }
2120        }
2121
2122        if (DO_BIC(BIC_Mod_c6))
2123                if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
2124                        return -8;
2125
2126        if (DO_BIC(BIC_CoreTmp)) {
2127                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2128                        return -9;
2129                c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
2130        }
2131
2132        if (do_rapl & RAPL_AMD_F17H) {
2133                if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
2134                        return -14;
2135                c->core_energy = msr & 0xFFFFFFFF;
2136        }
2137
2138        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2139                if (get_mp(cpu, mp, &c->counter[i]))
2140                        return -10;
2141        }
2142
2143        /* collect package counters only for 1st core in package */
2144        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2145                goto done;
2146
2147        if (DO_BIC(BIC_Totl_c0)) {
2148                if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
2149                        return -10;
2150        }
2151        if (DO_BIC(BIC_Any_c0)) {
2152                if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
2153                        return -11;
2154        }
2155        if (DO_BIC(BIC_GFX_c0)) {
2156                if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
2157                        return -12;
2158        }
2159        if (DO_BIC(BIC_CPUGFX)) {
2160                if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
2161                        return -13;
2162        }
2163        if (DO_BIC(BIC_Pkgpc3))
2164                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
2165                        return -9;
2166        if (DO_BIC(BIC_Pkgpc6)) {
2167                if (do_slm_cstates) {
2168                        if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
2169                                return -10;
2170                } else {
2171                        if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
2172                                return -10;
2173                }
2174        }
2175
2176        if (DO_BIC(BIC_Pkgpc2))
2177                if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
2178                        return -11;
2179        if (DO_BIC(BIC_Pkgpc7))
2180                if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
2181                        return -12;
2182        if (DO_BIC(BIC_Pkgpc8))
2183                if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
2184                        return -13;
2185        if (DO_BIC(BIC_Pkgpc9))
2186                if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
2187                        return -13;
2188        if (DO_BIC(BIC_Pkgpc10))
2189                if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
2190                        return -13;
2191
2192        if (DO_BIC(BIC_CPU_LPI))
2193                p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
2194        if (DO_BIC(BIC_SYS_LPI))
2195                p->sys_lpi = cpuidle_cur_sys_lpi_us;
2196
2197        if (do_rapl & RAPL_PKG) {
2198                if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
2199                        return -13;
2200                p->energy_pkg = msr;
2201        }
2202        if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
2203                if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
2204                        return -14;
2205                p->energy_cores = msr;
2206        }
2207        if (do_rapl & RAPL_DRAM) {
2208                if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
2209                        return -15;
2210                p->energy_dram = msr;
2211        }
2212        if (do_rapl & RAPL_GFX) {
2213                if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
2214                        return -16;
2215                p->energy_gfx = msr;
2216        }
2217        if (do_rapl & RAPL_PKG_PERF_STATUS) {
2218                if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
2219                        return -16;
2220                p->rapl_pkg_perf_status = msr;
2221        }
2222        if (do_rapl & RAPL_DRAM_PERF_STATUS) {
2223                if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
2224                        return -16;
2225                p->rapl_dram_perf_status = msr;
2226        }
2227        if (do_rapl & RAPL_AMD_F17H) {
2228                if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
2229                        return -13;
2230                p->energy_pkg = msr;
2231        }
2232        if (DO_BIC(BIC_PkgTmp)) {
2233                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2234                        return -17;
2235                p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
2236        }
2237
2238        if (DO_BIC(BIC_GFX_rc6))
2239                p->gfx_rc6_ms = gfx_cur_rc6_ms;
2240
2241        if (DO_BIC(BIC_GFXMHz))
2242                p->gfx_mhz = gfx_cur_mhz;
2243
2244        if (DO_BIC(BIC_GFXACTMHz))
2245                p->gfx_act_mhz = gfx_act_mhz;
2246
2247        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2248                if (get_mp(cpu, mp, &p->counter[i]))
2249                        return -10;
2250        }
2251done:
2252        gettimeofday(&t->tv_end, (struct timezone *)NULL);
2253
2254        return 0;
2255}
2256
2257/*
2258 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2259 * If you change the values, note they are used both in comparisons
2260 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2261 */
2262
2263#define PCLUKN 0                /* Unknown */
2264#define PCLRSV 1                /* Reserved */
2265#define PCL__0 2                /* PC0 */
2266#define PCL__1 3                /* PC1 */
2267#define PCL__2 4                /* PC2 */
2268#define PCL__3 5                /* PC3 */
2269#define PCL__4 6                /* PC4 */
2270#define PCL__6 7                /* PC6 */
2271#define PCL_6N 8                /* PC6 No Retention */
2272#define PCL_6R 9                /* PC6 Retention */
2273#define PCL__7 10               /* PC7 */
2274#define PCL_7S 11               /* PC7 Shrink */
2275#define PCL__8 12               /* PC8 */
2276#define PCL__9 13               /* PC9 */
2277#define PCL_10 14               /* PC10 */
2278#define PCLUNL 15               /* Unlimited */
2279
2280int pkg_cstate_limit = PCLUKN;
2281char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2282        "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
2283};
2284
2285int nhm_pkg_cstate_limits[16] =
2286    { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2287        PCLRSV, PCLRSV
2288};
2289
2290int snb_pkg_cstate_limits[16] =
2291    { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2292        PCLRSV, PCLRSV
2293};
2294
2295int hsw_pkg_cstate_limits[16] =
2296    { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2297        PCLRSV, PCLRSV
2298};
2299
2300int slv_pkg_cstate_limits[16] =
2301    { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2302        PCL__6, PCL__7
2303};
2304
2305int amt_pkg_cstate_limits[16] =
2306    { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2307        PCLRSV, PCLRSV
2308};
2309
2310int phi_pkg_cstate_limits[16] =
2311    { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2312        PCLRSV, PCLRSV
2313};
2314
2315int glm_pkg_cstate_limits[16] =
2316    { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2317        PCLRSV, PCLRSV
2318};
2319
2320int skx_pkg_cstate_limits[16] =
2321    { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2322        PCLRSV, PCLRSV
2323};
2324
2325int icx_pkg_cstate_limits[16] =
2326    { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2327        PCLRSV, PCLRSV
2328};
2329
2330static void calculate_tsc_tweak()
2331{
2332        tsc_tweak = base_hz / tsc_hz;
2333}
2334
2335void prewake_cstate_probe(unsigned int family, unsigned int model);
2336
2337static void dump_nhm_platform_info(void)
2338{
2339        unsigned long long msr;
2340        unsigned int ratio;
2341
2342        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2343
2344        fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2345
2346        ratio = (msr >> 40) & 0xFF;
2347        fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
2348
2349        ratio = (msr >> 8) & 0xFF;
2350        fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2351
2352        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2353        fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2354                base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2355
2356        /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
2357        if (dis_cstate_prewake)
2358                fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
2359
2360        return;
2361}
2362
2363static void dump_hsw_turbo_ratio_limits(void)
2364{
2365        unsigned long long msr;
2366        unsigned int ratio;
2367
2368        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2369
2370        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2371
2372        ratio = (msr >> 8) & 0xFF;
2373        if (ratio)
2374                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
2375
2376        ratio = (msr >> 0) & 0xFF;
2377        if (ratio)
2378                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
2379        return;
2380}
2381
2382static void dump_ivt_turbo_ratio_limits(void)
2383{
2384        unsigned long long msr;
2385        unsigned int ratio;
2386
2387        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2388
2389        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2390
2391        ratio = (msr >> 56) & 0xFF;
2392        if (ratio)
2393                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
2394
2395        ratio = (msr >> 48) & 0xFF;
2396        if (ratio)
2397                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
2398
2399        ratio = (msr >> 40) & 0xFF;
2400        if (ratio)
2401                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
2402
2403        ratio = (msr >> 32) & 0xFF;
2404        if (ratio)
2405                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
2406
2407        ratio = (msr >> 24) & 0xFF;
2408        if (ratio)
2409                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
2410
2411        ratio = (msr >> 16) & 0xFF;
2412        if (ratio)
2413                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
2414
2415        ratio = (msr >> 8) & 0xFF;
2416        if (ratio)
2417                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
2418
2419        ratio = (msr >> 0) & 0xFF;
2420        if (ratio)
2421                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
2422        return;
2423}
2424
2425int has_turbo_ratio_group_limits(int family, int model)
2426{
2427
2428        if (!genuine_intel)
2429                return 0;
2430
2431        switch (model) {
2432        case INTEL_FAM6_ATOM_GOLDMONT:
2433        case INTEL_FAM6_SKYLAKE_X:
2434        case INTEL_FAM6_ICELAKE_X:
2435        case INTEL_FAM6_ATOM_GOLDMONT_D:
2436        case INTEL_FAM6_ATOM_TREMONT_D:
2437                return 1;
2438        }
2439        return 0;
2440}
2441
2442static void dump_turbo_ratio_limits(int family, int model)
2443{
2444        unsigned long long msr, core_counts;
2445        unsigned int ratio, group_size;
2446
2447        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2448        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2449
2450        if (has_turbo_ratio_group_limits(family, model)) {
2451                get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2452                fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2453        } else {
2454                core_counts = 0x0807060504030201;
2455        }
2456
2457        ratio = (msr >> 56) & 0xFF;
2458        group_size = (core_counts >> 56) & 0xFF;
2459        if (ratio)
2460                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2461                        ratio, bclk, ratio * bclk, group_size);
2462
2463        ratio = (msr >> 48) & 0xFF;
2464        group_size = (core_counts >> 48) & 0xFF;
2465        if (ratio)
2466                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2467                        ratio, bclk, ratio * bclk, group_size);
2468
2469        ratio = (msr >> 40) & 0xFF;
2470        group_size = (core_counts >> 40) & 0xFF;
2471        if (ratio)
2472                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2473                        ratio, bclk, ratio * bclk, group_size);
2474
2475        ratio = (msr >> 32) & 0xFF;
2476        group_size = (core_counts >> 32) & 0xFF;
2477        if (ratio)
2478                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2479                        ratio, bclk, ratio * bclk, group_size);
2480
2481        ratio = (msr >> 24) & 0xFF;
2482        group_size = (core_counts >> 24) & 0xFF;
2483        if (ratio)
2484                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2485                        ratio, bclk, ratio * bclk, group_size);
2486
2487        ratio = (msr >> 16) & 0xFF;
2488        group_size = (core_counts >> 16) & 0xFF;
2489        if (ratio)
2490                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2491                        ratio, bclk, ratio * bclk, group_size);
2492
2493        ratio = (msr >> 8) & 0xFF;
2494        group_size = (core_counts >> 8) & 0xFF;
2495        if (ratio)
2496                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2497                        ratio, bclk, ratio * bclk, group_size);
2498
2499        ratio = (msr >> 0) & 0xFF;
2500        group_size = (core_counts >> 0) & 0xFF;
2501        if (ratio)
2502                fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2503                        ratio, bclk, ratio * bclk, group_size);
2504        return;
2505}
2506
2507static void dump_atom_turbo_ratio_limits(void)
2508{
2509        unsigned long long msr;
2510        unsigned int ratio;
2511
2512        get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2513        fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2514
2515        ratio = (msr >> 0) & 0x3F;
2516        if (ratio)
2517                fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
2518
2519        ratio = (msr >> 8) & 0x3F;
2520        if (ratio)
2521                fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
2522
2523        ratio = (msr >> 16) & 0x3F;
2524        if (ratio)
2525                fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2526
2527        get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2528        fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2529
2530        ratio = (msr >> 24) & 0x3F;
2531        if (ratio)
2532                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
2533
2534        ratio = (msr >> 16) & 0x3F;
2535        if (ratio)
2536                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
2537
2538        ratio = (msr >> 8) & 0x3F;
2539        if (ratio)
2540                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
2541
2542        ratio = (msr >> 0) & 0x3F;
2543        if (ratio)
2544                fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
2545}
2546
2547static void dump_knl_turbo_ratio_limits(void)
2548{
2549        const unsigned int buckets_no = 7;
2550
2551        unsigned long long msr;
2552        int delta_cores, delta_ratio;
2553        int i, b_nr;
2554        unsigned int cores[buckets_no];
2555        unsigned int ratio[buckets_no];
2556
2557        get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2558
2559        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2560
2561        /*
2562         * Turbo encoding in KNL is as follows:
2563         * [0] -- Reserved
2564         * [7:1] -- Base value of number of active cores of bucket 1.
2565         * [15:8] -- Base value of freq ratio of bucket 1.
2566         * [20:16] -- +ve delta of number of active cores of bucket 2.
2567         * i.e. active cores of bucket 2 =
2568         * active cores of bucket 1 + delta
2569         * [23:21] -- Negative delta of freq ratio of bucket 2.
2570         * i.e. freq ratio of bucket 2 =
2571         * freq ratio of bucket 1 - delta
2572         * [28:24]-- +ve delta of number of active cores of bucket 3.
2573         * [31:29]-- -ve delta of freq ratio of bucket 3.
2574         * [36:32]-- +ve delta of number of active cores of bucket 4.
2575         * [39:37]-- -ve delta of freq ratio of bucket 4.
2576         * [44:40]-- +ve delta of number of active cores of bucket 5.
2577         * [47:45]-- -ve delta of freq ratio of bucket 5.
2578         * [52:48]-- +ve delta of number of active cores of bucket 6.
2579         * [55:53]-- -ve delta of freq ratio of bucket 6.
2580         * [60:56]-- +ve delta of number of active cores of bucket 7.
2581         * [63:61]-- -ve delta of freq ratio of bucket 7.
2582         */
2583
2584        b_nr = 0;
2585        cores[b_nr] = (msr & 0xFF) >> 1;
2586        ratio[b_nr] = (msr >> 8) & 0xFF;
2587
2588        for (i = 16; i < 64; i += 8) {
2589                delta_cores = (msr >> i) & 0x1F;
2590                delta_ratio = (msr >> (i + 5)) & 0x7;
2591
2592                cores[b_nr + 1] = cores[b_nr] + delta_cores;
2593                ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2594                b_nr++;
2595        }
2596
2597        for (i = buckets_no - 1; i >= 0; i--)
2598                if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2599                        fprintf(outf,
2600                                "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2601                                ratio[i], bclk, ratio[i] * bclk, cores[i]);
2602}
2603
2604static void dump_nhm_cst_cfg(void)
2605{
2606        unsigned long long msr;
2607
2608        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2609
2610        fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2611
2612        fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2613                (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2614                (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2615                (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2616                (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2617                (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
2618
2619#define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2620        if (has_automatic_cstate_conversion) {
2621                fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2622        }
2623
2624        fprintf(outf, ")\n");
2625
2626        return;
2627}
2628
2629static void dump_config_tdp(void)
2630{
2631        unsigned long long msr;
2632
2633        get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2634        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2635        fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2636
2637        get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2638        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2639        if (msr) {
2640                fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2641                fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2642                fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2643                fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2644        }
2645        fprintf(outf, ")\n");
2646
2647        get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2648        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2649        if (msr) {
2650                fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2651                fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2652                fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2653                fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2654        }
2655        fprintf(outf, ")\n");
2656
2657        get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2658        fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2659        if ((msr) & 0x3)
2660                fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2661        fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2662        fprintf(outf, ")\n");
2663
2664        get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2665        fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2666        fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2667        fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2668        fprintf(outf, ")\n");
2669}
2670
2671unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2672
2673void print_irtl(void)
2674{
2675        unsigned long long msr;
2676
2677        get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2678        fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2679        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2680                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2681
2682        get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2683        fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2684        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2685                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2686
2687        get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2688        fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2689        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2690                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2691
2692        if (!do_irtl_hsw)
2693                return;
2694
2695        get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2696        fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2697        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2698                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2699
2700        get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2701        fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2702        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2703                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2704
2705        get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2706        fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2707        fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2708                (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2709
2710}
2711
2712void free_fd_percpu(void)
2713{
2714        int i;
2715
2716        for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2717                if (fd_percpu[i] != 0)
2718                        close(fd_percpu[i]);
2719        }
2720
2721        free(fd_percpu);
2722}
2723
2724void free_all_buffers(void)
2725{
2726        int i;
2727
2728        CPU_FREE(cpu_present_set);
2729        cpu_present_set = NULL;
2730        cpu_present_setsize = 0;
2731
2732        CPU_FREE(cpu_affinity_set);
2733        cpu_affinity_set = NULL;
2734        cpu_affinity_setsize = 0;
2735
2736        free(thread_even);
2737        free(core_even);
2738        free(package_even);
2739
2740        thread_even = NULL;
2741        core_even = NULL;
2742        package_even = NULL;
2743
2744        free(thread_odd);
2745        free(core_odd);
2746        free(package_odd);
2747
2748        thread_odd = NULL;
2749        core_odd = NULL;
2750        package_odd = NULL;
2751
2752        free(output_buffer);
2753        output_buffer = NULL;
2754        outp = NULL;
2755
2756        free_fd_percpu();
2757
2758        free(irq_column_2_cpu);
2759        free(irqs_per_cpu);
2760
2761        for (i = 0; i <= topo.max_cpu_num; ++i) {
2762                if (cpus[i].put_ids)
2763                        CPU_FREE(cpus[i].put_ids);
2764        }
2765        free(cpus);
2766}
2767
2768/*
2769 * Parse a file containing a single int.
2770 * Return 0 if file can not be opened
2771 * Exit if file can be opened, but can not be parsed
2772 */
2773int parse_int_file(const char *fmt, ...)
2774{
2775        va_list args;
2776        char path[PATH_MAX];
2777        FILE *filep;
2778        int value;
2779
2780        va_start(args, fmt);
2781        vsnprintf(path, sizeof(path), fmt, args);
2782        va_end(args);
2783        filep = fopen(path, "r");
2784        if (!filep)
2785                return 0;
2786        if (fscanf(filep, "%d", &value) != 1)
2787                err(1, "%s: failed to parse number from file", path);
2788        fclose(filep);
2789        return value;
2790}
2791
2792/*
2793 * cpu_is_first_core_in_package(cpu)
2794 * return 1 if given CPU is 1st core in package
2795 */
2796int cpu_is_first_core_in_package(int cpu)
2797{
2798        return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2799}
2800
2801int get_physical_package_id(int cpu)
2802{
2803        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2804}
2805
2806int get_die_id(int cpu)
2807{
2808        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2809}
2810
2811int get_core_id(int cpu)
2812{
2813        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2814}
2815
2816void set_node_data(void)
2817{
2818        int pkg, node, lnode, cpu, cpux;
2819        int cpu_count;
2820
2821        /* initialize logical_node_id */
2822        for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2823                cpus[cpu].logical_node_id = -1;
2824
2825        cpu_count = 0;
2826        for (pkg = 0; pkg < topo.num_packages; pkg++) {
2827                lnode = 0;
2828                for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2829                        if (cpus[cpu].physical_package_id != pkg)
2830                                continue;
2831                        /* find a cpu with an unset logical_node_id */
2832                        if (cpus[cpu].logical_node_id != -1)
2833                                continue;
2834                        cpus[cpu].logical_node_id = lnode;
2835                        node = cpus[cpu].physical_node_id;
2836                        cpu_count++;
2837                        /*
2838                         * find all matching cpus on this pkg and set
2839                         * the logical_node_id
2840                         */
2841                        for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2842                                if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
2843                                        cpus[cpux].logical_node_id = lnode;
2844                                        cpu_count++;
2845                                }
2846                        }
2847                        lnode++;
2848                        if (lnode > topo.nodes_per_pkg)
2849                                topo.nodes_per_pkg = lnode;
2850                }
2851                if (cpu_count >= topo.max_cpu_num)
2852                        break;
2853        }
2854}
2855
2856int get_physical_node_id(struct cpu_topology *thiscpu)
2857{
2858        char path[80];
2859        FILE *filep;
2860        int i;
2861        int cpu = thiscpu->logical_cpu_id;
2862
2863        for (i = 0; i <= topo.max_cpu_num; i++) {
2864                sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
2865                filep = fopen(path, "r");
2866                if (!filep)
2867                        continue;
2868                fclose(filep);
2869                return i;
2870        }
2871        return -1;
2872}
2873
2874int get_thread_siblings(struct cpu_topology *thiscpu)
2875{
2876        char path[80], character;
2877        FILE *filep;
2878        unsigned long map;
2879        int so, shift, sib_core;
2880        int cpu = thiscpu->logical_cpu_id;
2881        int offset = topo.max_cpu_num + 1;
2882        size_t size;
2883        int thread_id = 0;
2884
2885        thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2886        if (thiscpu->thread_id < 0)
2887                thiscpu->thread_id = thread_id++;
2888        if (!thiscpu->put_ids)
2889                return -1;
2890
2891        size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2892        CPU_ZERO_S(size, thiscpu->put_ids);
2893
2894        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2895        filep = fopen(path, "r");
2896
2897        if (!filep) {
2898                warnx("%s: open failed", path);
2899                return -1;
2900        }
2901        do {
2902                offset -= BITMASK_SIZE;
2903                if (fscanf(filep, "%lx%c", &map, &character) != 2)
2904                        err(1, "%s: failed to parse file", path);
2905                for (shift = 0; shift < BITMASK_SIZE; shift++) {
2906                        if ((map >> shift) & 0x1) {
2907                                so = shift + offset;
2908                                sib_core = get_core_id(so);
2909                                if (sib_core == thiscpu->physical_core_id) {
2910                                        CPU_SET_S(so, size, thiscpu->put_ids);
2911                                        if ((so != cpu) && (cpus[so].thread_id < 0))
2912                                                cpus[so].thread_id = thread_id++;
2913                                }
2914                        }
2915                }
2916        } while (!strncmp(&character, ",", 1));
2917        fclose(filep);
2918
2919        return CPU_COUNT_S(size, thiscpu->put_ids);
2920}
2921
2922/*
2923 * run func(thread, core, package) in topology order
2924 * skip non-present cpus
2925 */
2926
2927int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
2928                               struct pkg_data *, struct thread_data *, struct core_data *,
2929                               struct pkg_data *), struct thread_data *thread_base,
2930                   struct core_data *core_base, struct pkg_data *pkg_base,
2931                   struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
2932{
2933        int retval, pkg_no, node_no, core_no, thread_no;
2934
2935        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2936                for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2937                        for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
2938                                for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
2939                                        struct thread_data *t, *t2;
2940                                        struct core_data *c, *c2;
2941                                        struct pkg_data *p, *p2;
2942
2943                                        t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
2944
2945                                        if (cpu_is_not_present(t->cpu_id))
2946                                                continue;
2947
2948                                        t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
2949
2950                                        c = GET_CORE(core_base, core_no, node_no, pkg_no);
2951                                        c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
2952
2953                                        p = GET_PKG(pkg_base, pkg_no);
2954                                        p2 = GET_PKG(pkg_base2, pkg_no);
2955
2956                                        retval = func(t, c, p, t2, c2, p2);
2957                                        if (retval)
2958                                                return retval;
2959                                }
2960                        }
2961                }
2962        }
2963        return 0;
2964}
2965
2966/*
2967 * run func(cpu) on every cpu in /proc/stat
2968 * return max_cpu number
2969 */
2970int for_all_proc_cpus(int (func) (int))
2971{
2972        FILE *fp;
2973        int cpu_num;
2974        int retval;
2975
2976        fp = fopen_or_die(proc_stat, "r");
2977
2978        retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2979        if (retval != 0)
2980                err(1, "%s: failed to parse format", proc_stat);
2981
2982        while (1) {
2983                retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2984                if (retval != 1)
2985                        break;
2986
2987                retval = func(cpu_num);
2988                if (retval) {
2989                        fclose(fp);
2990                        return (retval);
2991                }
2992        }
2993        fclose(fp);
2994        return 0;
2995}
2996
2997void re_initialize(void)
2998{
2999        free_all_buffers();
3000        setup_all_buffers();
3001        fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
3002}
3003
3004void set_max_cpu_num(void)
3005{
3006        FILE *filep;
3007        int base_cpu;
3008        unsigned long dummy;
3009        char pathname[64];
3010
3011        base_cpu = sched_getcpu();
3012        if (base_cpu < 0)
3013                err(1, "cannot find calling cpu ID");
3014        sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
3015
3016        filep = fopen_or_die(pathname, "r");
3017        topo.max_cpu_num = 0;
3018        while (fscanf(filep, "%lx,", &dummy) == 1)
3019                topo.max_cpu_num += BITMASK_SIZE;
3020        fclose(filep);
3021        topo.max_cpu_num--;     /* 0 based */
3022}
3023
3024/*
3025 * count_cpus()
3026 * remember the last one seen, it will be the max
3027 */
3028int count_cpus(int cpu)
3029{
3030        topo.num_cpus++;
3031        return 0;
3032}
3033
3034int mark_cpu_present(int cpu)
3035{
3036        CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
3037        return 0;
3038}
3039
3040int init_thread_id(int cpu)
3041{
3042        cpus[cpu].thread_id = -1;
3043        return 0;
3044}
3045
3046/*
3047 * snapshot_proc_interrupts()
3048 *
3049 * read and record summary of /proc/interrupts
3050 *
3051 * return 1 if config change requires a restart, else return 0
3052 */
3053int snapshot_proc_interrupts(void)
3054{
3055        static FILE *fp;
3056        int column, retval;
3057
3058        if (fp == NULL)
3059                fp = fopen_or_die("/proc/interrupts", "r");
3060        else
3061                rewind(fp);
3062
3063        /* read 1st line of /proc/interrupts to get cpu* name for each column */
3064        for (column = 0; column < topo.num_cpus; ++column) {
3065                int cpu_number;
3066
3067                retval = fscanf(fp, " CPU%d", &cpu_number);
3068                if (retval != 1)
3069                        break;
3070
3071                if (cpu_number > topo.max_cpu_num) {
3072                        warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
3073                        return 1;
3074                }
3075
3076                irq_column_2_cpu[column] = cpu_number;
3077                irqs_per_cpu[cpu_number] = 0;
3078        }
3079
3080        /* read /proc/interrupt count lines and sum up irqs per cpu */
3081        while (1) {
3082                int column;
3083                char buf[64];
3084
3085                retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
3086                if (retval != 1)
3087                        break;
3088
3089                /* read the count per cpu */
3090                for (column = 0; column < topo.num_cpus; ++column) {
3091
3092                        int cpu_number, irq_count;
3093
3094                        retval = fscanf(fp, " %d", &irq_count);
3095                        if (retval != 1)
3096                                break;
3097
3098                        cpu_number = irq_column_2_cpu[column];
3099                        irqs_per_cpu[cpu_number] += irq_count;
3100
3101                }
3102
3103                while (getc(fp) != '\n') ;      /* flush interrupt description */
3104
3105        }
3106        return 0;
3107}
3108
3109/*
3110 * snapshot_gfx_rc6_ms()
3111 *
3112 * record snapshot of
3113 * /sys/class/drm/card0/power/rc6_residency_ms
3114 *
3115 * return 1 if config change requires a restart, else return 0
3116 */
3117int snapshot_gfx_rc6_ms(void)
3118{
3119        FILE *fp;
3120        int retval;
3121
3122        fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
3123
3124        retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
3125        if (retval != 1)
3126                err(1, "GFX rc6");
3127
3128        fclose(fp);
3129
3130        return 0;
3131}
3132
3133/*
3134 * snapshot_gfx_mhz()
3135 *
3136 * record snapshot of
3137 * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
3138 *
3139 * return 1 if config change requires a restart, else return 0
3140 */
3141int snapshot_gfx_mhz(void)
3142{
3143        static FILE *fp;
3144        int retval;
3145
3146        if (fp == NULL)
3147                fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
3148        else {
3149                rewind(fp);
3150                fflush(fp);
3151        }
3152
3153        retval = fscanf(fp, "%d", &gfx_cur_mhz);
3154        if (retval != 1)
3155                err(1, "GFX MHz");
3156
3157        return 0;
3158}
3159
3160/*
3161 * snapshot_gfx_cur_mhz()
3162 *
3163 * record snapshot of
3164 * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
3165 *
3166 * return 1 if config change requires a restart, else return 0
3167 */
3168int snapshot_gfx_act_mhz(void)
3169{
3170        static FILE *fp;
3171        int retval;
3172
3173        if (fp == NULL)
3174                fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
3175        else {
3176                rewind(fp);
3177                fflush(fp);
3178        }
3179
3180        retval = fscanf(fp, "%d", &gfx_act_mhz);
3181        if (retval != 1)
3182                err(1, "GFX ACT MHz");
3183
3184        return 0;
3185}
3186
3187/*
3188 * snapshot_cpu_lpi()
3189 *
3190 * record snapshot of
3191 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
3192 */
3193int snapshot_cpu_lpi_us(void)
3194{
3195        FILE *fp;
3196        int retval;
3197
3198        fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
3199
3200        retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
3201        if (retval != 1) {
3202                fprintf(stderr, "Disabling Low Power Idle CPU output\n");
3203                BIC_NOT_PRESENT(BIC_CPU_LPI);
3204                fclose(fp);
3205                return -1;
3206        }
3207
3208        fclose(fp);
3209
3210        return 0;
3211}
3212
3213/*
3214 * snapshot_sys_lpi()
3215 *
3216 * record snapshot of sys_lpi_file
3217 */
3218int snapshot_sys_lpi_us(void)
3219{
3220        FILE *fp;
3221        int retval;
3222
3223        fp = fopen_or_die(sys_lpi_file, "r");
3224
3225        retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
3226        if (retval != 1) {
3227                fprintf(stderr, "Disabling Low Power Idle System output\n");
3228                BIC_NOT_PRESENT(BIC_SYS_LPI);
3229                fclose(fp);
3230                return -1;
3231        }
3232        fclose(fp);
3233
3234        return 0;
3235}
3236
3237/*
3238 * snapshot /proc and /sys files
3239 *
3240 * return 1 if configuration restart needed, else return 0
3241 */
3242int snapshot_proc_sysfs_files(void)
3243{
3244        if (DO_BIC(BIC_IRQ))
3245                if (snapshot_proc_interrupts())
3246                        return 1;
3247
3248        if (DO_BIC(BIC_GFX_rc6))
3249                snapshot_gfx_rc6_ms();
3250
3251        if (DO_BIC(BIC_GFXMHz))
3252                snapshot_gfx_mhz();
3253
3254        if (DO_BIC(BIC_GFXACTMHz))
3255                snapshot_gfx_act_mhz();
3256
3257        if (DO_BIC(BIC_CPU_LPI))
3258                snapshot_cpu_lpi_us();
3259
3260        if (DO_BIC(BIC_SYS_LPI))
3261                snapshot_sys_lpi_us();
3262
3263        return 0;
3264}
3265
3266int exit_requested;
3267
3268static void signal_handler(int signal)
3269{
3270        switch (signal) {
3271        case SIGINT:
3272                exit_requested = 1;
3273                if (debug)
3274                        fprintf(stderr, " SIGINT\n");
3275                break;
3276        case SIGUSR1:
3277                if (debug > 1)
3278                        fprintf(stderr, "SIGUSR1\n");
3279                break;
3280        }
3281}
3282
3283void setup_signal_handler(void)
3284{
3285        struct sigaction sa;
3286
3287        memset(&sa, 0, sizeof(sa));
3288
3289        sa.sa_handler = &signal_handler;
3290
3291        if (sigaction(SIGINT, &sa, NULL) < 0)
3292                err(1, "sigaction SIGINT");
3293        if (sigaction(SIGUSR1, &sa, NULL) < 0)
3294                err(1, "sigaction SIGUSR1");
3295}
3296
3297void do_sleep(void)
3298{
3299        struct timeval tout;
3300        struct timespec rest;
3301        fd_set readfds;
3302        int retval;
3303
3304        FD_ZERO(&readfds);
3305        FD_SET(0, &readfds);
3306
3307        if (ignore_stdin) {
3308                nanosleep(&interval_ts, NULL);
3309                return;
3310        }
3311
3312        tout = interval_tv;
3313        retval = select(1, &readfds, NULL, NULL, &tout);
3314
3315        if (retval == 1) {
3316                switch (getc(stdin)) {
3317                case 'q':
3318                        exit_requested = 1;
3319                        break;
3320                case EOF:
3321                        /*
3322                         * 'stdin' is a pipe closed on the other end. There
3323                         * won't be any further input.
3324                         */
3325                        ignore_stdin = 1;
3326                        /* Sleep the rest of the time */
3327                        rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3328                        rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3329                        nanosleep(&rest, NULL);
3330                }
3331        }
3332}
3333
3334int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
3335{
3336        int ret, idx;
3337        unsigned long long msr_cur, msr_last;
3338
3339        if (!per_cpu_msr_sum)
3340                return 1;
3341
3342        idx = offset_to_idx(offset);
3343        if (idx < 0)
3344                return idx;
3345        /* get_msr_sum() = sum + (get_msr() - last) */
3346        ret = get_msr(cpu, offset, &msr_cur);
3347        if (ret)
3348                return ret;
3349        msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
3350        DELTA_WRAP32(msr_cur, msr_last);
3351        *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
3352
3353        return 0;
3354}
3355
3356timer_t timerid;
3357
3358/* Timer callback, update the sum of MSRs periodically. */
3359static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3360{
3361        int i, ret;
3362        int cpu = t->cpu_id;
3363
3364        for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
3365                unsigned long long msr_cur, msr_last;
3366                off_t offset;
3367
3368                if (!idx_valid(i))
3369                        continue;
3370                offset = idx_to_offset(i);
3371                if (offset < 0)
3372                        continue;
3373                ret = get_msr(cpu, offset, &msr_cur);
3374                if (ret) {
3375                        fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
3376                        continue;
3377                }
3378
3379                msr_last = per_cpu_msr_sum[cpu].entries[i].last;
3380                per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
3381
3382                DELTA_WRAP32(msr_cur, msr_last);
3383                per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
3384        }
3385        return 0;
3386}
3387
3388static void msr_record_handler(union sigval v)
3389{
3390        for_all_cpus(update_msr_sum, EVEN_COUNTERS);
3391}
3392
3393void msr_sum_record(void)
3394{
3395        struct itimerspec its;
3396        struct sigevent sev;
3397
3398        per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
3399        if (!per_cpu_msr_sum) {
3400                fprintf(outf, "Can not allocate memory for long time MSR.\n");
3401                return;
3402        }
3403        /*
3404         * Signal handler might be restricted, so use thread notifier instead.
3405         */
3406        memset(&sev, 0, sizeof(struct sigevent));
3407        sev.sigev_notify = SIGEV_THREAD;
3408        sev.sigev_notify_function = msr_record_handler;
3409
3410        sev.sigev_value.sival_ptr = &timerid;
3411        if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
3412                fprintf(outf, "Can not create timer.\n");
3413                goto release_msr;
3414        }
3415
3416        its.it_value.tv_sec = 0;
3417        its.it_value.tv_nsec = 1;
3418        /*
3419         * A wraparound time has been calculated early.
3420         * Some sources state that the peak power for a
3421         * microprocessor is usually 1.5 times the TDP rating,
3422         * use 2 * TDP for safety.
3423         */
3424        its.it_interval.tv_sec = rapl_joule_counter_range / 2;
3425        its.it_interval.tv_nsec = 0;
3426
3427        if (timer_settime(timerid, 0, &its, NULL) == -1) {
3428                fprintf(outf, "Can not set timer.\n");
3429                goto release_timer;
3430        }
3431        return;
3432
3433release_timer:
3434        timer_delete(timerid);
3435release_msr:
3436        free(per_cpu_msr_sum);
3437}
3438
3439/*
3440 * set_my_sched_priority(pri)
3441 * return previous
3442 */
3443int set_my_sched_priority(int priority)
3444{
3445        int retval;
3446        int original_priority;
3447
3448        errno = 0;
3449        original_priority = getpriority(PRIO_PROCESS, 0);
3450        if (errno && (original_priority == -1))
3451                err(errno, "getpriority");
3452
3453        retval = setpriority(PRIO_PROCESS, 0, priority);
3454        if (retval)
3455                err(retval, "setpriority(%d)", priority);
3456
3457        errno = 0;
3458        retval = getpriority(PRIO_PROCESS, 0);
3459        if (retval != priority)
3460                err(-1, "getpriority(%d) != setpriority(%d)", retval, priority);
3461
3462        return original_priority;
3463}
3464
3465void turbostat_loop()
3466{
3467        int retval;
3468        int restarted = 0;
3469        int done_iters = 0;
3470
3471        setup_signal_handler();
3472
3473        /*
3474         * elevate own priority for interval mode
3475         */
3476        set_my_sched_priority(-20);
3477
3478restart:
3479        restarted++;
3480
3481        snapshot_proc_sysfs_files();
3482        retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3483        first_counter_read = 0;
3484        if (retval < -1) {
3485                exit(retval);
3486        } else if (retval == -1) {
3487                if (restarted > 10) {
3488                        exit(retval);
3489                }
3490                re_initialize();
3491                goto restart;
3492        }
3493        restarted = 0;
3494        done_iters = 0;
3495        gettimeofday(&tv_even, (struct timezone *)NULL);
3496
3497        while (1) {
3498                if (for_all_proc_cpus(cpu_is_not_present)) {
3499                        re_initialize();
3500                        goto restart;
3501                }
3502                do_sleep();
3503                if (snapshot_proc_sysfs_files())
3504                        goto restart;
3505                retval = for_all_cpus(get_counters, ODD_COUNTERS);
3506                if (retval < -1) {
3507                        exit(retval);
3508                } else if (retval == -1) {
3509                        re_initialize();
3510                        goto restart;
3511                }
3512                gettimeofday(&tv_odd, (struct timezone *)NULL);
3513                timersub(&tv_odd, &tv_even, &tv_delta);
3514                if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3515                        re_initialize();
3516                        goto restart;
3517                }
3518                compute_average(EVEN_COUNTERS);
3519                format_all_counters(EVEN_COUNTERS);
3520                flush_output_stdout();
3521                if (exit_requested)
3522                        break;
3523                if (num_iterations && ++done_iters >= num_iterations)
3524                        break;
3525                do_sleep();
3526                if (snapshot_proc_sysfs_files())
3527                        goto restart;
3528                retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3529                if (retval < -1) {
3530                        exit(retval);
3531                } else if (retval == -1) {
3532                        re_initialize();
3533                        goto restart;
3534                }
3535                gettimeofday(&tv_even, (struct timezone *)NULL);
3536                timersub(&tv_even, &tv_odd, &tv_delta);
3537                if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3538                        re_initialize();
3539                        goto restart;
3540                }
3541                compute_average(ODD_COUNTERS);
3542                format_all_counters(ODD_COUNTERS);
3543                flush_output_stdout();
3544                if (exit_requested)
3545                        break;
3546                if (num_iterations && ++done_iters >= num_iterations)
3547                        break;
3548        }
3549}
3550
3551void check_dev_msr()
3552{
3553        struct stat sb;
3554        char pathname[32];
3555
3556        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3557        if (stat(pathname, &sb))
3558                if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3559                        err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3560}
3561
3562/*
3563 * check for CAP_SYS_RAWIO
3564 * return 0 on success
3565 * return 1 on fail
3566 */
3567int check_for_cap_sys_rawio(void)
3568{
3569        cap_t caps;
3570        cap_flag_value_t cap_flag_value;
3571
3572        caps = cap_get_proc();
3573        if (caps == NULL)
3574                err(-6, "cap_get_proc\n");
3575
3576        if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3577                err(-6, "cap_get\n");
3578
3579        if (cap_flag_value != CAP_SET) {
3580                warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3581                return 1;
3582        }
3583
3584        if (cap_free(caps) == -1)
3585                err(-6, "cap_free\n");
3586
3587        return 0;
3588}
3589
3590void check_permissions(void)
3591{
3592        int do_exit = 0;
3593        char pathname[32];
3594
3595        /* check for CAP_SYS_RAWIO */
3596        do_exit += check_for_cap_sys_rawio();
3597
3598        /* test file permissions */
3599        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3600        if (euidaccess(pathname, R_OK)) {
3601                do_exit++;
3602                warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3603        }
3604
3605        /* if all else fails, thell them to be root */
3606        if (do_exit)
3607                if (getuid() != 0)
3608                        warnx("... or simply run as root");
3609
3610        if (do_exit)
3611                exit(-6);
3612}
3613
3614/*
3615 * NHM adds support for additional MSRs:
3616 *
3617 * MSR_SMI_COUNT                   0x00000034
3618 *
3619 * MSR_PLATFORM_INFO               0x000000ce
3620 * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3621 *
3622 * MSR_MISC_PWR_MGMT               0x000001aa
3623 *
3624 * MSR_PKG_C3_RESIDENCY            0x000003f8
3625 * MSR_PKG_C6_RESIDENCY            0x000003f9
3626 * MSR_CORE_C3_RESIDENCY           0x000003fc
3627 * MSR_CORE_C6_RESIDENCY           0x000003fd
3628 *
3629 * Side effect:
3630 * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3631 * sets has_misc_feature_control
3632 */
3633int probe_nhm_msrs(unsigned int family, unsigned int model)
3634{
3635        unsigned long long msr;
3636        unsigned int base_ratio;
3637        int *pkg_cstate_limits;
3638
3639        if (!genuine_intel)
3640                return 0;
3641
3642        if (family != 6)
3643                return 0;
3644
3645        bclk = discover_bclk(family, model);
3646
3647        switch (model) {
3648        case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3649        case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3650                pkg_cstate_limits = nhm_pkg_cstate_limits;
3651                break;
3652        case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3653        case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3654        case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3655        case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3656                pkg_cstate_limits = snb_pkg_cstate_limits;
3657                has_misc_feature_control = 1;
3658                break;
3659        case INTEL_FAM6_HASWELL:        /* HSW */
3660        case INTEL_FAM6_HASWELL_G:      /* HSW */
3661        case INTEL_FAM6_HASWELL_X:      /* HSX */
3662        case INTEL_FAM6_HASWELL_L:      /* HSW */
3663        case INTEL_FAM6_BROADWELL:      /* BDW */
3664        case INTEL_FAM6_BROADWELL_G:    /* BDW */
3665        case INTEL_FAM6_BROADWELL_X:    /* BDX */
3666        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3667        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3668                pkg_cstate_limits = hsw_pkg_cstate_limits;
3669                has_misc_feature_control = 1;
3670                break;
3671        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3672                pkg_cstate_limits = skx_pkg_cstate_limits;
3673                has_misc_feature_control = 1;
3674                break;
3675        case INTEL_FAM6_ICELAKE_X:      /* ICX */
3676                pkg_cstate_limits = icx_pkg_cstate_limits;
3677                has_misc_feature_control = 1;
3678                break;
3679        case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3680                no_MSR_MISC_PWR_MGMT = 1;
3681        case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
3682                pkg_cstate_limits = slv_pkg_cstate_limits;
3683                break;
3684        case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3685                pkg_cstate_limits = amt_pkg_cstate_limits;
3686                no_MSR_MISC_PWR_MGMT = 1;
3687                break;
3688        case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3689                pkg_cstate_limits = phi_pkg_cstate_limits;
3690                break;
3691        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3692        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3693        case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
3694        case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
3695        case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
3696                pkg_cstate_limits = glm_pkg_cstate_limits;
3697                break;
3698        default:
3699                return 0;
3700        }
3701        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3702        pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3703
3704        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3705        base_ratio = (msr >> 8) & 0xFF;
3706
3707        base_hz = base_ratio * bclk * 1000000;
3708        has_base_hz = 1;
3709        return 1;
3710}
3711
3712/*
3713 * SLV client has support for unique MSRs:
3714 *
3715 * MSR_CC6_DEMOTION_POLICY_CONFIG
3716 * MSR_MC6_DEMOTION_POLICY_CONFIG
3717 */
3718
3719int has_slv_msrs(unsigned int family, unsigned int model)
3720{
3721        if (!genuine_intel)
3722                return 0;
3723
3724        switch (model) {
3725        case INTEL_FAM6_ATOM_SILVERMONT:
3726        case INTEL_FAM6_ATOM_SILVERMONT_MID:
3727        case INTEL_FAM6_ATOM_AIRMONT_MID:
3728                return 1;
3729        }
3730        return 0;
3731}
3732
3733int is_dnv(unsigned int family, unsigned int model)
3734{
3735
3736        if (!genuine_intel)
3737                return 0;
3738
3739        switch (model) {
3740        case INTEL_FAM6_ATOM_GOLDMONT_D:
3741                return 1;
3742        }
3743        return 0;
3744}
3745
3746int is_bdx(unsigned int family, unsigned int model)
3747{
3748
3749        if (!genuine_intel)
3750                return 0;
3751
3752        switch (model) {
3753        case INTEL_FAM6_BROADWELL_X:
3754                return 1;
3755        }
3756        return 0;
3757}
3758
3759int is_skx(unsigned int family, unsigned int model)
3760{
3761
3762        if (!genuine_intel)
3763                return 0;
3764
3765        switch (model) {
3766        case INTEL_FAM6_SKYLAKE_X:
3767                return 1;
3768        }
3769        return 0;
3770}
3771
3772int is_icx(unsigned int family, unsigned int model)
3773{
3774
3775        if (!genuine_intel)
3776                return 0;
3777
3778        switch (model) {
3779        case INTEL_FAM6_ICELAKE_X:
3780                return 1;
3781        }
3782        return 0;
3783}
3784
3785int is_ehl(unsigned int family, unsigned int model)
3786{
3787        if (!genuine_intel)
3788                return 0;
3789
3790        switch (model) {
3791        case INTEL_FAM6_ATOM_TREMONT:
3792                return 1;
3793        }
3794        return 0;
3795}
3796
3797int is_jvl(unsigned int family, unsigned int model)
3798{
3799        if (!genuine_intel)
3800                return 0;
3801
3802        switch (model) {
3803        case INTEL_FAM6_ATOM_TREMONT_D:
3804                return 1;
3805        }
3806        return 0;
3807}
3808
3809int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3810{
3811        if (has_slv_msrs(family, model))
3812                return 0;
3813
3814        switch (model) {
3815                /* Nehalem compatible, but do not include turbo-ratio limit support */
3816        case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3817        case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3818                return 0;
3819        default:
3820                return 1;
3821        }
3822}
3823
3824int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3825{
3826        if (has_slv_msrs(family, model))
3827                return 1;
3828
3829        return 0;
3830}
3831
3832int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3833{
3834        if (!genuine_intel)
3835                return 0;
3836
3837        if (family != 6)
3838                return 0;
3839
3840        switch (model) {
3841        case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3842        case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3843                return 1;
3844        default:
3845                return 0;
3846        }
3847}
3848
3849int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3850{
3851        if (!genuine_intel)
3852                return 0;
3853
3854        if (family != 6)
3855                return 0;
3856
3857        switch (model) {
3858        case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3859                return 1;
3860        default:
3861                return 0;
3862        }
3863}
3864
3865int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3866{
3867        if (!genuine_intel)
3868                return 0;
3869
3870        if (family != 6)
3871                return 0;
3872
3873        switch (model) {
3874        case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3875                return 1;
3876        default:
3877                return 0;
3878        }
3879}
3880
3881int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3882{
3883        if (!genuine_intel)
3884                return 0;
3885
3886        if (family != 6)
3887                return 0;
3888
3889        switch (model) {
3890        case INTEL_FAM6_ATOM_GOLDMONT:
3891        case INTEL_FAM6_SKYLAKE_X:
3892        case INTEL_FAM6_ICELAKE_X:
3893                return 1;
3894        default:
3895                return 0;
3896        }
3897}
3898
3899int has_config_tdp(unsigned int family, unsigned int model)
3900{
3901        if (!genuine_intel)
3902                return 0;
3903
3904        if (family != 6)
3905                return 0;
3906
3907        switch (model) {
3908        case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3909        case INTEL_FAM6_HASWELL:        /* HSW */
3910        case INTEL_FAM6_HASWELL_X:      /* HSX */
3911        case INTEL_FAM6_HASWELL_L:      /* HSW */
3912        case INTEL_FAM6_HASWELL_G:      /* HSW */
3913        case INTEL_FAM6_BROADWELL:      /* BDW */
3914        case INTEL_FAM6_BROADWELL_G:    /* BDW */
3915        case INTEL_FAM6_BROADWELL_X:    /* BDX */
3916        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3917        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3918        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3919        case INTEL_FAM6_ICELAKE_X:      /* ICX */
3920
3921        case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3922                return 1;
3923        default:
3924                return 0;
3925        }
3926}
3927
3928/*
3929 * tcc_offset_bits:
3930 * 0: Tcc Offset not supported (Default)
3931 * 6: Bit 29:24 of MSR_PLATFORM_INFO
3932 * 4: Bit 27:24 of MSR_PLATFORM_INFO
3933 */
3934void check_tcc_offset(int model)
3935{
3936        unsigned long long msr;
3937
3938        if (!genuine_intel)
3939                return;
3940
3941        switch (model) {
3942        case INTEL_FAM6_SKYLAKE_L:
3943        case INTEL_FAM6_SKYLAKE:
3944        case INTEL_FAM6_KABYLAKE_L:
3945        case INTEL_FAM6_KABYLAKE:
3946        case INTEL_FAM6_ICELAKE_L:
3947        case INTEL_FAM6_ICELAKE:
3948        case INTEL_FAM6_TIGERLAKE_L:
3949        case INTEL_FAM6_TIGERLAKE:
3950        case INTEL_FAM6_COMETLAKE:
3951                if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
3952                        msr = (msr >> 30) & 1;
3953                        if (msr)
3954                                tcc_offset_bits = 6;
3955                }
3956                return;
3957        default:
3958                return;
3959        }
3960}
3961
3962static void remove_underbar(char *s)
3963{
3964        char *to = s;
3965
3966        while (*s) {
3967                if (*s != '_')
3968                        *to++ = *s;
3969                s++;
3970        }
3971
3972        *to = 0;
3973}
3974
3975static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3976{
3977        if (!do_nhm_platform_info)
3978                return;
3979
3980        dump_nhm_platform_info();
3981
3982        if (has_hsw_turbo_ratio_limit(family, model))
3983                dump_hsw_turbo_ratio_limits();
3984
3985        if (has_ivt_turbo_ratio_limit(family, model))
3986                dump_ivt_turbo_ratio_limits();
3987
3988        if (has_turbo_ratio_limit(family, model))
3989                dump_turbo_ratio_limits(family, model);
3990
3991        if (has_atom_turbo_ratio_limit(family, model))
3992                dump_atom_turbo_ratio_limits();
3993
3994        if (has_knl_turbo_ratio_limit(family, model))
3995                dump_knl_turbo_ratio_limits();
3996
3997        if (has_config_tdp(family, model))
3998                dump_config_tdp();
3999
4000        dump_nhm_cst_cfg();
4001}
4002
4003static void dump_sysfs_file(char *path)
4004{
4005        FILE *input;
4006        char cpuidle_buf[64];
4007
4008        input = fopen(path, "r");
4009        if (input == NULL) {
4010                if (debug)
4011                        fprintf(outf, "NSFOD %s\n", path);
4012                return;
4013        }
4014        if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
4015                err(1, "%s: failed to read file", path);
4016        fclose(input);
4017
4018        fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
4019}
4020
4021static void dump_sysfs_cstate_config(void)
4022{
4023        char path[64];
4024        char name_buf[16];
4025        char desc[64];
4026        FILE *input;
4027        int state;
4028        char *sp;
4029
4030        if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
4031                fprintf(outf, "cpuidle not loaded\n");
4032                return;
4033        }
4034
4035        dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
4036        dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
4037        dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
4038
4039        for (state = 0; state < 10; ++state) {
4040
4041                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
4042                input = fopen(path, "r");
4043                if (input == NULL)
4044                        continue;
4045                if (!fgets(name_buf, sizeof(name_buf), input))
4046                        err(1, "%s: failed to read file", path);
4047
4048                /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4049                sp = strchr(name_buf, '-');
4050                if (!sp)
4051                        sp = strchrnul(name_buf, '\n');
4052                *sp = '\0';
4053                fclose(input);
4054
4055                remove_underbar(name_buf);
4056
4057                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
4058                input = fopen(path, "r");
4059                if (input == NULL)
4060                        continue;
4061                if (!fgets(desc, sizeof(desc), input))
4062                        err(1, "%s: failed to read file", path);
4063
4064                fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
4065                fclose(input);
4066        }
4067}
4068
4069static void dump_sysfs_pstate_config(void)
4070{
4071        char path[64];
4072        char driver_buf[64];
4073        char governor_buf[64];
4074        FILE *input;
4075        int turbo;
4076
4077        sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
4078        input = fopen(path, "r");
4079        if (input == NULL) {
4080                fprintf(outf, "NSFOD %s\n", path);
4081                return;
4082        }
4083        if (!fgets(driver_buf, sizeof(driver_buf), input))
4084                err(1, "%s: failed to read file", path);
4085        fclose(input);
4086
4087        sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
4088        input = fopen(path, "r");
4089        if (input == NULL) {
4090                fprintf(outf, "NSFOD %s\n", path);
4091                return;
4092        }
4093        if (!fgets(governor_buf, sizeof(governor_buf), input))
4094                err(1, "%s: failed to read file", path);
4095        fclose(input);
4096
4097        fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
4098        fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
4099
4100        sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
4101        input = fopen(path, "r");
4102        if (input != NULL) {
4103                if (fscanf(input, "%d", &turbo) != 1)
4104                        err(1, "%s: failed to parse number from file", path);
4105                fprintf(outf, "cpufreq boost: %d\n", turbo);
4106                fclose(input);
4107        }
4108
4109        sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
4110        input = fopen(path, "r");
4111        if (input != NULL) {
4112                if (fscanf(input, "%d", &turbo) != 1)
4113                        err(1, "%s: failed to parse number from file", path);
4114                fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
4115                fclose(input);
4116        }
4117}
4118
4119/*
4120 * print_epb()
4121 * Decode the ENERGY_PERF_BIAS MSR
4122 */
4123int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4124{
4125        char *epb_string;
4126        int cpu, epb;
4127
4128        if (!has_epb)
4129                return 0;
4130
4131        cpu = t->cpu_id;
4132
4133        /* EPB is per-package */
4134        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4135                return 0;
4136
4137        if (cpu_migrate(cpu)) {
4138                fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
4139                return -1;
4140        }
4141
4142        epb = get_epb(cpu);
4143        if (epb < 0)
4144                return 0;
4145
4146        switch (epb) {
4147        case ENERGY_PERF_BIAS_PERFORMANCE:
4148                epb_string = "performance";
4149                break;
4150        case ENERGY_PERF_BIAS_NORMAL:
4151                epb_string = "balanced";
4152                break;
4153        case ENERGY_PERF_BIAS_POWERSAVE:
4154                epb_string = "powersave";
4155                break;
4156        default:
4157                epb_string = "custom";
4158                break;
4159        }
4160        fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
4161
4162        return 0;
4163}
4164
4165/*
4166 * print_hwp()
4167 * Decode the MSR_HWP_CAPABILITIES
4168 */
4169int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4170{
4171        unsigned long long msr;
4172        int cpu;
4173
4174        if (!has_hwp)
4175                return 0;
4176
4177        cpu = t->cpu_id;
4178
4179        /* MSR_HWP_CAPABILITIES is per-package */
4180        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4181                return 0;
4182
4183        if (cpu_migrate(cpu)) {
4184                fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
4185                return -1;
4186        }
4187
4188        if (get_msr(cpu, MSR_PM_ENABLE, &msr))
4189                return 0;
4190
4191        fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
4192
4193        /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
4194        if ((msr & (1 << 0)) == 0)
4195                return 0;
4196
4197        if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
4198                return 0;
4199
4200        fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
4201                "(high %d guar %d eff %d low %d)\n",
4202                cpu, msr,
4203                (unsigned int)HWP_HIGHEST_PERF(msr),
4204                (unsigned int)HWP_GUARANTEED_PERF(msr),
4205                (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
4206
4207        if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
4208                return 0;
4209
4210        fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
4211                "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
4212                cpu, msr,
4213                (unsigned int)(((msr) >> 0) & 0xff),
4214                (unsigned int)(((msr) >> 8) & 0xff),
4215                (unsigned int)(((msr) >> 16) & 0xff),
4216                (unsigned int)(((msr) >> 24) & 0xff),
4217                (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
4218
4219        if (has_hwp_pkg) {
4220                if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
4221                        return 0;
4222
4223                fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
4224                        "(min %d max %d des %d epp 0x%x window 0x%x)\n",
4225                        cpu, msr,
4226                        (unsigned int)(((msr) >> 0) & 0xff),
4227                        (unsigned int)(((msr) >> 8) & 0xff),
4228                        (unsigned int)(((msr) >> 16) & 0xff),
4229                        (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
4230        }
4231        if (has_hwp_notify) {
4232                if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
4233                        return 0;
4234
4235                fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
4236                        "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
4237                        cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
4238        }
4239        if (get_msr(cpu, MSR_HWP_STATUS, &msr))
4240                return 0;
4241
4242        fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
4243                "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
4244                cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
4245
4246        return 0;
4247}
4248
4249/*
4250 * print_perf_limit()
4251 */
4252int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4253{
4254        unsigned long long msr;
4255        int cpu;
4256
4257        cpu = t->cpu_id;
4258
4259        /* per-package */
4260        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4261                return 0;
4262
4263        if (cpu_migrate(cpu)) {
4264                fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
4265                return -1;
4266        }
4267
4268        if (do_core_perf_limit_reasons) {
4269                get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
4270                fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4271                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
4272                        (msr & 1 << 15) ? "bit15, " : "",
4273                        (msr & 1 << 14) ? "bit14, " : "",
4274                        (msr & 1 << 13) ? "Transitions, " : "",
4275                        (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
4276                        (msr & 1 << 11) ? "PkgPwrL2, " : "",
4277                        (msr & 1 << 10) ? "PkgPwrL1, " : "",
4278                        (msr & 1 << 9) ? "CorePwr, " : "",
4279                        (msr & 1 << 8) ? "Amps, " : "",
4280                        (msr & 1 << 6) ? "VR-Therm, " : "",
4281                        (msr & 1 << 5) ? "Auto-HWP, " : "",
4282                        (msr & 1 << 4) ? "Graphics, " : "",
4283                        (msr & 1 << 2) ? "bit2, " : "",
4284                        (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
4285                fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
4286                        (msr & 1 << 31) ? "bit31, " : "",
4287                        (msr & 1 << 30) ? "bit30, " : "",
4288                        (msr & 1 << 29) ? "Transitions, " : "",
4289                        (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
4290                        (msr & 1 << 27) ? "PkgPwrL2, " : "",
4291                        (msr & 1 << 26) ? "PkgPwrL1, " : "",
4292                        (msr & 1 << 25) ? "CorePwr, " : "",
4293                        (msr & 1 << 24) ? "Amps, " : "",
4294                        (msr & 1 << 22) ? "VR-Therm, " : "",
4295                        (msr & 1 << 21) ? "Auto-HWP, " : "",
4296                        (msr & 1 << 20) ? "Graphics, " : "",
4297                        (msr & 1 << 18) ? "bit18, " : "",
4298                        (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
4299
4300        }
4301        if (do_gfx_perf_limit_reasons) {
4302                get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
4303                fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4304                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
4305                        (msr & 1 << 0) ? "PROCHOT, " : "",
4306                        (msr & 1 << 1) ? "ThermStatus, " : "",
4307                        (msr & 1 << 4) ? "Graphics, " : "",
4308                        (msr & 1 << 6) ? "VR-Therm, " : "",
4309                        (msr & 1 << 8) ? "Amps, " : "",
4310                        (msr & 1 << 9) ? "GFXPwr, " : "",
4311                        (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4312                fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
4313                        (msr & 1 << 16) ? "PROCHOT, " : "",
4314                        (msr & 1 << 17) ? "ThermStatus, " : "",
4315                        (msr & 1 << 20) ? "Graphics, " : "",
4316                        (msr & 1 << 22) ? "VR-Therm, " : "",
4317                        (msr & 1 << 24) ? "Amps, " : "",
4318                        (msr & 1 << 25) ? "GFXPwr, " : "",
4319                        (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4320        }
4321        if (do_ring_perf_limit_reasons) {
4322                get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
4323                fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4324                fprintf(outf, " (Active: %s%s%s%s%s%s)",
4325                        (msr & 1 << 0) ? "PROCHOT, " : "",
4326                        (msr & 1 << 1) ? "ThermStatus, " : "",
4327                        (msr & 1 << 6) ? "VR-Therm, " : "",
4328                        (msr & 1 << 8) ? "Amps, " : "",
4329                        (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4330                fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
4331                        (msr & 1 << 16) ? "PROCHOT, " : "",
4332                        (msr & 1 << 17) ? "ThermStatus, " : "",
4333                        (msr & 1 << 22) ? "VR-Therm, " : "",
4334                        (msr & 1 << 24) ? "Amps, " : "",
4335                        (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4336        }
4337        return 0;
4338}
4339
4340#define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
4341#define RAPL_TIME_GRANULARITY   0x3F    /* 6 bit time granularity */
4342
4343double get_tdp_intel(unsigned int model)
4344{
4345        unsigned long long msr;
4346
4347        if (do_rapl & RAPL_PKG_POWER_INFO)
4348                if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
4349                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
4350
4351        switch (model) {
4352        case INTEL_FAM6_ATOM_SILVERMONT:
4353        case INTEL_FAM6_ATOM_SILVERMONT_D:
4354                return 30.0;
4355        default:
4356                return 135.0;
4357        }
4358}
4359
4360double get_tdp_amd(unsigned int family)
4361{
4362        /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
4363        return 280.0;
4364}
4365
4366/*
4367 * rapl_dram_energy_units_probe()
4368 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
4369 */
4370static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
4371{
4372        /* only called for genuine_intel, family 6 */
4373
4374        switch (model) {
4375        case INTEL_FAM6_HASWELL_X:      /* HSX */
4376        case INTEL_FAM6_BROADWELL_X:    /* BDX */
4377        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4378        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4379                return (rapl_dram_energy_units = 15.3 / 1000000);
4380        default:
4381                return (rapl_energy_units);
4382        }
4383}
4384
4385void rapl_probe_intel(unsigned int family, unsigned int model)
4386{
4387        unsigned long long msr;
4388        unsigned int time_unit;
4389        double tdp;
4390
4391        if (family != 6)
4392                return;
4393
4394        switch (model) {
4395        case INTEL_FAM6_SANDYBRIDGE:
4396        case INTEL_FAM6_IVYBRIDGE:
4397        case INTEL_FAM6_HASWELL:        /* HSW */
4398        case INTEL_FAM6_HASWELL_L:      /* HSW */
4399        case INTEL_FAM6_HASWELL_G:      /* HSW */
4400        case INTEL_FAM6_BROADWELL:      /* BDW */
4401        case INTEL_FAM6_BROADWELL_G:    /* BDW */
4402                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
4403                if (rapl_joules) {
4404                        BIC_PRESENT(BIC_Pkg_J);
4405                        BIC_PRESENT(BIC_Cor_J);
4406                        BIC_PRESENT(BIC_GFX_J);
4407                } else {
4408                        BIC_PRESENT(BIC_PkgWatt);
4409                        BIC_PRESENT(BIC_CorWatt);
4410                        BIC_PRESENT(BIC_GFXWatt);
4411                }
4412                break;
4413        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4414        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4415                do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
4416                if (rapl_joules)
4417                        BIC_PRESENT(BIC_Pkg_J);
4418                else
4419                        BIC_PRESENT(BIC_PkgWatt);
4420                break;
4421        case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4422                do_rapl =
4423                    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4424                    | RAPL_GFX | RAPL_PKG_POWER_INFO;
4425                if (rapl_joules) {
4426                        BIC_PRESENT(BIC_Pkg_J);
4427                        BIC_PRESENT(BIC_Cor_J);
4428                        BIC_PRESENT(BIC_RAM_J);
4429                        BIC_PRESENT(BIC_GFX_J);
4430                } else {
4431                        BIC_PRESENT(BIC_PkgWatt);
4432                        BIC_PRESENT(BIC_CorWatt);
4433                        BIC_PRESENT(BIC_RAMWatt);
4434                        BIC_PRESENT(BIC_GFXWatt);
4435                }
4436                break;
4437        case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
4438                do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
4439                BIC_PRESENT(BIC_PKG__);
4440                if (rapl_joules)
4441                        BIC_PRESENT(BIC_Pkg_J);
4442                else
4443                        BIC_PRESENT(BIC_PkgWatt);
4444                break;
4445        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4446        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4447                do_rapl =
4448                    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4449                    | RAPL_GFX | RAPL_PKG_POWER_INFO;
4450                BIC_PRESENT(BIC_PKG__);
4451                BIC_PRESENT(BIC_RAM__);
4452                if (rapl_joules) {
4453                        BIC_PRESENT(BIC_Pkg_J);
4454                        BIC_PRESENT(BIC_Cor_J);
4455                        BIC_PRESENT(BIC_RAM_J);
4456                        BIC_PRESENT(BIC_GFX_J);
4457                } else {
4458                        BIC_PRESENT(BIC_PkgWatt);
4459                        BIC_PRESENT(BIC_CorWatt);
4460                        BIC_PRESENT(BIC_RAMWatt);
4461                        BIC_PRESENT(BIC_GFXWatt);
4462                }
4463                break;
4464        case INTEL_FAM6_HASWELL_X:      /* HSX */
4465        case INTEL_FAM6_BROADWELL_X:    /* BDX */
4466        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4467        case INTEL_FAM6_ICELAKE_X:      /* ICX */
4468        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4469                do_rapl =
4470                    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4471                    RAPL_PKG_POWER_INFO;
4472                BIC_PRESENT(BIC_PKG__);
4473                BIC_PRESENT(BIC_RAM__);
4474                if (rapl_joules) {
4475                        BIC_PRESENT(BIC_Pkg_J);
4476                        BIC_PRESENT(BIC_RAM_J);
4477                } else {
4478                        BIC_PRESENT(BIC_PkgWatt);
4479                        BIC_PRESENT(BIC_RAMWatt);
4480                }
4481                break;
4482        case INTEL_FAM6_SANDYBRIDGE_X:
4483        case INTEL_FAM6_IVYBRIDGE_X:
4484                do_rapl =
4485                    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
4486                    RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
4487                BIC_PRESENT(BIC_PKG__);
4488                BIC_PRESENT(BIC_RAM__);
4489                if (rapl_joules) {
4490                        BIC_PRESENT(BIC_Pkg_J);
4491                        BIC_PRESENT(BIC_Cor_J);
4492                        BIC_PRESENT(BIC_RAM_J);
4493                } else {
4494                        BIC_PRESENT(BIC_PkgWatt);
4495                        BIC_PRESENT(BIC_CorWatt);
4496                        BIC_PRESENT(BIC_RAMWatt);
4497                }
4498                break;
4499        case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4500        case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4501                do_rapl = RAPL_PKG | RAPL_CORES;
4502                if (rapl_joules) {
4503                        BIC_PRESENT(BIC_Pkg_J);
4504                        BIC_PRESENT(BIC_Cor_J);
4505                } else {
4506                        BIC_PRESENT(BIC_PkgWatt);
4507                        BIC_PRESENT(BIC_CorWatt);
4508                }
4509                break;
4510        case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4511                do_rapl =
4512                    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4513                    RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4514                BIC_PRESENT(BIC_PKG__);
4515                BIC_PRESENT(BIC_RAM__);
4516                if (rapl_joules) {
4517                        BIC_PRESENT(BIC_Pkg_J);
4518                        BIC_PRESENT(BIC_Cor_J);
4519                        BIC_PRESENT(BIC_RAM_J);
4520                } else {
4521                        BIC_PRESENT(BIC_PkgWatt);
4522                        BIC_PRESENT(BIC_CorWatt);
4523                        BIC_PRESENT(BIC_RAMWatt);
4524                }
4525                break;
4526        default:
4527                return;
4528        }
4529
4530        /* units on package 0, verify later other packages match */
4531        if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4532                return;
4533
4534        rapl_power_units = 1.0 / (1 << (msr & 0xF));
4535        if (model == INTEL_FAM6_ATOM_SILVERMONT)
4536                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4537        else
4538                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4539
4540        rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4541
4542        time_unit = msr >> 16 & 0xF;
4543        if (time_unit == 0)
4544                time_unit = 0xA;
4545
4546        rapl_time_units = 1.0 / (1 << (time_unit));
4547
4548        tdp = get_tdp_intel(model);
4549
4550        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4551        if (!quiet)
4552                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4553}
4554
4555void rapl_probe_amd(unsigned int family, unsigned int model)
4556{
4557        unsigned long long msr;
4558        unsigned int eax, ebx, ecx, edx;
4559        unsigned int has_rapl = 0;
4560        double tdp;
4561
4562        if (max_extended_level >= 0x80000007) {
4563                __cpuid(0x80000007, eax, ebx, ecx, edx);
4564                /* RAPL (Fam 17h+) */
4565                has_rapl = edx & (1 << 14);
4566        }
4567
4568        if (!has_rapl || family < 0x17)
4569                return;
4570
4571        do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4572        if (rapl_joules) {
4573                BIC_PRESENT(BIC_Pkg_J);
4574                BIC_PRESENT(BIC_Cor_J);
4575        } else {
4576                BIC_PRESENT(BIC_PkgWatt);
4577                BIC_PRESENT(BIC_CorWatt);
4578        }
4579
4580        if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4581                return;
4582
4583        rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4584        rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4585        rapl_power_units = ldexp(1.0, -(msr & 0xf));
4586
4587        tdp = get_tdp_amd(family);
4588
4589        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4590        if (!quiet)
4591                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4592}
4593
4594/*
4595 * rapl_probe()
4596 *
4597 * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4598 */
4599void rapl_probe(unsigned int family, unsigned int model)
4600{
4601        if (genuine_intel)
4602                rapl_probe_intel(family, model);
4603        if (authentic_amd || hygon_genuine)
4604                rapl_probe_amd(family, model);
4605}
4606
4607void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4608{
4609        if (!genuine_intel)
4610                return;
4611
4612        if (family != 6)
4613                return;
4614
4615        switch (model) {
4616        case INTEL_FAM6_HASWELL:        /* HSW */
4617        case INTEL_FAM6_HASWELL_L:      /* HSW */
4618        case INTEL_FAM6_HASWELL_G:      /* HSW */
4619                do_gfx_perf_limit_reasons = 1;
4620        case INTEL_FAM6_HASWELL_X:      /* HSX */
4621                do_core_perf_limit_reasons = 1;
4622                do_ring_perf_limit_reasons = 1;
4623        default:
4624                return;
4625        }
4626}
4627
4628void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4629{
4630        if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model))
4631                has_automatic_cstate_conversion = 1;
4632}
4633
4634void prewake_cstate_probe(unsigned int family, unsigned int model)
4635{
4636        if (is_icx(family, model))
4637                dis_cstate_prewake = 1;
4638}
4639
4640int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4641{
4642        unsigned long long msr;
4643        unsigned int dts, dts2;
4644        int cpu;
4645
4646        if (!(do_dts || do_ptm))
4647                return 0;
4648
4649        cpu = t->cpu_id;
4650
4651        /* DTS is per-core, no need to print for each thread */
4652        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4653                return 0;
4654
4655        if (cpu_migrate(cpu)) {
4656                fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
4657                return -1;
4658        }
4659
4660        if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4661                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4662                        return 0;
4663
4664                dts = (msr >> 16) & 0x7F;
4665                fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
4666
4667                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4668                        return 0;
4669
4670                dts = (msr >> 16) & 0x7F;
4671                dts2 = (msr >> 8) & 0x7F;
4672                fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4673                        cpu, msr, tj_max - dts, tj_max - dts2);
4674        }
4675
4676        if (do_dts && debug) {
4677                unsigned int resolution;
4678
4679                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4680                        return 0;
4681
4682                dts = (msr >> 16) & 0x7F;
4683                resolution = (msr >> 27) & 0xF;
4684                fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4685                        cpu, msr, tj_max - dts, resolution);
4686
4687                if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4688                        return 0;
4689
4690                dts = (msr >> 16) & 0x7F;
4691                dts2 = (msr >> 8) & 0x7F;
4692                fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4693                        cpu, msr, tj_max - dts, tj_max - dts2);
4694        }
4695
4696        return 0;
4697}
4698
4699void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4700{
4701        fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4702                cpu, label,
4703                ((msr >> 15) & 1) ? "EN" : "DIS",
4704                ((msr >> 0) & 0x7FFF) * rapl_power_units,
4705                (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4706                (((msr >> 16) & 1) ? "EN" : "DIS"));
4707
4708        return;
4709}
4710
4711int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4712{
4713        unsigned long long msr;
4714        const char *msr_name;
4715        int cpu;
4716
4717        if (!do_rapl)
4718                return 0;
4719
4720        /* RAPL counters are per package, so print only for 1st thread/package */
4721        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4722                return 0;
4723
4724        cpu = t->cpu_id;
4725        if (cpu_migrate(cpu)) {
4726                fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
4727                return -1;
4728        }
4729
4730        if (do_rapl & RAPL_AMD_F17H) {
4731                msr_name = "MSR_RAPL_PWR_UNIT";
4732                if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4733                        return -1;
4734        } else {
4735                msr_name = "MSR_RAPL_POWER_UNIT";
4736                if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4737                        return -1;
4738        }
4739
4740        fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4741                rapl_power_units, rapl_energy_units, rapl_time_units);
4742
4743        if (do_rapl & RAPL_PKG_POWER_INFO) {
4744
4745                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4746                        return -5;
4747
4748                fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4749                        cpu, msr,
4750                        ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4751                        ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4752                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4753                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4754
4755        }
4756        if (do_rapl & RAPL_PKG) {
4757
4758                if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4759                        return -9;
4760
4761                fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4762                        cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4763
4764                print_power_limit_msr(cpu, msr, "PKG Limit #1");
4765                fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4766                        cpu,
4767                        ((msr >> 47) & 1) ? "EN" : "DIS",
4768                        ((msr >> 32) & 0x7FFF) * rapl_power_units,
4769                        (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4770                        ((msr >> 48) & 1) ? "EN" : "DIS");
4771        }
4772
4773        if (do_rapl & RAPL_DRAM_POWER_INFO) {
4774                if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4775                        return -6;
4776
4777                fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4778                        cpu, msr,
4779                        ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4780                        ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4781                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4782                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4783        }
4784        if (do_rapl & RAPL_DRAM) {
4785                if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4786                        return -9;
4787                fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4788                        cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4789
4790                print_power_limit_msr(cpu, msr, "DRAM Limit");
4791        }
4792        if (do_rapl & RAPL_CORE_POLICY) {
4793                if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4794                        return -7;
4795
4796                fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4797        }
4798        if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4799                if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4800                        return -9;
4801                fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4802                        cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4803                print_power_limit_msr(cpu, msr, "Cores Limit");
4804        }
4805        if (do_rapl & RAPL_GFX) {
4806                if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4807                        return -8;
4808
4809                fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4810
4811                if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4812                        return -9;
4813                fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4814                        cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4815                print_power_limit_msr(cpu, msr, "GFX Limit");
4816        }
4817        return 0;
4818}
4819
4820/*
4821 * SNB adds support for additional MSRs:
4822 *
4823 * MSR_PKG_C7_RESIDENCY            0x000003fa
4824 * MSR_CORE_C7_RESIDENCY           0x000003fe
4825 * MSR_PKG_C2_RESIDENCY            0x0000060d
4826 */
4827
4828int has_snb_msrs(unsigned int family, unsigned int model)
4829{
4830        if (!genuine_intel)
4831                return 0;
4832
4833        switch (model) {
4834        case INTEL_FAM6_SANDYBRIDGE:
4835        case INTEL_FAM6_SANDYBRIDGE_X:
4836        case INTEL_FAM6_IVYBRIDGE:      /* IVB */
4837        case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
4838        case INTEL_FAM6_HASWELL:        /* HSW */
4839        case INTEL_FAM6_HASWELL_X:      /* HSW */
4840        case INTEL_FAM6_HASWELL_L:      /* HSW */
4841        case INTEL_FAM6_HASWELL_G:      /* HSW */
4842        case INTEL_FAM6_BROADWELL:      /* BDW */
4843        case INTEL_FAM6_BROADWELL_G:    /* BDW */
4844        case INTEL_FAM6_BROADWELL_X:    /* BDX */
4845        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4846        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4847        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4848        case INTEL_FAM6_ICELAKE_X:      /* ICX */
4849        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4850        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4851        case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4852        case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4853        case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
4854                return 1;
4855        }
4856        return 0;
4857}
4858
4859/*
4860 * HSW ULT added support for C8/C9/C10 MSRs:
4861 *
4862 * MSR_PKG_C8_RESIDENCY         0x00000630
4863 * MSR_PKG_C9_RESIDENCY         0x00000631
4864 * MSR_PKG_C10_RESIDENCY        0x00000632
4865 *
4866 * MSR_PKGC8_IRTL               0x00000633
4867 * MSR_PKGC9_IRTL               0x00000634
4868 * MSR_PKGC10_IRTL              0x00000635
4869 *
4870 */
4871int has_c8910_msrs(unsigned int family, unsigned int model)
4872{
4873        if (!genuine_intel)
4874                return 0;
4875
4876        switch (model) {
4877        case INTEL_FAM6_HASWELL_L:      /* HSW */
4878        case INTEL_FAM6_BROADWELL:      /* BDW */
4879        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4880        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4881        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4882        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4883        case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4884                return 1;
4885        }
4886        return 0;
4887}
4888
4889/*
4890 * SKL adds support for additional MSRS:
4891 *
4892 * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4893 * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4894 * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4895 * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4896 */
4897int has_skl_msrs(unsigned int family, unsigned int model)
4898{
4899        if (!genuine_intel)
4900                return 0;
4901
4902        switch (model) {
4903        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4904        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4905                return 1;
4906        }
4907        return 0;
4908}
4909
4910int is_slm(unsigned int family, unsigned int model)
4911{
4912        if (!genuine_intel)
4913                return 0;
4914        switch (model) {
4915        case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4916        case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4917                return 1;
4918        }
4919        return 0;
4920}
4921
4922int is_knl(unsigned int family, unsigned int model)
4923{
4924        if (!genuine_intel)
4925                return 0;
4926        switch (model) {
4927        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4928                return 1;
4929        }
4930        return 0;