linux/drivers/cpufreq/intel_pstate.c
<<
>>
Prefs
   1/*
   2 * intel_pstate.c: Native P state management for Intel processors
   3 *
   4 * (C) Copyright 2012 Intel Corporation
   5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; version 2
  10 * of the License.
  11 */
  12
  13#include <linux/kernel.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/module.h>
  16#include <linux/ktime.h>
  17#include <linux/hrtimer.h>
  18#include <linux/tick.h>
  19#include <linux/slab.h>
  20#include <linux/sched.h>
  21#include <linux/list.h>
  22#include <linux/cpu.h>
  23#include <linux/cpufreq.h>
  24#include <linux/sysfs.h>
  25#include <linux/types.h>
  26#include <linux/fs.h>
  27#include <linux/debugfs.h>
  28#include <linux/acpi.h>
  29#include <trace/events/power.h>
  30
  31#include <asm/div64.h>
  32#include <asm/msr.h>
  33#include <asm/cpu_device_id.h>
  34
  35#define BYT_RATIOS              0x66a
  36#define BYT_VIDS                0x66b
  37#define BYT_TURBO_RATIOS        0x66c
  38#define BYT_TURBO_VIDS          0x66d
  39
  40#define FRAC_BITS 8
  41#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
  42#define fp_toint(X) ((X) >> FRAC_BITS)
  43
  44
  45static inline int32_t mul_fp(int32_t x, int32_t y)
  46{
  47        return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
  48}
  49
  50static inline int32_t div_fp(int32_t x, int32_t y)
  51{
  52        return div_s64((int64_t)x << FRAC_BITS, y);
  53}
  54
  55static inline int ceiling_fp(int32_t x)
  56{
  57        int mask, ret;
  58
  59        ret = fp_toint(x);
  60        mask = (1 << FRAC_BITS) - 1;
  61        if (x & mask)
  62                ret += 1;
  63        return ret;
  64}
  65
  66struct sample {
  67        int32_t core_pct_busy;
  68        u64 aperf;
  69        u64 mperf;
  70        int freq;
  71        ktime_t time;
  72};
  73
  74struct pstate_data {
  75        int     current_pstate;
  76        int     min_pstate;
  77        int     max_pstate;
  78        int     scaling;
  79        int     turbo_pstate;
  80};
  81
  82struct vid_data {
  83        int min;
  84        int max;
  85        int turbo;
  86        int32_t ratio;
  87};
  88
  89struct _pid {
  90        int setpoint;
  91        int32_t integral;
  92        int32_t p_gain;
  93        int32_t i_gain;
  94        int32_t d_gain;
  95        int deadband;
  96        int32_t last_err;
  97};
  98
  99struct cpudata {
 100        int cpu;
 101
 102        struct timer_list timer;
 103
 104        struct pstate_data pstate;
 105        struct vid_data vid;
 106        struct _pid pid;
 107
 108        ktime_t last_sample_time;
 109        u64     prev_aperf;
 110        u64     prev_mperf;
 111        struct sample sample;
 112};
 113
 114static struct cpudata **all_cpu_data;
 115struct pstate_adjust_policy {
 116        int sample_rate_ms;
 117        int deadband;
 118        int setpoint;
 119        int p_gain_pct;
 120        int d_gain_pct;
 121        int i_gain_pct;
 122};
 123
 124struct pstate_funcs {
 125        int (*get_max)(void);
 126        int (*get_min)(void);
 127        int (*get_turbo)(void);
 128        int (*get_scaling)(void);
 129        void (*set)(struct cpudata*, int pstate);
 130        void (*get_vid)(struct cpudata *);
 131};
 132
 133struct cpu_defaults {
 134        struct pstate_adjust_policy pid_policy;
 135        struct pstate_funcs funcs;
 136};
 137
 138static struct pstate_adjust_policy pid_params;
 139static struct pstate_funcs pstate_funcs;
 140static int hwp_active;
 141
 142struct perf_limits {
 143        int no_turbo;
 144        int turbo_disabled;
 145        int max_perf_pct;
 146        int min_perf_pct;
 147        int32_t max_perf;
 148        int32_t min_perf;
 149        int max_policy_pct;
 150        int max_sysfs_pct;
 151        int min_policy_pct;
 152        int min_sysfs_pct;
 153};
 154
 155static struct perf_limits limits = {
 156        .no_turbo = 0,
 157        .turbo_disabled = 0,
 158        .max_perf_pct = 100,
 159        .max_perf = int_tofp(1),
 160        .min_perf_pct = 0,
 161        .min_perf = 0,
 162        .max_policy_pct = 100,
 163        .max_sysfs_pct = 100,
 164        .min_policy_pct = 0,
 165        .min_sysfs_pct = 0,
 166};
 167
 168static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 169                             int deadband, int integral) {
 170        pid->setpoint = setpoint;
 171        pid->deadband  = deadband;
 172        pid->integral  = int_tofp(integral);
 173        pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
 174}
 175
 176static inline void pid_p_gain_set(struct _pid *pid, int percent)
 177{
 178        pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
 179}
 180
 181static inline void pid_i_gain_set(struct _pid *pid, int percent)
 182{
 183        pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
 184}
 185
 186static inline void pid_d_gain_set(struct _pid *pid, int percent)
 187{
 188        pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
 189}
 190
 191static signed int pid_calc(struct _pid *pid, int32_t busy)
 192{
 193        signed int result;
 194        int32_t pterm, dterm, fp_error;
 195        int32_t integral_limit;
 196
 197        fp_error = int_tofp(pid->setpoint) - busy;
 198
 199        if (abs(fp_error) <= int_tofp(pid->deadband))
 200                return 0;
 201
 202        pterm = mul_fp(pid->p_gain, fp_error);
 203
 204        pid->integral += fp_error;
 205
 206        /*
 207         * We limit the integral here so that it will never
 208         * get higher than 30.  This prevents it from becoming
 209         * too large an input over long periods of time and allows
 210         * it to get factored out sooner.
 211         *
 212         * The value of 30 was chosen through experimentation.
 213         */
 214        integral_limit = int_tofp(30);
 215        if (pid->integral > integral_limit)
 216                pid->integral = integral_limit;
 217        if (pid->integral < -integral_limit)
 218                pid->integral = -integral_limit;
 219
 220        dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
 221        pid->last_err = fp_error;
 222
 223        result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
 224        result = result + (1 << (FRAC_BITS-1));
 225        return (signed int)fp_toint(result);
 226}
 227
 228static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
 229{
 230        pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
 231        pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
 232        pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
 233
 234        pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
 235}
 236
 237static inline void intel_pstate_reset_all_pid(void)
 238{
 239        unsigned int cpu;
 240
 241        for_each_online_cpu(cpu) {
 242                if (all_cpu_data[cpu])
 243                        intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
 244        }
 245}
 246
 247static inline void update_turbo_state(void)
 248{
 249        u64 misc_en;
 250        struct cpudata *cpu;
 251
 252        cpu = all_cpu_data[0];
 253        rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
 254        limits.turbo_disabled =
 255                (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
 256                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 257}
 258
 259#define PCT_TO_HWP(x) (x * 255 / 100)
 260static void intel_pstate_hwp_set(void)
 261{
 262        int min, max, cpu;
 263        u64 value, freq;
 264
 265        get_online_cpus();
 266
 267        for_each_online_cpu(cpu) {
 268                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 269                min = PCT_TO_HWP(limits.min_perf_pct);
 270                value &= ~HWP_MIN_PERF(~0L);
 271                value |= HWP_MIN_PERF(min);
 272
 273                max = PCT_TO_HWP(limits.max_perf_pct);
 274                if (limits.no_turbo) {
 275                        rdmsrl( MSR_HWP_CAPABILITIES, freq);
 276                        max = HWP_GUARANTEED_PERF(freq);
 277                }
 278
 279                value &= ~HWP_MAX_PERF(~0L);
 280                value |= HWP_MAX_PERF(max);
 281                wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 282        }
 283
 284        put_online_cpus();
 285}
 286
 287/************************** debugfs begin ************************/
 288static int pid_param_set(void *data, u64 val)
 289{
 290        *(u32 *)data = val;
 291        intel_pstate_reset_all_pid();
 292        return 0;
 293}
 294
 295static int pid_param_get(void *data, u64 *val)
 296{
 297        *val = *(u32 *)data;
 298        return 0;
 299}
 300DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
 301
 302struct pid_param {
 303        char *name;
 304        void *value;
 305};
 306
 307static struct pid_param pid_files[] = {
 308        {"sample_rate_ms", &pid_params.sample_rate_ms},
 309        {"d_gain_pct", &pid_params.d_gain_pct},
 310        {"i_gain_pct", &pid_params.i_gain_pct},
 311        {"deadband", &pid_params.deadband},
 312        {"setpoint", &pid_params.setpoint},
 313        {"p_gain_pct", &pid_params.p_gain_pct},
 314        {NULL, NULL}
 315};
 316
 317static void __init intel_pstate_debug_expose_params(void)
 318{
 319        struct dentry *debugfs_parent;
 320        int i = 0;
 321
 322        if (hwp_active)
 323                return;
 324        debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
 325        if (IS_ERR_OR_NULL(debugfs_parent))
 326                return;
 327        while (pid_files[i].name) {
 328                debugfs_create_file(pid_files[i].name, 0660,
 329                                    debugfs_parent, pid_files[i].value,
 330                                    &fops_pid_param);
 331                i++;
 332        }
 333}
 334
 335/************************** debugfs end ************************/
 336
 337/************************** sysfs begin ************************/
 338#define show_one(file_name, object)                                     \
 339        static ssize_t show_##file_name                                 \
 340        (struct kobject *kobj, struct attribute *attr, char *buf)       \
 341        {                                                               \
 342                return sprintf(buf, "%u\n", limits.object);             \
 343        }
 344
 345static ssize_t show_turbo_pct(struct kobject *kobj,
 346                                struct attribute *attr, char *buf)
 347{
 348        struct cpudata *cpu;
 349        int total, no_turbo, turbo_pct;
 350        uint32_t turbo_fp;
 351
 352        cpu = all_cpu_data[0];
 353
 354        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 355        no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
 356        turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
 357        turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
 358        return sprintf(buf, "%u\n", turbo_pct);
 359}
 360
 361static ssize_t show_num_pstates(struct kobject *kobj,
 362                                struct attribute *attr, char *buf)
 363{
 364        struct cpudata *cpu;
 365        int total;
 366
 367        cpu = all_cpu_data[0];
 368        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 369        return sprintf(buf, "%u\n", total);
 370}
 371
 372static ssize_t show_no_turbo(struct kobject *kobj,
 373                             struct attribute *attr, char *buf)
 374{
 375        ssize_t ret;
 376
 377        update_turbo_state();
 378        if (limits.turbo_disabled)
 379                ret = sprintf(buf, "%u\n", limits.turbo_disabled);
 380        else
 381                ret = sprintf(buf, "%u\n", limits.no_turbo);
 382
 383        return ret;
 384}
 385
 386static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 387                              const char *buf, size_t count)
 388{
 389        unsigned int input;
 390        int ret;
 391
 392        ret = sscanf(buf, "%u", &input);
 393        if (ret != 1)
 394                return -EINVAL;
 395
 396        update_turbo_state();
 397        if (limits.turbo_disabled) {
 398                pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 399                return -EPERM;
 400        }
 401
 402        limits.no_turbo = clamp_t(int, input, 0, 1);
 403
 404        if (hwp_active)
 405                intel_pstate_hwp_set();
 406
 407        return count;
 408}
 409
 410static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 411                                  const char *buf, size_t count)
 412{
 413        unsigned int input;
 414        int ret;
 415
 416        ret = sscanf(buf, "%u", &input);
 417        if (ret != 1)
 418                return -EINVAL;
 419
 420        limits.max_sysfs_pct = clamp_t(int, input, 0 , 100);
 421        limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
 422        limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 423
 424        if (hwp_active)
 425                intel_pstate_hwp_set();
 426        return count;
 427}
 428
 429static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 430                                  const char *buf, size_t count)
 431{
 432        unsigned int input;
 433        int ret;
 434
 435        ret = sscanf(buf, "%u", &input);
 436        if (ret != 1)
 437                return -EINVAL;
 438
 439        limits.min_sysfs_pct = clamp_t(int, input, 0 , 100);
 440        limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
 441        limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 442
 443        if (hwp_active)
 444                intel_pstate_hwp_set();
 445        return count;
 446}
 447
 448show_one(max_perf_pct, max_perf_pct);
 449show_one(min_perf_pct, min_perf_pct);
 450
 451define_one_global_rw(no_turbo);
 452define_one_global_rw(max_perf_pct);
 453define_one_global_rw(min_perf_pct);
 454define_one_global_ro(turbo_pct);
 455define_one_global_ro(num_pstates);
 456
 457static struct attribute *intel_pstate_attributes[] = {
 458        &no_turbo.attr,
 459        &max_perf_pct.attr,
 460        &min_perf_pct.attr,
 461        &turbo_pct.attr,
 462        &num_pstates.attr,
 463        NULL
 464};
 465
 466static struct attribute_group intel_pstate_attr_group = {
 467        .attrs = intel_pstate_attributes,
 468};
 469
 470static void __init intel_pstate_sysfs_expose_params(void)
 471{
 472        struct kobject *intel_pstate_kobject;
 473        int rc;
 474
 475        intel_pstate_kobject = kobject_create_and_add("intel_pstate",
 476                                                &cpu_subsys.dev_root->kobj);
 477        BUG_ON(!intel_pstate_kobject);
 478        rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
 479        BUG_ON(rc);
 480}
 481/************************** sysfs end ************************/
 482
 483static void intel_pstate_hwp_enable(void)
 484{
 485        hwp_active++;
 486        pr_info("intel_pstate HWP enabled\n");
 487
 488        wrmsrl( MSR_PM_ENABLE, 0x1);
 489}
 490
 491static int byt_get_min_pstate(void)
 492{
 493        u64 value;
 494
 495        rdmsrl(BYT_RATIOS, value);
 496        return (value >> 8) & 0x7F;
 497}
 498
 499static int byt_get_max_pstate(void)
 500{
 501        u64 value;
 502
 503        rdmsrl(BYT_RATIOS, value);
 504        return (value >> 16) & 0x7F;
 505}
 506
 507static int byt_get_turbo_pstate(void)
 508{
 509        u64 value;
 510
 511        rdmsrl(BYT_TURBO_RATIOS, value);
 512        return value & 0x7F;
 513}
 514
 515static void byt_set_pstate(struct cpudata *cpudata, int pstate)
 516{
 517        u64 val;
 518        int32_t vid_fp;
 519        u32 vid;
 520
 521        val = pstate << 8;
 522        if (limits.no_turbo && !limits.turbo_disabled)
 523                val |= (u64)1 << 32;
 524
 525        vid_fp = cpudata->vid.min + mul_fp(
 526                int_tofp(pstate - cpudata->pstate.min_pstate),
 527                cpudata->vid.ratio);
 528
 529        vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
 530        vid = ceiling_fp(vid_fp);
 531
 532        if (pstate > cpudata->pstate.max_pstate)
 533                vid = cpudata->vid.turbo;
 534
 535        val |= vid;
 536
 537        wrmsrl(MSR_IA32_PERF_CTL, val);
 538}
 539
 540#define BYT_BCLK_FREQS 5
 541static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
 542
 543static int byt_get_scaling(void)
 544{
 545        u64 value;
 546        int i;
 547
 548        rdmsrl(MSR_FSB_FREQ, value);
 549        i = value & 0x3;
 550
 551        BUG_ON(i > BYT_BCLK_FREQS);
 552
 553        return byt_freq_table[i] * 100;
 554}
 555
 556static void byt_get_vid(struct cpudata *cpudata)
 557{
 558        u64 value;
 559
 560        rdmsrl(BYT_VIDS, value);
 561        cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
 562        cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
 563        cpudata->vid.ratio = div_fp(
 564                cpudata->vid.max - cpudata->vid.min,
 565                int_tofp(cpudata->pstate.max_pstate -
 566                        cpudata->pstate.min_pstate));
 567
 568        rdmsrl(BYT_TURBO_VIDS, value);
 569        cpudata->vid.turbo = value & 0x7f;
 570}
 571
 572static int core_get_min_pstate(void)
 573{
 574        u64 value;
 575
 576        rdmsrl(MSR_PLATFORM_INFO, value);
 577        return (value >> 40) & 0xFF;
 578}
 579
 580static int core_get_max_pstate(void)
 581{
 582        u64 value;
 583
 584        rdmsrl(MSR_PLATFORM_INFO, value);
 585        return (value >> 8) & 0xFF;
 586}
 587
 588static int core_get_turbo_pstate(void)
 589{
 590        u64 value;
 591        int nont, ret;
 592
 593        rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
 594        nont = core_get_max_pstate();
 595        ret = (value) & 255;
 596        if (ret <= nont)
 597                ret = nont;
 598        return ret;
 599}
 600
 601static inline int core_get_scaling(void)
 602{
 603        return 100000;
 604}
 605
 606static void core_set_pstate(struct cpudata *cpudata, int pstate)
 607{
 608        u64 val;
 609
 610        val = pstate << 8;
 611        if (limits.no_turbo && !limits.turbo_disabled)
 612                val |= (u64)1 << 32;
 613
 614        wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
 615}
 616
 617static struct cpu_defaults core_params = {
 618        .pid_policy = {
 619                .sample_rate_ms = 10,
 620                .deadband = 0,
 621                .setpoint = 97,
 622                .p_gain_pct = 20,
 623                .d_gain_pct = 0,
 624                .i_gain_pct = 0,
 625        },
 626        .funcs = {
 627                .get_max = core_get_max_pstate,
 628                .get_min = core_get_min_pstate,
 629                .get_turbo = core_get_turbo_pstate,
 630                .get_scaling = core_get_scaling,
 631                .set = core_set_pstate,
 632        },
 633};
 634
 635static struct cpu_defaults byt_params = {
 636        .pid_policy = {
 637                .sample_rate_ms = 10,
 638                .deadband = 0,
 639                .setpoint = 97,
 640                .p_gain_pct = 14,
 641                .d_gain_pct = 0,
 642                .i_gain_pct = 4,
 643        },
 644        .funcs = {
 645                .get_max = byt_get_max_pstate,
 646                .get_min = byt_get_min_pstate,
 647                .get_turbo = byt_get_turbo_pstate,
 648                .set = byt_set_pstate,
 649                .get_scaling = byt_get_scaling,
 650                .get_vid = byt_get_vid,
 651        },
 652};
 653
 654static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 655{
 656        int max_perf = cpu->pstate.turbo_pstate;
 657        int max_perf_adj;
 658        int min_perf;
 659
 660        if (limits.no_turbo || limits.turbo_disabled)
 661                max_perf = cpu->pstate.max_pstate;
 662
 663        /*
 664         * performance can be limited by user through sysfs, by cpufreq
 665         * policy, or by cpu specific default values determined through
 666         * experimentation.
 667         */
 668        max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
 669        *max = clamp_t(int, max_perf_adj,
 670                        cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
 671
 672        min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
 673        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 674}
 675
 676static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
 677{
 678        int max_perf, min_perf;
 679
 680        update_turbo_state();
 681
 682        intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 683
 684        pstate = clamp_t(int, pstate, min_perf, max_perf);
 685
 686        if (pstate == cpu->pstate.current_pstate)
 687                return;
 688
 689        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 690
 691        cpu->pstate.current_pstate = pstate;
 692
 693        pstate_funcs.set(cpu, pstate);
 694}
 695
 696static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 697{
 698        cpu->pstate.min_pstate = pstate_funcs.get_min();
 699        cpu->pstate.max_pstate = pstate_funcs.get_max();
 700        cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 701        cpu->pstate.scaling = pstate_funcs.get_scaling();
 702
 703        if (pstate_funcs.get_vid)
 704                pstate_funcs.get_vid(cpu);
 705        intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
 706}
 707
 708static inline void intel_pstate_calc_busy(struct cpudata *cpu)
 709{
 710        struct sample *sample = &cpu->sample;
 711        int64_t core_pct;
 712
 713        core_pct = int_tofp(sample->aperf) * int_tofp(100);
 714        core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
 715
 716        sample->freq = fp_toint(
 717                mul_fp(int_tofp(
 718                        cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
 719                        core_pct));
 720
 721        sample->core_pct_busy = (int32_t)core_pct;
 722}
 723
 724static inline void intel_pstate_sample(struct cpudata *cpu)
 725{
 726        u64 aperf, mperf;
 727        unsigned long flags;
 728
 729        local_irq_save(flags);
 730        rdmsrl(MSR_IA32_APERF, aperf);
 731        rdmsrl(MSR_IA32_MPERF, mperf);
 732        local_irq_restore(flags);
 733
 734        cpu->last_sample_time = cpu->sample.time;
 735        cpu->sample.time = ktime_get();
 736        cpu->sample.aperf = aperf;
 737        cpu->sample.mperf = mperf;
 738        cpu->sample.aperf -= cpu->prev_aperf;
 739        cpu->sample.mperf -= cpu->prev_mperf;
 740
 741        intel_pstate_calc_busy(cpu);
 742
 743        cpu->prev_aperf = aperf;
 744        cpu->prev_mperf = mperf;
 745}
 746
 747static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
 748{
 749        int delay;
 750
 751        delay = msecs_to_jiffies(50);
 752        mod_timer_pinned(&cpu->timer, jiffies + delay);
 753}
 754
 755static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 756{
 757        int delay;
 758
 759        delay = msecs_to_jiffies(pid_params.sample_rate_ms);
 760        mod_timer_pinned(&cpu->timer, jiffies + delay);
 761}
 762
 763static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 764{
 765        int32_t core_busy, max_pstate, current_pstate, sample_ratio;
 766        u32 duration_us;
 767        u32 sample_time;
 768
 769        /*
 770         * core_busy is the ratio of actual performance to max
 771         * max_pstate is the max non turbo pstate available
 772         * current_pstate was the pstate that was requested during
 773         *      the last sample period.
 774         *
 775         * We normalize core_busy, which was our actual percent
 776         * performance to what we requested during the last sample
 777         * period. The result will be a percentage of busy at a
 778         * specified pstate.
 779         */
 780        core_busy = cpu->sample.core_pct_busy;
 781        max_pstate = int_tofp(cpu->pstate.max_pstate);
 782        current_pstate = int_tofp(cpu->pstate.current_pstate);
 783        core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 784
 785        /*
 786         * Since we have a deferred timer, it will not fire unless
 787         * we are in C0.  So, determine if the actual elapsed time
 788         * is significantly greater (3x) than our sample interval.  If it
 789         * is, then we were idle for a long enough period of time
 790         * to adjust our busyness.
 791         */
 792        sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
 793        duration_us = (u32) ktime_us_delta(cpu->sample.time,
 794                                           cpu->last_sample_time);
 795        if (duration_us > sample_time * 3) {
 796                sample_ratio = div_fp(int_tofp(sample_time),
 797                                      int_tofp(duration_us));
 798                core_busy = mul_fp(core_busy, sample_ratio);
 799        }
 800
 801        return core_busy;
 802}
 803
 804static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 805{
 806        int32_t busy_scaled;
 807        struct _pid *pid;
 808        signed int ctl;
 809
 810        pid = &cpu->pid;
 811        busy_scaled = intel_pstate_get_scaled_busy(cpu);
 812
 813        ctl = pid_calc(pid, busy_scaled);
 814
 815        /* Negative values of ctl increase the pstate and vice versa */
 816        intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
 817}
 818
 819static void intel_hwp_timer_func(unsigned long __data)
 820{
 821        struct cpudata *cpu = (struct cpudata *) __data;
 822
 823        intel_pstate_sample(cpu);
 824        intel_hwp_set_sample_time(cpu);
 825}
 826
 827static void intel_pstate_timer_func(unsigned long __data)
 828{
 829        struct cpudata *cpu = (struct cpudata *) __data;
 830        struct sample *sample;
 831
 832        intel_pstate_sample(cpu);
 833
 834        sample = &cpu->sample;
 835
 836        intel_pstate_adjust_busy_pstate(cpu);
 837
 838        trace_pstate_sample(fp_toint(sample->core_pct_busy),
 839                        fp_toint(intel_pstate_get_scaled_busy(cpu)),
 840                        cpu->pstate.current_pstate,
 841                        sample->mperf,
 842                        sample->aperf,
 843                        sample->freq);
 844
 845        intel_pstate_set_sample_time(cpu);
 846}
 847
 848#define ICPU(model, policy) \
 849        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
 850                        (unsigned long)&policy }
 851
 852static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 853        ICPU(0x2a, core_params),
 854        ICPU(0x2d, core_params),
 855        ICPU(0x37, byt_params),
 856        ICPU(0x3a, core_params),
 857        ICPU(0x3c, core_params),
 858        ICPU(0x3d, core_params),
 859        ICPU(0x3e, core_params),
 860        ICPU(0x3f, core_params),
 861        ICPU(0x45, core_params),
 862        ICPU(0x46, core_params),
 863        ICPU(0x47, core_params),
 864        ICPU(0x4c, byt_params),
 865        ICPU(0x4e, core_params),
 866        ICPU(0x4f, core_params),
 867        ICPU(0x56, core_params),
 868        {}
 869};
 870MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 871
 872static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
 873        ICPU(0x56, core_params),
 874        {}
 875};
 876
 877static int intel_pstate_init_cpu(unsigned int cpunum)
 878{
 879        struct cpudata *cpu;
 880
 881        if (!all_cpu_data[cpunum])
 882                all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
 883                                               GFP_KERNEL);
 884        if (!all_cpu_data[cpunum])
 885                return -ENOMEM;
 886
 887        cpu = all_cpu_data[cpunum];
 888
 889        cpu->cpu = cpunum;
 890        intel_pstate_get_cpu_pstates(cpu);
 891
 892        init_timer_deferrable(&cpu->timer);
 893        cpu->timer.data = (unsigned long)cpu;
 894        cpu->timer.expires = jiffies + HZ/100;
 895
 896        if (!hwp_active)
 897                cpu->timer.function = intel_pstate_timer_func;
 898        else
 899                cpu->timer.function = intel_hwp_timer_func;
 900
 901        intel_pstate_busy_pid_reset(cpu);
 902        intel_pstate_sample(cpu);
 903
 904        add_timer_on(&cpu->timer, cpunum);
 905
 906        pr_debug("Intel pstate controlling: cpu %d\n", cpunum);
 907
 908        return 0;
 909}
 910
 911static unsigned int intel_pstate_get(unsigned int cpu_num)
 912{
 913        struct sample *sample;
 914        struct cpudata *cpu;
 915
 916        cpu = all_cpu_data[cpu_num];
 917        if (!cpu)
 918                return 0;
 919        sample = &cpu->sample;
 920        return sample->freq;
 921}
 922
 923static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 924{
 925        if (!policy->cpuinfo.max_freq)
 926                return -ENODEV;
 927
 928        if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
 929            policy->max >= policy->cpuinfo.max_freq) {
 930                limits.min_policy_pct = 100;
 931                limits.min_perf_pct = 100;
 932                limits.min_perf = int_tofp(1);
 933                limits.max_policy_pct = 100;
 934                limits.max_perf_pct = 100;
 935                limits.max_perf = int_tofp(1);
 936                limits.no_turbo = 0;
 937                return 0;
 938        }
 939
 940        limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
 941        limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100);
 942        limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
 943        limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 944
 945        limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
 946        limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100);
 947        limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
 948        limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 949
 950        if (hwp_active)
 951                intel_pstate_hwp_set();
 952
 953        return 0;
 954}
 955
 956static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 957{
 958        cpufreq_verify_within_cpu_limits(policy);
 959
 960        if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
 961            policy->policy != CPUFREQ_POLICY_PERFORMANCE)
 962                return -EINVAL;
 963
 964        return 0;
 965}
 966
 967static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 968{
 969        int cpu_num = policy->cpu;
 970        struct cpudata *cpu = all_cpu_data[cpu_num];
 971
 972        pr_info("intel_pstate CPU %d exiting\n", cpu_num);
 973
 974        del_timer_sync(&all_cpu_data[cpu_num]->timer);
 975        if (hwp_active)
 976                return;
 977
 978        intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
 979}
 980
 981static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 982{
 983        struct cpudata *cpu;
 984        int rc;
 985
 986        rc = intel_pstate_init_cpu(policy->cpu);
 987        if (rc)
 988                return rc;
 989
 990        cpu = all_cpu_data[policy->cpu];
 991
 992        if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
 993                policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 994        else
 995                policy->policy = CPUFREQ_POLICY_POWERSAVE;
 996
 997        policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 998        policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
 999
1000        /* cpuinfo and default policy values */
1001        policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
1002        policy->cpuinfo.max_freq =
1003                cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1004        policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
1005        cpumask_set_cpu(policy->cpu, policy->cpus);
1006
1007        return 0;
1008}
1009
1010static struct cpufreq_driver intel_pstate_driver = {
1011        .flags          = CPUFREQ_CONST_LOOPS,
1012        .verify         = intel_pstate_verify_policy,
1013        .setpolicy      = intel_pstate_set_policy,
1014        .get            = intel_pstate_get,
1015        .init           = intel_pstate_cpu_init,
1016        .stop_cpu       = intel_pstate_stop_cpu,
1017        .name           = "intel_pstate",
1018};
1019
1020static int __initdata no_load;
1021static int __initdata no_hwp;
1022static int __initdata hwp_only;
1023static unsigned int force_load;
1024
1025static int intel_pstate_msrs_not_valid(void)
1026{
1027        /* Check that all the msr's we are using are valid. */
1028        u64 aperf, mperf, tmp;
1029
1030        rdmsrl(MSR_IA32_APERF, aperf);
1031        rdmsrl(MSR_IA32_MPERF, mperf);
1032
1033        if (!pstate_funcs.get_max() ||
1034            !pstate_funcs.get_min() ||
1035            !pstate_funcs.get_turbo())
1036                return -ENODEV;
1037
1038        rdmsrl(MSR_IA32_APERF, tmp);
1039        if (!(tmp - aperf))
1040                return -ENODEV;
1041
1042        rdmsrl(MSR_IA32_MPERF, tmp);
1043        if (!(tmp - mperf))
1044                return -ENODEV;
1045
1046        return 0;
1047}
1048
1049static void copy_pid_params(struct pstate_adjust_policy *policy)
1050{
1051        pid_params.sample_rate_ms = policy->sample_rate_ms;
1052        pid_params.p_gain_pct = policy->p_gain_pct;
1053        pid_params.i_gain_pct = policy->i_gain_pct;
1054        pid_params.d_gain_pct = policy->d_gain_pct;
1055        pid_params.deadband = policy->deadband;
1056        pid_params.setpoint = policy->setpoint;
1057}
1058
1059static void copy_cpu_funcs(struct pstate_funcs *funcs)
1060{
1061        pstate_funcs.get_max   = funcs->get_max;
1062        pstate_funcs.get_min   = funcs->get_min;
1063        pstate_funcs.get_turbo = funcs->get_turbo;
1064        pstate_funcs.get_scaling = funcs->get_scaling;
1065        pstate_funcs.set       = funcs->set;
1066        pstate_funcs.get_vid   = funcs->get_vid;
1067}
1068
1069#if IS_ENABLED(CONFIG_ACPI)
1070#include <acpi/processor.h>
1071
1072static bool intel_pstate_no_acpi_pss(void)
1073{
1074        int i;
1075
1076        for_each_possible_cpu(i) {
1077                acpi_status status;
1078                union acpi_object *pss;
1079                struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
1080                struct acpi_processor *pr = per_cpu(processors, i);
1081
1082                if (!pr)
1083                        continue;
1084
1085                status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
1086                if (ACPI_FAILURE(status))
1087                        continue;
1088
1089                pss = buffer.pointer;
1090                if (pss && pss->type == ACPI_TYPE_PACKAGE) {
1091                        kfree(pss);
1092                        return false;
1093                }
1094
1095                kfree(pss);
1096        }
1097
1098        return true;
1099}
1100
1101static bool intel_pstate_has_acpi_ppc(void)
1102{
1103        int i;
1104
1105        for_each_possible_cpu(i) {
1106                struct acpi_processor *pr = per_cpu(processors, i);
1107
1108                if (!pr)
1109                        continue;
1110                if (acpi_has_method(pr->handle, "_PPC"))
1111                        return true;
1112        }
1113        return false;
1114}
1115
1116enum {
1117        PSS,
1118        PPC,
1119};
1120
1121struct hw_vendor_info {
1122        u16  valid;
1123        char oem_id[ACPI_OEM_ID_SIZE];
1124        char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1125        int  oem_pwr_table;
1126};
1127
1128/* Hardware vendor-specific info that has its own power management modes */
1129static struct hw_vendor_info vendor_info[] = {
1130        {1, "HP    ", "ProLiant", PSS},
1131        {1, "ORACLE", "X4-2    ", PPC},
1132        {1, "ORACLE", "X4-2L   ", PPC},
1133        {1, "ORACLE", "X4-2B   ", PPC},
1134        {1, "ORACLE", "X3-2    ", PPC},
1135        {1, "ORACLE", "X3-2L   ", PPC},
1136        {1, "ORACLE", "X3-2B   ", PPC},
1137        {1, "ORACLE", "X4470M2 ", PPC},
1138        {1, "ORACLE", "X4270M3 ", PPC},
1139        {1, "ORACLE", "X4270M2 ", PPC},
1140        {1, "ORACLE", "X4170M2 ", PPC},
1141        {0, "", ""},
1142};
1143
1144static bool intel_pstate_platform_pwr_mgmt_exists(void)
1145{
1146        struct acpi_table_header hdr;
1147        struct hw_vendor_info *v_info;
1148        const struct x86_cpu_id *id;
1149        u64 misc_pwr;
1150
1151        id = x86_match_cpu(intel_pstate_cpu_oob_ids);
1152        if (id) {
1153                rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
1154                if ( misc_pwr & (1 << 8))
1155                        return true;
1156        }
1157
1158        if (acpi_disabled ||
1159            ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1160                return false;
1161
1162        for (v_info = vendor_info; v_info->valid; v_info++) {
1163                if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1164                        !strncmp(hdr.oem_table_id, v_info->oem_table_id,
1165                                                ACPI_OEM_TABLE_ID_SIZE))
1166                        switch (v_info->oem_pwr_table) {
1167                        case PSS:
1168                                return intel_pstate_no_acpi_pss();
1169                        case PPC:
1170                                return intel_pstate_has_acpi_ppc() &&
1171                                        (!force_load);
1172                        }
1173        }
1174
1175        return false;
1176}
1177#else /* CONFIG_ACPI not enabled */
1178static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1179static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1180#endif /* CONFIG_ACPI */
1181
1182static int __init intel_pstate_init(void)
1183{
1184        int cpu, rc = 0;
1185        const struct x86_cpu_id *id;
1186        struct cpu_defaults *cpu_info;
1187        struct cpuinfo_x86 *c = &boot_cpu_data;
1188
1189        if (no_load)
1190                return -ENODEV;
1191
1192        id = x86_match_cpu(intel_pstate_cpu_ids);
1193        if (!id)
1194                return -ENODEV;
1195
1196        /*
1197         * The Intel pstate driver will be ignored if the platform
1198         * firmware has its own power management modes.
1199         */
1200        if (intel_pstate_platform_pwr_mgmt_exists())
1201                return -ENODEV;
1202
1203        cpu_info = (struct cpu_defaults *)id->driver_data;
1204
1205        copy_pid_params(&cpu_info->pid_policy);
1206        copy_cpu_funcs(&cpu_info->funcs);
1207
1208        if (intel_pstate_msrs_not_valid())
1209                return -ENODEV;
1210
1211        pr_info("Intel P-state driver initializing.\n");
1212
1213        all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1214        if (!all_cpu_data)
1215                return -ENOMEM;
1216
1217        if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp)
1218                intel_pstate_hwp_enable();
1219
1220        if (!hwp_active && hwp_only)
1221                goto out;
1222
1223        rc = cpufreq_register_driver(&intel_pstate_driver);
1224        if (rc)
1225                goto out;
1226
1227        intel_pstate_debug_expose_params();
1228        intel_pstate_sysfs_expose_params();
1229
1230        return rc;
1231out:
1232        get_online_cpus();
1233        for_each_online_cpu(cpu) {
1234                if (all_cpu_data[cpu]) {
1235                        del_timer_sync(&all_cpu_data[cpu]->timer);
1236                        kfree(all_cpu_data[cpu]);
1237                }
1238        }
1239
1240        put_online_cpus();
1241        vfree(all_cpu_data);
1242        return -ENODEV;
1243}
1244device_initcall(intel_pstate_init);
1245
1246static int __init intel_pstate_setup(char *str)
1247{
1248        if (!str)
1249                return -EINVAL;
1250
1251        if (!strcmp(str, "disable"))
1252                no_load = 1;
1253        if (!strcmp(str, "no_hwp"))
1254                no_hwp = 1;
1255        if (!strcmp(str, "force"))
1256                force_load = 1;
1257        if (!strcmp(str, "hwp_only"))
1258                hwp_only = 1;
1259        return 0;
1260}
1261early_param("intel_pstate", intel_pstate_setup);
1262
1263MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1264MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1265MODULE_LICENSE("GPL");
1266