linux/drivers/cpufreq/intel_pstate.c
<<
>>
Prefs
   1/*
   2 * intel_pstate.c: Native P state management for Intel processors
   3 *
   4 * (C) Copyright 2012 Intel Corporation
   5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; version 2
  10 * of the License.
  11 */
  12
  13#include <linux/kernel.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/module.h>
  16#include <linux/ktime.h>
  17#include <linux/hrtimer.h>
  18#include <linux/tick.h>
  19#include <linux/slab.h>
  20#include <linux/sched.h>
  21#include <linux/list.h>
  22#include <linux/cpu.h>
  23#include <linux/cpufreq.h>
  24#include <linux/sysfs.h>
  25#include <linux/types.h>
  26#include <linux/fs.h>
  27#include <linux/debugfs.h>
  28#include <linux/acpi.h>
  29#include <trace/events/power.h>
  30
  31#include <asm/div64.h>
  32#include <asm/msr.h>
  33#include <asm/cpu_device_id.h>
  34#include <asm/cpufeature.h>
  35
  36#define BYT_RATIOS              0x66a
  37#define BYT_VIDS                0x66b
  38#define BYT_TURBO_RATIOS        0x66c
  39#define BYT_TURBO_VIDS          0x66d
  40
  41#define FRAC_BITS 8
  42#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
  43#define fp_toint(X) ((X) >> FRAC_BITS)
  44
  45
  46static inline int32_t mul_fp(int32_t x, int32_t y)
  47{
  48        return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
  49}
  50
  51static inline int32_t div_fp(int32_t x, int32_t y)
  52{
  53        return div_s64((int64_t)x << FRAC_BITS, y);
  54}
  55
  56static inline int ceiling_fp(int32_t x)
  57{
  58        int mask, ret;
  59
  60        ret = fp_toint(x);
  61        mask = (1 << FRAC_BITS) - 1;
  62        if (x & mask)
  63                ret += 1;
  64        return ret;
  65}
  66
  67struct sample {
  68        int32_t core_pct_busy;
  69        u64 aperf;
  70        u64 mperf;
  71        int freq;
  72        ktime_t time;
  73};
  74
  75struct pstate_data {
  76        int     current_pstate;
  77        int     min_pstate;
  78        int     max_pstate;
  79        int     scaling;
  80        int     turbo_pstate;
  81};
  82
  83struct vid_data {
  84        int min;
  85        int max;
  86        int turbo;
  87        int32_t ratio;
  88};
  89
  90struct _pid {
  91        int setpoint;
  92        int32_t integral;
  93        int32_t p_gain;
  94        int32_t i_gain;
  95        int32_t d_gain;
  96        int deadband;
  97        int32_t last_err;
  98};
  99
 100struct cpudata {
 101        int cpu;
 102
 103        struct timer_list timer;
 104
 105        struct pstate_data pstate;
 106        struct vid_data vid;
 107        struct _pid pid;
 108
 109        ktime_t last_sample_time;
 110        u64     prev_aperf;
 111        u64     prev_mperf;
 112        struct sample sample;
 113};
 114
 115static struct cpudata **all_cpu_data;
 116struct pstate_adjust_policy {
 117        int sample_rate_ms;
 118        int deadband;
 119        int setpoint;
 120        int p_gain_pct;
 121        int d_gain_pct;
 122        int i_gain_pct;
 123};
 124
 125struct pstate_funcs {
 126        int (*get_max)(void);
 127        int (*get_min)(void);
 128        int (*get_turbo)(void);
 129        int (*get_scaling)(void);
 130        void (*set)(struct cpudata*, int pstate);
 131        void (*get_vid)(struct cpudata *);
 132};
 133
 134struct cpu_defaults {
 135        struct pstate_adjust_policy pid_policy;
 136        struct pstate_funcs funcs;
 137};
 138
 139static struct pstate_adjust_policy pid_params;
 140static struct pstate_funcs pstate_funcs;
 141static int hwp_active;
 142
 143struct perf_limits {
 144        int no_turbo;
 145        int turbo_disabled;
 146        int max_perf_pct;
 147        int min_perf_pct;
 148        int32_t max_perf;
 149        int32_t min_perf;
 150        int max_policy_pct;
 151        int max_sysfs_pct;
 152        int min_policy_pct;
 153        int min_sysfs_pct;
 154};
 155
 156static struct perf_limits limits = {
 157        .no_turbo = 0,
 158        .turbo_disabled = 0,
 159        .max_perf_pct = 100,
 160        .max_perf = int_tofp(1),
 161        .min_perf_pct = 0,
 162        .min_perf = 0,
 163        .max_policy_pct = 100,
 164        .max_sysfs_pct = 100,
 165        .min_policy_pct = 0,
 166        .min_sysfs_pct = 0,
 167};
 168
 169static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 170                             int deadband, int integral) {
 171        pid->setpoint = setpoint;
 172        pid->deadband  = deadband;
 173        pid->integral  = int_tofp(integral);
 174        pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
 175}
 176
 177static inline void pid_p_gain_set(struct _pid *pid, int percent)
 178{
 179        pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
 180}
 181
 182static inline void pid_i_gain_set(struct _pid *pid, int percent)
 183{
 184        pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
 185}
 186
 187static inline void pid_d_gain_set(struct _pid *pid, int percent)
 188{
 189        pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
 190}
 191
 192static signed int pid_calc(struct _pid *pid, int32_t busy)
 193{
 194        signed int result;
 195        int32_t pterm, dterm, fp_error;
 196        int32_t integral_limit;
 197
 198        fp_error = int_tofp(pid->setpoint) - busy;
 199
 200        if (abs(fp_error) <= int_tofp(pid->deadband))
 201                return 0;
 202
 203        pterm = mul_fp(pid->p_gain, fp_error);
 204
 205        pid->integral += fp_error;
 206
 207        /*
 208         * We limit the integral here so that it will never
 209         * get higher than 30.  This prevents it from becoming
 210         * too large an input over long periods of time and allows
 211         * it to get factored out sooner.
 212         *
 213         * The value of 30 was chosen through experimentation.
 214         */
 215        integral_limit = int_tofp(30);
 216        if (pid->integral > integral_limit)
 217                pid->integral = integral_limit;
 218        if (pid->integral < -integral_limit)
 219                pid->integral = -integral_limit;
 220
 221        dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
 222        pid->last_err = fp_error;
 223
 224        result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
 225        result = result + (1 << (FRAC_BITS-1));
 226        return (signed int)fp_toint(result);
 227}
 228
 229static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
 230{
 231        pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
 232        pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
 233        pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
 234
 235        pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
 236}
 237
 238static inline void intel_pstate_reset_all_pid(void)
 239{
 240        unsigned int cpu;
 241
 242        for_each_online_cpu(cpu) {
 243                if (all_cpu_data[cpu])
 244                        intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
 245        }
 246}
 247
 248static inline void update_turbo_state(void)
 249{
 250        u64 misc_en;
 251        struct cpudata *cpu;
 252
 253        cpu = all_cpu_data[0];
 254        rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
 255        limits.turbo_disabled =
 256                (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
 257                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 258}
 259
 260#define PCT_TO_HWP(x) (x * 255 / 100)
 261static void intel_pstate_hwp_set(void)
 262{
 263        int min, max, cpu;
 264        u64 value, freq;
 265
 266        get_online_cpus();
 267
 268        for_each_online_cpu(cpu) {
 269                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 270                min = PCT_TO_HWP(limits.min_perf_pct);
 271                value &= ~HWP_MIN_PERF(~0L);
 272                value |= HWP_MIN_PERF(min);
 273
 274                max = PCT_TO_HWP(limits.max_perf_pct);
 275                if (limits.no_turbo) {
 276                        rdmsrl( MSR_HWP_CAPABILITIES, freq);
 277                        max = HWP_GUARANTEED_PERF(freq);
 278                }
 279
 280                value &= ~HWP_MAX_PERF(~0L);
 281                value |= HWP_MAX_PERF(max);
 282                wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 283        }
 284
 285        put_online_cpus();
 286}
 287
 288/************************** debugfs begin ************************/
 289static int pid_param_set(void *data, u64 val)
 290{
 291        *(u32 *)data = val;
 292        intel_pstate_reset_all_pid();
 293        return 0;
 294}
 295
 296static int pid_param_get(void *data, u64 *val)
 297{
 298        *val = *(u32 *)data;
 299        return 0;
 300}
 301DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
 302
 303struct pid_param {
 304        char *name;
 305        void *value;
 306};
 307
 308static struct pid_param pid_files[] = {
 309        {"sample_rate_ms", &pid_params.sample_rate_ms},
 310        {"d_gain_pct", &pid_params.d_gain_pct},
 311        {"i_gain_pct", &pid_params.i_gain_pct},
 312        {"deadband", &pid_params.deadband},
 313        {"setpoint", &pid_params.setpoint},
 314        {"p_gain_pct", &pid_params.p_gain_pct},
 315        {NULL, NULL}
 316};
 317
 318static void __init intel_pstate_debug_expose_params(void)
 319{
 320        struct dentry *debugfs_parent;
 321        int i = 0;
 322
 323        if (hwp_active)
 324                return;
 325        debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
 326        if (IS_ERR_OR_NULL(debugfs_parent))
 327                return;
 328        while (pid_files[i].name) {
 329                debugfs_create_file(pid_files[i].name, 0660,
 330                                    debugfs_parent, pid_files[i].value,
 331                                    &fops_pid_param);
 332                i++;
 333        }
 334}
 335
 336/************************** debugfs end ************************/
 337
 338/************************** sysfs begin ************************/
 339#define show_one(file_name, object)                                     \
 340        static ssize_t show_##file_name                                 \
 341        (struct kobject *kobj, struct attribute *attr, char *buf)       \
 342        {                                                               \
 343                return sprintf(buf, "%u\n", limits.object);             \
 344        }
 345
 346static ssize_t show_turbo_pct(struct kobject *kobj,
 347                                struct attribute *attr, char *buf)
 348{
 349        struct cpudata *cpu;
 350        int total, no_turbo, turbo_pct;
 351        uint32_t turbo_fp;
 352
 353        cpu = all_cpu_data[0];
 354
 355        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 356        no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
 357        turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
 358        turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
 359        return sprintf(buf, "%u\n", turbo_pct);
 360}
 361
 362static ssize_t show_num_pstates(struct kobject *kobj,
 363                                struct attribute *attr, char *buf)
 364{
 365        struct cpudata *cpu;
 366        int total;
 367
 368        cpu = all_cpu_data[0];
 369        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 370        return sprintf(buf, "%u\n", total);
 371}
 372
 373static ssize_t show_no_turbo(struct kobject *kobj,
 374                             struct attribute *attr, char *buf)
 375{
 376        ssize_t ret;
 377
 378        update_turbo_state();
 379        if (limits.turbo_disabled)
 380                ret = sprintf(buf, "%u\n", limits.turbo_disabled);
 381        else
 382                ret = sprintf(buf, "%u\n", limits.no_turbo);
 383
 384        return ret;
 385}
 386
 387static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 388                              const char *buf, size_t count)
 389{
 390        unsigned int input;
 391        int ret;
 392
 393        ret = sscanf(buf, "%u", &input);
 394        if (ret != 1)
 395                return -EINVAL;
 396
 397        update_turbo_state();
 398        if (limits.turbo_disabled) {
 399                pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 400                return -EPERM;
 401        }
 402
 403        limits.no_turbo = clamp_t(int, input, 0, 1);
 404
 405        if (hwp_active)
 406                intel_pstate_hwp_set();
 407
 408        return count;
 409}
 410
 411static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 412                                  const char *buf, size_t count)
 413{
 414        unsigned int input;
 415        int ret;
 416
 417        ret = sscanf(buf, "%u", &input);
 418        if (ret != 1)
 419                return -EINVAL;
 420
 421        limits.max_sysfs_pct = clamp_t(int, input, 0 , 100);
 422        limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
 423        limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 424
 425        if (hwp_active)
 426                intel_pstate_hwp_set();
 427        return count;
 428}
 429
 430static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 431                                  const char *buf, size_t count)
 432{
 433        unsigned int input;
 434        int ret;
 435
 436        ret = sscanf(buf, "%u", &input);
 437        if (ret != 1)
 438                return -EINVAL;
 439
 440        limits.min_sysfs_pct = clamp_t(int, input, 0 , 100);
 441        limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
 442        limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 443
 444        if (hwp_active)
 445                intel_pstate_hwp_set();
 446        return count;
 447}
 448
 449show_one(max_perf_pct, max_perf_pct);
 450show_one(min_perf_pct, min_perf_pct);
 451
 452define_one_global_rw(no_turbo);
 453define_one_global_rw(max_perf_pct);
 454define_one_global_rw(min_perf_pct);
 455define_one_global_ro(turbo_pct);
 456define_one_global_ro(num_pstates);
 457
 458static struct attribute *intel_pstate_attributes[] = {
 459        &no_turbo.attr,
 460        &max_perf_pct.attr,
 461        &min_perf_pct.attr,
 462        &turbo_pct.attr,
 463        &num_pstates.attr,
 464        NULL
 465};
 466
 467static struct attribute_group intel_pstate_attr_group = {
 468        .attrs = intel_pstate_attributes,
 469};
 470
 471static void __init intel_pstate_sysfs_expose_params(void)
 472{
 473        struct kobject *intel_pstate_kobject;
 474        int rc;
 475
 476        intel_pstate_kobject = kobject_create_and_add("intel_pstate",
 477                                                &cpu_subsys.dev_root->kobj);
 478        BUG_ON(!intel_pstate_kobject);
 479        rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
 480        BUG_ON(rc);
 481}
 482/************************** sysfs end ************************/
 483
 484static void intel_pstate_hwp_enable(void)
 485{
 486        hwp_active++;
 487        pr_info("intel_pstate HWP enabled\n");
 488
 489        wrmsrl( MSR_PM_ENABLE, 0x1);
 490}
 491
 492static int byt_get_min_pstate(void)
 493{
 494        u64 value;
 495
 496        rdmsrl(BYT_RATIOS, value);
 497        return (value >> 8) & 0x7F;
 498}
 499
 500static int byt_get_max_pstate(void)
 501{
 502        u64 value;
 503
 504        rdmsrl(BYT_RATIOS, value);
 505        return (value >> 16) & 0x7F;
 506}
 507
 508static int byt_get_turbo_pstate(void)
 509{
 510        u64 value;
 511
 512        rdmsrl(BYT_TURBO_RATIOS, value);
 513        return value & 0x7F;
 514}
 515
 516static void byt_set_pstate(struct cpudata *cpudata, int pstate)
 517{
 518        u64 val;
 519        int32_t vid_fp;
 520        u32 vid;
 521
 522        val = pstate << 8;
 523        if (limits.no_turbo && !limits.turbo_disabled)
 524                val |= (u64)1 << 32;
 525
 526        vid_fp = cpudata->vid.min + mul_fp(
 527                int_tofp(pstate - cpudata->pstate.min_pstate),
 528                cpudata->vid.ratio);
 529
 530        vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
 531        vid = ceiling_fp(vid_fp);
 532
 533        if (pstate > cpudata->pstate.max_pstate)
 534                vid = cpudata->vid.turbo;
 535
 536        val |= vid;
 537
 538        wrmsrl(MSR_IA32_PERF_CTL, val);
 539}
 540
 541#define BYT_BCLK_FREQS 5
 542static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
 543
 544static int byt_get_scaling(void)
 545{
 546        u64 value;
 547        int i;
 548
 549        rdmsrl(MSR_FSB_FREQ, value);
 550        i = value & 0x3;
 551
 552        BUG_ON(i > BYT_BCLK_FREQS);
 553
 554        return byt_freq_table[i] * 100;
 555}
 556
 557static void byt_get_vid(struct cpudata *cpudata)
 558{
 559        u64 value;
 560
 561        rdmsrl(BYT_VIDS, value);
 562        cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
 563        cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
 564        cpudata->vid.ratio = div_fp(
 565                cpudata->vid.max - cpudata->vid.min,
 566                int_tofp(cpudata->pstate.max_pstate -
 567                        cpudata->pstate.min_pstate));
 568
 569        rdmsrl(BYT_TURBO_VIDS, value);
 570        cpudata->vid.turbo = value & 0x7f;
 571}
 572
 573static int core_get_min_pstate(void)
 574{
 575        u64 value;
 576
 577        rdmsrl(MSR_PLATFORM_INFO, value);
 578        return (value >> 40) & 0xFF;
 579}
 580
 581static int core_get_max_pstate(void)
 582{
 583        u64 value;
 584
 585        rdmsrl(MSR_PLATFORM_INFO, value);
 586        return (value >> 8) & 0xFF;
 587}
 588
 589static int core_get_turbo_pstate(void)
 590{
 591        u64 value;
 592        int nont, ret;
 593
 594        rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
 595        nont = core_get_max_pstate();
 596        ret = (value) & 255;
 597        if (ret <= nont)
 598                ret = nont;
 599        return ret;
 600}
 601
 602static inline int core_get_scaling(void)
 603{
 604        return 100000;
 605}
 606
 607static void core_set_pstate(struct cpudata *cpudata, int pstate)
 608{
 609        u64 val;
 610
 611        val = pstate << 8;
 612        if (limits.no_turbo && !limits.turbo_disabled)
 613                val |= (u64)1 << 32;
 614
 615        wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
 616}
 617
 618static int knl_get_turbo_pstate(void)
 619{
 620        u64 value;
 621        int nont, ret;
 622
 623        rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
 624        nont = core_get_max_pstate();
 625        ret = (((value) >> 8) & 0xFF);
 626        if (ret <= nont)
 627                ret = nont;
 628        return ret;
 629}
 630
 631static struct cpu_defaults core_params = {
 632        .pid_policy = {
 633                .sample_rate_ms = 10,
 634                .deadband = 0,
 635                .setpoint = 97,
 636                .p_gain_pct = 20,
 637                .d_gain_pct = 0,
 638                .i_gain_pct = 0,
 639        },
 640        .funcs = {
 641                .get_max = core_get_max_pstate,
 642                .get_min = core_get_min_pstate,
 643                .get_turbo = core_get_turbo_pstate,
 644                .get_scaling = core_get_scaling,
 645                .set = core_set_pstate,
 646        },
 647};
 648
 649static struct cpu_defaults byt_params = {
 650        .pid_policy = {
 651                .sample_rate_ms = 10,
 652                .deadband = 0,
 653                .setpoint = 60,
 654                .p_gain_pct = 14,
 655                .d_gain_pct = 0,
 656                .i_gain_pct = 4,
 657        },
 658        .funcs = {
 659                .get_max = byt_get_max_pstate,
 660                .get_min = byt_get_min_pstate,
 661                .get_turbo = byt_get_turbo_pstate,
 662                .set = byt_set_pstate,
 663                .get_scaling = byt_get_scaling,
 664                .get_vid = byt_get_vid,
 665        },
 666};
 667
 668static struct cpu_defaults knl_params = {
 669        .pid_policy = {
 670                .sample_rate_ms = 10,
 671                .deadband = 0,
 672                .setpoint = 97,
 673                .p_gain_pct = 20,
 674                .d_gain_pct = 0,
 675                .i_gain_pct = 0,
 676        },
 677        .funcs = {
 678                .get_max = core_get_max_pstate,
 679                .get_min = core_get_min_pstate,
 680                .get_turbo = knl_get_turbo_pstate,
 681                .set = core_set_pstate,
 682        },
 683};
 684
 685static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 686{
 687        int max_perf = cpu->pstate.turbo_pstate;
 688        int max_perf_adj;
 689        int min_perf;
 690
 691        if (limits.no_turbo || limits.turbo_disabled)
 692                max_perf = cpu->pstate.max_pstate;
 693
 694        /*
 695         * performance can be limited by user through sysfs, by cpufreq
 696         * policy, or by cpu specific default values determined through
 697         * experimentation.
 698         */
 699        max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
 700        *max = clamp_t(int, max_perf_adj,
 701                        cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
 702
 703        min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
 704        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 705}
 706
 707static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
 708{
 709        int max_perf, min_perf;
 710
 711        update_turbo_state();
 712
 713        intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 714
 715        pstate = clamp_t(int, pstate, min_perf, max_perf);
 716
 717        if (pstate == cpu->pstate.current_pstate)
 718                return;
 719
 720        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 721
 722        cpu->pstate.current_pstate = pstate;
 723
 724        pstate_funcs.set(cpu, pstate);
 725}
 726
 727static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 728{
 729        cpu->pstate.min_pstate = pstate_funcs.get_min();
 730        cpu->pstate.max_pstate = pstate_funcs.get_max();
 731        cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 732        cpu->pstate.scaling = pstate_funcs.get_scaling();
 733
 734        if (pstate_funcs.get_vid)
 735                pstate_funcs.get_vid(cpu);
 736        intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
 737}
 738
 739static inline void intel_pstate_calc_busy(struct cpudata *cpu)
 740{
 741        struct sample *sample = &cpu->sample;
 742        int64_t core_pct;
 743
 744        core_pct = int_tofp(sample->aperf) * int_tofp(100);
 745        core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
 746
 747        sample->freq = fp_toint(
 748                mul_fp(int_tofp(
 749                        cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
 750                        core_pct));
 751
 752        sample->core_pct_busy = (int32_t)core_pct;
 753}
 754
 755static inline void intel_pstate_sample(struct cpudata *cpu)
 756{
 757        u64 aperf, mperf;
 758        unsigned long flags;
 759
 760        local_irq_save(flags);
 761        rdmsrl(MSR_IA32_APERF, aperf);
 762        rdmsrl(MSR_IA32_MPERF, mperf);
 763        local_irq_restore(flags);
 764
 765        cpu->last_sample_time = cpu->sample.time;
 766        cpu->sample.time = ktime_get();
 767        cpu->sample.aperf = aperf;
 768        cpu->sample.mperf = mperf;
 769        cpu->sample.aperf -= cpu->prev_aperf;
 770        cpu->sample.mperf -= cpu->prev_mperf;
 771
 772        intel_pstate_calc_busy(cpu);
 773
 774        cpu->prev_aperf = aperf;
 775        cpu->prev_mperf = mperf;
 776}
 777
 778static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
 779{
 780        int delay;
 781
 782        delay = msecs_to_jiffies(50);
 783        mod_timer_pinned(&cpu->timer, jiffies + delay);
 784}
 785
 786static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 787{
 788        int delay;
 789
 790        delay = msecs_to_jiffies(pid_params.sample_rate_ms);
 791        mod_timer_pinned(&cpu->timer, jiffies + delay);
 792}
 793
 794static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 795{
 796        int32_t core_busy, max_pstate, current_pstate, sample_ratio;
 797        u32 duration_us;
 798        u32 sample_time;
 799
 800        /*
 801         * core_busy is the ratio of actual performance to max
 802         * max_pstate is the max non turbo pstate available
 803         * current_pstate was the pstate that was requested during
 804         *      the last sample period.
 805         *
 806         * We normalize core_busy, which was our actual percent
 807         * performance to what we requested during the last sample
 808         * period. The result will be a percentage of busy at a
 809         * specified pstate.
 810         */
 811        core_busy = cpu->sample.core_pct_busy;
 812        max_pstate = int_tofp(cpu->pstate.max_pstate);
 813        current_pstate = int_tofp(cpu->pstate.current_pstate);
 814        core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 815
 816        /*
 817         * Since we have a deferred timer, it will not fire unless
 818         * we are in C0.  So, determine if the actual elapsed time
 819         * is significantly greater (3x) than our sample interval.  If it
 820         * is, then we were idle for a long enough period of time
 821         * to adjust our busyness.
 822         */
 823        sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
 824        duration_us = (u32) ktime_us_delta(cpu->sample.time,
 825                                           cpu->last_sample_time);
 826        if (duration_us > sample_time * 3) {
 827                sample_ratio = div_fp(int_tofp(sample_time),
 828                                      int_tofp(duration_us));
 829                core_busy = mul_fp(core_busy, sample_ratio);
 830        }
 831
 832        return core_busy;
 833}
 834
 835static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 836{
 837        int32_t busy_scaled;
 838        struct _pid *pid;
 839        signed int ctl;
 840
 841        pid = &cpu->pid;
 842        busy_scaled = intel_pstate_get_scaled_busy(cpu);
 843
 844        ctl = pid_calc(pid, busy_scaled);
 845
 846        /* Negative values of ctl increase the pstate and vice versa */
 847        intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
 848}
 849
 850static void intel_hwp_timer_func(unsigned long __data)
 851{
 852        struct cpudata *cpu = (struct cpudata *) __data;
 853
 854        intel_pstate_sample(cpu);
 855        intel_hwp_set_sample_time(cpu);
 856}
 857
 858static void intel_pstate_timer_func(unsigned long __data)
 859{
 860        struct cpudata *cpu = (struct cpudata *) __data;
 861        struct sample *sample;
 862
 863        intel_pstate_sample(cpu);
 864
 865        sample = &cpu->sample;
 866
 867        intel_pstate_adjust_busy_pstate(cpu);
 868
 869        trace_pstate_sample(fp_toint(sample->core_pct_busy),
 870                        fp_toint(intel_pstate_get_scaled_busy(cpu)),
 871                        cpu->pstate.current_pstate,
 872                        sample->mperf,
 873                        sample->aperf,
 874                        sample->freq);
 875
 876        intel_pstate_set_sample_time(cpu);
 877}
 878
 879#define ICPU(model, policy) \
 880        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
 881                        (unsigned long)&policy }
 882
 883static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 884        ICPU(0x2a, core_params),
 885        ICPU(0x2d, core_params),
 886        ICPU(0x37, byt_params),
 887        ICPU(0x3a, core_params),
 888        ICPU(0x3c, core_params),
 889        ICPU(0x3d, core_params),
 890        ICPU(0x3e, core_params),
 891        ICPU(0x3f, core_params),
 892        ICPU(0x45, core_params),
 893        ICPU(0x46, core_params),
 894        ICPU(0x47, core_params),
 895        ICPU(0x4c, byt_params),
 896        ICPU(0x4e, core_params),
 897        ICPU(0x4f, core_params),
 898        ICPU(0x56, core_params),
 899        ICPU(0x57, knl_params),
 900        {}
 901};
 902MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 903
 904static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
 905        ICPU(0x56, core_params),
 906        {}
 907};
 908
 909static int intel_pstate_init_cpu(unsigned int cpunum)
 910{
 911        struct cpudata *cpu;
 912
 913        if (!all_cpu_data[cpunum])
 914                all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
 915                                               GFP_KERNEL);
 916        if (!all_cpu_data[cpunum])
 917                return -ENOMEM;
 918
 919        cpu = all_cpu_data[cpunum];
 920
 921        cpu->cpu = cpunum;
 922        intel_pstate_get_cpu_pstates(cpu);
 923
 924        init_timer_deferrable(&cpu->timer);
 925        cpu->timer.data = (unsigned long)cpu;
 926        cpu->timer.expires = jiffies + HZ/100;
 927
 928        if (!hwp_active)
 929                cpu->timer.function = intel_pstate_timer_func;
 930        else
 931                cpu->timer.function = intel_hwp_timer_func;
 932
 933        intel_pstate_busy_pid_reset(cpu);
 934        intel_pstate_sample(cpu);
 935
 936        add_timer_on(&cpu->timer, cpunum);
 937
 938        pr_debug("Intel pstate controlling: cpu %d\n", cpunum);
 939
 940        return 0;
 941}
 942
 943static unsigned int intel_pstate_get(unsigned int cpu_num)
 944{
 945        struct sample *sample;
 946        struct cpudata *cpu;
 947
 948        cpu = all_cpu_data[cpu_num];
 949        if (!cpu)
 950                return 0;
 951        sample = &cpu->sample;
 952        return sample->freq;
 953}
 954
 955static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 956{
 957        if (!policy->cpuinfo.max_freq)
 958                return -ENODEV;
 959
 960        if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
 961            policy->max >= policy->cpuinfo.max_freq) {
 962                limits.min_policy_pct = 100;
 963                limits.min_perf_pct = 100;
 964                limits.min_perf = int_tofp(1);
 965                limits.max_policy_pct = 100;
 966                limits.max_perf_pct = 100;
 967                limits.max_perf = int_tofp(1);
 968                limits.no_turbo = 0;
 969                return 0;
 970        }
 971
 972        limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
 973        limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100);
 974        limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
 975        limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 976
 977        limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
 978        limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100);
 979        limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
 980        limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 981
 982        if (hwp_active)
 983                intel_pstate_hwp_set();
 984
 985        return 0;
 986}
 987
 988static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 989{
 990        cpufreq_verify_within_cpu_limits(policy);
 991
 992        if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
 993            policy->policy != CPUFREQ_POLICY_PERFORMANCE)
 994                return -EINVAL;
 995
 996        return 0;
 997}
 998
 999static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1000{
1001        int cpu_num = policy->cpu;
1002        struct cpudata *cpu = all_cpu_data[cpu_num];
1003
1004        pr_info("intel_pstate CPU %d exiting\n", cpu_num);
1005
1006        del_timer_sync(&all_cpu_data[cpu_num]->timer);
1007        if (hwp_active)
1008                return;
1009
1010        intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
1011}
1012
1013static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1014{
1015        struct cpudata *cpu;
1016        int rc;
1017
1018        rc = intel_pstate_init_cpu(policy->cpu);
1019        if (rc)
1020                return rc;
1021
1022        cpu = all_cpu_data[policy->cpu];
1023
1024        if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
1025                policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1026        else
1027                policy->policy = CPUFREQ_POLICY_POWERSAVE;
1028
1029        policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
1030        policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1031
1032        /* cpuinfo and default policy values */
1033        policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
1034        policy->cpuinfo.max_freq =
1035                cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1036        policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
1037        cpumask_set_cpu(policy->cpu, policy->cpus);
1038
1039        return 0;
1040}
1041
1042static struct cpufreq_driver intel_pstate_driver = {
1043        .flags          = CPUFREQ_CONST_LOOPS,
1044        .verify         = intel_pstate_verify_policy,
1045        .setpolicy      = intel_pstate_set_policy,
1046        .get            = intel_pstate_get,
1047        .init           = intel_pstate_cpu_init,
1048        .stop_cpu       = intel_pstate_stop_cpu,
1049        .name           = "intel_pstate",
1050};
1051
1052static int __initdata no_load;
1053static int __initdata no_hwp;
1054static int __initdata hwp_only;
1055static unsigned int force_load;
1056
1057static int intel_pstate_msrs_not_valid(void)
1058{
1059        if (!pstate_funcs.get_max() ||
1060            !pstate_funcs.get_min() ||
1061            !pstate_funcs.get_turbo())
1062                return -ENODEV;
1063
1064        return 0;
1065}
1066
1067static void copy_pid_params(struct pstate_adjust_policy *policy)
1068{
1069        pid_params.sample_rate_ms = policy->sample_rate_ms;
1070        pid_params.p_gain_pct = policy->p_gain_pct;
1071        pid_params.i_gain_pct = policy->i_gain_pct;
1072        pid_params.d_gain_pct = policy->d_gain_pct;
1073        pid_params.deadband = policy->deadband;
1074        pid_params.setpoint = policy->setpoint;
1075}
1076
1077static void copy_cpu_funcs(struct pstate_funcs *funcs)
1078{
1079        pstate_funcs.get_max   = funcs->get_max;
1080        pstate_funcs.get_min   = funcs->get_min;
1081        pstate_funcs.get_turbo = funcs->get_turbo;
1082        pstate_funcs.get_scaling = funcs->get_scaling;
1083        pstate_funcs.set       = funcs->set;
1084        pstate_funcs.get_vid   = funcs->get_vid;
1085}
1086
1087#if IS_ENABLED(CONFIG_ACPI)
1088#include <acpi/processor.h>
1089
1090static bool intel_pstate_no_acpi_pss(void)
1091{
1092        int i;
1093
1094        for_each_possible_cpu(i) {
1095                acpi_status status;
1096                union acpi_object *pss;
1097                struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
1098                struct acpi_processor *pr = per_cpu(processors, i);
1099
1100                if (!pr)
1101                        continue;
1102
1103                status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
1104                if (ACPI_FAILURE(status))
1105                        continue;
1106
1107                pss = buffer.pointer;
1108                if (pss && pss->type == ACPI_TYPE_PACKAGE) {
1109                        kfree(pss);
1110                        return false;
1111                }
1112
1113                kfree(pss);
1114        }
1115
1116        return true;
1117}
1118
1119static bool intel_pstate_has_acpi_ppc(void)
1120{
1121        int i;
1122
1123        for_each_possible_cpu(i) {
1124                struct acpi_processor *pr = per_cpu(processors, i);
1125
1126                if (!pr)
1127                        continue;
1128                if (acpi_has_method(pr->handle, "_PPC"))
1129                        return true;
1130        }
1131        return false;
1132}
1133
1134enum {
1135        PSS,
1136        PPC,
1137};
1138
1139struct hw_vendor_info {
1140        u16  valid;
1141        char oem_id[ACPI_OEM_ID_SIZE];
1142        char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1143        int  oem_pwr_table;
1144};
1145
1146/* Hardware vendor-specific info that has its own power management modes */
1147static struct hw_vendor_info vendor_info[] = {
1148        {1, "HP    ", "ProLiant", PSS},
1149        {1, "ORACLE", "X4-2    ", PPC},
1150        {1, "ORACLE", "X4-2L   ", PPC},
1151        {1, "ORACLE", "X4-2B   ", PPC},
1152        {1, "ORACLE", "X3-2    ", PPC},
1153        {1, "ORACLE", "X3-2L   ", PPC},
1154        {1, "ORACLE", "X3-2B   ", PPC},
1155        {1, "ORACLE", "X4470M2 ", PPC},
1156        {1, "ORACLE", "X4270M3 ", PPC},
1157        {1, "ORACLE", "X4270M2 ", PPC},
1158        {1, "ORACLE", "X4170M2 ", PPC},
1159        {0, "", ""},
1160};
1161
1162static bool intel_pstate_platform_pwr_mgmt_exists(void)
1163{
1164        struct acpi_table_header hdr;
1165        struct hw_vendor_info *v_info;
1166        const struct x86_cpu_id *id;
1167        u64 misc_pwr;
1168
1169        id = x86_match_cpu(intel_pstate_cpu_oob_ids);
1170        if (id) {
1171                rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
1172                if ( misc_pwr & (1 << 8))
1173                        return true;
1174        }
1175
1176        if (acpi_disabled ||
1177            ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1178                return false;
1179
1180        for (v_info = vendor_info; v_info->valid; v_info++) {
1181                if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1182                        !strncmp(hdr.oem_table_id, v_info->oem_table_id,
1183                                                ACPI_OEM_TABLE_ID_SIZE))
1184                        switch (v_info->oem_pwr_table) {
1185                        case PSS:
1186                                return intel_pstate_no_acpi_pss();
1187                        case PPC:
1188                                return intel_pstate_has_acpi_ppc() &&
1189                                        (!force_load);
1190                        }
1191        }
1192
1193        return false;
1194}
1195#else /* CONFIG_ACPI not enabled */
1196static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1197static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1198#endif /* CONFIG_ACPI */
1199
1200static int __init intel_pstate_init(void)
1201{
1202        int cpu, rc = 0;
1203        const struct x86_cpu_id *id;
1204        struct cpu_defaults *cpu_def;
1205
1206        if (no_load)
1207                return -ENODEV;
1208
1209        id = x86_match_cpu(intel_pstate_cpu_ids);
1210        if (!id)
1211                return -ENODEV;
1212
1213        /*
1214         * The Intel pstate driver will be ignored if the platform
1215         * firmware has its own power management modes.
1216         */
1217        if (intel_pstate_platform_pwr_mgmt_exists())
1218                return -ENODEV;
1219
1220        cpu_def = (struct cpu_defaults *)id->driver_data;
1221
1222        copy_pid_params(&cpu_def->pid_policy);
1223        copy_cpu_funcs(&cpu_def->funcs);
1224
1225        if (intel_pstate_msrs_not_valid())
1226                return -ENODEV;
1227
1228        pr_info("Intel P-state driver initializing.\n");
1229
1230        all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1231        if (!all_cpu_data)
1232                return -ENOMEM;
1233
1234        if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp)
1235                intel_pstate_hwp_enable();
1236
1237        if (!hwp_active && hwp_only)
1238                goto out;
1239
1240        rc = cpufreq_register_driver(&intel_pstate_driver);
1241        if (rc)
1242                goto out;
1243
1244        intel_pstate_debug_expose_params();
1245        intel_pstate_sysfs_expose_params();
1246
1247        return rc;
1248out:
1249        get_online_cpus();
1250        for_each_online_cpu(cpu) {
1251                if (all_cpu_data[cpu]) {
1252                        del_timer_sync(&all_cpu_data[cpu]->timer);
1253                        kfree(all_cpu_data[cpu]);
1254                }
1255        }
1256
1257        put_online_cpus();
1258        vfree(all_cpu_data);
1259        return -ENODEV;
1260}
1261device_initcall(intel_pstate_init);
1262
1263static int __init intel_pstate_setup(char *str)
1264{
1265        if (!str)
1266                return -EINVAL;
1267
1268        if (!strcmp(str, "disable"))
1269                no_load = 1;
1270        if (!strcmp(str, "no_hwp"))
1271                no_hwp = 1;
1272        if (!strcmp(str, "force"))
1273                force_load = 1;
1274        if (!strcmp(str, "hwp_only"))
1275                hwp_only = 1;
1276        return 0;
1277}
1278early_param("intel_pstate", intel_pstate_setup);
1279
1280MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1281MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1282MODULE_LICENSE("GPL");
1283