1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/kernel_stat.h>
13#include <linux/module.h>
14#include <linux/ktime.h>
15#include <linux/hrtimer.h>
16#include <linux/tick.h>
17#include <linux/slab.h>
18#include <linux/sched/cpufreq.h>
19#include <linux/list.h>
20#include <linux/cpu.h>
21#include <linux/cpufreq.h>
22#include <linux/sysfs.h>
23#include <linux/types.h>
24#include <linux/fs.h>
25#include <linux/acpi.h>
26#include <linux/vmalloc.h>
27#include <trace/events/power.h>
28
29#include <asm/div64.h>
30#include <asm/msr.h>
31#include <asm/cpu_device_id.h>
32#include <asm/cpufeature.h>
33#include <asm/intel-family.h>
34
35#define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
36
37#define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
38#define INTEL_CPUFREQ_TRANSITION_DELAY 500
39
40#ifdef CONFIG_ACPI
41#include <acpi/processor.h>
42#include <acpi/cppc_acpi.h>
43#endif
44
45#define FRAC_BITS 8
46#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
47#define fp_toint(X) ((X) >> FRAC_BITS)
48
49#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
50
51#define EXT_BITS 6
52#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
53#define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
54#define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
55
56static inline int32_t mul_fp(int32_t x, int32_t y)
57{
58 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
59}
60
61static inline int32_t div_fp(s64 x, s64 y)
62{
63 return div64_s64((int64_t)x << FRAC_BITS, y);
64}
65
66static inline int ceiling_fp(int32_t x)
67{
68 int mask, ret;
69
70 ret = fp_toint(x);
71 mask = (1 << FRAC_BITS) - 1;
72 if (x & mask)
73 ret += 1;
74 return ret;
75}
76
77static inline int32_t percent_fp(int percent)
78{
79 return div_fp(percent, 100);
80}
81
82static inline u64 mul_ext_fp(u64 x, u64 y)
83{
84 return (x * y) >> EXT_FRAC_BITS;
85}
86
87static inline u64 div_ext_fp(u64 x, u64 y)
88{
89 return div64_u64(x << EXT_FRAC_BITS, y);
90}
91
92static inline int32_t percent_ext_fp(int percent)
93{
94 return div_ext_fp(percent, 100);
95}
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115struct sample {
116 int32_t core_avg_perf;
117 int32_t busy_scaled;
118 u64 aperf;
119 u64 mperf;
120 u64 tsc;
121 u64 time;
122};
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140struct pstate_data {
141 int current_pstate;
142 int min_pstate;
143 int max_pstate;
144 int max_pstate_physical;
145 int scaling;
146 int turbo_pstate;
147 unsigned int max_freq;
148 unsigned int turbo_freq;
149};
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164struct vid_data {
165 int min;
166 int max;
167 int turbo;
168 int32_t ratio;
169};
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184struct global_params {
185 bool no_turbo;
186 bool turbo_disabled;
187 bool turbo_disabled_mf;
188 int max_perf_pct;
189 int min_perf_pct;
190};
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232struct cpudata {
233 int cpu;
234
235 unsigned int policy;
236 struct update_util_data update_util;
237 bool update_util_set;
238
239 struct pstate_data pstate;
240 struct vid_data vid;
241
242 u64 last_update;
243 u64 last_sample_time;
244 u64 aperf_mperf_shift;
245 u64 prev_aperf;
246 u64 prev_mperf;
247 u64 prev_tsc;
248 u64 prev_cummulative_iowait;
249 struct sample sample;
250 int32_t min_perf_ratio;
251 int32_t max_perf_ratio;
252#ifdef CONFIG_ACPI
253 struct acpi_processor_performance acpi_perf_data;
254 bool valid_pss_table;
255#endif
256 unsigned int iowait_boost;
257 s16 epp_powersave;
258 s16 epp_policy;
259 s16 epp_default;
260 s16 epp_saved;
261 u64 hwp_req_cached;
262 u64 hwp_cap_cached;
263 u64 last_io_update;
264 unsigned int sched_flags;
265 u32 hwp_boost_min;
266};
267
268static struct cpudata **all_cpu_data;
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283struct pstate_funcs {
284 int (*get_max)(void);
285 int (*get_max_physical)(void);
286 int (*get_min)(void);
287 int (*get_turbo)(void);
288 int (*get_scaling)(void);
289 int (*get_aperf_mperf_shift)(void);
290 u64 (*get_val)(struct cpudata*, int pstate);
291 void (*get_vid)(struct cpudata *);
292};
293
294static struct pstate_funcs pstate_funcs __read_mostly;
295
296static int hwp_active __read_mostly;
297static int hwp_mode_bdw __read_mostly;
298static bool per_cpu_limits __read_mostly;
299static bool hwp_boost __read_mostly;
300
301static struct cpufreq_driver *intel_pstate_driver __read_mostly;
302
303#ifdef CONFIG_ACPI
304static bool acpi_ppc;
305#endif
306
307static struct global_params global;
308
309static DEFINE_MUTEX(intel_pstate_driver_lock);
310static DEFINE_MUTEX(intel_pstate_limits_lock);
311
312#ifdef CONFIG_ACPI
313
314static bool intel_pstate_acpi_pm_profile_server(void)
315{
316 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
317 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
318 return true;
319
320 return false;
321}
322
323static bool intel_pstate_get_ppc_enable_status(void)
324{
325 if (intel_pstate_acpi_pm_profile_server())
326 return true;
327
328 return acpi_ppc;
329}
330
331#ifdef CONFIG_ACPI_CPPC_LIB
332
333
334static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
335{
336 sched_set_itmt_support();
337}
338
339static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
340
341static void intel_pstate_set_itmt_prio(int cpu)
342{
343 struct cppc_perf_caps cppc_perf;
344 static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
345 int ret;
346
347 ret = cppc_get_perf_caps(cpu, &cppc_perf);
348 if (ret)
349 return;
350
351
352
353
354
355
356 sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
357
358 if (max_highest_perf <= min_highest_perf) {
359 if (cppc_perf.highest_perf > max_highest_perf)
360 max_highest_perf = cppc_perf.highest_perf;
361
362 if (cppc_perf.highest_perf < min_highest_perf)
363 min_highest_perf = cppc_perf.highest_perf;
364
365 if (max_highest_perf > min_highest_perf) {
366
367
368
369
370
371
372 schedule_work(&sched_itmt_work);
373 }
374 }
375}
376
377static int intel_pstate_get_cppc_guranteed(int cpu)
378{
379 struct cppc_perf_caps cppc_perf;
380 int ret;
381
382 ret = cppc_get_perf_caps(cpu, &cppc_perf);
383 if (ret)
384 return ret;
385
386 if (cppc_perf.guaranteed_perf)
387 return cppc_perf.guaranteed_perf;
388
389 return cppc_perf.nominal_perf;
390}
391
392#else
393static void intel_pstate_set_itmt_prio(int cpu)
394{
395}
396#endif
397
398static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
399{
400 struct cpudata *cpu;
401 int ret;
402 int i;
403
404 if (hwp_active) {
405 intel_pstate_set_itmt_prio(policy->cpu);
406 return;
407 }
408
409 if (!intel_pstate_get_ppc_enable_status())
410 return;
411
412 cpu = all_cpu_data[policy->cpu];
413
414 ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
415 policy->cpu);
416 if (ret)
417 return;
418
419
420
421
422
423
424 if (cpu->acpi_perf_data.control_register.space_id !=
425 ACPI_ADR_SPACE_FIXED_HARDWARE)
426 goto err;
427
428
429
430
431
432 if (cpu->acpi_perf_data.state_count < 2)
433 goto err;
434
435 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
436 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
437 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
438 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
439 (u32) cpu->acpi_perf_data.states[i].core_frequency,
440 (u32) cpu->acpi_perf_data.states[i].power,
441 (u32) cpu->acpi_perf_data.states[i].control);
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455 if (!global.turbo_disabled)
456 cpu->acpi_perf_data.states[0].core_frequency =
457 policy->cpuinfo.max_freq / 1000;
458 cpu->valid_pss_table = true;
459 pr_debug("_PPC limits will be enforced\n");
460
461 return;
462
463 err:
464 cpu->valid_pss_table = false;
465 acpi_processor_unregister_performance(policy->cpu);
466}
467
468static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
469{
470 struct cpudata *cpu;
471
472 cpu = all_cpu_data[policy->cpu];
473 if (!cpu->valid_pss_table)
474 return;
475
476 acpi_processor_unregister_performance(policy->cpu);
477}
478#else
479static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
480{
481}
482
483static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
484{
485}
486
487static inline bool intel_pstate_acpi_pm_profile_server(void)
488{
489 return false;
490}
491#endif
492
493#ifndef CONFIG_ACPI_CPPC_LIB
494static int intel_pstate_get_cppc_guranteed(int cpu)
495{
496 return -ENOTSUPP;
497}
498#endif
499
500static inline void update_turbo_state(void)
501{
502 u64 misc_en;
503 struct cpudata *cpu;
504
505 cpu = all_cpu_data[0];
506 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
507 global.turbo_disabled =
508 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
509 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
510}
511
512static int min_perf_pct_min(void)
513{
514 struct cpudata *cpu = all_cpu_data[0];
515 int turbo_pstate = cpu->pstate.turbo_pstate;
516
517 return turbo_pstate ?
518 (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0;
519}
520
521static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
522{
523 u64 epb;
524 int ret;
525
526 if (!boot_cpu_has(X86_FEATURE_EPB))
527 return -ENXIO;
528
529 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
530 if (ret)
531 return (s16)ret;
532
533 return (s16)(epb & 0x0f);
534}
535
536static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
537{
538 s16 epp;
539
540 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
541
542
543
544
545 if (!hwp_req_data) {
546 epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
547 &hwp_req_data);
548 if (epp)
549 return epp;
550 }
551 epp = (hwp_req_data >> 24) & 0xff;
552 } else {
553
554 epp = intel_pstate_get_epb(cpu_data);
555 }
556
557 return epp;
558}
559
560static int intel_pstate_set_epb(int cpu, s16 pref)
561{
562 u64 epb;
563 int ret;
564
565 if (!boot_cpu_has(X86_FEATURE_EPB))
566 return -ENXIO;
567
568 ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
569 if (ret)
570 return ret;
571
572 epb = (epb & ~0x0f) | pref;
573 wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
574
575 return 0;
576}
577
578
579
580
581
582
583
584
585
586
587
588
589static const char * const energy_perf_strings[] = {
590 "default",
591 "performance",
592 "balance_performance",
593 "balance_power",
594 "power",
595 NULL
596};
597static const unsigned int epp_values[] = {
598 HWP_EPP_PERFORMANCE,
599 HWP_EPP_BALANCE_PERFORMANCE,
600 HWP_EPP_BALANCE_POWERSAVE,
601 HWP_EPP_POWERSAVE
602};
603
604static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
605{
606 s16 epp;
607 int index = -EINVAL;
608
609 epp = intel_pstate_get_epp(cpu_data, 0);
610 if (epp < 0)
611 return epp;
612
613 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
614 if (epp == HWP_EPP_PERFORMANCE)
615 return 1;
616 if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
617 return 2;
618 if (epp <= HWP_EPP_BALANCE_POWERSAVE)
619 return 3;
620 else
621 return 4;
622 } else if (boot_cpu_has(X86_FEATURE_EPB)) {
623
624
625
626
627
628
629
630
631
632
633 index = (epp >> 2) + 1;
634 }
635
636 return index;
637}
638
639static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
640 int pref_index)
641{
642 int epp = -EINVAL;
643 int ret;
644
645 if (!pref_index)
646 epp = cpu_data->epp_default;
647
648 mutex_lock(&intel_pstate_limits_lock);
649
650 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
651 u64 value;
652
653 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
654 if (ret)
655 goto return_pref;
656
657 value &= ~GENMASK_ULL(31, 24);
658
659 if (epp == -EINVAL)
660 epp = epp_values[pref_index - 1];
661
662 value |= (u64)epp << 24;
663 ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
664 } else {
665 if (epp == -EINVAL)
666 epp = (pref_index - 1) << 2;
667 ret = intel_pstate_set_epb(cpu_data->cpu, epp);
668 }
669return_pref:
670 mutex_unlock(&intel_pstate_limits_lock);
671
672 return ret;
673}
674
675static ssize_t show_energy_performance_available_preferences(
676 struct cpufreq_policy *policy, char *buf)
677{
678 int i = 0;
679 int ret = 0;
680
681 while (energy_perf_strings[i] != NULL)
682 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
683
684 ret += sprintf(&buf[ret], "\n");
685
686 return ret;
687}
688
689cpufreq_freq_attr_ro(energy_performance_available_preferences);
690
691static ssize_t store_energy_performance_preference(
692 struct cpufreq_policy *policy, const char *buf, size_t count)
693{
694 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
695 char str_preference[21];
696 int ret;
697
698 ret = sscanf(buf, "%20s", str_preference);
699 if (ret != 1)
700 return -EINVAL;
701
702 ret = match_string(energy_perf_strings, -1, str_preference);
703 if (ret < 0)
704 return ret;
705
706 intel_pstate_set_energy_pref_index(cpu_data, ret);
707 return count;
708}
709
710static ssize_t show_energy_performance_preference(
711 struct cpufreq_policy *policy, char *buf)
712{
713 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
714 int preference;
715
716 preference = intel_pstate_get_energy_pref_index(cpu_data);
717 if (preference < 0)
718 return preference;
719
720 return sprintf(buf, "%s\n", energy_perf_strings[preference]);
721}
722
723cpufreq_freq_attr_rw(energy_performance_preference);
724
725static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
726{
727 struct cpudata *cpu;
728 u64 cap;
729 int ratio;
730
731 ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
732 if (ratio <= 0) {
733 rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
734 ratio = HWP_GUARANTEED_PERF(cap);
735 }
736
737 cpu = all_cpu_data[policy->cpu];
738
739 return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling);
740}
741
742cpufreq_freq_attr_ro(base_frequency);
743
744static struct freq_attr *hwp_cpufreq_attrs[] = {
745 &energy_performance_preference,
746 &energy_performance_available_preferences,
747 &base_frequency,
748 NULL,
749};
750
751static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
752 int *current_max)
753{
754 u64 cap;
755
756 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
757 WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
758 if (global.no_turbo)
759 *current_max = HWP_GUARANTEED_PERF(cap);
760 else
761 *current_max = HWP_HIGHEST_PERF(cap);
762
763 *phy_max = HWP_HIGHEST_PERF(cap);
764}
765
766static void intel_pstate_hwp_set(unsigned int cpu)
767{
768 struct cpudata *cpu_data = all_cpu_data[cpu];
769 int max, min;
770 u64 value;
771 s16 epp;
772
773 max = cpu_data->max_perf_ratio;
774 min = cpu_data->min_perf_ratio;
775
776 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
777 min = max;
778
779 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
780
781 value &= ~HWP_MIN_PERF(~0L);
782 value |= HWP_MIN_PERF(min);
783
784 value &= ~HWP_MAX_PERF(~0L);
785 value |= HWP_MAX_PERF(max);
786
787 if (cpu_data->epp_policy == cpu_data->policy)
788 goto skip_epp;
789
790 cpu_data->epp_policy = cpu_data->policy;
791
792 if (cpu_data->epp_saved >= 0) {
793 epp = cpu_data->epp_saved;
794 cpu_data->epp_saved = -EINVAL;
795 goto update_epp;
796 }
797
798 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
799 epp = intel_pstate_get_epp(cpu_data, value);
800 cpu_data->epp_powersave = epp;
801
802 if (epp < 0)
803 goto skip_epp;
804
805 epp = 0;
806 } else {
807
808 if (cpu_data->epp_powersave < 0)
809 goto skip_epp;
810
811
812
813
814
815
816
817
818 epp = intel_pstate_get_epp(cpu_data, value);
819 if (epp)
820 goto skip_epp;
821
822 epp = cpu_data->epp_powersave;
823 }
824update_epp:
825 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
826 value &= ~GENMASK_ULL(31, 24);
827 value |= (u64)epp << 24;
828 } else {
829 intel_pstate_set_epb(cpu, epp);
830 }
831skip_epp:
832 WRITE_ONCE(cpu_data->hwp_req_cached, value);
833 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
834}
835
836static void intel_pstate_hwp_force_min_perf(int cpu)
837{
838 u64 value;
839 int min_perf;
840
841 value = all_cpu_data[cpu]->hwp_req_cached;
842 value &= ~GENMASK_ULL(31, 0);
843 min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached);
844
845
846 value |= HWP_MAX_PERF(min_perf);
847 value |= HWP_MIN_PERF(min_perf);
848
849
850 if (boot_cpu_has(X86_FEATURE_HWP_EPP))
851 value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
852 else
853 intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE);
854
855 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
856}
857
858static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
859{
860 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
861
862 if (!hwp_active)
863 return 0;
864
865 cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
866
867 return 0;
868}
869
870static void intel_pstate_hwp_enable(struct cpudata *cpudata);
871
872static int intel_pstate_resume(struct cpufreq_policy *policy)
873{
874 if (!hwp_active)
875 return 0;
876
877 mutex_lock(&intel_pstate_limits_lock);
878
879 if (policy->cpu == 0)
880 intel_pstate_hwp_enable(all_cpu_data[policy->cpu]);
881
882 all_cpu_data[policy->cpu]->epp_policy = 0;
883 intel_pstate_hwp_set(policy->cpu);
884
885 mutex_unlock(&intel_pstate_limits_lock);
886
887 return 0;
888}
889
890static void intel_pstate_update_policies(void)
891{
892 int cpu;
893
894 for_each_possible_cpu(cpu)
895 cpufreq_update_policy(cpu);
896}
897
898static void intel_pstate_update_max_freq(unsigned int cpu)
899{
900 struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
901 struct cpudata *cpudata;
902
903 if (!policy)
904 return;
905
906 cpudata = all_cpu_data[cpu];
907 policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
908 cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
909
910 refresh_frequency_limits(policy);
911
912 cpufreq_cpu_release(policy);
913}
914
915static void intel_pstate_update_limits(unsigned int cpu)
916{
917 mutex_lock(&intel_pstate_driver_lock);
918
919 update_turbo_state();
920
921
922
923
924 if (global.turbo_disabled_mf != global.turbo_disabled) {
925 global.turbo_disabled_mf = global.turbo_disabled;
926 for_each_possible_cpu(cpu)
927 intel_pstate_update_max_freq(cpu);
928 } else {
929 cpufreq_update_policy(cpu);
930 }
931
932 mutex_unlock(&intel_pstate_driver_lock);
933}
934
935
936#define show_one(file_name, object) \
937 static ssize_t show_##file_name \
938 (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
939 { \
940 return sprintf(buf, "%u\n", global.object); \
941 }
942
943static ssize_t intel_pstate_show_status(char *buf);
944static int intel_pstate_update_status(const char *buf, size_t size);
945
946static ssize_t show_status(struct kobject *kobj,
947 struct kobj_attribute *attr, char *buf)
948{
949 ssize_t ret;
950
951 mutex_lock(&intel_pstate_driver_lock);
952 ret = intel_pstate_show_status(buf);
953 mutex_unlock(&intel_pstate_driver_lock);
954
955 return ret;
956}
957
958static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
959 const char *buf, size_t count)
960{
961 char *p = memchr(buf, '\n', count);
962 int ret;
963
964 mutex_lock(&intel_pstate_driver_lock);
965 ret = intel_pstate_update_status(buf, p ? p - buf : count);
966 mutex_unlock(&intel_pstate_driver_lock);
967
968 return ret < 0 ? ret : count;
969}
970
971static ssize_t show_turbo_pct(struct kobject *kobj,
972 struct kobj_attribute *attr, char *buf)
973{
974 struct cpudata *cpu;
975 int total, no_turbo, turbo_pct;
976 uint32_t turbo_fp;
977
978 mutex_lock(&intel_pstate_driver_lock);
979
980 if (!intel_pstate_driver) {
981 mutex_unlock(&intel_pstate_driver_lock);
982 return -EAGAIN;
983 }
984
985 cpu = all_cpu_data[0];
986
987 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
988 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
989 turbo_fp = div_fp(no_turbo, total);
990 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
991
992 mutex_unlock(&intel_pstate_driver_lock);
993
994 return sprintf(buf, "%u\n", turbo_pct);
995}
996
997static ssize_t show_num_pstates(struct kobject *kobj,
998 struct kobj_attribute *attr, char *buf)
999{
1000 struct cpudata *cpu;
1001 int total;
1002
1003 mutex_lock(&intel_pstate_driver_lock);
1004
1005 if (!intel_pstate_driver) {
1006 mutex_unlock(&intel_pstate_driver_lock);
1007 return -EAGAIN;
1008 }
1009
1010 cpu = all_cpu_data[0];
1011 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
1012
1013 mutex_unlock(&intel_pstate_driver_lock);
1014
1015 return sprintf(buf, "%u\n", total);
1016}
1017
1018static ssize_t show_no_turbo(struct kobject *kobj,
1019 struct kobj_attribute *attr, char *buf)
1020{
1021 ssize_t ret;
1022
1023 mutex_lock(&intel_pstate_driver_lock);
1024
1025 if (!intel_pstate_driver) {
1026 mutex_unlock(&intel_pstate_driver_lock);
1027 return -EAGAIN;
1028 }
1029
1030 update_turbo_state();
1031 if (global.turbo_disabled)
1032 ret = sprintf(buf, "%u\n", global.turbo_disabled);
1033 else
1034 ret = sprintf(buf, "%u\n", global.no_turbo);
1035
1036 mutex_unlock(&intel_pstate_driver_lock);
1037
1038 return ret;
1039}
1040
1041static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
1042 const char *buf, size_t count)
1043{
1044 unsigned int input;
1045 int ret;
1046
1047 ret = sscanf(buf, "%u", &input);
1048 if (ret != 1)
1049 return -EINVAL;
1050
1051 mutex_lock(&intel_pstate_driver_lock);
1052
1053 if (!intel_pstate_driver) {
1054 mutex_unlock(&intel_pstate_driver_lock);
1055 return -EAGAIN;
1056 }
1057
1058 mutex_lock(&intel_pstate_limits_lock);
1059
1060 update_turbo_state();
1061 if (global.turbo_disabled) {
1062 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
1063 mutex_unlock(&intel_pstate_limits_lock);
1064 mutex_unlock(&intel_pstate_driver_lock);
1065 return -EPERM;
1066 }
1067
1068 global.no_turbo = clamp_t(int, input, 0, 1);
1069
1070 if (global.no_turbo) {
1071 struct cpudata *cpu = all_cpu_data[0];
1072 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1073
1074
1075 if (global.min_perf_pct > pct)
1076 global.min_perf_pct = pct;
1077 }
1078
1079 mutex_unlock(&intel_pstate_limits_lock);
1080
1081 intel_pstate_update_policies();
1082
1083 mutex_unlock(&intel_pstate_driver_lock);
1084
1085 return count;
1086}
1087
1088static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
1089 const char *buf, size_t count)
1090{
1091 unsigned int input;
1092 int ret;
1093
1094 ret = sscanf(buf, "%u", &input);
1095 if (ret != 1)
1096 return -EINVAL;
1097
1098 mutex_lock(&intel_pstate_driver_lock);
1099
1100 if (!intel_pstate_driver) {
1101 mutex_unlock(&intel_pstate_driver_lock);
1102 return -EAGAIN;
1103 }
1104
1105 mutex_lock(&intel_pstate_limits_lock);
1106
1107 global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
1108
1109 mutex_unlock(&intel_pstate_limits_lock);
1110
1111 intel_pstate_update_policies();
1112
1113 mutex_unlock(&intel_pstate_driver_lock);
1114
1115 return count;
1116}
1117
1118static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
1119 const char *buf, size_t count)
1120{
1121 unsigned int input;
1122 int ret;
1123
1124 ret = sscanf(buf, "%u", &input);
1125 if (ret != 1)
1126 return -EINVAL;
1127
1128 mutex_lock(&intel_pstate_driver_lock);
1129
1130 if (!intel_pstate_driver) {
1131 mutex_unlock(&intel_pstate_driver_lock);
1132 return -EAGAIN;
1133 }
1134
1135 mutex_lock(&intel_pstate_limits_lock);
1136
1137 global.min_perf_pct = clamp_t(int, input,
1138 min_perf_pct_min(), global.max_perf_pct);
1139
1140 mutex_unlock(&intel_pstate_limits_lock);
1141
1142 intel_pstate_update_policies();
1143
1144 mutex_unlock(&intel_pstate_driver_lock);
1145
1146 return count;
1147}
1148
1149static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
1150 struct kobj_attribute *attr, char *buf)
1151{
1152 return sprintf(buf, "%u\n", hwp_boost);
1153}
1154
1155static ssize_t store_hwp_dynamic_boost(struct kobject *a,
1156 struct kobj_attribute *b,
1157 const char *buf, size_t count)
1158{
1159 unsigned int input;
1160 int ret;
1161
1162 ret = kstrtouint(buf, 10, &input);
1163 if (ret)
1164 return ret;
1165
1166 mutex_lock(&intel_pstate_driver_lock);
1167 hwp_boost = !!input;
1168 intel_pstate_update_policies();
1169 mutex_unlock(&intel_pstate_driver_lock);
1170
1171 return count;
1172}
1173
1174show_one(max_perf_pct, max_perf_pct);
1175show_one(min_perf_pct, min_perf_pct);
1176
1177define_one_global_rw(status);
1178define_one_global_rw(no_turbo);
1179define_one_global_rw(max_perf_pct);
1180define_one_global_rw(min_perf_pct);
1181define_one_global_ro(turbo_pct);
1182define_one_global_ro(num_pstates);
1183define_one_global_rw(hwp_dynamic_boost);
1184
1185static struct attribute *intel_pstate_attributes[] = {
1186 &status.attr,
1187 &no_turbo.attr,
1188 &turbo_pct.attr,
1189 &num_pstates.attr,
1190 NULL
1191};
1192
1193static const struct attribute_group intel_pstate_attr_group = {
1194 .attrs = intel_pstate_attributes,
1195};
1196
1197static void __init intel_pstate_sysfs_expose_params(void)
1198{
1199 struct kobject *intel_pstate_kobject;
1200 int rc;
1201
1202 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
1203 &cpu_subsys.dev_root->kobj);
1204 if (WARN_ON(!intel_pstate_kobject))
1205 return;
1206
1207 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
1208 if (WARN_ON(rc))
1209 return;
1210
1211
1212
1213
1214
1215 if (per_cpu_limits)
1216 return;
1217
1218 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
1219 WARN_ON(rc);
1220
1221 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
1222 WARN_ON(rc);
1223
1224 if (hwp_active) {
1225 rc = sysfs_create_file(intel_pstate_kobject,
1226 &hwp_dynamic_boost.attr);
1227 WARN_ON(rc);
1228 }
1229}
1230
1231
1232static void intel_pstate_hwp_enable(struct cpudata *cpudata)
1233{
1234
1235 if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
1236 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
1237
1238 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
1239 cpudata->epp_policy = 0;
1240 if (cpudata->epp_default == -EINVAL)
1241 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
1242}
1243
1244#define MSR_IA32_POWER_CTL_BIT_EE 19
1245
1246
1247static void intel_pstate_disable_ee(int cpu)
1248{
1249 u64 power_ctl;
1250 int ret;
1251
1252 ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl);
1253 if (ret)
1254 return;
1255
1256 if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) {
1257 pr_info("Disabling energy efficiency optimization\n");
1258 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
1259 wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl);
1260 }
1261}
1262
1263static int atom_get_min_pstate(void)
1264{
1265 u64 value;
1266
1267 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1268 return (value >> 8) & 0x7F;
1269}
1270
1271static int atom_get_max_pstate(void)
1272{
1273 u64 value;
1274
1275 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1276 return (value >> 16) & 0x7F;
1277}
1278
1279static int atom_get_turbo_pstate(void)
1280{
1281 u64 value;
1282
1283 rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
1284 return value & 0x7F;
1285}
1286
1287static u64 atom_get_val(struct cpudata *cpudata, int pstate)
1288{
1289 u64 val;
1290 int32_t vid_fp;
1291 u32 vid;
1292
1293 val = (u64)pstate << 8;
1294 if (global.no_turbo && !global.turbo_disabled)
1295 val |= (u64)1 << 32;
1296
1297 vid_fp = cpudata->vid.min + mul_fp(
1298 int_tofp(pstate - cpudata->pstate.min_pstate),
1299 cpudata->vid.ratio);
1300
1301 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
1302 vid = ceiling_fp(vid_fp);
1303
1304 if (pstate > cpudata->pstate.max_pstate)
1305 vid = cpudata->vid.turbo;
1306
1307 return val | vid;
1308}
1309
1310static int silvermont_get_scaling(void)
1311{
1312 u64 value;
1313 int i;
1314
1315 static int silvermont_freq_table[] = {
1316 83300, 100000, 133300, 116700, 80000};
1317
1318 rdmsrl(MSR_FSB_FREQ, value);
1319 i = value & 0x7;
1320 WARN_ON(i > 4);
1321
1322 return silvermont_freq_table[i];
1323}
1324
1325static int airmont_get_scaling(void)
1326{
1327 u64 value;
1328 int i;
1329
1330 static int airmont_freq_table[] = {
1331 83300, 100000, 133300, 116700, 80000,
1332 93300, 90000, 88900, 87500};
1333
1334 rdmsrl(MSR_FSB_FREQ, value);
1335 i = value & 0xF;
1336 WARN_ON(i > 8);
1337
1338 return airmont_freq_table[i];
1339}
1340
1341static void atom_get_vid(struct cpudata *cpudata)
1342{
1343 u64 value;
1344
1345 rdmsrl(MSR_ATOM_CORE_VIDS, value);
1346 cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
1347 cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
1348 cpudata->vid.ratio = div_fp(
1349 cpudata->vid.max - cpudata->vid.min,
1350 int_tofp(cpudata->pstate.max_pstate -
1351 cpudata->pstate.min_pstate));
1352
1353 rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
1354 cpudata->vid.turbo = value & 0x7f;
1355}
1356
1357static int core_get_min_pstate(void)
1358{
1359 u64 value;
1360
1361 rdmsrl(MSR_PLATFORM_INFO, value);
1362 return (value >> 40) & 0xFF;
1363}
1364
1365static int core_get_max_pstate_physical(void)
1366{
1367 u64 value;
1368
1369 rdmsrl(MSR_PLATFORM_INFO, value);
1370 return (value >> 8) & 0xFF;
1371}
1372
1373static int core_get_tdp_ratio(u64 plat_info)
1374{
1375
1376 if (plat_info & 0x600000000) {
1377 u64 tdp_ctrl;
1378 u64 tdp_ratio;
1379 int tdp_msr;
1380 int err;
1381
1382
1383 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
1384 if (err)
1385 return err;
1386
1387
1388 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
1389 err = rdmsrl_safe(tdp_msr, &tdp_ratio);
1390 if (err)
1391 return err;
1392
1393
1394 if (tdp_ctrl & 0x03)
1395 tdp_ratio >>= 16;
1396
1397 tdp_ratio &= 0xff;
1398 pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
1399
1400 return (int)tdp_ratio;
1401 }
1402
1403 return -ENXIO;
1404}
1405
1406static int core_get_max_pstate(void)
1407{
1408 u64 tar;
1409 u64 plat_info;
1410 int max_pstate;
1411 int tdp_ratio;
1412 int err;
1413
1414 rdmsrl(MSR_PLATFORM_INFO, plat_info);
1415 max_pstate = (plat_info >> 8) & 0xFF;
1416
1417 tdp_ratio = core_get_tdp_ratio(plat_info);
1418 if (tdp_ratio <= 0)
1419 return max_pstate;
1420
1421 if (hwp_active) {
1422
1423 return tdp_ratio;
1424 }
1425
1426 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
1427 if (!err) {
1428 int tar_levels;
1429
1430
1431 tar_levels = tar & 0xff;
1432 if (tdp_ratio - 1 == tar_levels) {
1433 max_pstate = tar_levels;
1434 pr_debug("max_pstate=TAC %x\n", max_pstate);
1435 }
1436 }
1437
1438 return max_pstate;
1439}
1440
1441static int core_get_turbo_pstate(void)
1442{
1443 u64 value;
1444 int nont, ret;
1445
1446 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1447 nont = core_get_max_pstate();
1448 ret = (value) & 255;
1449 if (ret <= nont)
1450 ret = nont;
1451 return ret;
1452}
1453
1454static inline int core_get_scaling(void)
1455{
1456 return 100000;
1457}
1458
1459static u64 core_get_val(struct cpudata *cpudata, int pstate)
1460{
1461 u64 val;
1462
1463 val = (u64)pstate << 8;
1464 if (global.no_turbo && !global.turbo_disabled)
1465 val |= (u64)1 << 32;
1466
1467 return val;
1468}
1469
1470static int knl_get_aperf_mperf_shift(void)
1471{
1472 return 10;
1473}
1474
1475static int knl_get_turbo_pstate(void)
1476{
1477 u64 value;
1478 int nont, ret;
1479
1480 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1481 nont = core_get_max_pstate();
1482 ret = (((value) >> 8) & 0xFF);
1483 if (ret <= nont)
1484 ret = nont;
1485 return ret;
1486}
1487
1488static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
1489{
1490 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
1491 cpu->pstate.current_pstate = pstate;
1492
1493
1494
1495
1496
1497 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
1498 pstate_funcs.get_val(cpu, pstate));
1499}
1500
1501static void intel_pstate_set_min_pstate(struct cpudata *cpu)
1502{
1503 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
1504}
1505
1506static void intel_pstate_max_within_limits(struct cpudata *cpu)
1507{
1508 int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
1509
1510 update_turbo_state();
1511 intel_pstate_set_pstate(cpu, pstate);
1512}
1513
1514static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1515{
1516 cpu->pstate.min_pstate = pstate_funcs.get_min();
1517 cpu->pstate.max_pstate = pstate_funcs.get_max();
1518 cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
1519 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
1520 cpu->pstate.scaling = pstate_funcs.get_scaling();
1521 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
1522
1523 if (hwp_active && !hwp_mode_bdw) {
1524 unsigned int phy_max, current_max;
1525
1526 intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max);
1527 cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
1528 } else {
1529 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1530 }
1531
1532 if (pstate_funcs.get_aperf_mperf_shift)
1533 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
1534
1535 if (pstate_funcs.get_vid)
1536 pstate_funcs.get_vid(cpu);
1537
1538 intel_pstate_set_min_pstate(cpu);
1539}
1540
1541
1542
1543
1544
1545
1546
1547static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
1548
1549static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
1550{
1551 u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
1552 u32 max_limit = (hwp_req & 0xff00) >> 8;
1553 u32 min_limit = (hwp_req & 0xff);
1554 u32 boost_level1;
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571 if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
1572 return;
1573
1574 if (!cpu->hwp_boost_min)
1575 cpu->hwp_boost_min = min_limit;
1576
1577
1578 boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
1579
1580 if (cpu->hwp_boost_min < boost_level1)
1581 cpu->hwp_boost_min = boost_level1;
1582 else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1583 cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
1584 else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
1585 max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1586 cpu->hwp_boost_min = max_limit;
1587 else
1588 return;
1589
1590 hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
1591 wrmsrl(MSR_HWP_REQUEST, hwp_req);
1592 cpu->last_update = cpu->sample.time;
1593}
1594
1595static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
1596{
1597 if (cpu->hwp_boost_min) {
1598 bool expired;
1599
1600
1601 expired = time_after64(cpu->sample.time, cpu->last_update +
1602 hwp_boost_hold_time_ns);
1603 if (expired) {
1604 wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
1605 cpu->hwp_boost_min = 0;
1606 }
1607 }
1608 cpu->last_update = cpu->sample.time;
1609}
1610
1611static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
1612 u64 time)
1613{
1614 cpu->sample.time = time;
1615
1616 if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
1617 bool do_io = false;
1618
1619 cpu->sched_flags = 0;
1620
1621
1622
1623
1624
1625
1626
1627 if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
1628 do_io = true;
1629
1630 cpu->last_io_update = time;
1631
1632 if (do_io)
1633 intel_pstate_hwp_boost_up(cpu);
1634
1635 } else {
1636 intel_pstate_hwp_boost_down(cpu);
1637 }
1638}
1639
1640static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
1641 u64 time, unsigned int flags)
1642{
1643 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1644
1645 cpu->sched_flags |= flags;
1646
1647 if (smp_processor_id() == cpu->cpu)
1648 intel_pstate_update_util_hwp_local(cpu, time);
1649}
1650
1651static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
1652{
1653 struct sample *sample = &cpu->sample;
1654
1655 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
1656}
1657
1658static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
1659{
1660 u64 aperf, mperf;
1661 unsigned long flags;
1662 u64 tsc;
1663
1664 local_irq_save(flags);
1665 rdmsrl(MSR_IA32_APERF, aperf);
1666 rdmsrl(MSR_IA32_MPERF, mperf);
1667 tsc = rdtsc();
1668 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
1669 local_irq_restore(flags);
1670 return false;
1671 }
1672 local_irq_restore(flags);
1673
1674 cpu->last_sample_time = cpu->sample.time;
1675 cpu->sample.time = time;
1676 cpu->sample.aperf = aperf;
1677 cpu->sample.mperf = mperf;
1678 cpu->sample.tsc = tsc;
1679 cpu->sample.aperf -= cpu->prev_aperf;
1680 cpu->sample.mperf -= cpu->prev_mperf;
1681 cpu->sample.tsc -= cpu->prev_tsc;
1682
1683 cpu->prev_aperf = aperf;
1684 cpu->prev_mperf = mperf;
1685 cpu->prev_tsc = tsc;
1686
1687
1688
1689
1690
1691
1692
1693 if (cpu->last_sample_time) {
1694 intel_pstate_calc_avg_perf(cpu);
1695 return true;
1696 }
1697 return false;
1698}
1699
1700static inline int32_t get_avg_frequency(struct cpudata *cpu)
1701{
1702 return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz);
1703}
1704
1705static inline int32_t get_avg_pstate(struct cpudata *cpu)
1706{
1707 return mul_ext_fp(cpu->pstate.max_pstate_physical,
1708 cpu->sample.core_avg_perf);
1709}
1710
1711static inline int32_t get_target_pstate(struct cpudata *cpu)
1712{
1713 struct sample *sample = &cpu->sample;
1714 int32_t busy_frac;
1715 int target, avg_pstate;
1716
1717 busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
1718 sample->tsc);
1719
1720 if (busy_frac < cpu->iowait_boost)
1721 busy_frac = cpu->iowait_boost;
1722
1723 sample->busy_scaled = busy_frac * 100;
1724
1725 target = global.no_turbo || global.turbo_disabled ?
1726 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
1727 target += target >> 2;
1728 target = mul_fp(target, busy_frac);
1729 if (target < cpu->pstate.min_pstate)
1730 target = cpu->pstate.min_pstate;
1731
1732
1733
1734
1735
1736
1737
1738
1739 avg_pstate = get_avg_pstate(cpu);
1740 if (avg_pstate > target)
1741 target += (avg_pstate - target) >> 1;
1742
1743 return target;
1744}
1745
1746static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
1747{
1748 int min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
1749 int max_pstate = max(min_pstate, cpu->max_perf_ratio);
1750
1751 return clamp_t(int, pstate, min_pstate, max_pstate);
1752}
1753
1754static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
1755{
1756 if (pstate == cpu->pstate.current_pstate)
1757 return;
1758
1759 cpu->pstate.current_pstate = pstate;
1760 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
1761}
1762
1763static void intel_pstate_adjust_pstate(struct cpudata *cpu)
1764{
1765 int from = cpu->pstate.current_pstate;
1766 struct sample *sample;
1767 int target_pstate;
1768
1769 update_turbo_state();
1770
1771 target_pstate = get_target_pstate(cpu);
1772 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
1773 trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
1774 intel_pstate_update_pstate(cpu, target_pstate);
1775
1776 sample = &cpu->sample;
1777 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
1778 fp_toint(sample->busy_scaled),
1779 from,
1780 cpu->pstate.current_pstate,
1781 sample->mperf,
1782 sample->aperf,
1783 sample->tsc,
1784 get_avg_frequency(cpu),
1785 fp_toint(cpu->iowait_boost * 100));
1786}
1787
1788static void intel_pstate_update_util(struct update_util_data *data, u64 time,
1789 unsigned int flags)
1790{
1791 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1792 u64 delta_ns;
1793
1794
1795 if (smp_processor_id() != cpu->cpu)
1796 return;
1797
1798 delta_ns = time - cpu->last_update;
1799 if (flags & SCHED_CPUFREQ_IOWAIT) {
1800
1801 if (delta_ns > TICK_NSEC) {
1802 cpu->iowait_boost = ONE_EIGHTH_FP;
1803 } else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
1804 cpu->iowait_boost <<= 1;
1805 if (cpu->iowait_boost > int_tofp(1))
1806 cpu->iowait_boost = int_tofp(1);
1807 } else {
1808 cpu->iowait_boost = ONE_EIGHTH_FP;
1809 }
1810 } else if (cpu->iowait_boost) {
1811
1812 if (delta_ns > TICK_NSEC)
1813 cpu->iowait_boost = 0;
1814 else
1815 cpu->iowait_boost >>= 1;
1816 }
1817 cpu->last_update = time;
1818 delta_ns = time - cpu->sample.time;
1819 if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
1820 return;
1821
1822 if (intel_pstate_sample(cpu, time))
1823 intel_pstate_adjust_pstate(cpu);
1824}
1825
1826static struct pstate_funcs core_funcs = {
1827 .get_max = core_get_max_pstate,
1828 .get_max_physical = core_get_max_pstate_physical,
1829 .get_min = core_get_min_pstate,
1830 .get_turbo = core_get_turbo_pstate,
1831 .get_scaling = core_get_scaling,
1832 .get_val = core_get_val,
1833};
1834
1835static const struct pstate_funcs silvermont_funcs = {
1836 .get_max = atom_get_max_pstate,
1837 .get_max_physical = atom_get_max_pstate,
1838 .get_min = atom_get_min_pstate,
1839 .get_turbo = atom_get_turbo_pstate,
1840 .get_val = atom_get_val,
1841 .get_scaling = silvermont_get_scaling,
1842 .get_vid = atom_get_vid,
1843};
1844
1845static const struct pstate_funcs airmont_funcs = {
1846 .get_max = atom_get_max_pstate,
1847 .get_max_physical = atom_get_max_pstate,
1848 .get_min = atom_get_min_pstate,
1849 .get_turbo = atom_get_turbo_pstate,
1850 .get_val = atom_get_val,
1851 .get_scaling = airmont_get_scaling,
1852 .get_vid = atom_get_vid,
1853};
1854
1855static const struct pstate_funcs knl_funcs = {
1856 .get_max = core_get_max_pstate,
1857 .get_max_physical = core_get_max_pstate_physical,
1858 .get_min = core_get_min_pstate,
1859 .get_turbo = knl_get_turbo_pstate,
1860 .get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
1861 .get_scaling = core_get_scaling,
1862 .get_val = core_get_val,
1863};
1864
1865#define ICPU(model, policy) \
1866 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
1867 (unsigned long)&policy }
1868
1869static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1870 ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs),
1871 ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs),
1872 ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs),
1873 ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs),
1874 ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs),
1875 ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs),
1876 ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs),
1877 ICPU(INTEL_FAM6_HASWELL_X, core_funcs),
1878 ICPU(INTEL_FAM6_HASWELL_ULT, core_funcs),
1879 ICPU(INTEL_FAM6_HASWELL_GT3E, core_funcs),
1880 ICPU(INTEL_FAM6_BROADWELL_GT3E, core_funcs),
1881 ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs),
1882 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_funcs),
1883 ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
1884 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
1885 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
1886 ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs),
1887 ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs),
1888 ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs),
1889 ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs),
1890 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1891 {}
1892};
1893MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
1894
1895static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
1896 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
1897 ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
1898 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1899 {}
1900};
1901
1902static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
1903 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs),
1904 {}
1905};
1906
1907static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
1908 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1909 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
1910 {}
1911};
1912
1913static int intel_pstate_init_cpu(unsigned int cpunum)
1914{
1915 struct cpudata *cpu;
1916
1917 cpu = all_cpu_data[cpunum];
1918
1919 if (!cpu) {
1920 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
1921 if (!cpu)
1922 return -ENOMEM;
1923
1924 all_cpu_data[cpunum] = cpu;
1925
1926 cpu->epp_default = -EINVAL;
1927 cpu->epp_powersave = -EINVAL;
1928 cpu->epp_saved = -EINVAL;
1929 }
1930
1931 cpu = all_cpu_data[cpunum];
1932
1933 cpu->cpu = cpunum;
1934
1935 if (hwp_active) {
1936 const struct x86_cpu_id *id;
1937
1938 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids);
1939 if (id)
1940 intel_pstate_disable_ee(cpunum);
1941
1942 intel_pstate_hwp_enable(cpu);
1943
1944 id = x86_match_cpu(intel_pstate_hwp_boost_ids);
1945 if (id && intel_pstate_acpi_pm_profile_server())
1946 hwp_boost = true;
1947 }
1948
1949 intel_pstate_get_cpu_pstates(cpu);
1950
1951 pr_debug("controlling: cpu %d\n", cpunum);
1952
1953 return 0;
1954}
1955
1956static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
1957{
1958 struct cpudata *cpu = all_cpu_data[cpu_num];
1959
1960 if (hwp_active && !hwp_boost)
1961 return;
1962
1963 if (cpu->update_util_set)
1964 return;
1965
1966
1967 cpu->sample.time = 0;
1968 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
1969 (hwp_active ?
1970 intel_pstate_update_util_hwp :
1971 intel_pstate_update_util));
1972 cpu->update_util_set = true;
1973}
1974
1975static void intel_pstate_clear_update_util_hook(unsigned int cpu)
1976{
1977 struct cpudata *cpu_data = all_cpu_data[cpu];
1978
1979 if (!cpu_data->update_util_set)
1980 return;
1981
1982 cpufreq_remove_update_util_hook(cpu);
1983 cpu_data->update_util_set = false;
1984 synchronize_rcu();
1985}
1986
1987static int intel_pstate_get_max_freq(struct cpudata *cpu)
1988{
1989 return global.turbo_disabled || global.no_turbo ?
1990 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
1991}
1992
1993static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
1994 struct cpudata *cpu)
1995{
1996 int max_freq = intel_pstate_get_max_freq(cpu);
1997 int32_t max_policy_perf, min_policy_perf;
1998 int max_state, turbo_max;
1999
2000
2001
2002
2003
2004
2005 if (hwp_active) {
2006 intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
2007 } else {
2008 max_state = global.no_turbo || global.turbo_disabled ?
2009 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2010 turbo_max = cpu->pstate.turbo_pstate;
2011 }
2012
2013 max_policy_perf = max_state * policy->max / max_freq;
2014 if (policy->max == policy->min) {
2015 min_policy_perf = max_policy_perf;
2016 } else {
2017 min_policy_perf = max_state * policy->min / max_freq;
2018 min_policy_perf = clamp_t(int32_t, min_policy_perf,
2019 0, max_policy_perf);
2020 }
2021
2022 pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
2023 policy->cpu, max_state,
2024 min_policy_perf, max_policy_perf);
2025
2026
2027 if (per_cpu_limits) {
2028 cpu->min_perf_ratio = min_policy_perf;
2029 cpu->max_perf_ratio = max_policy_perf;
2030 } else {
2031 int32_t global_min, global_max;
2032
2033
2034 global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
2035 global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
2036 global_min = clamp_t(int32_t, global_min, 0, global_max);
2037
2038 pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu,
2039 global_min, global_max);
2040
2041 cpu->min_perf_ratio = max(min_policy_perf, global_min);
2042 cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
2043 cpu->max_perf_ratio = min(max_policy_perf, global_max);
2044 cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
2045
2046
2047 cpu->min_perf_ratio = min(cpu->min_perf_ratio,
2048 cpu->max_perf_ratio);
2049
2050 }
2051 pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu,
2052 cpu->max_perf_ratio,
2053 cpu->min_perf_ratio);
2054}
2055
2056static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2057{
2058 struct cpudata *cpu;
2059
2060 if (!policy->cpuinfo.max_freq)
2061 return -ENODEV;
2062
2063 pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
2064 policy->cpuinfo.max_freq, policy->max);
2065
2066 cpu = all_cpu_data[policy->cpu];
2067 cpu->policy = policy->policy;
2068
2069 mutex_lock(&intel_pstate_limits_lock);
2070
2071 intel_pstate_update_perf_limits(policy, cpu);
2072
2073 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
2074
2075
2076
2077
2078 intel_pstate_clear_update_util_hook(policy->cpu);
2079 intel_pstate_max_within_limits(cpu);
2080 } else {
2081 intel_pstate_set_update_util_hook(policy->cpu);
2082 }
2083
2084 if (hwp_active) {
2085
2086
2087
2088
2089
2090 if (!hwp_boost)
2091 intel_pstate_clear_update_util_hook(policy->cpu);
2092 intel_pstate_hwp_set(policy->cpu);
2093 }
2094
2095 mutex_unlock(&intel_pstate_limits_lock);
2096
2097 return 0;
2098}
2099
2100static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
2101 struct cpudata *cpu)
2102{
2103 if (!hwp_active &&
2104 cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
2105 policy->max < policy->cpuinfo.max_freq &&
2106 policy->max > cpu->pstate.max_freq) {
2107 pr_debug("policy->max > max non turbo frequency\n");
2108 policy->max = policy->cpuinfo.max_freq;
2109 }
2110}
2111
2112static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
2113{
2114 struct cpudata *cpu = all_cpu_data[policy->cpu];
2115
2116 update_turbo_state();
2117 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
2118 intel_pstate_get_max_freq(cpu));
2119
2120 if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
2121 policy->policy != CPUFREQ_POLICY_PERFORMANCE)
2122 return -EINVAL;
2123
2124 intel_pstate_adjust_policy_max(policy, cpu);
2125
2126 return 0;
2127}
2128
2129static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
2130{
2131 intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
2132}
2133
2134static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
2135{
2136 pr_debug("CPU %d exiting\n", policy->cpu);
2137
2138 intel_pstate_clear_update_util_hook(policy->cpu);
2139 if (hwp_active) {
2140 intel_pstate_hwp_save_state(policy);
2141 intel_pstate_hwp_force_min_perf(policy->cpu);
2142 } else {
2143 intel_cpufreq_stop_cpu(policy);
2144 }
2145}
2146
2147static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
2148{
2149 intel_pstate_exit_perf_limits(policy);
2150
2151 policy->fast_switch_possible = false;
2152
2153 return 0;
2154}
2155
2156static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
2157{
2158 struct cpudata *cpu;
2159 int rc;
2160
2161 rc = intel_pstate_init_cpu(policy->cpu);
2162 if (rc)
2163 return rc;
2164
2165 cpu = all_cpu_data[policy->cpu];
2166
2167 cpu->max_perf_ratio = 0xFF;
2168 cpu->min_perf_ratio = 0;
2169
2170 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
2171 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
2172
2173
2174 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
2175 update_turbo_state();
2176 global.turbo_disabled_mf = global.turbo_disabled;
2177 policy->cpuinfo.max_freq = global.turbo_disabled ?
2178 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2179 policy->cpuinfo.max_freq *= cpu->pstate.scaling;
2180
2181 if (hwp_active) {
2182 unsigned int max_freq;
2183
2184 max_freq = global.turbo_disabled ?
2185 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2186 if (max_freq < policy->cpuinfo.max_freq)
2187 policy->cpuinfo.max_freq = max_freq;
2188 }
2189
2190 intel_pstate_init_acpi_perf_limits(policy);
2191
2192 policy->fast_switch_possible = true;
2193
2194 return 0;
2195}
2196
2197static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
2198{
2199 int ret = __intel_pstate_cpu_init(policy);
2200
2201 if (ret)
2202 return ret;
2203
2204 if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
2205 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
2206 else
2207 policy->policy = CPUFREQ_POLICY_POWERSAVE;
2208
2209 return 0;
2210}
2211
2212static struct cpufreq_driver intel_pstate = {
2213 .flags = CPUFREQ_CONST_LOOPS,
2214 .verify = intel_pstate_verify_policy,
2215 .setpolicy = intel_pstate_set_policy,
2216 .suspend = intel_pstate_hwp_save_state,
2217 .resume = intel_pstate_resume,
2218 .init = intel_pstate_cpu_init,
2219 .exit = intel_pstate_cpu_exit,
2220 .stop_cpu = intel_pstate_stop_cpu,
2221 .update_limits = intel_pstate_update_limits,
2222 .name = "intel_pstate",
2223};
2224
2225static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
2226{
2227 struct cpudata *cpu = all_cpu_data[policy->cpu];
2228
2229 update_turbo_state();
2230 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
2231 intel_pstate_get_max_freq(cpu));
2232
2233 intel_pstate_adjust_policy_max(policy, cpu);
2234
2235 intel_pstate_update_perf_limits(policy, cpu);
2236
2237 return 0;
2238}
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253#define INTEL_PSTATE_TRACE_TARGET 10
2254#define INTEL_PSTATE_TRACE_FAST_SWITCH 90
2255
2256static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate)
2257{
2258 struct sample *sample;
2259
2260 if (!trace_pstate_sample_enabled())
2261 return;
2262
2263 if (!intel_pstate_sample(cpu, ktime_get()))
2264 return;
2265
2266 sample = &cpu->sample;
2267 trace_pstate_sample(trace_type,
2268 0,
2269 old_pstate,
2270 cpu->pstate.current_pstate,
2271 sample->mperf,
2272 sample->aperf,
2273 sample->tsc,
2274 get_avg_frequency(cpu),
2275 fp_toint(cpu->iowait_boost * 100));
2276}
2277
2278static int intel_cpufreq_target(struct cpufreq_policy *policy,
2279 unsigned int target_freq,
2280 unsigned int relation)
2281{
2282 struct cpudata *cpu = all_cpu_data[policy->cpu];
2283 struct cpufreq_freqs freqs;
2284 int target_pstate, old_pstate;
2285
2286 update_turbo_state();
2287
2288 freqs.old = policy->cur;
2289 freqs.new = target_freq;
2290
2291 cpufreq_freq_transition_begin(policy, &freqs);
2292 switch (relation) {
2293 case CPUFREQ_RELATION_L:
2294 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
2295 break;
2296 case CPUFREQ_RELATION_H:
2297 target_pstate = freqs.new / cpu->pstate.scaling;
2298 break;
2299 default:
2300 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
2301 break;
2302 }
2303 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2304 old_pstate = cpu->pstate.current_pstate;
2305 if (target_pstate != cpu->pstate.current_pstate) {
2306 cpu->pstate.current_pstate = target_pstate;
2307 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
2308 pstate_funcs.get_val(cpu, target_pstate));
2309 }
2310 freqs.new = target_pstate * cpu->pstate.scaling;
2311 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate);
2312 cpufreq_freq_transition_end(policy, &freqs, false);
2313
2314 return 0;
2315}
2316
2317static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
2318 unsigned int target_freq)
2319{
2320 struct cpudata *cpu = all_cpu_data[policy->cpu];
2321 int target_pstate, old_pstate;
2322
2323 update_turbo_state();
2324
2325 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
2326 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2327 old_pstate = cpu->pstate.current_pstate;
2328 intel_pstate_update_pstate(cpu, target_pstate);
2329 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
2330 return target_pstate * cpu->pstate.scaling;
2331}
2332
2333static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
2334{
2335 int ret = __intel_pstate_cpu_init(policy);
2336
2337 if (ret)
2338 return ret;
2339
2340 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
2341 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
2342
2343 policy->cur = policy->cpuinfo.min_freq;
2344
2345 return 0;
2346}
2347
2348static struct cpufreq_driver intel_cpufreq = {
2349 .flags = CPUFREQ_CONST_LOOPS,
2350 .verify = intel_cpufreq_verify_policy,
2351 .target = intel_cpufreq_target,
2352 .fast_switch = intel_cpufreq_fast_switch,
2353 .init = intel_cpufreq_cpu_init,
2354 .exit = intel_pstate_cpu_exit,
2355 .stop_cpu = intel_cpufreq_stop_cpu,
2356 .update_limits = intel_pstate_update_limits,
2357 .name = "intel_cpufreq",
2358};
2359
2360static struct cpufreq_driver *default_driver = &intel_pstate;
2361
2362static void intel_pstate_driver_cleanup(void)
2363{
2364 unsigned int cpu;
2365
2366 get_online_cpus();
2367 for_each_online_cpu(cpu) {
2368 if (all_cpu_data[cpu]) {
2369 if (intel_pstate_driver == &intel_pstate)
2370 intel_pstate_clear_update_util_hook(cpu);
2371
2372 kfree(all_cpu_data[cpu]);
2373 all_cpu_data[cpu] = NULL;
2374 }
2375 }
2376 put_online_cpus();
2377 intel_pstate_driver = NULL;
2378}
2379
2380static int intel_pstate_register_driver(struct cpufreq_driver *driver)
2381{
2382 int ret;
2383
2384 memset(&global, 0, sizeof(global));
2385 global.max_perf_pct = 100;
2386
2387 intel_pstate_driver = driver;
2388 ret = cpufreq_register_driver(intel_pstate_driver);
2389 if (ret) {
2390 intel_pstate_driver_cleanup();
2391 return ret;
2392 }
2393
2394 global.min_perf_pct = min_perf_pct_min();
2395
2396 return 0;
2397}
2398
2399static int intel_pstate_unregister_driver(void)
2400{
2401 if (hwp_active)
2402 return -EBUSY;
2403
2404 cpufreq_unregister_driver(intel_pstate_driver);
2405 intel_pstate_driver_cleanup();
2406
2407 return 0;
2408}
2409
2410static ssize_t intel_pstate_show_status(char *buf)
2411{
2412 if (!intel_pstate_driver)
2413 return sprintf(buf, "off\n");
2414
2415 return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
2416 "active" : "passive");
2417}
2418
2419static int intel_pstate_update_status(const char *buf, size_t size)
2420{
2421 int ret;
2422
2423 if (size == 3 && !strncmp(buf, "off", size))
2424 return intel_pstate_driver ?
2425 intel_pstate_unregister_driver() : -EINVAL;
2426
2427 if (size == 6 && !strncmp(buf, "active", size)) {
2428 if (intel_pstate_driver) {
2429 if (intel_pstate_driver == &intel_pstate)
2430 return 0;
2431
2432 ret = intel_pstate_unregister_driver();
2433 if (ret)
2434 return ret;
2435 }
2436
2437 return intel_pstate_register_driver(&intel_pstate);
2438 }
2439
2440 if (size == 7 && !strncmp(buf, "passive", size)) {
2441 if (intel_pstate_driver) {
2442 if (intel_pstate_driver == &intel_cpufreq)
2443 return 0;
2444
2445 ret = intel_pstate_unregister_driver();
2446 if (ret)
2447 return ret;
2448 }
2449
2450 return intel_pstate_register_driver(&intel_cpufreq);
2451 }
2452
2453 return -EINVAL;
2454}
2455
2456static int no_load __initdata;
2457static int no_hwp __initdata;
2458static int hwp_only __initdata;
2459static unsigned int force_load __initdata;
2460
2461static int __init intel_pstate_msrs_not_valid(void)
2462{
2463 if (!pstate_funcs.get_max() ||
2464 !pstate_funcs.get_min() ||
2465 !pstate_funcs.get_turbo())
2466 return -ENODEV;
2467
2468 return 0;
2469}
2470
2471static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
2472{
2473 pstate_funcs.get_max = funcs->get_max;
2474 pstate_funcs.get_max_physical = funcs->get_max_physical;
2475 pstate_funcs.get_min = funcs->get_min;
2476 pstate_funcs.get_turbo = funcs->get_turbo;
2477 pstate_funcs.get_scaling = funcs->get_scaling;
2478 pstate_funcs.get_val = funcs->get_val;
2479 pstate_funcs.get_vid = funcs->get_vid;
2480 pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
2481}
2482
2483#ifdef CONFIG_ACPI
2484
2485static bool __init intel_pstate_no_acpi_pss(void)
2486{
2487 int i;
2488
2489 for_each_possible_cpu(i) {
2490 acpi_status status;
2491 union acpi_object *pss;
2492 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
2493 struct acpi_processor *pr = per_cpu(processors, i);
2494
2495 if (!pr)
2496 continue;
2497
2498 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
2499 if (ACPI_FAILURE(status))
2500 continue;
2501
2502 pss = buffer.pointer;
2503 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
2504 kfree(pss);
2505 return false;
2506 }
2507
2508 kfree(pss);
2509 }
2510
2511 pr_debug("ACPI _PSS not found\n");
2512 return true;
2513}
2514
2515static bool __init intel_pstate_no_acpi_pcch(void)
2516{
2517 acpi_status status;
2518 acpi_handle handle;
2519
2520 status = acpi_get_handle(NULL, "\\_SB", &handle);
2521 if (ACPI_FAILURE(status))
2522 goto not_found;
2523
2524 if (acpi_has_method(handle, "PCCH"))
2525 return false;
2526
2527not_found:
2528 pr_debug("ACPI PCCH not found\n");
2529 return true;
2530}
2531
2532static bool __init intel_pstate_has_acpi_ppc(void)
2533{
2534 int i;
2535
2536 for_each_possible_cpu(i) {
2537 struct acpi_processor *pr = per_cpu(processors, i);
2538
2539 if (!pr)
2540 continue;
2541 if (acpi_has_method(pr->handle, "_PPC"))
2542 return true;
2543 }
2544 pr_debug("ACPI _PPC not found\n");
2545 return false;
2546}
2547
2548enum {
2549 PSS,
2550 PPC,
2551};
2552
2553
2554static struct acpi_platform_list plat_info[] __initdata = {
2555 {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, 0, PSS},
2556 {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2557 {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2558 {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2559 {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2560 {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2561 {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2562 {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2563 {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2564 {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2565 {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2566 {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2567 {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2568 {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2569 {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2570 { }
2571};
2572
2573static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
2574{
2575 const struct x86_cpu_id *id;
2576 u64 misc_pwr;
2577 int idx;
2578
2579 id = x86_match_cpu(intel_pstate_cpu_oob_ids);
2580 if (id) {
2581 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
2582 if (misc_pwr & (1 << 8)) {
2583 pr_debug("Bit 8 in the MISC_PWR_MGMT MSR set\n");
2584 return true;
2585 }
2586 }
2587
2588 idx = acpi_match_platform_list(plat_info);
2589 if (idx < 0)
2590 return false;
2591
2592 switch (plat_info[idx].data) {
2593 case PSS:
2594 if (!intel_pstate_no_acpi_pss())
2595 return false;
2596
2597 return intel_pstate_no_acpi_pcch();
2598 case PPC:
2599 return intel_pstate_has_acpi_ppc() && !force_load;
2600 }
2601
2602 return false;
2603}
2604
2605static void intel_pstate_request_control_from_smm(void)
2606{
2607
2608
2609
2610
2611 if (acpi_ppc)
2612 acpi_processor_pstate_control();
2613}
2614#else
2615static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
2616static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
2617static inline void intel_pstate_request_control_from_smm(void) {}
2618#endif
2619
2620#define INTEL_PSTATE_HWP_BROADWELL 0x01
2621
2622#define ICPU_HWP(model, hwp_mode) \
2623 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
2624
2625static const struct x86_cpu_id hwp_support_ids[] __initconst = {
2626 ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
2627 ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL),
2628 ICPU_HWP(X86_MODEL_ANY, 0),
2629 {}
2630};
2631
2632static int __init intel_pstate_init(void)
2633{
2634 const struct x86_cpu_id *id;
2635 int rc;
2636
2637 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2638 return -ENODEV;
2639
2640 if (no_load)
2641 return -ENODEV;
2642
2643 id = x86_match_cpu(hwp_support_ids);
2644 if (id) {
2645 copy_cpu_funcs(&core_funcs);
2646 if (!no_hwp) {
2647 hwp_active++;
2648 hwp_mode_bdw = id->driver_data;
2649 intel_pstate.attr = hwp_cpufreq_attrs;
2650 goto hwp_cpu_matched;
2651 }
2652 } else {
2653 id = x86_match_cpu(intel_pstate_cpu_ids);
2654 if (!id) {
2655 pr_info("CPU model not supported\n");
2656 return -ENODEV;
2657 }
2658
2659 copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
2660 }
2661
2662 if (intel_pstate_msrs_not_valid()) {
2663 pr_info("Invalid MSRs\n");
2664 return -ENODEV;
2665 }
2666
2667hwp_cpu_matched:
2668
2669
2670
2671
2672 if (intel_pstate_platform_pwr_mgmt_exists()) {
2673 pr_info("P-states controlled by the platform\n");
2674 return -ENODEV;
2675 }
2676
2677 if (!hwp_active && hwp_only)
2678 return -ENOTSUPP;
2679
2680 pr_info("Intel P-state driver initializing\n");
2681
2682 all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
2683 if (!all_cpu_data)
2684 return -ENOMEM;
2685
2686 intel_pstate_request_control_from_smm();
2687
2688 intel_pstate_sysfs_expose_params();
2689
2690 mutex_lock(&intel_pstate_driver_lock);
2691 rc = intel_pstate_register_driver(default_driver);
2692 mutex_unlock(&intel_pstate_driver_lock);
2693 if (rc)
2694 return rc;
2695
2696 if (hwp_active)
2697 pr_info("HWP enabled\n");
2698
2699 return 0;
2700}
2701device_initcall(intel_pstate_init);
2702
2703static int __init intel_pstate_setup(char *str)
2704{
2705 if (!str)
2706 return -EINVAL;
2707
2708 if (!strcmp(str, "disable")) {
2709 no_load = 1;
2710 } else if (!strcmp(str, "passive")) {
2711 pr_info("Passive mode enabled\n");
2712 default_driver = &intel_cpufreq;
2713 no_hwp = 1;
2714 }
2715 if (!strcmp(str, "no_hwp")) {
2716 pr_info("HWP disabled\n");
2717 no_hwp = 1;
2718 }
2719 if (!strcmp(str, "force"))
2720 force_load = 1;
2721 if (!strcmp(str, "hwp_only"))
2722 hwp_only = 1;
2723 if (!strcmp(str, "per_cpu_perf_limits"))
2724 per_cpu_limits = true;
2725
2726#ifdef CONFIG_ACPI
2727 if (!strcmp(str, "support_acpi_ppc"))
2728 acpi_ppc = true;
2729#endif
2730
2731 return 0;
2732}
2733early_param("intel_pstate", intel_pstate_setup);
2734
2735MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
2736MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
2737MODULE_LICENSE("GPL");
2738