1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/kernel_stat.h>
13#include <linux/module.h>
14#include <linux/ktime.h>
15#include <linux/hrtimer.h>
16#include <linux/tick.h>
17#include <linux/slab.h>
18#include <linux/sched/cpufreq.h>
19#include <linux/list.h>
20#include <linux/cpu.h>
21#include <linux/cpufreq.h>
22#include <linux/sysfs.h>
23#include <linux/types.h>
24#include <linux/fs.h>
25#include <linux/acpi.h>
26#include <linux/vmalloc.h>
27#include <linux/pm_qos.h>
28#include <trace/events/power.h>
29
30#include <asm/div64.h>
31#include <asm/msr.h>
32#include <asm/cpu_device_id.h>
33#include <asm/cpufeature.h>
34#include <asm/intel-family.h>
35
36#define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
37
38#define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
39#define INTEL_CPUFREQ_TRANSITION_DELAY 500
40
41#ifdef CONFIG_ACPI
42#include <acpi/processor.h>
43#include <acpi/cppc_acpi.h>
44#endif
45
46#define FRAC_BITS 8
47#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
48#define fp_toint(X) ((X) >> FRAC_BITS)
49
50#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
51
52#define EXT_BITS 6
53#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
54#define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
55#define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
56
57static inline int32_t mul_fp(int32_t x, int32_t y)
58{
59 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
60}
61
62static inline int32_t div_fp(s64 x, s64 y)
63{
64 return div64_s64((int64_t)x << FRAC_BITS, y);
65}
66
67static inline int ceiling_fp(int32_t x)
68{
69 int mask, ret;
70
71 ret = fp_toint(x);
72 mask = (1 << FRAC_BITS) - 1;
73 if (x & mask)
74 ret += 1;
75 return ret;
76}
77
78static inline int32_t percent_fp(int percent)
79{
80 return div_fp(percent, 100);
81}
82
83static inline u64 mul_ext_fp(u64 x, u64 y)
84{
85 return (x * y) >> EXT_FRAC_BITS;
86}
87
88static inline u64 div_ext_fp(u64 x, u64 y)
89{
90 return div64_u64(x << EXT_FRAC_BITS, y);
91}
92
93static inline int32_t percent_ext_fp(int percent)
94{
95 return div_ext_fp(percent, 100);
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116struct sample {
117 int32_t core_avg_perf;
118 int32_t busy_scaled;
119 u64 aperf;
120 u64 mperf;
121 u64 tsc;
122 u64 time;
123};
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141struct pstate_data {
142 int current_pstate;
143 int min_pstate;
144 int max_pstate;
145 int max_pstate_physical;
146 int scaling;
147 int turbo_pstate;
148 unsigned int max_freq;
149 unsigned int turbo_freq;
150};
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165struct vid_data {
166 int min;
167 int max;
168 int turbo;
169 int32_t ratio;
170};
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185struct global_params {
186 bool no_turbo;
187 bool turbo_disabled;
188 bool turbo_disabled_mf;
189 int max_perf_pct;
190 int min_perf_pct;
191};
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233struct cpudata {
234 int cpu;
235
236 unsigned int policy;
237 struct update_util_data update_util;
238 bool update_util_set;
239
240 struct pstate_data pstate;
241 struct vid_data vid;
242
243 u64 last_update;
244 u64 last_sample_time;
245 u64 aperf_mperf_shift;
246 u64 prev_aperf;
247 u64 prev_mperf;
248 u64 prev_tsc;
249 u64 prev_cummulative_iowait;
250 struct sample sample;
251 int32_t min_perf_ratio;
252 int32_t max_perf_ratio;
253#ifdef CONFIG_ACPI
254 struct acpi_processor_performance acpi_perf_data;
255 bool valid_pss_table;
256#endif
257 unsigned int iowait_boost;
258 s16 epp_powersave;
259 s16 epp_policy;
260 s16 epp_default;
261 s16 epp_saved;
262 u64 hwp_req_cached;
263 u64 hwp_cap_cached;
264 u64 last_io_update;
265 unsigned int sched_flags;
266 u32 hwp_boost_min;
267};
268
269static struct cpudata **all_cpu_data;
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284struct pstate_funcs {
285 int (*get_max)(void);
286 int (*get_max_physical)(void);
287 int (*get_min)(void);
288 int (*get_turbo)(void);
289 int (*get_scaling)(void);
290 int (*get_aperf_mperf_shift)(void);
291 u64 (*get_val)(struct cpudata*, int pstate);
292 void (*get_vid)(struct cpudata *);
293};
294
295static struct pstate_funcs pstate_funcs __read_mostly;
296
297static int hwp_active __read_mostly;
298static int hwp_mode_bdw __read_mostly;
299static bool per_cpu_limits __read_mostly;
300static bool hwp_boost __read_mostly;
301
302static struct cpufreq_driver *intel_pstate_driver __read_mostly;
303
304#ifdef CONFIG_ACPI
305static bool acpi_ppc;
306#endif
307
308static struct global_params global;
309
310static DEFINE_MUTEX(intel_pstate_driver_lock);
311static DEFINE_MUTEX(intel_pstate_limits_lock);
312
313#ifdef CONFIG_ACPI
314
315static bool intel_pstate_acpi_pm_profile_server(void)
316{
317 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
318 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
319 return true;
320
321 return false;
322}
323
324static bool intel_pstate_get_ppc_enable_status(void)
325{
326 if (intel_pstate_acpi_pm_profile_server())
327 return true;
328
329 return acpi_ppc;
330}
331
332#ifdef CONFIG_ACPI_CPPC_LIB
333
334
335static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
336{
337 sched_set_itmt_support();
338}
339
340static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
341
342static void intel_pstate_set_itmt_prio(int cpu)
343{
344 struct cppc_perf_caps cppc_perf;
345 static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
346 int ret;
347
348 ret = cppc_get_perf_caps(cpu, &cppc_perf);
349 if (ret)
350 return;
351
352
353
354
355
356
357 sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
358
359 if (max_highest_perf <= min_highest_perf) {
360 if (cppc_perf.highest_perf > max_highest_perf)
361 max_highest_perf = cppc_perf.highest_perf;
362
363 if (cppc_perf.highest_perf < min_highest_perf)
364 min_highest_perf = cppc_perf.highest_perf;
365
366 if (max_highest_perf > min_highest_perf) {
367
368
369
370
371
372
373 schedule_work(&sched_itmt_work);
374 }
375 }
376}
377
378static int intel_pstate_get_cppc_guranteed(int cpu)
379{
380 struct cppc_perf_caps cppc_perf;
381 int ret;
382
383 ret = cppc_get_perf_caps(cpu, &cppc_perf);
384 if (ret)
385 return ret;
386
387 if (cppc_perf.guaranteed_perf)
388 return cppc_perf.guaranteed_perf;
389
390 return cppc_perf.nominal_perf;
391}
392
393#else
394static void intel_pstate_set_itmt_prio(int cpu)
395{
396}
397#endif
398
399static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
400{
401 struct cpudata *cpu;
402 int ret;
403 int i;
404
405 if (hwp_active) {
406 intel_pstate_set_itmt_prio(policy->cpu);
407 return;
408 }
409
410 if (!intel_pstate_get_ppc_enable_status())
411 return;
412
413 cpu = all_cpu_data[policy->cpu];
414
415 ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
416 policy->cpu);
417 if (ret)
418 return;
419
420
421
422
423
424
425 if (cpu->acpi_perf_data.control_register.space_id !=
426 ACPI_ADR_SPACE_FIXED_HARDWARE)
427 goto err;
428
429
430
431
432
433 if (cpu->acpi_perf_data.state_count < 2)
434 goto err;
435
436 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
437 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
438 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
439 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
440 (u32) cpu->acpi_perf_data.states[i].core_frequency,
441 (u32) cpu->acpi_perf_data.states[i].power,
442 (u32) cpu->acpi_perf_data.states[i].control);
443 }
444
445
446
447
448
449
450
451
452
453
454
455
456 if (!global.turbo_disabled)
457 cpu->acpi_perf_data.states[0].core_frequency =
458 policy->cpuinfo.max_freq / 1000;
459 cpu->valid_pss_table = true;
460 pr_debug("_PPC limits will be enforced\n");
461
462 return;
463
464 err:
465 cpu->valid_pss_table = false;
466 acpi_processor_unregister_performance(policy->cpu);
467}
468
469static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
470{
471 struct cpudata *cpu;
472
473 cpu = all_cpu_data[policy->cpu];
474 if (!cpu->valid_pss_table)
475 return;
476
477 acpi_processor_unregister_performance(policy->cpu);
478}
479#else
480static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
481{
482}
483
484static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
485{
486}
487
488static inline bool intel_pstate_acpi_pm_profile_server(void)
489{
490 return false;
491}
492#endif
493
494#ifndef CONFIG_ACPI_CPPC_LIB
495static int intel_pstate_get_cppc_guranteed(int cpu)
496{
497 return -ENOTSUPP;
498}
499#endif
500
501static inline void update_turbo_state(void)
502{
503 u64 misc_en;
504 struct cpudata *cpu;
505
506 cpu = all_cpu_data[0];
507 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
508 global.turbo_disabled =
509 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
510 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
511}
512
513static int min_perf_pct_min(void)
514{
515 struct cpudata *cpu = all_cpu_data[0];
516 int turbo_pstate = cpu->pstate.turbo_pstate;
517
518 return turbo_pstate ?
519 (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0;
520}
521
522static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
523{
524 u64 epb;
525 int ret;
526
527 if (!boot_cpu_has(X86_FEATURE_EPB))
528 return -ENXIO;
529
530 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
531 if (ret)
532 return (s16)ret;
533
534 return (s16)(epb & 0x0f);
535}
536
537static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
538{
539 s16 epp;
540
541 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
542
543
544
545
546 if (!hwp_req_data) {
547 epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
548 &hwp_req_data);
549 if (epp)
550 return epp;
551 }
552 epp = (hwp_req_data >> 24) & 0xff;
553 } else {
554
555 epp = intel_pstate_get_epb(cpu_data);
556 }
557
558 return epp;
559}
560
561static int intel_pstate_set_epb(int cpu, s16 pref)
562{
563 u64 epb;
564 int ret;
565
566 if (!boot_cpu_has(X86_FEATURE_EPB))
567 return -ENXIO;
568
569 ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
570 if (ret)
571 return ret;
572
573 epb = (epb & ~0x0f) | pref;
574 wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
575
576 return 0;
577}
578
579
580
581
582
583
584
585
586
587
588
589
590static const char * const energy_perf_strings[] = {
591 "default",
592 "performance",
593 "balance_performance",
594 "balance_power",
595 "power",
596 NULL
597};
598static const unsigned int epp_values[] = {
599 HWP_EPP_PERFORMANCE,
600 HWP_EPP_BALANCE_PERFORMANCE,
601 HWP_EPP_BALANCE_POWERSAVE,
602 HWP_EPP_POWERSAVE
603};
604
605static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
606{
607 s16 epp;
608 int index = -EINVAL;
609
610 epp = intel_pstate_get_epp(cpu_data, 0);
611 if (epp < 0)
612 return epp;
613
614 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
615 if (epp == HWP_EPP_PERFORMANCE)
616 return 1;
617 if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
618 return 2;
619 if (epp <= HWP_EPP_BALANCE_POWERSAVE)
620 return 3;
621 else
622 return 4;
623 } else if (boot_cpu_has(X86_FEATURE_EPB)) {
624
625
626
627
628
629
630
631
632
633
634 index = (epp >> 2) + 1;
635 }
636
637 return index;
638}
639
640static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
641 int pref_index)
642{
643 int epp = -EINVAL;
644 int ret;
645
646 if (!pref_index)
647 epp = cpu_data->epp_default;
648
649 mutex_lock(&intel_pstate_limits_lock);
650
651 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
652 u64 value;
653
654 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
655 if (ret)
656 goto return_pref;
657
658 value &= ~GENMASK_ULL(31, 24);
659
660 if (epp == -EINVAL)
661 epp = epp_values[pref_index - 1];
662
663 value |= (u64)epp << 24;
664 ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
665 } else {
666 if (epp == -EINVAL)
667 epp = (pref_index - 1) << 2;
668 ret = intel_pstate_set_epb(cpu_data->cpu, epp);
669 }
670return_pref:
671 mutex_unlock(&intel_pstate_limits_lock);
672
673 return ret;
674}
675
676static ssize_t show_energy_performance_available_preferences(
677 struct cpufreq_policy *policy, char *buf)
678{
679 int i = 0;
680 int ret = 0;
681
682 while (energy_perf_strings[i] != NULL)
683 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
684
685 ret += sprintf(&buf[ret], "\n");
686
687 return ret;
688}
689
690cpufreq_freq_attr_ro(energy_performance_available_preferences);
691
692static ssize_t store_energy_performance_preference(
693 struct cpufreq_policy *policy, const char *buf, size_t count)
694{
695 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
696 char str_preference[21];
697 int ret;
698
699 ret = sscanf(buf, "%20s", str_preference);
700 if (ret != 1)
701 return -EINVAL;
702
703 ret = match_string(energy_perf_strings, -1, str_preference);
704 if (ret < 0)
705 return ret;
706
707 intel_pstate_set_energy_pref_index(cpu_data, ret);
708 return count;
709}
710
711static ssize_t show_energy_performance_preference(
712 struct cpufreq_policy *policy, char *buf)
713{
714 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
715 int preference;
716
717 preference = intel_pstate_get_energy_pref_index(cpu_data);
718 if (preference < 0)
719 return preference;
720
721 return sprintf(buf, "%s\n", energy_perf_strings[preference]);
722}
723
724cpufreq_freq_attr_rw(energy_performance_preference);
725
726static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
727{
728 struct cpudata *cpu;
729 u64 cap;
730 int ratio;
731
732 ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
733 if (ratio <= 0) {
734 rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
735 ratio = HWP_GUARANTEED_PERF(cap);
736 }
737
738 cpu = all_cpu_data[policy->cpu];
739
740 return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling);
741}
742
743cpufreq_freq_attr_ro(base_frequency);
744
745static struct freq_attr *hwp_cpufreq_attrs[] = {
746 &energy_performance_preference,
747 &energy_performance_available_preferences,
748 &base_frequency,
749 NULL,
750};
751
752static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
753 int *current_max)
754{
755 u64 cap;
756
757 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
758 WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
759 if (global.no_turbo)
760 *current_max = HWP_GUARANTEED_PERF(cap);
761 else
762 *current_max = HWP_HIGHEST_PERF(cap);
763
764 *phy_max = HWP_HIGHEST_PERF(cap);
765}
766
767static void intel_pstate_hwp_set(unsigned int cpu)
768{
769 struct cpudata *cpu_data = all_cpu_data[cpu];
770 int max, min;
771 u64 value;
772 s16 epp;
773
774 max = cpu_data->max_perf_ratio;
775 min = cpu_data->min_perf_ratio;
776
777 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
778 min = max;
779
780 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
781
782 value &= ~HWP_MIN_PERF(~0L);
783 value |= HWP_MIN_PERF(min);
784
785 value &= ~HWP_MAX_PERF(~0L);
786 value |= HWP_MAX_PERF(max);
787
788 if (cpu_data->epp_policy == cpu_data->policy)
789 goto skip_epp;
790
791 cpu_data->epp_policy = cpu_data->policy;
792
793 if (cpu_data->epp_saved >= 0) {
794 epp = cpu_data->epp_saved;
795 cpu_data->epp_saved = -EINVAL;
796 goto update_epp;
797 }
798
799 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
800 epp = intel_pstate_get_epp(cpu_data, value);
801 cpu_data->epp_powersave = epp;
802
803 if (epp < 0)
804 goto skip_epp;
805
806 epp = 0;
807 } else {
808
809 if (cpu_data->epp_powersave < 0)
810 goto skip_epp;
811
812
813
814
815
816
817
818
819 epp = intel_pstate_get_epp(cpu_data, value);
820 if (epp)
821 goto skip_epp;
822
823 epp = cpu_data->epp_powersave;
824 }
825update_epp:
826 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
827 value &= ~GENMASK_ULL(31, 24);
828 value |= (u64)epp << 24;
829 } else {
830 intel_pstate_set_epb(cpu, epp);
831 }
832skip_epp:
833 WRITE_ONCE(cpu_data->hwp_req_cached, value);
834 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
835}
836
837static void intel_pstate_hwp_force_min_perf(int cpu)
838{
839 u64 value;
840 int min_perf;
841
842 value = all_cpu_data[cpu]->hwp_req_cached;
843 value &= ~GENMASK_ULL(31, 0);
844 min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached);
845
846
847 value |= HWP_MAX_PERF(min_perf);
848 value |= HWP_MIN_PERF(min_perf);
849
850
851 if (boot_cpu_has(X86_FEATURE_HWP_EPP))
852 value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
853
854 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
855}
856
857static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
858{
859 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
860
861 if (!hwp_active)
862 return 0;
863
864 cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
865
866 return 0;
867}
868
869static void intel_pstate_hwp_enable(struct cpudata *cpudata);
870
871static int intel_pstate_resume(struct cpufreq_policy *policy)
872{
873 if (!hwp_active)
874 return 0;
875
876 mutex_lock(&intel_pstate_limits_lock);
877
878 if (policy->cpu == 0)
879 intel_pstate_hwp_enable(all_cpu_data[policy->cpu]);
880
881 all_cpu_data[policy->cpu]->epp_policy = 0;
882 intel_pstate_hwp_set(policy->cpu);
883
884 mutex_unlock(&intel_pstate_limits_lock);
885
886 return 0;
887}
888
889static void intel_pstate_update_policies(void)
890{
891 int cpu;
892
893 for_each_possible_cpu(cpu)
894 cpufreq_update_policy(cpu);
895}
896
897static void intel_pstate_update_max_freq(unsigned int cpu)
898{
899 struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
900 struct cpudata *cpudata;
901
902 if (!policy)
903 return;
904
905 cpudata = all_cpu_data[cpu];
906 policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
907 cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
908
909 refresh_frequency_limits(policy);
910
911 cpufreq_cpu_release(policy);
912}
913
914static void intel_pstate_update_limits(unsigned int cpu)
915{
916 mutex_lock(&intel_pstate_driver_lock);
917
918 update_turbo_state();
919
920
921
922
923 if (global.turbo_disabled_mf != global.turbo_disabled) {
924 global.turbo_disabled_mf = global.turbo_disabled;
925 for_each_possible_cpu(cpu)
926 intel_pstate_update_max_freq(cpu);
927 } else {
928 cpufreq_update_policy(cpu);
929 }
930
931 mutex_unlock(&intel_pstate_driver_lock);
932}
933
934
935#define show_one(file_name, object) \
936 static ssize_t show_##file_name \
937 (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
938 { \
939 return sprintf(buf, "%u\n", global.object); \
940 }
941
942static ssize_t intel_pstate_show_status(char *buf);
943static int intel_pstate_update_status(const char *buf, size_t size);
944
945static ssize_t show_status(struct kobject *kobj,
946 struct kobj_attribute *attr, char *buf)
947{
948 ssize_t ret;
949
950 mutex_lock(&intel_pstate_driver_lock);
951 ret = intel_pstate_show_status(buf);
952 mutex_unlock(&intel_pstate_driver_lock);
953
954 return ret;
955}
956
957static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
958 const char *buf, size_t count)
959{
960 char *p = memchr(buf, '\n', count);
961 int ret;
962
963 mutex_lock(&intel_pstate_driver_lock);
964 ret = intel_pstate_update_status(buf, p ? p - buf : count);
965 mutex_unlock(&intel_pstate_driver_lock);
966
967 return ret < 0 ? ret : count;
968}
969
970static ssize_t show_turbo_pct(struct kobject *kobj,
971 struct kobj_attribute *attr, char *buf)
972{
973 struct cpudata *cpu;
974 int total, no_turbo, turbo_pct;
975 uint32_t turbo_fp;
976
977 mutex_lock(&intel_pstate_driver_lock);
978
979 if (!intel_pstate_driver) {
980 mutex_unlock(&intel_pstate_driver_lock);
981 return -EAGAIN;
982 }
983
984 cpu = all_cpu_data[0];
985
986 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
987 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
988 turbo_fp = div_fp(no_turbo, total);
989 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
990
991 mutex_unlock(&intel_pstate_driver_lock);
992
993 return sprintf(buf, "%u\n", turbo_pct);
994}
995
996static ssize_t show_num_pstates(struct kobject *kobj,
997 struct kobj_attribute *attr, char *buf)
998{
999 struct cpudata *cpu;
1000 int total;
1001
1002 mutex_lock(&intel_pstate_driver_lock);
1003
1004 if (!intel_pstate_driver) {
1005 mutex_unlock(&intel_pstate_driver_lock);
1006 return -EAGAIN;
1007 }
1008
1009 cpu = all_cpu_data[0];
1010 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
1011
1012 mutex_unlock(&intel_pstate_driver_lock);
1013
1014 return sprintf(buf, "%u\n", total);
1015}
1016
1017static ssize_t show_no_turbo(struct kobject *kobj,
1018 struct kobj_attribute *attr, char *buf)
1019{
1020 ssize_t ret;
1021
1022 mutex_lock(&intel_pstate_driver_lock);
1023
1024 if (!intel_pstate_driver) {
1025 mutex_unlock(&intel_pstate_driver_lock);
1026 return -EAGAIN;
1027 }
1028
1029 update_turbo_state();
1030 if (global.turbo_disabled)
1031 ret = sprintf(buf, "%u\n", global.turbo_disabled);
1032 else
1033 ret = sprintf(buf, "%u\n", global.no_turbo);
1034
1035 mutex_unlock(&intel_pstate_driver_lock);
1036
1037 return ret;
1038}
1039
1040static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
1041 const char *buf, size_t count)
1042{
1043 unsigned int input;
1044 int ret;
1045
1046 ret = sscanf(buf, "%u", &input);
1047 if (ret != 1)
1048 return -EINVAL;
1049
1050 mutex_lock(&intel_pstate_driver_lock);
1051
1052 if (!intel_pstate_driver) {
1053 mutex_unlock(&intel_pstate_driver_lock);
1054 return -EAGAIN;
1055 }
1056
1057 mutex_lock(&intel_pstate_limits_lock);
1058
1059 update_turbo_state();
1060 if (global.turbo_disabled) {
1061 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
1062 mutex_unlock(&intel_pstate_limits_lock);
1063 mutex_unlock(&intel_pstate_driver_lock);
1064 return -EPERM;
1065 }
1066
1067 global.no_turbo = clamp_t(int, input, 0, 1);
1068
1069 if (global.no_turbo) {
1070 struct cpudata *cpu = all_cpu_data[0];
1071 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1072
1073
1074 if (global.min_perf_pct > pct)
1075 global.min_perf_pct = pct;
1076 }
1077
1078 mutex_unlock(&intel_pstate_limits_lock);
1079
1080 intel_pstate_update_policies();
1081
1082 mutex_unlock(&intel_pstate_driver_lock);
1083
1084 return count;
1085}
1086
1087static struct cpufreq_driver intel_pstate;
1088
1089static void update_qos_request(enum freq_qos_req_type type)
1090{
1091 int max_state, turbo_max, freq, i, perf_pct;
1092 struct freq_qos_request *req;
1093 struct cpufreq_policy *policy;
1094
1095 for_each_possible_cpu(i) {
1096 struct cpudata *cpu = all_cpu_data[i];
1097
1098 policy = cpufreq_cpu_get(i);
1099 if (!policy)
1100 continue;
1101
1102 req = policy->driver_data;
1103 cpufreq_cpu_put(policy);
1104
1105 if (!req)
1106 continue;
1107
1108 if (hwp_active)
1109 intel_pstate_get_hwp_max(i, &turbo_max, &max_state);
1110 else
1111 turbo_max = cpu->pstate.turbo_pstate;
1112
1113 if (type == FREQ_QOS_MIN) {
1114 perf_pct = global.min_perf_pct;
1115 } else {
1116 req++;
1117 perf_pct = global.max_perf_pct;
1118 }
1119
1120 freq = DIV_ROUND_UP(turbo_max * perf_pct, 100);
1121 freq *= cpu->pstate.scaling;
1122
1123 if (freq_qos_update_request(req, freq) < 0)
1124 pr_warn("Failed to update freq constraint: CPU%d\n", i);
1125 }
1126}
1127
1128static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
1129 const char *buf, size_t count)
1130{
1131 unsigned int input;
1132 int ret;
1133
1134 ret = sscanf(buf, "%u", &input);
1135 if (ret != 1)
1136 return -EINVAL;
1137
1138 mutex_lock(&intel_pstate_driver_lock);
1139
1140 if (!intel_pstate_driver) {
1141 mutex_unlock(&intel_pstate_driver_lock);
1142 return -EAGAIN;
1143 }
1144
1145 mutex_lock(&intel_pstate_limits_lock);
1146
1147 global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
1148
1149 mutex_unlock(&intel_pstate_limits_lock);
1150
1151 if (intel_pstate_driver == &intel_pstate)
1152 intel_pstate_update_policies();
1153 else
1154 update_qos_request(FREQ_QOS_MAX);
1155
1156 mutex_unlock(&intel_pstate_driver_lock);
1157
1158 return count;
1159}
1160
1161static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
1162 const char *buf, size_t count)
1163{
1164 unsigned int input;
1165 int ret;
1166
1167 ret = sscanf(buf, "%u", &input);
1168 if (ret != 1)
1169 return -EINVAL;
1170
1171 mutex_lock(&intel_pstate_driver_lock);
1172
1173 if (!intel_pstate_driver) {
1174 mutex_unlock(&intel_pstate_driver_lock);
1175 return -EAGAIN;
1176 }
1177
1178 mutex_lock(&intel_pstate_limits_lock);
1179
1180 global.min_perf_pct = clamp_t(int, input,
1181 min_perf_pct_min(), global.max_perf_pct);
1182
1183 mutex_unlock(&intel_pstate_limits_lock);
1184
1185 if (intel_pstate_driver == &intel_pstate)
1186 intel_pstate_update_policies();
1187 else
1188 update_qos_request(FREQ_QOS_MIN);
1189
1190 mutex_unlock(&intel_pstate_driver_lock);
1191
1192 return count;
1193}
1194
1195static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
1196 struct kobj_attribute *attr, char *buf)
1197{
1198 return sprintf(buf, "%u\n", hwp_boost);
1199}
1200
1201static ssize_t store_hwp_dynamic_boost(struct kobject *a,
1202 struct kobj_attribute *b,
1203 const char *buf, size_t count)
1204{
1205 unsigned int input;
1206 int ret;
1207
1208 ret = kstrtouint(buf, 10, &input);
1209 if (ret)
1210 return ret;
1211
1212 mutex_lock(&intel_pstate_driver_lock);
1213 hwp_boost = !!input;
1214 intel_pstate_update_policies();
1215 mutex_unlock(&intel_pstate_driver_lock);
1216
1217 return count;
1218}
1219
1220show_one(max_perf_pct, max_perf_pct);
1221show_one(min_perf_pct, min_perf_pct);
1222
1223define_one_global_rw(status);
1224define_one_global_rw(no_turbo);
1225define_one_global_rw(max_perf_pct);
1226define_one_global_rw(min_perf_pct);
1227define_one_global_ro(turbo_pct);
1228define_one_global_ro(num_pstates);
1229define_one_global_rw(hwp_dynamic_boost);
1230
1231static struct attribute *intel_pstate_attributes[] = {
1232 &status.attr,
1233 &no_turbo.attr,
1234 &turbo_pct.attr,
1235 &num_pstates.attr,
1236 NULL
1237};
1238
1239static const struct attribute_group intel_pstate_attr_group = {
1240 .attrs = intel_pstate_attributes,
1241};
1242
1243static void __init intel_pstate_sysfs_expose_params(void)
1244{
1245 struct kobject *intel_pstate_kobject;
1246 int rc;
1247
1248 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
1249 &cpu_subsys.dev_root->kobj);
1250 if (WARN_ON(!intel_pstate_kobject))
1251 return;
1252
1253 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
1254 if (WARN_ON(rc))
1255 return;
1256
1257
1258
1259
1260
1261 if (per_cpu_limits)
1262 return;
1263
1264 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
1265 WARN_ON(rc);
1266
1267 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
1268 WARN_ON(rc);
1269
1270 if (hwp_active) {
1271 rc = sysfs_create_file(intel_pstate_kobject,
1272 &hwp_dynamic_boost.attr);
1273 WARN_ON(rc);
1274 }
1275}
1276
1277
1278static void intel_pstate_hwp_enable(struct cpudata *cpudata)
1279{
1280
1281 if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
1282 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
1283
1284 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
1285 cpudata->epp_policy = 0;
1286 if (cpudata->epp_default == -EINVAL)
1287 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
1288}
1289
1290#define MSR_IA32_POWER_CTL_BIT_EE 19
1291
1292
1293static void intel_pstate_disable_ee(int cpu)
1294{
1295 u64 power_ctl;
1296 int ret;
1297
1298 ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl);
1299 if (ret)
1300 return;
1301
1302 if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) {
1303 pr_info("Disabling energy efficiency optimization\n");
1304 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
1305 wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl);
1306 }
1307}
1308
1309static int atom_get_min_pstate(void)
1310{
1311 u64 value;
1312
1313 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1314 return (value >> 8) & 0x7F;
1315}
1316
1317static int atom_get_max_pstate(void)
1318{
1319 u64 value;
1320
1321 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1322 return (value >> 16) & 0x7F;
1323}
1324
1325static int atom_get_turbo_pstate(void)
1326{
1327 u64 value;
1328
1329 rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
1330 return value & 0x7F;
1331}
1332
1333static u64 atom_get_val(struct cpudata *cpudata, int pstate)
1334{
1335 u64 val;
1336 int32_t vid_fp;
1337 u32 vid;
1338
1339 val = (u64)pstate << 8;
1340 if (global.no_turbo && !global.turbo_disabled)
1341 val |= (u64)1 << 32;
1342
1343 vid_fp = cpudata->vid.min + mul_fp(
1344 int_tofp(pstate - cpudata->pstate.min_pstate),
1345 cpudata->vid.ratio);
1346
1347 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
1348 vid = ceiling_fp(vid_fp);
1349
1350 if (pstate > cpudata->pstate.max_pstate)
1351 vid = cpudata->vid.turbo;
1352
1353 return val | vid;
1354}
1355
1356static int silvermont_get_scaling(void)
1357{
1358 u64 value;
1359 int i;
1360
1361 static int silvermont_freq_table[] = {
1362 83300, 100000, 133300, 116700, 80000};
1363
1364 rdmsrl(MSR_FSB_FREQ, value);
1365 i = value & 0x7;
1366 WARN_ON(i > 4);
1367
1368 return silvermont_freq_table[i];
1369}
1370
1371static int airmont_get_scaling(void)
1372{
1373 u64 value;
1374 int i;
1375
1376 static int airmont_freq_table[] = {
1377 83300, 100000, 133300, 116700, 80000,
1378 93300, 90000, 88900, 87500};
1379
1380 rdmsrl(MSR_FSB_FREQ, value);
1381 i = value & 0xF;
1382 WARN_ON(i > 8);
1383
1384 return airmont_freq_table[i];
1385}
1386
1387static void atom_get_vid(struct cpudata *cpudata)
1388{
1389 u64 value;
1390
1391 rdmsrl(MSR_ATOM_CORE_VIDS, value);
1392 cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
1393 cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
1394 cpudata->vid.ratio = div_fp(
1395 cpudata->vid.max - cpudata->vid.min,
1396 int_tofp(cpudata->pstate.max_pstate -
1397 cpudata->pstate.min_pstate));
1398
1399 rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
1400 cpudata->vid.turbo = value & 0x7f;
1401}
1402
1403static int core_get_min_pstate(void)
1404{
1405 u64 value;
1406
1407 rdmsrl(MSR_PLATFORM_INFO, value);
1408 return (value >> 40) & 0xFF;
1409}
1410
1411static int core_get_max_pstate_physical(void)
1412{
1413 u64 value;
1414
1415 rdmsrl(MSR_PLATFORM_INFO, value);
1416 return (value >> 8) & 0xFF;
1417}
1418
1419static int core_get_tdp_ratio(u64 plat_info)
1420{
1421
1422 if (plat_info & 0x600000000) {
1423 u64 tdp_ctrl;
1424 u64 tdp_ratio;
1425 int tdp_msr;
1426 int err;
1427
1428
1429 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
1430 if (err)
1431 return err;
1432
1433
1434 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
1435 err = rdmsrl_safe(tdp_msr, &tdp_ratio);
1436 if (err)
1437 return err;
1438
1439
1440 if (tdp_ctrl & 0x03)
1441 tdp_ratio >>= 16;
1442
1443 tdp_ratio &= 0xff;
1444 pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
1445
1446 return (int)tdp_ratio;
1447 }
1448
1449 return -ENXIO;
1450}
1451
1452static int core_get_max_pstate(void)
1453{
1454 u64 tar;
1455 u64 plat_info;
1456 int max_pstate;
1457 int tdp_ratio;
1458 int err;
1459
1460 rdmsrl(MSR_PLATFORM_INFO, plat_info);
1461 max_pstate = (plat_info >> 8) & 0xFF;
1462
1463 tdp_ratio = core_get_tdp_ratio(plat_info);
1464 if (tdp_ratio <= 0)
1465 return max_pstate;
1466
1467 if (hwp_active) {
1468
1469 return tdp_ratio;
1470 }
1471
1472 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
1473 if (!err) {
1474 int tar_levels;
1475
1476
1477 tar_levels = tar & 0xff;
1478 if (tdp_ratio - 1 == tar_levels) {
1479 max_pstate = tar_levels;
1480 pr_debug("max_pstate=TAC %x\n", max_pstate);
1481 }
1482 }
1483
1484 return max_pstate;
1485}
1486
1487static int core_get_turbo_pstate(void)
1488{
1489 u64 value;
1490 int nont, ret;
1491
1492 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1493 nont = core_get_max_pstate();
1494 ret = (value) & 255;
1495 if (ret <= nont)
1496 ret = nont;
1497 return ret;
1498}
1499
1500static inline int core_get_scaling(void)
1501{
1502 return 100000;
1503}
1504
1505static u64 core_get_val(struct cpudata *cpudata, int pstate)
1506{
1507 u64 val;
1508
1509 val = (u64)pstate << 8;
1510 if (global.no_turbo && !global.turbo_disabled)
1511 val |= (u64)1 << 32;
1512
1513 return val;
1514}
1515
1516static int knl_get_aperf_mperf_shift(void)
1517{
1518 return 10;
1519}
1520
1521static int knl_get_turbo_pstate(void)
1522{
1523 u64 value;
1524 int nont, ret;
1525
1526 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1527 nont = core_get_max_pstate();
1528 ret = (((value) >> 8) & 0xFF);
1529 if (ret <= nont)
1530 ret = nont;
1531 return ret;
1532}
1533
1534static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
1535{
1536 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
1537 cpu->pstate.current_pstate = pstate;
1538
1539
1540
1541
1542
1543 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
1544 pstate_funcs.get_val(cpu, pstate));
1545}
1546
1547static void intel_pstate_set_min_pstate(struct cpudata *cpu)
1548{
1549 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
1550}
1551
1552static void intel_pstate_max_within_limits(struct cpudata *cpu)
1553{
1554 int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
1555
1556 update_turbo_state();
1557 intel_pstate_set_pstate(cpu, pstate);
1558}
1559
1560static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1561{
1562 cpu->pstate.min_pstate = pstate_funcs.get_min();
1563 cpu->pstate.max_pstate = pstate_funcs.get_max();
1564 cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
1565 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
1566 cpu->pstate.scaling = pstate_funcs.get_scaling();
1567 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
1568
1569 if (hwp_active && !hwp_mode_bdw) {
1570 unsigned int phy_max, current_max;
1571
1572 intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max);
1573 cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
1574 } else {
1575 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1576 }
1577
1578 if (pstate_funcs.get_aperf_mperf_shift)
1579 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
1580
1581 if (pstate_funcs.get_vid)
1582 pstate_funcs.get_vid(cpu);
1583
1584 intel_pstate_set_min_pstate(cpu);
1585}
1586
1587
1588
1589
1590
1591
1592
1593static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
1594
1595static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
1596{
1597 u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
1598 u32 max_limit = (hwp_req & 0xff00) >> 8;
1599 u32 min_limit = (hwp_req & 0xff);
1600 u32 boost_level1;
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617 if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
1618 return;
1619
1620 if (!cpu->hwp_boost_min)
1621 cpu->hwp_boost_min = min_limit;
1622
1623
1624 boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
1625
1626 if (cpu->hwp_boost_min < boost_level1)
1627 cpu->hwp_boost_min = boost_level1;
1628 else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1629 cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
1630 else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
1631 max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1632 cpu->hwp_boost_min = max_limit;
1633 else
1634 return;
1635
1636 hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
1637 wrmsrl(MSR_HWP_REQUEST, hwp_req);
1638 cpu->last_update = cpu->sample.time;
1639}
1640
1641static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
1642{
1643 if (cpu->hwp_boost_min) {
1644 bool expired;
1645
1646
1647 expired = time_after64(cpu->sample.time, cpu->last_update +
1648 hwp_boost_hold_time_ns);
1649 if (expired) {
1650 wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
1651 cpu->hwp_boost_min = 0;
1652 }
1653 }
1654 cpu->last_update = cpu->sample.time;
1655}
1656
1657static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
1658 u64 time)
1659{
1660 cpu->sample.time = time;
1661
1662 if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
1663 bool do_io = false;
1664
1665 cpu->sched_flags = 0;
1666
1667
1668
1669
1670
1671
1672
1673 if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
1674 do_io = true;
1675
1676 cpu->last_io_update = time;
1677
1678 if (do_io)
1679 intel_pstate_hwp_boost_up(cpu);
1680
1681 } else {
1682 intel_pstate_hwp_boost_down(cpu);
1683 }
1684}
1685
1686static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
1687 u64 time, unsigned int flags)
1688{
1689 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1690
1691 cpu->sched_flags |= flags;
1692
1693 if (smp_processor_id() == cpu->cpu)
1694 intel_pstate_update_util_hwp_local(cpu, time);
1695}
1696
1697static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
1698{
1699 struct sample *sample = &cpu->sample;
1700
1701 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
1702}
1703
1704static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
1705{
1706 u64 aperf, mperf;
1707 unsigned long flags;
1708 u64 tsc;
1709
1710 local_irq_save(flags);
1711 rdmsrl(MSR_IA32_APERF, aperf);
1712 rdmsrl(MSR_IA32_MPERF, mperf);
1713 tsc = rdtsc();
1714 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
1715 local_irq_restore(flags);
1716 return false;
1717 }
1718 local_irq_restore(flags);
1719
1720 cpu->last_sample_time = cpu->sample.time;
1721 cpu->sample.time = time;
1722 cpu->sample.aperf = aperf;
1723 cpu->sample.mperf = mperf;
1724 cpu->sample.tsc = tsc;
1725 cpu->sample.aperf -= cpu->prev_aperf;
1726 cpu->sample.mperf -= cpu->prev_mperf;
1727 cpu->sample.tsc -= cpu->prev_tsc;
1728
1729 cpu->prev_aperf = aperf;
1730 cpu->prev_mperf = mperf;
1731 cpu->prev_tsc = tsc;
1732
1733
1734
1735
1736
1737
1738
1739 if (cpu->last_sample_time) {
1740 intel_pstate_calc_avg_perf(cpu);
1741 return true;
1742 }
1743 return false;
1744}
1745
1746static inline int32_t get_avg_frequency(struct cpudata *cpu)
1747{
1748 return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz);
1749}
1750
1751static inline int32_t get_avg_pstate(struct cpudata *cpu)
1752{
1753 return mul_ext_fp(cpu->pstate.max_pstate_physical,
1754 cpu->sample.core_avg_perf);
1755}
1756
1757static inline int32_t get_target_pstate(struct cpudata *cpu)
1758{
1759 struct sample *sample = &cpu->sample;
1760 int32_t busy_frac;
1761 int target, avg_pstate;
1762
1763 busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
1764 sample->tsc);
1765
1766 if (busy_frac < cpu->iowait_boost)
1767 busy_frac = cpu->iowait_boost;
1768
1769 sample->busy_scaled = busy_frac * 100;
1770
1771 target = global.no_turbo || global.turbo_disabled ?
1772 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
1773 target += target >> 2;
1774 target = mul_fp(target, busy_frac);
1775 if (target < cpu->pstate.min_pstate)
1776 target = cpu->pstate.min_pstate;
1777
1778
1779
1780
1781
1782
1783
1784
1785 avg_pstate = get_avg_pstate(cpu);
1786 if (avg_pstate > target)
1787 target += (avg_pstate - target) >> 1;
1788
1789 return target;
1790}
1791
1792static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
1793{
1794 int min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
1795 int max_pstate = max(min_pstate, cpu->max_perf_ratio);
1796
1797 return clamp_t(int, pstate, min_pstate, max_pstate);
1798}
1799
1800static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
1801{
1802 if (pstate == cpu->pstate.current_pstate)
1803 return;
1804
1805 cpu->pstate.current_pstate = pstate;
1806 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
1807}
1808
1809static void intel_pstate_adjust_pstate(struct cpudata *cpu)
1810{
1811 int from = cpu->pstate.current_pstate;
1812 struct sample *sample;
1813 int target_pstate;
1814
1815 update_turbo_state();
1816
1817 target_pstate = get_target_pstate(cpu);
1818 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
1819 trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
1820 intel_pstate_update_pstate(cpu, target_pstate);
1821
1822 sample = &cpu->sample;
1823 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
1824 fp_toint(sample->busy_scaled),
1825 from,
1826 cpu->pstate.current_pstate,
1827 sample->mperf,
1828 sample->aperf,
1829 sample->tsc,
1830 get_avg_frequency(cpu),
1831 fp_toint(cpu->iowait_boost * 100));
1832}
1833
1834static void intel_pstate_update_util(struct update_util_data *data, u64 time,
1835 unsigned int flags)
1836{
1837 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1838 u64 delta_ns;
1839
1840
1841 if (smp_processor_id() != cpu->cpu)
1842 return;
1843
1844 delta_ns = time - cpu->last_update;
1845 if (flags & SCHED_CPUFREQ_IOWAIT) {
1846
1847 if (delta_ns > TICK_NSEC) {
1848 cpu->iowait_boost = ONE_EIGHTH_FP;
1849 } else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
1850 cpu->iowait_boost <<= 1;
1851 if (cpu->iowait_boost > int_tofp(1))
1852 cpu->iowait_boost = int_tofp(1);
1853 } else {
1854 cpu->iowait_boost = ONE_EIGHTH_FP;
1855 }
1856 } else if (cpu->iowait_boost) {
1857
1858 if (delta_ns > TICK_NSEC)
1859 cpu->iowait_boost = 0;
1860 else
1861 cpu->iowait_boost >>= 1;
1862 }
1863 cpu->last_update = time;
1864 delta_ns = time - cpu->sample.time;
1865 if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
1866 return;
1867
1868 if (intel_pstate_sample(cpu, time))
1869 intel_pstate_adjust_pstate(cpu);
1870}
1871
1872static struct pstate_funcs core_funcs = {
1873 .get_max = core_get_max_pstate,
1874 .get_max_physical = core_get_max_pstate_physical,
1875 .get_min = core_get_min_pstate,
1876 .get_turbo = core_get_turbo_pstate,
1877 .get_scaling = core_get_scaling,
1878 .get_val = core_get_val,
1879};
1880
1881static const struct pstate_funcs silvermont_funcs = {
1882 .get_max = atom_get_max_pstate,
1883 .get_max_physical = atom_get_max_pstate,
1884 .get_min = atom_get_min_pstate,
1885 .get_turbo = atom_get_turbo_pstate,
1886 .get_val = atom_get_val,
1887 .get_scaling = silvermont_get_scaling,
1888 .get_vid = atom_get_vid,
1889};
1890
1891static const struct pstate_funcs airmont_funcs = {
1892 .get_max = atom_get_max_pstate,
1893 .get_max_physical = atom_get_max_pstate,
1894 .get_min = atom_get_min_pstate,
1895 .get_turbo = atom_get_turbo_pstate,
1896 .get_val = atom_get_val,
1897 .get_scaling = airmont_get_scaling,
1898 .get_vid = atom_get_vid,
1899};
1900
1901static const struct pstate_funcs knl_funcs = {
1902 .get_max = core_get_max_pstate,
1903 .get_max_physical = core_get_max_pstate_physical,
1904 .get_min = core_get_min_pstate,
1905 .get_turbo = knl_get_turbo_pstate,
1906 .get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
1907 .get_scaling = core_get_scaling,
1908 .get_val = core_get_val,
1909};
1910
1911#define ICPU(model, policy) \
1912 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
1913 (unsigned long)&policy }
1914
1915static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1916 ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs),
1917 ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs),
1918 ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs),
1919 ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs),
1920 ICPU(INTEL_FAM6_HASWELL, core_funcs),
1921 ICPU(INTEL_FAM6_BROADWELL, core_funcs),
1922 ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs),
1923 ICPU(INTEL_FAM6_HASWELL_X, core_funcs),
1924 ICPU(INTEL_FAM6_HASWELL_L, core_funcs),
1925 ICPU(INTEL_FAM6_HASWELL_G, core_funcs),
1926 ICPU(INTEL_FAM6_BROADWELL_G, core_funcs),
1927 ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs),
1928 ICPU(INTEL_FAM6_SKYLAKE_L, core_funcs),
1929 ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
1930 ICPU(INTEL_FAM6_SKYLAKE, core_funcs),
1931 ICPU(INTEL_FAM6_BROADWELL_D, core_funcs),
1932 ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs),
1933 ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs),
1934 ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs),
1935 ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs),
1936 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1937 {}
1938};
1939MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
1940
1941static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
1942 ICPU(INTEL_FAM6_BROADWELL_D, core_funcs),
1943 ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
1944 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1945 {}
1946};
1947
1948static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
1949 ICPU(INTEL_FAM6_KABYLAKE, core_funcs),
1950 {}
1951};
1952
1953static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
1954 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1955 ICPU(INTEL_FAM6_SKYLAKE, core_funcs),
1956 {}
1957};
1958
1959static int intel_pstate_init_cpu(unsigned int cpunum)
1960{
1961 struct cpudata *cpu;
1962
1963 cpu = all_cpu_data[cpunum];
1964
1965 if (!cpu) {
1966 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
1967 if (!cpu)
1968 return -ENOMEM;
1969
1970 all_cpu_data[cpunum] = cpu;
1971
1972 cpu->epp_default = -EINVAL;
1973 cpu->epp_powersave = -EINVAL;
1974 cpu->epp_saved = -EINVAL;
1975 }
1976
1977 cpu = all_cpu_data[cpunum];
1978
1979 cpu->cpu = cpunum;
1980
1981 if (hwp_active) {
1982 const struct x86_cpu_id *id;
1983
1984 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids);
1985 if (id)
1986 intel_pstate_disable_ee(cpunum);
1987
1988 intel_pstate_hwp_enable(cpu);
1989
1990 id = x86_match_cpu(intel_pstate_hwp_boost_ids);
1991 if (id && intel_pstate_acpi_pm_profile_server())
1992 hwp_boost = true;
1993 }
1994
1995 intel_pstate_get_cpu_pstates(cpu);
1996
1997 pr_debug("controlling: cpu %d\n", cpunum);
1998
1999 return 0;
2000}
2001
2002static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
2003{
2004 struct cpudata *cpu = all_cpu_data[cpu_num];
2005
2006 if (hwp_active && !hwp_boost)
2007 return;
2008
2009 if (cpu->update_util_set)
2010 return;
2011
2012
2013 cpu->sample.time = 0;
2014 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
2015 (hwp_active ?
2016 intel_pstate_update_util_hwp :
2017 intel_pstate_update_util));
2018 cpu->update_util_set = true;
2019}
2020
2021static void intel_pstate_clear_update_util_hook(unsigned int cpu)
2022{
2023 struct cpudata *cpu_data = all_cpu_data[cpu];
2024
2025 if (!cpu_data->update_util_set)
2026 return;
2027
2028 cpufreq_remove_update_util_hook(cpu);
2029 cpu_data->update_util_set = false;
2030 synchronize_rcu();
2031}
2032
2033static int intel_pstate_get_max_freq(struct cpudata *cpu)
2034{
2035 return global.turbo_disabled || global.no_turbo ?
2036 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2037}
2038
2039static void intel_pstate_update_perf_limits(struct cpudata *cpu,
2040 unsigned int policy_min,
2041 unsigned int policy_max)
2042{
2043 int max_freq = intel_pstate_get_max_freq(cpu);
2044 int32_t max_policy_perf, min_policy_perf;
2045 int max_state, turbo_max;
2046
2047
2048
2049
2050
2051
2052 if (hwp_active) {
2053 intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
2054 } else {
2055 max_state = global.no_turbo || global.turbo_disabled ?
2056 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2057 turbo_max = cpu->pstate.turbo_pstate;
2058 }
2059
2060 max_policy_perf = max_state * policy_max / max_freq;
2061 if (policy_max == policy_min) {
2062 min_policy_perf = max_policy_perf;
2063 } else {
2064 min_policy_perf = max_state * policy_min / max_freq;
2065 min_policy_perf = clamp_t(int32_t, min_policy_perf,
2066 0, max_policy_perf);
2067 }
2068
2069 pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
2070 cpu->cpu, max_state, min_policy_perf, max_policy_perf);
2071
2072
2073 if (per_cpu_limits) {
2074 cpu->min_perf_ratio = min_policy_perf;
2075 cpu->max_perf_ratio = max_policy_perf;
2076 } else {
2077 int32_t global_min, global_max;
2078
2079
2080 global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
2081 global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
2082 global_min = clamp_t(int32_t, global_min, 0, global_max);
2083
2084 pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu,
2085 global_min, global_max);
2086
2087 cpu->min_perf_ratio = max(min_policy_perf, global_min);
2088 cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
2089 cpu->max_perf_ratio = min(max_policy_perf, global_max);
2090 cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
2091
2092
2093 cpu->min_perf_ratio = min(cpu->min_perf_ratio,
2094 cpu->max_perf_ratio);
2095
2096 }
2097 pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu,
2098 cpu->max_perf_ratio,
2099 cpu->min_perf_ratio);
2100}
2101
2102static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2103{
2104 struct cpudata *cpu;
2105
2106 if (!policy->cpuinfo.max_freq)
2107 return -ENODEV;
2108
2109 pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
2110 policy->cpuinfo.max_freq, policy->max);
2111
2112 cpu = all_cpu_data[policy->cpu];
2113 cpu->policy = policy->policy;
2114
2115 mutex_lock(&intel_pstate_limits_lock);
2116
2117 intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
2118
2119 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
2120
2121
2122
2123
2124 intel_pstate_clear_update_util_hook(policy->cpu);
2125 intel_pstate_max_within_limits(cpu);
2126 } else {
2127 intel_pstate_set_update_util_hook(policy->cpu);
2128 }
2129
2130 if (hwp_active) {
2131
2132
2133
2134
2135
2136 if (!hwp_boost)
2137 intel_pstate_clear_update_util_hook(policy->cpu);
2138 intel_pstate_hwp_set(policy->cpu);
2139 }
2140
2141 mutex_unlock(&intel_pstate_limits_lock);
2142
2143 return 0;
2144}
2145
2146static void intel_pstate_adjust_policy_max(struct cpudata *cpu,
2147 struct cpufreq_policy_data *policy)
2148{
2149 if (!hwp_active &&
2150 cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
2151 policy->max < policy->cpuinfo.max_freq &&
2152 policy->max > cpu->pstate.max_freq) {
2153 pr_debug("policy->max > max non turbo frequency\n");
2154 policy->max = policy->cpuinfo.max_freq;
2155 }
2156}
2157
2158static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
2159{
2160 struct cpudata *cpu = all_cpu_data[policy->cpu];
2161
2162 update_turbo_state();
2163 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
2164 intel_pstate_get_max_freq(cpu));
2165
2166 intel_pstate_adjust_policy_max(cpu, policy);
2167
2168 return 0;
2169}
2170
2171static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
2172{
2173 intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
2174}
2175
2176static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
2177{
2178 pr_debug("CPU %d exiting\n", policy->cpu);
2179
2180 intel_pstate_clear_update_util_hook(policy->cpu);
2181 if (hwp_active) {
2182 intel_pstate_hwp_save_state(policy);
2183 intel_pstate_hwp_force_min_perf(policy->cpu);
2184 } else {
2185 intel_cpufreq_stop_cpu(policy);
2186 }
2187}
2188
2189static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
2190{
2191 intel_pstate_exit_perf_limits(policy);
2192
2193 policy->fast_switch_possible = false;
2194
2195 return 0;
2196}
2197
2198static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
2199{
2200 struct cpudata *cpu;
2201 int rc;
2202
2203 rc = intel_pstate_init_cpu(policy->cpu);
2204 if (rc)
2205 return rc;
2206
2207 cpu = all_cpu_data[policy->cpu];
2208
2209 cpu->max_perf_ratio = 0xFF;
2210 cpu->min_perf_ratio = 0;
2211
2212 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
2213 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
2214
2215
2216 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
2217 update_turbo_state();
2218 global.turbo_disabled_mf = global.turbo_disabled;
2219 policy->cpuinfo.max_freq = global.turbo_disabled ?
2220 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2221 policy->cpuinfo.max_freq *= cpu->pstate.scaling;
2222
2223 if (hwp_active) {
2224 unsigned int max_freq;
2225
2226 max_freq = global.turbo_disabled ?
2227 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2228 if (max_freq < policy->cpuinfo.max_freq)
2229 policy->cpuinfo.max_freq = max_freq;
2230 }
2231
2232 intel_pstate_init_acpi_perf_limits(policy);
2233
2234 policy->fast_switch_possible = true;
2235
2236 return 0;
2237}
2238
2239static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
2240{
2241 int ret = __intel_pstate_cpu_init(policy);
2242
2243 if (ret)
2244 return ret;
2245
2246 if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
2247 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
2248 else
2249 policy->policy = CPUFREQ_POLICY_POWERSAVE;
2250
2251 return 0;
2252}
2253
2254static struct cpufreq_driver intel_pstate = {
2255 .flags = CPUFREQ_CONST_LOOPS,
2256 .verify = intel_pstate_verify_policy,
2257 .setpolicy = intel_pstate_set_policy,
2258 .suspend = intel_pstate_hwp_save_state,
2259 .resume = intel_pstate_resume,
2260 .init = intel_pstate_cpu_init,
2261 .exit = intel_pstate_cpu_exit,
2262 .stop_cpu = intel_pstate_stop_cpu,
2263 .update_limits = intel_pstate_update_limits,
2264 .name = "intel_pstate",
2265};
2266
2267static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
2268{
2269 struct cpudata *cpu = all_cpu_data[policy->cpu];
2270
2271 update_turbo_state();
2272 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
2273 intel_pstate_get_max_freq(cpu));
2274
2275 intel_pstate_adjust_policy_max(cpu, policy);
2276
2277 intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
2278
2279 return 0;
2280}
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295#define INTEL_PSTATE_TRACE_TARGET 10
2296#define INTEL_PSTATE_TRACE_FAST_SWITCH 90
2297
2298static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate)
2299{
2300 struct sample *sample;
2301
2302 if (!trace_pstate_sample_enabled())
2303 return;
2304
2305 if (!intel_pstate_sample(cpu, ktime_get()))
2306 return;
2307
2308 sample = &cpu->sample;
2309 trace_pstate_sample(trace_type,
2310 0,
2311 old_pstate,
2312 cpu->pstate.current_pstate,
2313 sample->mperf,
2314 sample->aperf,
2315 sample->tsc,
2316 get_avg_frequency(cpu),
2317 fp_toint(cpu->iowait_boost * 100));
2318}
2319
2320static int intel_cpufreq_target(struct cpufreq_policy *policy,
2321 unsigned int target_freq,
2322 unsigned int relation)
2323{
2324 struct cpudata *cpu = all_cpu_data[policy->cpu];
2325 struct cpufreq_freqs freqs;
2326 int target_pstate, old_pstate;
2327
2328 update_turbo_state();
2329
2330 freqs.old = policy->cur;
2331 freqs.new = target_freq;
2332
2333 cpufreq_freq_transition_begin(policy, &freqs);
2334 switch (relation) {
2335 case CPUFREQ_RELATION_L:
2336 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
2337 break;
2338 case CPUFREQ_RELATION_H:
2339 target_pstate = freqs.new / cpu->pstate.scaling;
2340 break;
2341 default:
2342 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
2343 break;
2344 }
2345 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2346 old_pstate = cpu->pstate.current_pstate;
2347 if (target_pstate != cpu->pstate.current_pstate) {
2348 cpu->pstate.current_pstate = target_pstate;
2349 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
2350 pstate_funcs.get_val(cpu, target_pstate));
2351 }
2352 freqs.new = target_pstate * cpu->pstate.scaling;
2353 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate);
2354 cpufreq_freq_transition_end(policy, &freqs, false);
2355
2356 return 0;
2357}
2358
2359static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
2360 unsigned int target_freq)
2361{
2362 struct cpudata *cpu = all_cpu_data[policy->cpu];
2363 int target_pstate, old_pstate;
2364
2365 update_turbo_state();
2366
2367 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
2368 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2369 old_pstate = cpu->pstate.current_pstate;
2370 intel_pstate_update_pstate(cpu, target_pstate);
2371 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
2372 return target_pstate * cpu->pstate.scaling;
2373}
2374
2375static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
2376{
2377 int max_state, turbo_max, min_freq, max_freq, ret;
2378 struct freq_qos_request *req;
2379 struct cpudata *cpu;
2380 struct device *dev;
2381
2382 dev = get_cpu_device(policy->cpu);
2383 if (!dev)
2384 return -ENODEV;
2385
2386 ret = __intel_pstate_cpu_init(policy);
2387 if (ret)
2388 return ret;
2389
2390 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
2391 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
2392
2393 policy->cur = policy->cpuinfo.min_freq;
2394
2395 req = kcalloc(2, sizeof(*req), GFP_KERNEL);
2396 if (!req) {
2397 ret = -ENOMEM;
2398 goto pstate_exit;
2399 }
2400
2401 cpu = all_cpu_data[policy->cpu];
2402
2403 if (hwp_active)
2404 intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state);
2405 else
2406 turbo_max = cpu->pstate.turbo_pstate;
2407
2408 min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
2409 min_freq *= cpu->pstate.scaling;
2410 max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
2411 max_freq *= cpu->pstate.scaling;
2412
2413 ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN,
2414 min_freq);
2415 if (ret < 0) {
2416 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
2417 goto free_req;
2418 }
2419
2420 ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX,
2421 max_freq);
2422 if (ret < 0) {
2423 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
2424 goto remove_min_req;
2425 }
2426
2427 policy->driver_data = req;
2428
2429 return 0;
2430
2431remove_min_req:
2432 freq_qos_remove_request(req);
2433free_req:
2434 kfree(req);
2435pstate_exit:
2436 intel_pstate_exit_perf_limits(policy);
2437
2438 return ret;
2439}
2440
2441static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
2442{
2443 struct freq_qos_request *req;
2444
2445 req = policy->driver_data;
2446
2447 freq_qos_remove_request(req + 1);
2448 freq_qos_remove_request(req);
2449 kfree(req);
2450
2451 return intel_pstate_cpu_exit(policy);
2452}
2453
2454static struct cpufreq_driver intel_cpufreq = {
2455 .flags = CPUFREQ_CONST_LOOPS,
2456 .verify = intel_cpufreq_verify_policy,
2457 .target = intel_cpufreq_target,
2458 .fast_switch = intel_cpufreq_fast_switch,
2459 .init = intel_cpufreq_cpu_init,
2460 .exit = intel_cpufreq_cpu_exit,
2461 .stop_cpu = intel_cpufreq_stop_cpu,
2462 .update_limits = intel_pstate_update_limits,
2463 .name = "intel_cpufreq",
2464};
2465
2466static struct cpufreq_driver *default_driver = &intel_pstate;
2467
2468static void intel_pstate_driver_cleanup(void)
2469{
2470 unsigned int cpu;
2471
2472 get_online_cpus();
2473 for_each_online_cpu(cpu) {
2474 if (all_cpu_data[cpu]) {
2475 if (intel_pstate_driver == &intel_pstate)
2476 intel_pstate_clear_update_util_hook(cpu);
2477
2478 kfree(all_cpu_data[cpu]);
2479 all_cpu_data[cpu] = NULL;
2480 }
2481 }
2482 put_online_cpus();
2483 intel_pstate_driver = NULL;
2484}
2485
2486static int intel_pstate_register_driver(struct cpufreq_driver *driver)
2487{
2488 int ret;
2489
2490 memset(&global, 0, sizeof(global));
2491 global.max_perf_pct = 100;
2492
2493 intel_pstate_driver = driver;
2494 ret = cpufreq_register_driver(intel_pstate_driver);
2495 if (ret) {
2496 intel_pstate_driver_cleanup();
2497 return ret;
2498 }
2499
2500 global.min_perf_pct = min_perf_pct_min();
2501
2502 return 0;
2503}
2504
2505static int intel_pstate_unregister_driver(void)
2506{
2507 if (hwp_active)
2508 return -EBUSY;
2509
2510 cpufreq_unregister_driver(intel_pstate_driver);
2511 intel_pstate_driver_cleanup();
2512
2513 return 0;
2514}
2515
2516static ssize_t intel_pstate_show_status(char *buf)
2517{
2518 if (!intel_pstate_driver)
2519 return sprintf(buf, "off\n");
2520
2521 return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
2522 "active" : "passive");
2523}
2524
2525static int intel_pstate_update_status(const char *buf, size_t size)
2526{
2527 int ret;
2528
2529 if (size == 3 && !strncmp(buf, "off", size))
2530 return intel_pstate_driver ?
2531 intel_pstate_unregister_driver() : -EINVAL;
2532
2533 if (size == 6 && !strncmp(buf, "active", size)) {
2534 if (intel_pstate_driver) {
2535 if (intel_pstate_driver == &intel_pstate)
2536 return 0;
2537
2538 ret = intel_pstate_unregister_driver();
2539 if (ret)
2540 return ret;
2541 }
2542
2543 return intel_pstate_register_driver(&intel_pstate);
2544 }
2545
2546 if (size == 7 && !strncmp(buf, "passive", size)) {
2547 if (intel_pstate_driver) {
2548 if (intel_pstate_driver == &intel_cpufreq)
2549 return 0;
2550
2551 ret = intel_pstate_unregister_driver();
2552 if (ret)
2553 return ret;
2554 }
2555
2556 return intel_pstate_register_driver(&intel_cpufreq);
2557 }
2558
2559 return -EINVAL;
2560}
2561
2562static int no_load __initdata;
2563static int no_hwp __initdata;
2564static int hwp_only __initdata;
2565static unsigned int force_load __initdata;
2566
2567static int __init intel_pstate_msrs_not_valid(void)
2568{
2569 if (!pstate_funcs.get_max() ||
2570 !pstate_funcs.get_min() ||
2571 !pstate_funcs.get_turbo())
2572 return -ENODEV;
2573
2574 return 0;
2575}
2576
2577static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
2578{
2579 pstate_funcs.get_max = funcs->get_max;
2580 pstate_funcs.get_max_physical = funcs->get_max_physical;
2581 pstate_funcs.get_min = funcs->get_min;
2582 pstate_funcs.get_turbo = funcs->get_turbo;
2583 pstate_funcs.get_scaling = funcs->get_scaling;
2584 pstate_funcs.get_val = funcs->get_val;
2585 pstate_funcs.get_vid = funcs->get_vid;
2586 pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
2587}
2588
2589#ifdef CONFIG_ACPI
2590
2591static bool __init intel_pstate_no_acpi_pss(void)
2592{
2593 int i;
2594
2595 for_each_possible_cpu(i) {
2596 acpi_status status;
2597 union acpi_object *pss;
2598 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
2599 struct acpi_processor *pr = per_cpu(processors, i);
2600
2601 if (!pr)
2602 continue;
2603
2604 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
2605 if (ACPI_FAILURE(status))
2606 continue;
2607
2608 pss = buffer.pointer;
2609 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
2610 kfree(pss);
2611 return false;
2612 }
2613
2614 kfree(pss);
2615 }
2616
2617 pr_debug("ACPI _PSS not found\n");
2618 return true;
2619}
2620
2621static bool __init intel_pstate_no_acpi_pcch(void)
2622{
2623 acpi_status status;
2624 acpi_handle handle;
2625
2626 status = acpi_get_handle(NULL, "\\_SB", &handle);
2627 if (ACPI_FAILURE(status))
2628 goto not_found;
2629
2630 if (acpi_has_method(handle, "PCCH"))
2631 return false;
2632
2633not_found:
2634 pr_debug("ACPI PCCH not found\n");
2635 return true;
2636}
2637
2638static bool __init intel_pstate_has_acpi_ppc(void)
2639{
2640 int i;
2641
2642 for_each_possible_cpu(i) {
2643 struct acpi_processor *pr = per_cpu(processors, i);
2644
2645 if (!pr)
2646 continue;
2647 if (acpi_has_method(pr->handle, "_PPC"))
2648 return true;
2649 }
2650 pr_debug("ACPI _PPC not found\n");
2651 return false;
2652}
2653
2654enum {
2655 PSS,
2656 PPC,
2657};
2658
2659
2660static struct acpi_platform_list plat_info[] __initdata = {
2661 {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, NULL, PSS},
2662 {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2663 {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2664 {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2665 {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2666 {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2667 {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2668 {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2669 {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2670 {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2671 {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2672 {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2673 {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2674 {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2675 {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
2676 { }
2677};
2678
2679static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
2680{
2681 const struct x86_cpu_id *id;
2682 u64 misc_pwr;
2683 int idx;
2684
2685 id = x86_match_cpu(intel_pstate_cpu_oob_ids);
2686 if (id) {
2687 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
2688 if (misc_pwr & (1 << 8)) {
2689 pr_debug("Bit 8 in the MISC_PWR_MGMT MSR set\n");
2690 return true;
2691 }
2692 }
2693
2694 idx = acpi_match_platform_list(plat_info);
2695 if (idx < 0)
2696 return false;
2697
2698 switch (plat_info[idx].data) {
2699 case PSS:
2700 if (!intel_pstate_no_acpi_pss())
2701 return false;
2702
2703 return intel_pstate_no_acpi_pcch();
2704 case PPC:
2705 return intel_pstate_has_acpi_ppc() && !force_load;
2706 }
2707
2708 return false;
2709}
2710
2711static void intel_pstate_request_control_from_smm(void)
2712{
2713
2714
2715
2716
2717 if (acpi_ppc)
2718 acpi_processor_pstate_control();
2719}
2720#else
2721static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
2722static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
2723static inline void intel_pstate_request_control_from_smm(void) {}
2724#endif
2725
2726#define INTEL_PSTATE_HWP_BROADWELL 0x01
2727
2728#define ICPU_HWP(model, hwp_mode) \
2729 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
2730
2731static const struct x86_cpu_id hwp_support_ids[] __initconst = {
2732 ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
2733 ICPU_HWP(INTEL_FAM6_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL),
2734 ICPU_HWP(X86_MODEL_ANY, 0),
2735 {}
2736};
2737
2738static int __init intel_pstate_init(void)
2739{
2740 const struct x86_cpu_id *id;
2741 int rc;
2742
2743 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2744 return -ENODEV;
2745
2746 if (no_load)
2747 return -ENODEV;
2748
2749 id = x86_match_cpu(hwp_support_ids);
2750 if (id) {
2751 copy_cpu_funcs(&core_funcs);
2752 if (!no_hwp) {
2753 hwp_active++;
2754 hwp_mode_bdw = id->driver_data;
2755 intel_pstate.attr = hwp_cpufreq_attrs;
2756 goto hwp_cpu_matched;
2757 }
2758 } else {
2759 id = x86_match_cpu(intel_pstate_cpu_ids);
2760 if (!id) {
2761 pr_info("CPU model not supported\n");
2762 return -ENODEV;
2763 }
2764
2765 copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
2766 }
2767
2768 if (intel_pstate_msrs_not_valid()) {
2769 pr_info("Invalid MSRs\n");
2770 return -ENODEV;
2771 }
2772
2773hwp_cpu_matched:
2774
2775
2776
2777
2778 if (intel_pstate_platform_pwr_mgmt_exists()) {
2779 pr_info("P-states controlled by the platform\n");
2780 return -ENODEV;
2781 }
2782
2783 if (!hwp_active && hwp_only)
2784 return -ENOTSUPP;
2785
2786 pr_info("Intel P-state driver initializing\n");
2787
2788 all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
2789 if (!all_cpu_data)
2790 return -ENOMEM;
2791
2792 intel_pstate_request_control_from_smm();
2793
2794 intel_pstate_sysfs_expose_params();
2795
2796 mutex_lock(&intel_pstate_driver_lock);
2797 rc = intel_pstate_register_driver(default_driver);
2798 mutex_unlock(&intel_pstate_driver_lock);
2799 if (rc)
2800 return rc;
2801
2802 if (hwp_active)
2803 pr_info("HWP enabled\n");
2804
2805 return 0;
2806}
2807device_initcall(intel_pstate_init);
2808
2809static int __init intel_pstate_setup(char *str)
2810{
2811 if (!str)
2812 return -EINVAL;
2813
2814 if (!strcmp(str, "disable")) {
2815 no_load = 1;
2816 } else if (!strcmp(str, "passive")) {
2817 pr_info("Passive mode enabled\n");
2818 default_driver = &intel_cpufreq;
2819 no_hwp = 1;
2820 }
2821 if (!strcmp(str, "no_hwp")) {
2822 pr_info("HWP disabled\n");
2823 no_hwp = 1;
2824 }
2825 if (!strcmp(str, "force"))
2826 force_load = 1;
2827 if (!strcmp(str, "hwp_only"))
2828 hwp_only = 1;
2829 if (!strcmp(str, "per_cpu_perf_limits"))
2830 per_cpu_limits = true;
2831
2832#ifdef CONFIG_ACPI
2833 if (!strcmp(str, "support_acpi_ppc"))
2834 acpi_ppc = true;
2835#endif
2836
2837 return 0;
2838}
2839early_param("intel_pstate", intel_pstate_setup);
2840
2841MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
2842MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
2843MODULE_LICENSE("GPL");
2844