1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/kernel_stat.h>
13#include <linux/module.h>
14#include <linux/ktime.h>
15#include <linux/hrtimer.h>
16#include <linux/tick.h>
17#include <linux/slab.h>
18#include <linux/sched/cpufreq.h>
19#include <linux/list.h>
20#include <linux/cpu.h>
21#include <linux/cpufreq.h>
22#include <linux/sysfs.h>
23#include <linux/types.h>
24#include <linux/fs.h>
25#include <linux/acpi.h>
26#include <linux/vmalloc.h>
27#include <linux/pm_qos.h>
28#include <trace/events/power.h>
29
30#include <asm/div64.h>
31#include <asm/msr.h>
32#include <asm/cpu_device_id.h>
33#include <asm/cpufeature.h>
34#include <asm/intel-family.h>
35
36#define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
37
38#define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
39#define INTEL_CPUFREQ_TRANSITION_DELAY_HWP 5000
40#define INTEL_CPUFREQ_TRANSITION_DELAY 500
41
42#ifdef CONFIG_ACPI
43#include <acpi/processor.h>
44#include <acpi/cppc_acpi.h>
45#endif
46
47#define FRAC_BITS 8
48#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
49#define fp_toint(X) ((X) >> FRAC_BITS)
50
51#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
52
53#define EXT_BITS 6
54#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
55#define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
56#define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
57
58static inline int32_t mul_fp(int32_t x, int32_t y)
59{
60 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
61}
62
63static inline int32_t div_fp(s64 x, s64 y)
64{
65 return div64_s64((int64_t)x << FRAC_BITS, y);
66}
67
68static inline int ceiling_fp(int32_t x)
69{
70 int mask, ret;
71
72 ret = fp_toint(x);
73 mask = (1 << FRAC_BITS) - 1;
74 if (x & mask)
75 ret += 1;
76 return ret;
77}
78
79static inline u64 mul_ext_fp(u64 x, u64 y)
80{
81 return (x * y) >> EXT_FRAC_BITS;
82}
83
84static inline u64 div_ext_fp(u64 x, u64 y)
85{
86 return div64_u64(x << EXT_FRAC_BITS, y);
87}
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107struct sample {
108 int32_t core_avg_perf;
109 int32_t busy_scaled;
110 u64 aperf;
111 u64 mperf;
112 u64 tsc;
113 u64 time;
114};
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133struct pstate_data {
134 int current_pstate;
135 int min_pstate;
136 int max_pstate;
137 int max_pstate_physical;
138 int perf_ctl_scaling;
139 int scaling;
140 int turbo_pstate;
141 unsigned int min_freq;
142 unsigned int max_freq;
143 unsigned int turbo_freq;
144};
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159struct vid_data {
160 int min;
161 int max;
162 int turbo;
163 int32_t ratio;
164};
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179struct global_params {
180 bool no_turbo;
181 bool turbo_disabled;
182 bool turbo_disabled_mf;
183 int max_perf_pct;
184 int min_perf_pct;
185};
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225struct cpudata {
226 int cpu;
227
228 unsigned int policy;
229 struct update_util_data update_util;
230 bool update_util_set;
231
232 struct pstate_data pstate;
233 struct vid_data vid;
234
235 u64 last_update;
236 u64 last_sample_time;
237 u64 aperf_mperf_shift;
238 u64 prev_aperf;
239 u64 prev_mperf;
240 u64 prev_tsc;
241 u64 prev_cummulative_iowait;
242 struct sample sample;
243 int32_t min_perf_ratio;
244 int32_t max_perf_ratio;
245#ifdef CONFIG_ACPI
246 struct acpi_processor_performance acpi_perf_data;
247 bool valid_pss_table;
248#endif
249 unsigned int iowait_boost;
250 s16 epp_powersave;
251 s16 epp_policy;
252 s16 epp_default;
253 s16 epp_cached;
254 u64 hwp_req_cached;
255 u64 hwp_cap_cached;
256 u64 last_io_update;
257 unsigned int sched_flags;
258 u32 hwp_boost_min;
259 bool suspended;
260};
261
262static struct cpudata **all_cpu_data;
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279struct pstate_funcs {
280 int (*get_max)(void);
281 int (*get_max_physical)(void);
282 int (*get_min)(void);
283 int (*get_turbo)(void);
284 int (*get_scaling)(void);
285 int (*get_cpu_scaling)(int cpu);
286 int (*get_aperf_mperf_shift)(void);
287 u64 (*get_val)(struct cpudata*, int pstate);
288 void (*get_vid)(struct cpudata *);
289};
290
291static struct pstate_funcs pstate_funcs __read_mostly;
292
293static int hwp_active __read_mostly;
294static int hwp_mode_bdw __read_mostly;
295static bool per_cpu_limits __read_mostly;
296static bool hwp_boost __read_mostly;
297
298static struct cpufreq_driver *intel_pstate_driver __read_mostly;
299
300#ifdef CONFIG_ACPI
301static bool acpi_ppc;
302#endif
303
304static struct global_params global;
305
306static DEFINE_MUTEX(intel_pstate_driver_lock);
307static DEFINE_MUTEX(intel_pstate_limits_lock);
308
309#ifdef CONFIG_ACPI
310
311static bool intel_pstate_acpi_pm_profile_server(void)
312{
313 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
314 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
315 return true;
316
317 return false;
318}
319
320static bool intel_pstate_get_ppc_enable_status(void)
321{
322 if (intel_pstate_acpi_pm_profile_server())
323 return true;
324
325 return acpi_ppc;
326}
327
328#ifdef CONFIG_ACPI_CPPC_LIB
329
330
331static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
332{
333 sched_set_itmt_support();
334}
335
336static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
337
338static void intel_pstate_set_itmt_prio(int cpu)
339{
340 struct cppc_perf_caps cppc_perf;
341 static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
342 int ret;
343
344 ret = cppc_get_perf_caps(cpu, &cppc_perf);
345 if (ret)
346 return;
347
348
349
350
351
352
353 sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
354
355 if (max_highest_perf <= min_highest_perf) {
356 if (cppc_perf.highest_perf > max_highest_perf)
357 max_highest_perf = cppc_perf.highest_perf;
358
359 if (cppc_perf.highest_perf < min_highest_perf)
360 min_highest_perf = cppc_perf.highest_perf;
361
362 if (max_highest_perf > min_highest_perf) {
363
364
365
366
367
368
369 schedule_work(&sched_itmt_work);
370 }
371 }
372}
373
374static int intel_pstate_get_cppc_guaranteed(int cpu)
375{
376 struct cppc_perf_caps cppc_perf;
377 int ret;
378
379 ret = cppc_get_perf_caps(cpu, &cppc_perf);
380 if (ret)
381 return ret;
382
383 if (cppc_perf.guaranteed_perf)
384 return cppc_perf.guaranteed_perf;
385
386 return cppc_perf.nominal_perf;
387}
388
389static u32 intel_pstate_cppc_nominal(int cpu)
390{
391 u64 nominal_perf;
392
393 if (cppc_get_nominal_perf(cpu, &nominal_perf))
394 return 0;
395
396 return nominal_perf;
397}
398#else
399static inline void intel_pstate_set_itmt_prio(int cpu)
400{
401}
402#endif
403
404static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
405{
406 struct cpudata *cpu;
407 int ret;
408 int i;
409
410 if (hwp_active) {
411 intel_pstate_set_itmt_prio(policy->cpu);
412 return;
413 }
414
415 if (!intel_pstate_get_ppc_enable_status())
416 return;
417
418 cpu = all_cpu_data[policy->cpu];
419
420 ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
421 policy->cpu);
422 if (ret)
423 return;
424
425
426
427
428
429
430 if (cpu->acpi_perf_data.control_register.space_id !=
431 ACPI_ADR_SPACE_FIXED_HARDWARE)
432 goto err;
433
434
435
436
437
438 if (cpu->acpi_perf_data.state_count < 2)
439 goto err;
440
441 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
442 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
443 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
444 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
445 (u32) cpu->acpi_perf_data.states[i].core_frequency,
446 (u32) cpu->acpi_perf_data.states[i].power,
447 (u32) cpu->acpi_perf_data.states[i].control);
448 }
449
450
451
452
453
454
455
456
457
458
459
460
461 if (!global.turbo_disabled)
462 cpu->acpi_perf_data.states[0].core_frequency =
463 policy->cpuinfo.max_freq / 1000;
464 cpu->valid_pss_table = true;
465 pr_debug("_PPC limits will be enforced\n");
466
467 return;
468
469 err:
470 cpu->valid_pss_table = false;
471 acpi_processor_unregister_performance(policy->cpu);
472}
473
474static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
475{
476 struct cpudata *cpu;
477
478 cpu = all_cpu_data[policy->cpu];
479 if (!cpu->valid_pss_table)
480 return;
481
482 acpi_processor_unregister_performance(policy->cpu);
483}
484#else
485static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
486{
487}
488
489static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
490{
491}
492
493static inline bool intel_pstate_acpi_pm_profile_server(void)
494{
495 return false;
496}
497#endif
498
499#ifndef CONFIG_ACPI_CPPC_LIB
500static inline int intel_pstate_get_cppc_guaranteed(int cpu)
501{
502 return -ENOTSUPP;
503}
504#endif
505
506
507
508
509
510
511
512
513
514
515
516
517static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
518{
519 int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
520 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
521 int perf_ctl_turbo = pstate_funcs.get_turbo();
522 int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
523 int scaling = cpu->pstate.scaling;
524
525 pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
526 pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
527 pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
528 pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
529 pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
530 pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
531 pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
532
533
534
535
536
537
538
539
540 if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
541 cpu->pstate.turbo_freq = turbo_freq;
542 scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
543 cpu->pstate.scaling = scaling;
544
545 pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
546 cpu->cpu, scaling);
547 }
548
549 cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
550 perf_ctl_scaling);
551
552 cpu->pstate.max_pstate_physical =
553 DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
554 scaling);
555
556 cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
557
558
559
560
561 cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
562}
563
564static inline void update_turbo_state(void)
565{
566 u64 misc_en;
567 struct cpudata *cpu;
568
569 cpu = all_cpu_data[0];
570 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
571 global.turbo_disabled =
572 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
573 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
574}
575
576static int min_perf_pct_min(void)
577{
578 struct cpudata *cpu = all_cpu_data[0];
579 int turbo_pstate = cpu->pstate.turbo_pstate;
580
581 return turbo_pstate ?
582 (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0;
583}
584
585static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
586{
587 u64 epb;
588 int ret;
589
590 if (!boot_cpu_has(X86_FEATURE_EPB))
591 return -ENXIO;
592
593 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
594 if (ret)
595 return (s16)ret;
596
597 return (s16)(epb & 0x0f);
598}
599
600static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
601{
602 s16 epp;
603
604 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
605
606
607
608
609 if (!hwp_req_data) {
610 epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
611 &hwp_req_data);
612 if (epp)
613 return epp;
614 }
615 epp = (hwp_req_data >> 24) & 0xff;
616 } else {
617
618 epp = intel_pstate_get_epb(cpu_data);
619 }
620
621 return epp;
622}
623
624static int intel_pstate_set_epb(int cpu, s16 pref)
625{
626 u64 epb;
627 int ret;
628
629 if (!boot_cpu_has(X86_FEATURE_EPB))
630 return -ENXIO;
631
632 ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
633 if (ret)
634 return ret;
635
636 epb = (epb & ~0x0f) | pref;
637 wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
638
639 return 0;
640}
641
642
643
644
645
646
647
648
649
650
651
652
653static const char * const energy_perf_strings[] = {
654 "default",
655 "performance",
656 "balance_performance",
657 "balance_power",
658 "power",
659 NULL
660};
661static const unsigned int epp_values[] = {
662 HWP_EPP_PERFORMANCE,
663 HWP_EPP_BALANCE_PERFORMANCE,
664 HWP_EPP_BALANCE_POWERSAVE,
665 HWP_EPP_POWERSAVE
666};
667
668static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp)
669{
670 s16 epp;
671 int index = -EINVAL;
672
673 *raw_epp = 0;
674 epp = intel_pstate_get_epp(cpu_data, 0);
675 if (epp < 0)
676 return epp;
677
678 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
679 if (epp == HWP_EPP_PERFORMANCE)
680 return 1;
681 if (epp == HWP_EPP_BALANCE_PERFORMANCE)
682 return 2;
683 if (epp == HWP_EPP_BALANCE_POWERSAVE)
684 return 3;
685 if (epp == HWP_EPP_POWERSAVE)
686 return 4;
687 *raw_epp = epp;
688 return 0;
689 } else if (boot_cpu_has(X86_FEATURE_EPB)) {
690
691
692
693
694
695
696
697
698
699
700 index = (epp >> 2) + 1;
701 }
702
703 return index;
704}
705
706static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
707{
708 int ret;
709
710
711
712
713
714
715 u64 value = READ_ONCE(cpu->hwp_req_cached);
716
717 value &= ~GENMASK_ULL(31, 24);
718 value |= (u64)epp << 24;
719
720
721
722
723
724 WRITE_ONCE(cpu->hwp_req_cached, value);
725 ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
726 if (!ret)
727 cpu->epp_cached = epp;
728
729 return ret;
730}
731
732static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
733 int pref_index, bool use_raw,
734 u32 raw_epp)
735{
736 int epp = -EINVAL;
737 int ret;
738
739 if (!pref_index)
740 epp = cpu_data->epp_default;
741
742 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
743 if (use_raw)
744 epp = raw_epp;
745 else if (epp == -EINVAL)
746 epp = epp_values[pref_index - 1];
747
748
749
750
751
752
753 if (epp > 0 && cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
754 return -EBUSY;
755
756 ret = intel_pstate_set_epp(cpu_data, epp);
757 } else {
758 if (epp == -EINVAL)
759 epp = (pref_index - 1) << 2;
760 ret = intel_pstate_set_epb(cpu_data->cpu, epp);
761 }
762
763 return ret;
764}
765
766static ssize_t show_energy_performance_available_preferences(
767 struct cpufreq_policy *policy, char *buf)
768{
769 int i = 0;
770 int ret = 0;
771
772 while (energy_perf_strings[i] != NULL)
773 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
774
775 ret += sprintf(&buf[ret], "\n");
776
777 return ret;
778}
779
780cpufreq_freq_attr_ro(energy_performance_available_preferences);
781
782static struct cpufreq_driver intel_pstate;
783
784static ssize_t store_energy_performance_preference(
785 struct cpufreq_policy *policy, const char *buf, size_t count)
786{
787 struct cpudata *cpu = all_cpu_data[policy->cpu];
788 char str_preference[21];
789 bool raw = false;
790 ssize_t ret;
791 u32 epp = 0;
792
793 ret = sscanf(buf, "%20s", str_preference);
794 if (ret != 1)
795 return -EINVAL;
796
797 ret = match_string(energy_perf_strings, -1, str_preference);
798 if (ret < 0) {
799 if (!boot_cpu_has(X86_FEATURE_HWP_EPP))
800 return ret;
801
802 ret = kstrtouint(buf, 10, &epp);
803 if (ret)
804 return ret;
805
806 if (epp > 255)
807 return -EINVAL;
808
809 raw = true;
810 }
811
812
813
814
815
816
817 if (!intel_pstate_driver)
818 return -EAGAIN;
819
820 mutex_lock(&intel_pstate_limits_lock);
821
822 if (intel_pstate_driver == &intel_pstate) {
823 ret = intel_pstate_set_energy_pref_index(cpu, ret, raw, epp);
824 } else {
825
826
827
828
829
830
831 if (!raw)
832 epp = ret ? epp_values[ret - 1] : cpu->epp_default;
833
834 if (cpu->epp_cached != epp) {
835 int err;
836
837 cpufreq_stop_governor(policy);
838 ret = intel_pstate_set_epp(cpu, epp);
839 err = cpufreq_start_governor(policy);
840 if (!ret)
841 ret = err;
842 }
843 }
844
845 mutex_unlock(&intel_pstate_limits_lock);
846
847 return ret ?: count;
848}
849
850static ssize_t show_energy_performance_preference(
851 struct cpufreq_policy *policy, char *buf)
852{
853 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
854 int preference, raw_epp;
855
856 preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp);
857 if (preference < 0)
858 return preference;
859
860 if (raw_epp)
861 return sprintf(buf, "%d\n", raw_epp);
862 else
863 return sprintf(buf, "%s\n", energy_perf_strings[preference]);
864}
865
866cpufreq_freq_attr_rw(energy_performance_preference);
867
868static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
869{
870 struct cpudata *cpu = all_cpu_data[policy->cpu];
871 int ratio, freq;
872
873 ratio = intel_pstate_get_cppc_guaranteed(policy->cpu);
874 if (ratio <= 0) {
875 u64 cap;
876
877 rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
878 ratio = HWP_GUARANTEED_PERF(cap);
879 }
880
881 freq = ratio * cpu->pstate.scaling;
882 if (cpu->pstate.scaling != cpu->pstate.perf_ctl_scaling)
883 freq = rounddown(freq, cpu->pstate.perf_ctl_scaling);
884
885 return sprintf(buf, "%d\n", freq);
886}
887
888cpufreq_freq_attr_ro(base_frequency);
889
890static struct freq_attr *hwp_cpufreq_attrs[] = {
891 &energy_performance_preference,
892 &energy_performance_available_preferences,
893 &base_frequency,
894 NULL,
895};
896
897static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
898{
899 u64 cap;
900
901 rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
902 WRITE_ONCE(cpu->hwp_cap_cached, cap);
903 cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
904 cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
905}
906
907static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
908{
909 int scaling = cpu->pstate.scaling;
910
911 __intel_pstate_get_hwp_cap(cpu);
912
913 cpu->pstate.max_freq = cpu->pstate.max_pstate * scaling;
914 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling;
915 if (scaling != cpu->pstate.perf_ctl_scaling) {
916 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
917
918 cpu->pstate.max_freq = rounddown(cpu->pstate.max_freq,
919 perf_ctl_scaling);
920 cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_freq,
921 perf_ctl_scaling);
922 }
923}
924
925static void intel_pstate_hwp_set(unsigned int cpu)
926{
927 struct cpudata *cpu_data = all_cpu_data[cpu];
928 int max, min;
929 u64 value;
930 s16 epp;
931
932 max = cpu_data->max_perf_ratio;
933 min = cpu_data->min_perf_ratio;
934
935 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
936 min = max;
937
938 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
939
940 value &= ~HWP_MIN_PERF(~0L);
941 value |= HWP_MIN_PERF(min);
942
943 value &= ~HWP_MAX_PERF(~0L);
944 value |= HWP_MAX_PERF(max);
945
946 if (cpu_data->epp_policy == cpu_data->policy)
947 goto skip_epp;
948
949 cpu_data->epp_policy = cpu_data->policy;
950
951 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
952 epp = intel_pstate_get_epp(cpu_data, value);
953 cpu_data->epp_powersave = epp;
954
955 if (epp < 0)
956 goto skip_epp;
957
958 epp = 0;
959 } else {
960
961 if (cpu_data->epp_powersave < 0)
962 goto skip_epp;
963
964
965
966
967
968
969
970
971 epp = intel_pstate_get_epp(cpu_data, value);
972 if (epp)
973 goto skip_epp;
974
975 epp = cpu_data->epp_powersave;
976 }
977 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
978 value &= ~GENMASK_ULL(31, 24);
979 value |= (u64)epp << 24;
980 } else {
981 intel_pstate_set_epb(cpu, epp);
982 }
983skip_epp:
984 WRITE_ONCE(cpu_data->hwp_req_cached, value);
985 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
986}
987
988static void intel_pstate_hwp_offline(struct cpudata *cpu)
989{
990 u64 value = READ_ONCE(cpu->hwp_req_cached);
991 int min_perf;
992
993 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
994
995
996
997
998
999 value &= ~GENMASK_ULL(31, 24);
1000 value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
1001 WRITE_ONCE(cpu->hwp_req_cached, value);
1002 }
1003
1004 value &= ~GENMASK_ULL(31, 0);
1005 min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
1006
1007
1008 value |= HWP_MAX_PERF(min_perf);
1009 value |= HWP_MIN_PERF(min_perf);
1010
1011
1012 if (boot_cpu_has(X86_FEATURE_HWP_EPP))
1013 value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
1014
1015 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
1016}
1017
1018#define POWER_CTL_EE_ENABLE 1
1019#define POWER_CTL_EE_DISABLE 2
1020
1021static int power_ctl_ee_state;
1022
1023static void set_power_ctl_ee_state(bool input)
1024{
1025 u64 power_ctl;
1026
1027 mutex_lock(&intel_pstate_driver_lock);
1028 rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
1029 if (input) {
1030 power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE);
1031 power_ctl_ee_state = POWER_CTL_EE_ENABLE;
1032 } else {
1033 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
1034 power_ctl_ee_state = POWER_CTL_EE_DISABLE;
1035 }
1036 wrmsrl(MSR_IA32_POWER_CTL, power_ctl);
1037 mutex_unlock(&intel_pstate_driver_lock);
1038}
1039
1040static void intel_pstate_hwp_enable(struct cpudata *cpudata);
1041
1042static void intel_pstate_hwp_reenable(struct cpudata *cpu)
1043{
1044 intel_pstate_hwp_enable(cpu);
1045 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
1046}
1047
1048static int intel_pstate_suspend(struct cpufreq_policy *policy)
1049{
1050 struct cpudata *cpu = all_cpu_data[policy->cpu];
1051
1052 pr_debug("CPU %d suspending\n", cpu->cpu);
1053
1054 cpu->suspended = true;
1055
1056 return 0;
1057}
1058
1059static int intel_pstate_resume(struct cpufreq_policy *policy)
1060{
1061 struct cpudata *cpu = all_cpu_data[policy->cpu];
1062
1063 pr_debug("CPU %d resuming\n", cpu->cpu);
1064
1065
1066 if (power_ctl_ee_state == POWER_CTL_EE_ENABLE)
1067 set_power_ctl_ee_state(true);
1068 else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE)
1069 set_power_ctl_ee_state(false);
1070
1071 if (cpu->suspended && hwp_active) {
1072 mutex_lock(&intel_pstate_limits_lock);
1073
1074
1075 intel_pstate_hwp_reenable(cpu);
1076
1077 mutex_unlock(&intel_pstate_limits_lock);
1078 }
1079
1080 cpu->suspended = false;
1081
1082 return 0;
1083}
1084
1085static void intel_pstate_update_policies(void)
1086{
1087 int cpu;
1088
1089 for_each_possible_cpu(cpu)
1090 cpufreq_update_policy(cpu);
1091}
1092
1093static void intel_pstate_update_max_freq(unsigned int cpu)
1094{
1095 struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
1096 struct cpudata *cpudata;
1097
1098 if (!policy)
1099 return;
1100
1101 cpudata = all_cpu_data[cpu];
1102 policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
1103 cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
1104
1105 refresh_frequency_limits(policy);
1106
1107 cpufreq_cpu_release(policy);
1108}
1109
1110static void intel_pstate_update_limits(unsigned int cpu)
1111{
1112 mutex_lock(&intel_pstate_driver_lock);
1113
1114 update_turbo_state();
1115
1116
1117
1118
1119 if (global.turbo_disabled_mf != global.turbo_disabled) {
1120 global.turbo_disabled_mf = global.turbo_disabled;
1121 arch_set_max_freq_ratio(global.turbo_disabled);
1122 for_each_possible_cpu(cpu)
1123 intel_pstate_update_max_freq(cpu);
1124 } else {
1125 cpufreq_update_policy(cpu);
1126 }
1127
1128 mutex_unlock(&intel_pstate_driver_lock);
1129}
1130
1131
1132#define show_one(file_name, object) \
1133 static ssize_t show_##file_name \
1134 (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
1135 { \
1136 return sprintf(buf, "%u\n", global.object); \
1137 }
1138
1139static ssize_t intel_pstate_show_status(char *buf);
1140static int intel_pstate_update_status(const char *buf, size_t size);
1141
1142static ssize_t show_status(struct kobject *kobj,
1143 struct kobj_attribute *attr, char *buf)
1144{
1145 ssize_t ret;
1146
1147 mutex_lock(&intel_pstate_driver_lock);
1148 ret = intel_pstate_show_status(buf);
1149 mutex_unlock(&intel_pstate_driver_lock);
1150
1151 return ret;
1152}
1153
1154static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
1155 const char *buf, size_t count)
1156{
1157 char *p = memchr(buf, '\n', count);
1158 int ret;
1159
1160 mutex_lock(&intel_pstate_driver_lock);
1161 ret = intel_pstate_update_status(buf, p ? p - buf : count);
1162 mutex_unlock(&intel_pstate_driver_lock);
1163
1164 return ret < 0 ? ret : count;
1165}
1166
1167static ssize_t show_turbo_pct(struct kobject *kobj,
1168 struct kobj_attribute *attr, char *buf)
1169{
1170 struct cpudata *cpu;
1171 int total, no_turbo, turbo_pct;
1172 uint32_t turbo_fp;
1173
1174 mutex_lock(&intel_pstate_driver_lock);
1175
1176 if (!intel_pstate_driver) {
1177 mutex_unlock(&intel_pstate_driver_lock);
1178 return -EAGAIN;
1179 }
1180
1181 cpu = all_cpu_data[0];
1182
1183 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
1184 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
1185 turbo_fp = div_fp(no_turbo, total);
1186 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
1187
1188 mutex_unlock(&intel_pstate_driver_lock);
1189
1190 return sprintf(buf, "%u\n", turbo_pct);
1191}
1192
1193static ssize_t show_num_pstates(struct kobject *kobj,
1194 struct kobj_attribute *attr, char *buf)
1195{
1196 struct cpudata *cpu;
1197 int total;
1198
1199 mutex_lock(&intel_pstate_driver_lock);
1200
1201 if (!intel_pstate_driver) {
1202 mutex_unlock(&intel_pstate_driver_lock);
1203 return -EAGAIN;
1204 }
1205
1206 cpu = all_cpu_data[0];
1207 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
1208
1209 mutex_unlock(&intel_pstate_driver_lock);
1210
1211 return sprintf(buf, "%u\n", total);
1212}
1213
1214static ssize_t show_no_turbo(struct kobject *kobj,
1215 struct kobj_attribute *attr, char *buf)
1216{
1217 ssize_t ret;
1218
1219 mutex_lock(&intel_pstate_driver_lock);
1220
1221 if (!intel_pstate_driver) {
1222 mutex_unlock(&intel_pstate_driver_lock);
1223 return -EAGAIN;
1224 }
1225
1226 update_turbo_state();
1227 if (global.turbo_disabled)
1228 ret = sprintf(buf, "%u\n", global.turbo_disabled);
1229 else
1230 ret = sprintf(buf, "%u\n", global.no_turbo);
1231
1232 mutex_unlock(&intel_pstate_driver_lock);
1233
1234 return ret;
1235}
1236
1237static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
1238 const char *buf, size_t count)
1239{
1240 unsigned int input;
1241 int ret;
1242
1243 ret = sscanf(buf, "%u", &input);
1244 if (ret != 1)
1245 return -EINVAL;
1246
1247 mutex_lock(&intel_pstate_driver_lock);
1248
1249 if (!intel_pstate_driver) {
1250 mutex_unlock(&intel_pstate_driver_lock);
1251 return -EAGAIN;
1252 }
1253
1254 mutex_lock(&intel_pstate_limits_lock);
1255
1256 update_turbo_state();
1257 if (global.turbo_disabled) {
1258 pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n");
1259 mutex_unlock(&intel_pstate_limits_lock);
1260 mutex_unlock(&intel_pstate_driver_lock);
1261 return -EPERM;
1262 }
1263
1264 global.no_turbo = clamp_t(int, input, 0, 1);
1265
1266 if (global.no_turbo) {
1267 struct cpudata *cpu = all_cpu_data[0];
1268 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1269
1270
1271 if (global.min_perf_pct > pct)
1272 global.min_perf_pct = pct;
1273 }
1274
1275 mutex_unlock(&intel_pstate_limits_lock);
1276
1277 intel_pstate_update_policies();
1278
1279 mutex_unlock(&intel_pstate_driver_lock);
1280
1281 return count;
1282}
1283
1284static void update_qos_request(enum freq_qos_req_type type)
1285{
1286 struct freq_qos_request *req;
1287 struct cpufreq_policy *policy;
1288 int i;
1289
1290 for_each_possible_cpu(i) {
1291 struct cpudata *cpu = all_cpu_data[i];
1292 unsigned int freq, perf_pct;
1293
1294 policy = cpufreq_cpu_get(i);
1295 if (!policy)
1296 continue;
1297
1298 req = policy->driver_data;
1299 cpufreq_cpu_put(policy);
1300
1301 if (!req)
1302 continue;
1303
1304 if (hwp_active)
1305 intel_pstate_get_hwp_cap(cpu);
1306
1307 if (type == FREQ_QOS_MIN) {
1308 perf_pct = global.min_perf_pct;
1309 } else {
1310 req++;
1311 perf_pct = global.max_perf_pct;
1312 }
1313
1314 freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100);
1315
1316 if (freq_qos_update_request(req, freq) < 0)
1317 pr_warn("Failed to update freq constraint: CPU%d\n", i);
1318 }
1319}
1320
1321static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
1322 const char *buf, size_t count)
1323{
1324 unsigned int input;
1325 int ret;
1326
1327 ret = sscanf(buf, "%u", &input);
1328 if (ret != 1)
1329 return -EINVAL;
1330
1331 mutex_lock(&intel_pstate_driver_lock);
1332
1333 if (!intel_pstate_driver) {
1334 mutex_unlock(&intel_pstate_driver_lock);
1335 return -EAGAIN;
1336 }
1337
1338 mutex_lock(&intel_pstate_limits_lock);
1339
1340 global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
1341
1342 mutex_unlock(&intel_pstate_limits_lock);
1343
1344 if (intel_pstate_driver == &intel_pstate)
1345 intel_pstate_update_policies();
1346 else
1347 update_qos_request(FREQ_QOS_MAX);
1348
1349 mutex_unlock(&intel_pstate_driver_lock);
1350
1351 return count;
1352}
1353
1354static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
1355 const char *buf, size_t count)
1356{
1357 unsigned int input;
1358 int ret;
1359
1360 ret = sscanf(buf, "%u", &input);
1361 if (ret != 1)
1362 return -EINVAL;
1363
1364 mutex_lock(&intel_pstate_driver_lock);
1365
1366 if (!intel_pstate_driver) {
1367 mutex_unlock(&intel_pstate_driver_lock);
1368 return -EAGAIN;
1369 }
1370
1371 mutex_lock(&intel_pstate_limits_lock);
1372
1373 global.min_perf_pct = clamp_t(int, input,
1374 min_perf_pct_min(), global.max_perf_pct);
1375
1376 mutex_unlock(&intel_pstate_limits_lock);
1377
1378 if (intel_pstate_driver == &intel_pstate)
1379 intel_pstate_update_policies();
1380 else
1381 update_qos_request(FREQ_QOS_MIN);
1382
1383 mutex_unlock(&intel_pstate_driver_lock);
1384
1385 return count;
1386}
1387
1388static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
1389 struct kobj_attribute *attr, char *buf)
1390{
1391 return sprintf(buf, "%u\n", hwp_boost);
1392}
1393
1394static ssize_t store_hwp_dynamic_boost(struct kobject *a,
1395 struct kobj_attribute *b,
1396 const char *buf, size_t count)
1397{
1398 unsigned int input;
1399 int ret;
1400
1401 ret = kstrtouint(buf, 10, &input);
1402 if (ret)
1403 return ret;
1404
1405 mutex_lock(&intel_pstate_driver_lock);
1406 hwp_boost = !!input;
1407 intel_pstate_update_policies();
1408 mutex_unlock(&intel_pstate_driver_lock);
1409
1410 return count;
1411}
1412
1413static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribute *attr,
1414 char *buf)
1415{
1416 u64 power_ctl;
1417 int enable;
1418
1419 rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
1420 enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE));
1421 return sprintf(buf, "%d\n", !enable);
1422}
1423
1424static ssize_t store_energy_efficiency(struct kobject *a, struct kobj_attribute *b,
1425 const char *buf, size_t count)
1426{
1427 bool input;
1428 int ret;
1429
1430 ret = kstrtobool(buf, &input);
1431 if (ret)
1432 return ret;
1433
1434 set_power_ctl_ee_state(input);
1435
1436 return count;
1437}
1438
1439show_one(max_perf_pct, max_perf_pct);
1440show_one(min_perf_pct, min_perf_pct);
1441
1442define_one_global_rw(status);
1443define_one_global_rw(no_turbo);
1444define_one_global_rw(max_perf_pct);
1445define_one_global_rw(min_perf_pct);
1446define_one_global_ro(turbo_pct);
1447define_one_global_ro(num_pstates);
1448define_one_global_rw(hwp_dynamic_boost);
1449define_one_global_rw(energy_efficiency);
1450
1451static struct attribute *intel_pstate_attributes[] = {
1452 &status.attr,
1453 &no_turbo.attr,
1454 NULL
1455};
1456
1457static const struct attribute_group intel_pstate_attr_group = {
1458 .attrs = intel_pstate_attributes,
1459};
1460
1461static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[];
1462
1463static struct kobject *intel_pstate_kobject;
1464
1465static void __init intel_pstate_sysfs_expose_params(void)
1466{
1467 int rc;
1468
1469 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
1470 &cpu_subsys.dev_root->kobj);
1471 if (WARN_ON(!intel_pstate_kobject))
1472 return;
1473
1474 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
1475 if (WARN_ON(rc))
1476 return;
1477
1478 if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
1479 rc = sysfs_create_file(intel_pstate_kobject, &turbo_pct.attr);
1480 WARN_ON(rc);
1481
1482 rc = sysfs_create_file(intel_pstate_kobject, &num_pstates.attr);
1483 WARN_ON(rc);
1484 }
1485
1486
1487
1488
1489
1490 if (per_cpu_limits)
1491 return;
1492
1493 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
1494 WARN_ON(rc);
1495
1496 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
1497 WARN_ON(rc);
1498
1499 if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) {
1500 rc = sysfs_create_file(intel_pstate_kobject, &energy_efficiency.attr);
1501 WARN_ON(rc);
1502 }
1503}
1504
1505static void __init intel_pstate_sysfs_remove(void)
1506{
1507 if (!intel_pstate_kobject)
1508 return;
1509
1510 sysfs_remove_group(intel_pstate_kobject, &intel_pstate_attr_group);
1511
1512 if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
1513 sysfs_remove_file(intel_pstate_kobject, &num_pstates.attr);
1514 sysfs_remove_file(intel_pstate_kobject, &turbo_pct.attr);
1515 }
1516
1517 if (!per_cpu_limits) {
1518 sysfs_remove_file(intel_pstate_kobject, &max_perf_pct.attr);
1519 sysfs_remove_file(intel_pstate_kobject, &min_perf_pct.attr);
1520
1521 if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids))
1522 sysfs_remove_file(intel_pstate_kobject, &energy_efficiency.attr);
1523 }
1524
1525 kobject_put(intel_pstate_kobject);
1526}
1527
1528static void intel_pstate_sysfs_expose_hwp_dynamic_boost(void)
1529{
1530 int rc;
1531
1532 if (!hwp_active)
1533 return;
1534
1535 rc = sysfs_create_file(intel_pstate_kobject, &hwp_dynamic_boost.attr);
1536 WARN_ON_ONCE(rc);
1537}
1538
1539static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
1540{
1541 if (!hwp_active)
1542 return;
1543
1544 sysfs_remove_file(intel_pstate_kobject, &hwp_dynamic_boost.attr);
1545}
1546
1547
1548
1549static void intel_pstate_hwp_enable(struct cpudata *cpudata)
1550{
1551
1552 if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
1553 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
1554
1555 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
1556 if (cpudata->epp_default == -EINVAL)
1557 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
1558}
1559
1560static int atom_get_min_pstate(void)
1561{
1562 u64 value;
1563
1564 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1565 return (value >> 8) & 0x7F;
1566}
1567
1568static int atom_get_max_pstate(void)
1569{
1570 u64 value;
1571
1572 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1573 return (value >> 16) & 0x7F;
1574}
1575
1576static int atom_get_turbo_pstate(void)
1577{
1578 u64 value;
1579
1580 rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
1581 return value & 0x7F;
1582}
1583
1584static u64 atom_get_val(struct cpudata *cpudata, int pstate)
1585{
1586 u64 val;
1587 int32_t vid_fp;
1588 u32 vid;
1589
1590 val = (u64)pstate << 8;
1591 if (global.no_turbo && !global.turbo_disabled)
1592 val |= (u64)1 << 32;
1593
1594 vid_fp = cpudata->vid.min + mul_fp(
1595 int_tofp(pstate - cpudata->pstate.min_pstate),
1596 cpudata->vid.ratio);
1597
1598 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
1599 vid = ceiling_fp(vid_fp);
1600
1601 if (pstate > cpudata->pstate.max_pstate)
1602 vid = cpudata->vid.turbo;
1603
1604 return val | vid;
1605}
1606
1607static int silvermont_get_scaling(void)
1608{
1609 u64 value;
1610 int i;
1611
1612 static int silvermont_freq_table[] = {
1613 83300, 100000, 133300, 116700, 80000};
1614
1615 rdmsrl(MSR_FSB_FREQ, value);
1616 i = value & 0x7;
1617 WARN_ON(i > 4);
1618
1619 return silvermont_freq_table[i];
1620}
1621
1622static int airmont_get_scaling(void)
1623{
1624 u64 value;
1625 int i;
1626
1627 static int airmont_freq_table[] = {
1628 83300, 100000, 133300, 116700, 80000,
1629 93300, 90000, 88900, 87500};
1630
1631 rdmsrl(MSR_FSB_FREQ, value);
1632 i = value & 0xF;
1633 WARN_ON(i > 8);
1634
1635 return airmont_freq_table[i];
1636}
1637
1638static void atom_get_vid(struct cpudata *cpudata)
1639{
1640 u64 value;
1641
1642 rdmsrl(MSR_ATOM_CORE_VIDS, value);
1643 cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
1644 cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
1645 cpudata->vid.ratio = div_fp(
1646 cpudata->vid.max - cpudata->vid.min,
1647 int_tofp(cpudata->pstate.max_pstate -
1648 cpudata->pstate.min_pstate));
1649
1650 rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
1651 cpudata->vid.turbo = value & 0x7f;
1652}
1653
1654static int core_get_min_pstate(void)
1655{
1656 u64 value;
1657
1658 rdmsrl(MSR_PLATFORM_INFO, value);
1659 return (value >> 40) & 0xFF;
1660}
1661
1662static int core_get_max_pstate_physical(void)
1663{
1664 u64 value;
1665
1666 rdmsrl(MSR_PLATFORM_INFO, value);
1667 return (value >> 8) & 0xFF;
1668}
1669
1670static int core_get_tdp_ratio(u64 plat_info)
1671{
1672
1673 if (plat_info & 0x600000000) {
1674 u64 tdp_ctrl;
1675 u64 tdp_ratio;
1676 int tdp_msr;
1677 int err;
1678
1679
1680 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
1681 if (err)
1682 return err;
1683
1684
1685 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
1686 err = rdmsrl_safe(tdp_msr, &tdp_ratio);
1687 if (err)
1688 return err;
1689
1690
1691 if (tdp_ctrl & 0x03)
1692 tdp_ratio >>= 16;
1693
1694 tdp_ratio &= 0xff;
1695 pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
1696
1697 return (int)tdp_ratio;
1698 }
1699
1700 return -ENXIO;
1701}
1702
1703static int core_get_max_pstate(void)
1704{
1705 u64 tar;
1706 u64 plat_info;
1707 int max_pstate;
1708 int tdp_ratio;
1709 int err;
1710
1711 rdmsrl(MSR_PLATFORM_INFO, plat_info);
1712 max_pstate = (plat_info >> 8) & 0xFF;
1713
1714 tdp_ratio = core_get_tdp_ratio(plat_info);
1715 if (tdp_ratio <= 0)
1716 return max_pstate;
1717
1718 if (hwp_active) {
1719
1720 return tdp_ratio;
1721 }
1722
1723 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
1724 if (!err) {
1725 int tar_levels;
1726
1727
1728 tar_levels = tar & 0xff;
1729 if (tdp_ratio - 1 == tar_levels) {
1730 max_pstate = tar_levels;
1731 pr_debug("max_pstate=TAC %x\n", max_pstate);
1732 }
1733 }
1734
1735 return max_pstate;
1736}
1737
1738static int core_get_turbo_pstate(void)
1739{
1740 u64 value;
1741 int nont, ret;
1742
1743 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1744 nont = core_get_max_pstate();
1745 ret = (value) & 255;
1746 if (ret <= nont)
1747 ret = nont;
1748 return ret;
1749}
1750
1751static inline int core_get_scaling(void)
1752{
1753 return 100000;
1754}
1755
1756static u64 core_get_val(struct cpudata *cpudata, int pstate)
1757{
1758 u64 val;
1759
1760 val = (u64)pstate << 8;
1761 if (global.no_turbo && !global.turbo_disabled)
1762 val |= (u64)1 << 32;
1763
1764 return val;
1765}
1766
1767static int knl_get_aperf_mperf_shift(void)
1768{
1769 return 10;
1770}
1771
1772static int knl_get_turbo_pstate(void)
1773{
1774 u64 value;
1775 int nont, ret;
1776
1777 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1778 nont = core_get_max_pstate();
1779 ret = (((value) >> 8) & 0xFF);
1780 if (ret <= nont)
1781 ret = nont;
1782 return ret;
1783}
1784
1785#ifdef CONFIG_ACPI_CPPC_LIB
1786static u32 hybrid_ref_perf;
1787
1788static int hybrid_get_cpu_scaling(int cpu)
1789{
1790 return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
1791 intel_pstate_cppc_nominal(cpu));
1792}
1793
1794static void intel_pstate_cppc_set_cpu_scaling(void)
1795{
1796 u32 min_nominal_perf = U32_MAX;
1797 int cpu;
1798
1799 for_each_present_cpu(cpu) {
1800 u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
1801
1802 if (nominal_perf && nominal_perf < min_nominal_perf)
1803 min_nominal_perf = nominal_perf;
1804 }
1805
1806 if (min_nominal_perf < U32_MAX) {
1807 hybrid_ref_perf = min_nominal_perf;
1808 pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
1809 }
1810}
1811#else
1812static inline void intel_pstate_cppc_set_cpu_scaling(void)
1813{
1814}
1815#endif
1816
1817static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
1818{
1819 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
1820 cpu->pstate.current_pstate = pstate;
1821
1822
1823
1824
1825
1826 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
1827 pstate_funcs.get_val(cpu, pstate));
1828}
1829
1830static void intel_pstate_set_min_pstate(struct cpudata *cpu)
1831{
1832 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
1833}
1834
1835static void intel_pstate_max_within_limits(struct cpudata *cpu)
1836{
1837 int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
1838
1839 update_turbo_state();
1840 intel_pstate_set_pstate(cpu, pstate);
1841}
1842
1843static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1844{
1845 int perf_ctl_max_phys = pstate_funcs.get_max_physical();
1846 int perf_ctl_scaling = pstate_funcs.get_scaling();
1847
1848 cpu->pstate.min_pstate = pstate_funcs.get_min();
1849 cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
1850 cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
1851
1852 if (hwp_active && !hwp_mode_bdw) {
1853 __intel_pstate_get_hwp_cap(cpu);
1854
1855 if (pstate_funcs.get_cpu_scaling) {
1856 cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
1857 if (cpu->pstate.scaling != perf_ctl_scaling)
1858 intel_pstate_hybrid_hwp_adjust(cpu);
1859 } else {
1860 cpu->pstate.scaling = perf_ctl_scaling;
1861 }
1862 } else {
1863 cpu->pstate.scaling = perf_ctl_scaling;
1864 cpu->pstate.max_pstate = pstate_funcs.get_max();
1865 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
1866 }
1867
1868 if (cpu->pstate.scaling == perf_ctl_scaling) {
1869 cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
1870 cpu->pstate.max_freq = cpu->pstate.max_pstate * perf_ctl_scaling;
1871 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * perf_ctl_scaling;
1872 }
1873
1874 if (pstate_funcs.get_aperf_mperf_shift)
1875 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
1876
1877 if (pstate_funcs.get_vid)
1878 pstate_funcs.get_vid(cpu);
1879
1880 intel_pstate_set_min_pstate(cpu);
1881}
1882
1883
1884
1885
1886
1887
1888
1889static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
1890
1891static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
1892{
1893 u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
1894 u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
1895 u32 max_limit = (hwp_req & 0xff00) >> 8;
1896 u32 min_limit = (hwp_req & 0xff);
1897 u32 boost_level1;
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914 if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
1915 return;
1916
1917 if (!cpu->hwp_boost_min)
1918 cpu->hwp_boost_min = min_limit;
1919
1920
1921 boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1;
1922
1923 if (cpu->hwp_boost_min < boost_level1)
1924 cpu->hwp_boost_min = boost_level1;
1925 else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(hwp_cap))
1926 cpu->hwp_boost_min = HWP_GUARANTEED_PERF(hwp_cap);
1927 else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(hwp_cap) &&
1928 max_limit != HWP_GUARANTEED_PERF(hwp_cap))
1929 cpu->hwp_boost_min = max_limit;
1930 else
1931 return;
1932
1933 hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
1934 wrmsrl(MSR_HWP_REQUEST, hwp_req);
1935 cpu->last_update = cpu->sample.time;
1936}
1937
1938static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
1939{
1940 if (cpu->hwp_boost_min) {
1941 bool expired;
1942
1943
1944 expired = time_after64(cpu->sample.time, cpu->last_update +
1945 hwp_boost_hold_time_ns);
1946 if (expired) {
1947 wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
1948 cpu->hwp_boost_min = 0;
1949 }
1950 }
1951 cpu->last_update = cpu->sample.time;
1952}
1953
1954static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
1955 u64 time)
1956{
1957 cpu->sample.time = time;
1958
1959 if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
1960 bool do_io = false;
1961
1962 cpu->sched_flags = 0;
1963
1964
1965
1966
1967
1968
1969
1970 if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
1971 do_io = true;
1972
1973 cpu->last_io_update = time;
1974
1975 if (do_io)
1976 intel_pstate_hwp_boost_up(cpu);
1977
1978 } else {
1979 intel_pstate_hwp_boost_down(cpu);
1980 }
1981}
1982
1983static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
1984 u64 time, unsigned int flags)
1985{
1986 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1987
1988 cpu->sched_flags |= flags;
1989
1990 if (smp_processor_id() == cpu->cpu)
1991 intel_pstate_update_util_hwp_local(cpu, time);
1992}
1993
1994static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
1995{
1996 struct sample *sample = &cpu->sample;
1997
1998 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
1999}
2000
2001static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
2002{
2003 u64 aperf, mperf;
2004 unsigned long flags;
2005 u64 tsc;
2006
2007 local_irq_save(flags);
2008 rdmsrl(MSR_IA32_APERF, aperf);
2009 rdmsrl(MSR_IA32_MPERF, mperf);
2010 tsc = rdtsc();
2011 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
2012 local_irq_restore(flags);
2013 return false;
2014 }
2015 local_irq_restore(flags);
2016
2017 cpu->last_sample_time = cpu->sample.time;
2018 cpu->sample.time = time;
2019 cpu->sample.aperf = aperf;
2020 cpu->sample.mperf = mperf;
2021 cpu->sample.tsc = tsc;
2022 cpu->sample.aperf -= cpu->prev_aperf;
2023 cpu->sample.mperf -= cpu->prev_mperf;
2024 cpu->sample.tsc -= cpu->prev_tsc;
2025
2026 cpu->prev_aperf = aperf;
2027 cpu->prev_mperf = mperf;
2028 cpu->prev_tsc = tsc;
2029
2030
2031
2032
2033
2034
2035
2036 if (cpu->last_sample_time) {
2037 intel_pstate_calc_avg_perf(cpu);
2038 return true;
2039 }
2040 return false;
2041}
2042
2043static inline int32_t get_avg_frequency(struct cpudata *cpu)
2044{
2045 return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz);
2046}
2047
2048static inline int32_t get_avg_pstate(struct cpudata *cpu)
2049{
2050 return mul_ext_fp(cpu->pstate.max_pstate_physical,
2051 cpu->sample.core_avg_perf);
2052}
2053
2054static inline int32_t get_target_pstate(struct cpudata *cpu)
2055{
2056 struct sample *sample = &cpu->sample;
2057 int32_t busy_frac;
2058 int target, avg_pstate;
2059
2060 busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
2061 sample->tsc);
2062
2063 if (busy_frac < cpu->iowait_boost)
2064 busy_frac = cpu->iowait_boost;
2065
2066 sample->busy_scaled = busy_frac * 100;
2067
2068 target = global.no_turbo || global.turbo_disabled ?
2069 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2070 target += target >> 2;
2071 target = mul_fp(target, busy_frac);
2072 if (target < cpu->pstate.min_pstate)
2073 target = cpu->pstate.min_pstate;
2074
2075
2076
2077
2078
2079
2080
2081
2082 avg_pstate = get_avg_pstate(cpu);
2083 if (avg_pstate > target)
2084 target += (avg_pstate - target) >> 1;
2085
2086 return target;
2087}
2088
2089static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
2090{
2091 int min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
2092 int max_pstate = max(min_pstate, cpu->max_perf_ratio);
2093
2094 return clamp_t(int, pstate, min_pstate, max_pstate);
2095}
2096
2097static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
2098{
2099 if (pstate == cpu->pstate.current_pstate)
2100 return;
2101
2102 cpu->pstate.current_pstate = pstate;
2103 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
2104}
2105
2106static void intel_pstate_adjust_pstate(struct cpudata *cpu)
2107{
2108 int from = cpu->pstate.current_pstate;
2109 struct sample *sample;
2110 int target_pstate;
2111
2112 update_turbo_state();
2113
2114 target_pstate = get_target_pstate(cpu);
2115 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2116 trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
2117 intel_pstate_update_pstate(cpu, target_pstate);
2118
2119 sample = &cpu->sample;
2120 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
2121 fp_toint(sample->busy_scaled),
2122 from,
2123 cpu->pstate.current_pstate,
2124 sample->mperf,
2125 sample->aperf,
2126 sample->tsc,
2127 get_avg_frequency(cpu),
2128 fp_toint(cpu->iowait_boost * 100));
2129}
2130
2131static void intel_pstate_update_util(struct update_util_data *data, u64 time,
2132 unsigned int flags)
2133{
2134 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
2135 u64 delta_ns;
2136
2137
2138 if (smp_processor_id() != cpu->cpu)
2139 return;
2140
2141 delta_ns = time - cpu->last_update;
2142 if (flags & SCHED_CPUFREQ_IOWAIT) {
2143
2144 if (delta_ns > TICK_NSEC) {
2145 cpu->iowait_boost = ONE_EIGHTH_FP;
2146 } else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
2147 cpu->iowait_boost <<= 1;
2148 if (cpu->iowait_boost > int_tofp(1))
2149 cpu->iowait_boost = int_tofp(1);
2150 } else {
2151 cpu->iowait_boost = ONE_EIGHTH_FP;
2152 }
2153 } else if (cpu->iowait_boost) {
2154
2155 if (delta_ns > TICK_NSEC)
2156 cpu->iowait_boost = 0;
2157 else
2158 cpu->iowait_boost >>= 1;
2159 }
2160 cpu->last_update = time;
2161 delta_ns = time - cpu->sample.time;
2162 if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
2163 return;
2164
2165 if (intel_pstate_sample(cpu, time))
2166 intel_pstate_adjust_pstate(cpu);
2167}
2168
2169static struct pstate_funcs core_funcs = {
2170 .get_max = core_get_max_pstate,
2171 .get_max_physical = core_get_max_pstate_physical,
2172 .get_min = core_get_min_pstate,
2173 .get_turbo = core_get_turbo_pstate,
2174 .get_scaling = core_get_scaling,
2175 .get_val = core_get_val,
2176};
2177
2178static const struct pstate_funcs silvermont_funcs = {
2179 .get_max = atom_get_max_pstate,
2180 .get_max_physical = atom_get_max_pstate,
2181 .get_min = atom_get_min_pstate,
2182 .get_turbo = atom_get_turbo_pstate,
2183 .get_val = atom_get_val,
2184 .get_scaling = silvermont_get_scaling,
2185 .get_vid = atom_get_vid,
2186};
2187
2188static const struct pstate_funcs airmont_funcs = {
2189 .get_max = atom_get_max_pstate,
2190 .get_max_physical = atom_get_max_pstate,
2191 .get_min = atom_get_min_pstate,
2192 .get_turbo = atom_get_turbo_pstate,
2193 .get_val = atom_get_val,
2194 .get_scaling = airmont_get_scaling,
2195 .get_vid = atom_get_vid,
2196};
2197
2198static const struct pstate_funcs knl_funcs = {
2199 .get_max = core_get_max_pstate,
2200 .get_max_physical = core_get_max_pstate_physical,
2201 .get_min = core_get_min_pstate,
2202 .get_turbo = knl_get_turbo_pstate,
2203 .get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
2204 .get_scaling = core_get_scaling,
2205 .get_val = core_get_val,
2206};
2207
2208#define X86_MATCH(model, policy) \
2209 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
2210 X86_FEATURE_APERFMPERF, &policy)
2211
2212static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
2213 X86_MATCH(SANDYBRIDGE, core_funcs),
2214 X86_MATCH(SANDYBRIDGE_X, core_funcs),
2215 X86_MATCH(ATOM_SILVERMONT, silvermont_funcs),
2216 X86_MATCH(IVYBRIDGE, core_funcs),
2217 X86_MATCH(HASWELL, core_funcs),
2218 X86_MATCH(BROADWELL, core_funcs),
2219 X86_MATCH(IVYBRIDGE_X, core_funcs),
2220 X86_MATCH(HASWELL_X, core_funcs),
2221 X86_MATCH(HASWELL_L, core_funcs),
2222 X86_MATCH(HASWELL_G, core_funcs),
2223 X86_MATCH(BROADWELL_G, core_funcs),
2224 X86_MATCH(ATOM_AIRMONT, airmont_funcs),
2225 X86_MATCH(SKYLAKE_L, core_funcs),
2226 X86_MATCH(BROADWELL_X, core_funcs),
2227 X86_MATCH(SKYLAKE, core_funcs),
2228 X86_MATCH(BROADWELL_D, core_funcs),
2229 X86_MATCH(XEON_PHI_KNL, knl_funcs),
2230 X86_MATCH(XEON_PHI_KNM, knl_funcs),
2231 X86_MATCH(ATOM_GOLDMONT, core_funcs),
2232 X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs),
2233 X86_MATCH(SKYLAKE_X, core_funcs),
2234 X86_MATCH(COMETLAKE, core_funcs),
2235 X86_MATCH(ICELAKE_X, core_funcs),
2236 {}
2237};
2238MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
2239
2240static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
2241 X86_MATCH(BROADWELL_D, core_funcs),
2242 X86_MATCH(BROADWELL_X, core_funcs),
2243 X86_MATCH(SKYLAKE_X, core_funcs),
2244 {}
2245};
2246
2247static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
2248 X86_MATCH(KABYLAKE, core_funcs),
2249 {}
2250};
2251
2252static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
2253 X86_MATCH(SKYLAKE_X, core_funcs),
2254 X86_MATCH(SKYLAKE, core_funcs),
2255 {}
2256};
2257
2258static int intel_pstate_init_cpu(unsigned int cpunum)
2259{
2260 struct cpudata *cpu;
2261
2262 cpu = all_cpu_data[cpunum];
2263
2264 if (!cpu) {
2265 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
2266 if (!cpu)
2267 return -ENOMEM;
2268
2269 all_cpu_data[cpunum] = cpu;
2270
2271 cpu->cpu = cpunum;
2272
2273 cpu->epp_default = -EINVAL;
2274
2275 if (hwp_active) {
2276 const struct x86_cpu_id *id;
2277
2278 intel_pstate_hwp_enable(cpu);
2279
2280 id = x86_match_cpu(intel_pstate_hwp_boost_ids);
2281 if (id && intel_pstate_acpi_pm_profile_server())
2282 hwp_boost = true;
2283 }
2284 } else if (hwp_active) {
2285
2286
2287
2288
2289
2290 intel_pstate_hwp_reenable(cpu);
2291 }
2292
2293 cpu->epp_powersave = -EINVAL;
2294 cpu->epp_policy = 0;
2295
2296 intel_pstate_get_cpu_pstates(cpu);
2297
2298 pr_debug("controlling: cpu %d\n", cpunum);
2299
2300 return 0;
2301}
2302
2303static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
2304{
2305 struct cpudata *cpu = all_cpu_data[cpu_num];
2306
2307 if (hwp_active && !hwp_boost)
2308 return;
2309
2310 if (cpu->update_util_set)
2311 return;
2312
2313
2314 cpu->sample.time = 0;
2315 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
2316 (hwp_active ?
2317 intel_pstate_update_util_hwp :
2318 intel_pstate_update_util));
2319 cpu->update_util_set = true;
2320}
2321
2322static void intel_pstate_clear_update_util_hook(unsigned int cpu)
2323{
2324 struct cpudata *cpu_data = all_cpu_data[cpu];
2325
2326 if (!cpu_data->update_util_set)
2327 return;
2328
2329 cpufreq_remove_update_util_hook(cpu);
2330 cpu_data->update_util_set = false;
2331 synchronize_rcu();
2332}
2333
2334static int intel_pstate_get_max_freq(struct cpudata *cpu)
2335{
2336 return global.turbo_disabled || global.no_turbo ?
2337 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2338}
2339
2340static void intel_pstate_update_perf_limits(struct cpudata *cpu,
2341 unsigned int policy_min,
2342 unsigned int policy_max)
2343{
2344 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
2345 int32_t max_policy_perf, min_policy_perf;
2346
2347 max_policy_perf = policy_max / perf_ctl_scaling;
2348 if (policy_max == policy_min) {
2349 min_policy_perf = max_policy_perf;
2350 } else {
2351 min_policy_perf = policy_min / perf_ctl_scaling;
2352 min_policy_perf = clamp_t(int32_t, min_policy_perf,
2353 0, max_policy_perf);
2354 }
2355
2356
2357
2358
2359
2360 if (hwp_active) {
2361 intel_pstate_get_hwp_cap(cpu);
2362
2363 if (cpu->pstate.scaling != perf_ctl_scaling) {
2364 int scaling = cpu->pstate.scaling;
2365 int freq;
2366
2367 freq = max_policy_perf * perf_ctl_scaling;
2368 max_policy_perf = DIV_ROUND_UP(freq, scaling);
2369 freq = min_policy_perf * perf_ctl_scaling;
2370 min_policy_perf = DIV_ROUND_UP(freq, scaling);
2371 }
2372 }
2373
2374 pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
2375 cpu->cpu, min_policy_perf, max_policy_perf);
2376
2377
2378 if (per_cpu_limits) {
2379 cpu->min_perf_ratio = min_policy_perf;
2380 cpu->max_perf_ratio = max_policy_perf;
2381 } else {
2382 int turbo_max = cpu->pstate.turbo_pstate;
2383 int32_t global_min, global_max;
2384
2385
2386 global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
2387 global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
2388 global_min = clamp_t(int32_t, global_min, 0, global_max);
2389
2390 pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu,
2391 global_min, global_max);
2392
2393 cpu->min_perf_ratio = max(min_policy_perf, global_min);
2394 cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
2395 cpu->max_perf_ratio = min(max_policy_perf, global_max);
2396 cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
2397
2398
2399 cpu->min_perf_ratio = min(cpu->min_perf_ratio,
2400 cpu->max_perf_ratio);
2401
2402 }
2403 pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu,
2404 cpu->max_perf_ratio,
2405 cpu->min_perf_ratio);
2406}
2407
2408static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2409{
2410 struct cpudata *cpu;
2411
2412 if (!policy->cpuinfo.max_freq)
2413 return -ENODEV;
2414
2415 pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
2416 policy->cpuinfo.max_freq, policy->max);
2417
2418 cpu = all_cpu_data[policy->cpu];
2419 cpu->policy = policy->policy;
2420
2421 mutex_lock(&intel_pstate_limits_lock);
2422
2423 intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
2424
2425 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
2426
2427
2428
2429
2430 intel_pstate_clear_update_util_hook(policy->cpu);
2431 intel_pstate_max_within_limits(cpu);
2432 } else {
2433 intel_pstate_set_update_util_hook(policy->cpu);
2434 }
2435
2436 if (hwp_active) {
2437
2438
2439
2440
2441
2442 if (!hwp_boost)
2443 intel_pstate_clear_update_util_hook(policy->cpu);
2444 intel_pstate_hwp_set(policy->cpu);
2445 }
2446
2447 mutex_unlock(&intel_pstate_limits_lock);
2448
2449 return 0;
2450}
2451
2452static void intel_pstate_adjust_policy_max(struct cpudata *cpu,
2453 struct cpufreq_policy_data *policy)
2454{
2455 if (!hwp_active &&
2456 cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
2457 policy->max < policy->cpuinfo.max_freq &&
2458 policy->max > cpu->pstate.max_freq) {
2459 pr_debug("policy->max > max non turbo frequency\n");
2460 policy->max = policy->cpuinfo.max_freq;
2461 }
2462}
2463
2464static void intel_pstate_verify_cpu_policy(struct cpudata *cpu,
2465 struct cpufreq_policy_data *policy)
2466{
2467 int max_freq;
2468
2469 update_turbo_state();
2470 if (hwp_active) {
2471 intel_pstate_get_hwp_cap(cpu);
2472 max_freq = global.no_turbo || global.turbo_disabled ?
2473 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2474 } else {
2475 max_freq = intel_pstate_get_max_freq(cpu);
2476 }
2477 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, max_freq);
2478
2479 intel_pstate_adjust_policy_max(cpu, policy);
2480}
2481
2482static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
2483{
2484 intel_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
2485
2486 return 0;
2487}
2488
2489static int intel_cpufreq_cpu_offline(struct cpufreq_policy *policy)
2490{
2491 struct cpudata *cpu = all_cpu_data[policy->cpu];
2492
2493 pr_debug("CPU %d going offline\n", cpu->cpu);
2494
2495 if (cpu->suspended)
2496 return 0;
2497
2498
2499
2500
2501
2502
2503
2504 if (hwp_active)
2505 intel_pstate_hwp_offline(cpu);
2506 else
2507 intel_pstate_set_min_pstate(cpu);
2508
2509 intel_pstate_exit_perf_limits(policy);
2510
2511 return 0;
2512}
2513
2514static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
2515{
2516 struct cpudata *cpu = all_cpu_data[policy->cpu];
2517
2518 pr_debug("CPU %d going online\n", cpu->cpu);
2519
2520 intel_pstate_init_acpi_perf_limits(policy);
2521
2522 if (hwp_active) {
2523
2524
2525
2526
2527 intel_pstate_hwp_reenable(cpu);
2528 cpu->suspended = false;
2529 }
2530
2531 return 0;
2532}
2533
2534static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
2535{
2536 intel_pstate_clear_update_util_hook(policy->cpu);
2537
2538 return intel_cpufreq_cpu_offline(policy);
2539}
2540
2541static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
2542{
2543 pr_debug("CPU %d exiting\n", policy->cpu);
2544
2545 policy->fast_switch_possible = false;
2546
2547 return 0;
2548}
2549
2550static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
2551{
2552 struct cpudata *cpu;
2553 int rc;
2554
2555 rc = intel_pstate_init_cpu(policy->cpu);
2556 if (rc)
2557 return rc;
2558
2559 cpu = all_cpu_data[policy->cpu];
2560
2561 cpu->max_perf_ratio = 0xFF;
2562 cpu->min_perf_ratio = 0;
2563
2564
2565 policy->cpuinfo.min_freq = cpu->pstate.min_freq;
2566 update_turbo_state();
2567 global.turbo_disabled_mf = global.turbo_disabled;
2568 policy->cpuinfo.max_freq = global.turbo_disabled ?
2569 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2570
2571 policy->min = policy->cpuinfo.min_freq;
2572 policy->max = policy->cpuinfo.max_freq;
2573
2574 intel_pstate_init_acpi_perf_limits(policy);
2575
2576 policy->fast_switch_possible = true;
2577
2578 return 0;
2579}
2580
2581static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
2582{
2583 int ret = __intel_pstate_cpu_init(policy);
2584
2585 if (ret)
2586 return ret;
2587
2588
2589
2590
2591
2592 policy->policy = CPUFREQ_POLICY_POWERSAVE;
2593
2594 if (hwp_active) {
2595 struct cpudata *cpu = all_cpu_data[policy->cpu];
2596
2597 cpu->epp_cached = intel_pstate_get_epp(cpu, 0);
2598 }
2599
2600 return 0;
2601}
2602
2603static struct cpufreq_driver intel_pstate = {
2604 .flags = CPUFREQ_CONST_LOOPS,
2605 .verify = intel_pstate_verify_policy,
2606 .setpolicy = intel_pstate_set_policy,
2607 .suspend = intel_pstate_suspend,
2608 .resume = intel_pstate_resume,
2609 .init = intel_pstate_cpu_init,
2610 .exit = intel_pstate_cpu_exit,
2611 .offline = intel_pstate_cpu_offline,
2612 .online = intel_pstate_cpu_online,
2613 .update_limits = intel_pstate_update_limits,
2614 .name = "intel_pstate",
2615};
2616
2617static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
2618{
2619 struct cpudata *cpu = all_cpu_data[policy->cpu];
2620
2621 intel_pstate_verify_cpu_policy(cpu, policy);
2622 intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
2623
2624 return 0;
2625}
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640#define INTEL_PSTATE_TRACE_TARGET 10
2641#define INTEL_PSTATE_TRACE_FAST_SWITCH 90
2642
2643static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate)
2644{
2645 struct sample *sample;
2646
2647 if (!trace_pstate_sample_enabled())
2648 return;
2649
2650 if (!intel_pstate_sample(cpu, ktime_get()))
2651 return;
2652
2653 sample = &cpu->sample;
2654 trace_pstate_sample(trace_type,
2655 0,
2656 old_pstate,
2657 cpu->pstate.current_pstate,
2658 sample->mperf,
2659 sample->aperf,
2660 sample->tsc,
2661 get_avg_frequency(cpu),
2662 fp_toint(cpu->iowait_boost * 100));
2663}
2664
2665static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
2666 u32 desired, bool fast_switch)
2667{
2668 u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev;
2669
2670 value &= ~HWP_MIN_PERF(~0L);
2671 value |= HWP_MIN_PERF(min);
2672
2673 value &= ~HWP_MAX_PERF(~0L);
2674 value |= HWP_MAX_PERF(max);
2675
2676 value &= ~HWP_DESIRED_PERF(~0L);
2677 value |= HWP_DESIRED_PERF(desired);
2678
2679 if (value == prev)
2680 return;
2681
2682 WRITE_ONCE(cpu->hwp_req_cached, value);
2683 if (fast_switch)
2684 wrmsrl(MSR_HWP_REQUEST, value);
2685 else
2686 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
2687}
2688
2689static void intel_cpufreq_perf_ctl_update(struct cpudata *cpu,
2690 u32 target_pstate, bool fast_switch)
2691{
2692 if (fast_switch)
2693 wrmsrl(MSR_IA32_PERF_CTL,
2694 pstate_funcs.get_val(cpu, target_pstate));
2695 else
2696 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
2697 pstate_funcs.get_val(cpu, target_pstate));
2698}
2699
2700static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy,
2701 int target_pstate, bool fast_switch)
2702{
2703 struct cpudata *cpu = all_cpu_data[policy->cpu];
2704 int old_pstate = cpu->pstate.current_pstate;
2705
2706 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2707 if (hwp_active) {
2708 int max_pstate = policy->strict_target ?
2709 target_pstate : cpu->max_perf_ratio;
2710
2711 intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate, 0,
2712 fast_switch);
2713 } else if (target_pstate != old_pstate) {
2714 intel_cpufreq_perf_ctl_update(cpu, target_pstate, fast_switch);
2715 }
2716
2717 cpu->pstate.current_pstate = target_pstate;
2718
2719 intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH :
2720 INTEL_PSTATE_TRACE_TARGET, old_pstate);
2721
2722 return target_pstate;
2723}
2724
2725static int intel_cpufreq_target(struct cpufreq_policy *policy,
2726 unsigned int target_freq,
2727 unsigned int relation)
2728{
2729 struct cpudata *cpu = all_cpu_data[policy->cpu];
2730 struct cpufreq_freqs freqs;
2731 int target_pstate;
2732
2733 update_turbo_state();
2734
2735 freqs.old = policy->cur;
2736 freqs.new = target_freq;
2737
2738 cpufreq_freq_transition_begin(policy, &freqs);
2739
2740 switch (relation) {
2741 case CPUFREQ_RELATION_L:
2742 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
2743 break;
2744 case CPUFREQ_RELATION_H:
2745 target_pstate = freqs.new / cpu->pstate.scaling;
2746 break;
2747 default:
2748 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
2749 break;
2750 }
2751
2752 target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false);
2753
2754 freqs.new = target_pstate * cpu->pstate.scaling;
2755
2756 cpufreq_freq_transition_end(policy, &freqs, false);
2757
2758 return 0;
2759}
2760
2761static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
2762 unsigned int target_freq)
2763{
2764 struct cpudata *cpu = all_cpu_data[policy->cpu];
2765 int target_pstate;
2766
2767 update_turbo_state();
2768
2769 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
2770
2771 target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
2772
2773 return target_pstate * cpu->pstate.scaling;
2774}
2775
2776static void intel_cpufreq_adjust_perf(unsigned int cpunum,
2777 unsigned long min_perf,
2778 unsigned long target_perf,
2779 unsigned long capacity)
2780{
2781 struct cpudata *cpu = all_cpu_data[cpunum];
2782 u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
2783 int old_pstate = cpu->pstate.current_pstate;
2784 int cap_pstate, min_pstate, max_pstate, target_pstate;
2785
2786 update_turbo_state();
2787 cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) :
2788 HWP_HIGHEST_PERF(hwp_cap);
2789
2790
2791
2792 target_pstate = cap_pstate;
2793 if (target_perf < capacity)
2794 target_pstate = DIV_ROUND_UP(cap_pstate * target_perf, capacity);
2795
2796 min_pstate = cap_pstate;
2797 if (min_perf < capacity)
2798 min_pstate = DIV_ROUND_UP(cap_pstate * min_perf, capacity);
2799
2800 if (min_pstate < cpu->pstate.min_pstate)
2801 min_pstate = cpu->pstate.min_pstate;
2802
2803 if (min_pstate < cpu->min_perf_ratio)
2804 min_pstate = cpu->min_perf_ratio;
2805
2806 max_pstate = min(cap_pstate, cpu->max_perf_ratio);
2807 if (max_pstate < min_pstate)
2808 max_pstate = min_pstate;
2809
2810 target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate);
2811
2812 intel_cpufreq_hwp_update(cpu, min_pstate, max_pstate, target_pstate, true);
2813
2814 cpu->pstate.current_pstate = target_pstate;
2815 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
2816}
2817
2818static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
2819{
2820 struct freq_qos_request *req;
2821 struct cpudata *cpu;
2822 struct device *dev;
2823 int ret, freq;
2824
2825 dev = get_cpu_device(policy->cpu);
2826 if (!dev)
2827 return -ENODEV;
2828
2829 ret = __intel_pstate_cpu_init(policy);
2830 if (ret)
2831 return ret;
2832
2833 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
2834
2835 policy->cur = policy->cpuinfo.min_freq;
2836
2837 req = kcalloc(2, sizeof(*req), GFP_KERNEL);
2838 if (!req) {
2839 ret = -ENOMEM;
2840 goto pstate_exit;
2841 }
2842
2843 cpu = all_cpu_data[policy->cpu];
2844
2845 if (hwp_active) {
2846 u64 value;
2847
2848 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP;
2849
2850 intel_pstate_get_hwp_cap(cpu);
2851
2852 rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
2853 WRITE_ONCE(cpu->hwp_req_cached, value);
2854
2855 cpu->epp_cached = intel_pstate_get_epp(cpu, value);
2856 } else {
2857 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
2858 }
2859
2860 freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.min_perf_pct, 100);
2861
2862 ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN,
2863 freq);
2864 if (ret < 0) {
2865 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
2866 goto free_req;
2867 }
2868
2869 freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.max_perf_pct, 100);
2870
2871 ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX,
2872 freq);
2873 if (ret < 0) {
2874 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
2875 goto remove_min_req;
2876 }
2877
2878 policy->driver_data = req;
2879
2880 return 0;
2881
2882remove_min_req:
2883 freq_qos_remove_request(req);
2884free_req:
2885 kfree(req);
2886pstate_exit:
2887 intel_pstate_exit_perf_limits(policy);
2888
2889 return ret;
2890}
2891
2892static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
2893{
2894 struct freq_qos_request *req;
2895
2896 req = policy->driver_data;
2897
2898 freq_qos_remove_request(req + 1);
2899 freq_qos_remove_request(req);
2900 kfree(req);
2901
2902 return intel_pstate_cpu_exit(policy);
2903}
2904
2905static struct cpufreq_driver intel_cpufreq = {
2906 .flags = CPUFREQ_CONST_LOOPS,
2907 .verify = intel_cpufreq_verify_policy,
2908 .target = intel_cpufreq_target,
2909 .fast_switch = intel_cpufreq_fast_switch,
2910 .init = intel_cpufreq_cpu_init,
2911 .exit = intel_cpufreq_cpu_exit,
2912 .offline = intel_cpufreq_cpu_offline,
2913 .online = intel_pstate_cpu_online,
2914 .suspend = intel_pstate_suspend,
2915 .resume = intel_pstate_resume,
2916 .update_limits = intel_pstate_update_limits,
2917 .name = "intel_cpufreq",
2918};
2919
2920static struct cpufreq_driver *default_driver;
2921
2922static void intel_pstate_driver_cleanup(void)
2923{
2924 unsigned int cpu;
2925
2926 cpus_read_lock();
2927 for_each_online_cpu(cpu) {
2928 if (all_cpu_data[cpu]) {
2929 if (intel_pstate_driver == &intel_pstate)
2930 intel_pstate_clear_update_util_hook(cpu);
2931
2932 kfree(all_cpu_data[cpu]);
2933 all_cpu_data[cpu] = NULL;
2934 }
2935 }
2936 cpus_read_unlock();
2937
2938 intel_pstate_driver = NULL;
2939}
2940
2941static int intel_pstate_register_driver(struct cpufreq_driver *driver)
2942{
2943 int ret;
2944
2945 if (driver == &intel_pstate)
2946 intel_pstate_sysfs_expose_hwp_dynamic_boost();
2947
2948 memset(&global, 0, sizeof(global));
2949 global.max_perf_pct = 100;
2950
2951 intel_pstate_driver = driver;
2952 ret = cpufreq_register_driver(intel_pstate_driver);
2953 if (ret) {
2954 intel_pstate_driver_cleanup();
2955 return ret;
2956 }
2957
2958 global.min_perf_pct = min_perf_pct_min();
2959
2960 return 0;
2961}
2962
2963static ssize_t intel_pstate_show_status(char *buf)
2964{
2965 if (!intel_pstate_driver)
2966 return sprintf(buf, "off\n");
2967
2968 return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
2969 "active" : "passive");
2970}
2971
2972static int intel_pstate_update_status(const char *buf, size_t size)
2973{
2974 if (size == 3 && !strncmp(buf, "off", size)) {
2975 if (!intel_pstate_driver)
2976 return -EINVAL;
2977
2978 if (hwp_active)
2979 return -EBUSY;
2980
2981 cpufreq_unregister_driver(intel_pstate_driver);
2982 intel_pstate_driver_cleanup();
2983 return 0;
2984 }
2985
2986 if (size == 6 && !strncmp(buf, "active", size)) {
2987 if (intel_pstate_driver) {
2988 if (intel_pstate_driver == &intel_pstate)
2989 return 0;
2990
2991 cpufreq_unregister_driver(intel_pstate_driver);
2992 }
2993
2994 return intel_pstate_register_driver(&intel_pstate);
2995 }
2996
2997 if (size == 7 && !strncmp(buf, "passive", size)) {
2998 if (intel_pstate_driver) {
2999 if (intel_pstate_driver == &intel_cpufreq)
3000 return 0;
3001
3002 cpufreq_unregister_driver(intel_pstate_driver);
3003 intel_pstate_sysfs_hide_hwp_dynamic_boost();
3004 }
3005
3006 return intel_pstate_register_driver(&intel_cpufreq);
3007 }
3008
3009 return -EINVAL;
3010}
3011
3012static int no_load __initdata;
3013static int no_hwp __initdata;
3014static int hwp_only __initdata;
3015static unsigned int force_load __initdata;
3016
3017static int __init intel_pstate_msrs_not_valid(void)
3018{
3019 if (!pstate_funcs.get_max() ||
3020 !pstate_funcs.get_min() ||
3021 !pstate_funcs.get_turbo())
3022 return -ENODEV;
3023
3024 return 0;
3025}
3026
3027static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
3028{
3029 pstate_funcs.get_max = funcs->get_max;
3030 pstate_funcs.get_max_physical = funcs->get_max_physical;
3031 pstate_funcs.get_min = funcs->get_min;
3032 pstate_funcs.get_turbo = funcs->get_turbo;
3033 pstate_funcs.get_scaling = funcs->get_scaling;
3034 pstate_funcs.get_val = funcs->get_val;
3035 pstate_funcs.get_vid = funcs->get_vid;
3036 pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
3037}
3038
3039#ifdef CONFIG_ACPI
3040
3041static bool __init intel_pstate_no_acpi_pss(void)
3042{
3043 int i;
3044
3045 for_each_possible_cpu(i) {
3046 acpi_status status;
3047 union acpi_object *pss;
3048 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
3049 struct acpi_processor *pr = per_cpu(processors, i);
3050
3051 if (!pr)
3052 continue;
3053
3054 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
3055 if (ACPI_FAILURE(status))
3056 continue;
3057
3058 pss = buffer.pointer;
3059 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
3060 kfree(pss);
3061 return false;
3062 }
3063
3064 kfree(pss);
3065 }
3066
3067 pr_debug("ACPI _PSS not found\n");
3068 return true;
3069}
3070
3071static bool __init intel_pstate_no_acpi_pcch(void)
3072{
3073 acpi_status status;
3074 acpi_handle handle;
3075
3076 status = acpi_get_handle(NULL, "\\_SB", &handle);
3077 if (ACPI_FAILURE(status))
3078 goto not_found;
3079
3080 if (acpi_has_method(handle, "PCCH"))
3081 return false;
3082
3083not_found:
3084 pr_debug("ACPI PCCH not found\n");
3085 return true;
3086}
3087
3088static bool __init intel_pstate_has_acpi_ppc(void)
3089{
3090 int i;
3091
3092 for_each_possible_cpu(i) {
3093 struct acpi_processor *pr = per_cpu(processors, i);
3094
3095 if (!pr)
3096 continue;
3097 if (acpi_has_method(pr->handle, "_PPC"))
3098 return true;
3099 }
3100 pr_debug("ACPI _PPC not found\n");
3101 return false;
3102}
3103
3104enum {
3105 PSS,
3106 PPC,
3107};
3108
3109
3110static struct acpi_platform_list plat_info[] __initdata = {
3111 {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, NULL, PSS},
3112 {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3113 {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3114 {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3115 {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3116 {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3117 {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3118 {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3119 {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3120 {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3121 {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3122 {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3123 {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3124 {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3125 {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3126 { }
3127};
3128
3129#define BITMASK_OOB (BIT(8) | BIT(18))
3130
3131static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
3132{
3133 const struct x86_cpu_id *id;
3134 u64 misc_pwr;
3135 int idx;
3136
3137 id = x86_match_cpu(intel_pstate_cpu_oob_ids);
3138 if (id) {
3139 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
3140 if (misc_pwr & BITMASK_OOB) {
3141 pr_debug("Bit 8 or 18 in the MISC_PWR_MGMT MSR set\n");
3142 pr_debug("P states are controlled in Out of Band mode by the firmware/hardware\n");
3143 return true;
3144 }
3145 }
3146
3147 idx = acpi_match_platform_list(plat_info);
3148 if (idx < 0)
3149 return false;
3150
3151 switch (plat_info[idx].data) {
3152 case PSS:
3153 if (!intel_pstate_no_acpi_pss())
3154 return false;
3155
3156 return intel_pstate_no_acpi_pcch();
3157 case PPC:
3158 return intel_pstate_has_acpi_ppc() && !force_load;
3159 }
3160
3161 return false;
3162}
3163
3164static void intel_pstate_request_control_from_smm(void)
3165{
3166
3167
3168
3169
3170 if (acpi_ppc)
3171 acpi_processor_pstate_control();
3172}
3173#else
3174static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
3175static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
3176static inline void intel_pstate_request_control_from_smm(void) {}
3177#endif
3178
3179#define INTEL_PSTATE_HWP_BROADWELL 0x01
3180
3181#define X86_MATCH_HWP(model, hwp_mode) \
3182 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
3183 X86_FEATURE_HWP, hwp_mode)
3184
3185static const struct x86_cpu_id hwp_support_ids[] __initconst = {
3186 X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
3187 X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL),
3188 X86_MATCH_HWP(ANY, 0),
3189 {}
3190};
3191
3192static bool intel_pstate_hwp_is_enabled(void)
3193{
3194 u64 value;
3195
3196 rdmsrl(MSR_PM_ENABLE, value);
3197 return !!(value & 0x1);
3198}
3199
3200static int __init intel_pstate_init(void)
3201{
3202 const struct x86_cpu_id *id;
3203 int rc;
3204
3205 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
3206 return -ENODEV;
3207
3208 id = x86_match_cpu(hwp_support_ids);
3209 if (id) {
3210 bool hwp_forced = intel_pstate_hwp_is_enabled();
3211
3212 if (hwp_forced)
3213 pr_info("HWP enabled by BIOS\n");
3214 else if (no_load)
3215 return -ENODEV;
3216
3217 copy_cpu_funcs(&core_funcs);
3218
3219
3220
3221
3222
3223
3224
3225
3226 if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) {
3227 hwp_active++;
3228 hwp_mode_bdw = id->driver_data;
3229 intel_pstate.attr = hwp_cpufreq_attrs;
3230 intel_cpufreq.attr = hwp_cpufreq_attrs;
3231 intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS;
3232 intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf;
3233 if (!default_driver)
3234 default_driver = &intel_pstate;
3235
3236 if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
3237 intel_pstate_cppc_set_cpu_scaling();
3238
3239 goto hwp_cpu_matched;
3240 }
3241 pr_info("HWP not enabled\n");
3242 } else {
3243 if (no_load)
3244 return -ENODEV;
3245
3246 id = x86_match_cpu(intel_pstate_cpu_ids);
3247 if (!id) {
3248 pr_info("CPU model not supported\n");
3249 return -ENODEV;
3250 }
3251
3252 copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
3253 }
3254
3255 if (intel_pstate_msrs_not_valid()) {
3256 pr_info("Invalid MSRs\n");
3257 return -ENODEV;
3258 }
3259
3260 if (!default_driver)
3261 default_driver = &intel_cpufreq;
3262
3263hwp_cpu_matched:
3264
3265
3266
3267
3268 if (intel_pstate_platform_pwr_mgmt_exists()) {
3269 pr_info("P-states controlled by the platform\n");
3270 return -ENODEV;
3271 }
3272
3273 if (!hwp_active && hwp_only)
3274 return -ENOTSUPP;
3275
3276 pr_info("Intel P-state driver initializing\n");
3277
3278 all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
3279 if (!all_cpu_data)
3280 return -ENOMEM;
3281
3282 intel_pstate_request_control_from_smm();
3283
3284 intel_pstate_sysfs_expose_params();
3285
3286 mutex_lock(&intel_pstate_driver_lock);
3287 rc = intel_pstate_register_driver(default_driver);
3288 mutex_unlock(&intel_pstate_driver_lock);
3289 if (rc) {
3290 intel_pstate_sysfs_remove();
3291 return rc;
3292 }
3293
3294 if (hwp_active) {
3295 const struct x86_cpu_id *id;
3296
3297 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids);
3298 if (id) {
3299 set_power_ctl_ee_state(false);
3300 pr_info("Disabling energy efficiency optimization\n");
3301 }
3302
3303 pr_info("HWP enabled\n");
3304 } else if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
3305 pr_warn("Problematic setup: Hybrid processor with disabled HWP\n");
3306 }
3307
3308 return 0;
3309}
3310device_initcall(intel_pstate_init);
3311
3312static int __init intel_pstate_setup(char *str)
3313{
3314 if (!str)
3315 return -EINVAL;
3316
3317 if (!strcmp(str, "disable"))
3318 no_load = 1;
3319 else if (!strcmp(str, "active"))
3320 default_driver = &intel_pstate;
3321 else if (!strcmp(str, "passive"))
3322 default_driver = &intel_cpufreq;
3323
3324 if (!strcmp(str, "no_hwp"))
3325 no_hwp = 1;
3326
3327 if (!strcmp(str, "force"))
3328 force_load = 1;
3329 if (!strcmp(str, "hwp_only"))
3330 hwp_only = 1;
3331 if (!strcmp(str, "per_cpu_perf_limits"))
3332 per_cpu_limits = true;
3333
3334#ifdef CONFIG_ACPI
3335 if (!strcmp(str, "support_acpi_ppc"))
3336 acpi_ppc = true;
3337#endif
3338
3339 return 0;
3340}
3341early_param("intel_pstate", intel_pstate_setup);
3342
3343MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
3344MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
3345MODULE_LICENSE("GPL");
3346