linux/arch/x86/kernel/itmt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
   4 *
   5 * (C) Copyright 2016 Intel Corporation
   6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
   7 *
   8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
   9 * the maximum turbo frequencies of some cores in a CPU package may be
  10 * higher than for the other cores in the same package.  In that case,
  11 * better performance can be achieved by making the scheduler prefer
  12 * to run tasks on the CPUs with higher max turbo frequencies.
  13 *
  14 * This file provides functions and data structures for enabling the
  15 * scheduler to favor scheduling on cores can be boosted to a higher
  16 * frequency under ITMT.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/cpumask.h>
  21#include <linux/cpuset.h>
  22#include <linux/mutex.h>
  23#include <linux/sysctl.h>
  24#include <linux/nodemask.h>
  25
  26static DEFINE_MUTEX(itmt_update_mutex);
  27DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
  28
  29/* Boolean to track if system has ITMT capabilities */
  30static bool __read_mostly sched_itmt_capable;
  31
  32/*
  33 * Boolean to control whether we want to move processes to cpu capable
  34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
  35 * Technology 3.0.
  36 *
  37 * It can be set via /proc/sys/kernel/sched_itmt_enabled
  38 */
  39unsigned int __read_mostly sysctl_sched_itmt_enabled;
  40
  41static int sched_itmt_update_handler(struct ctl_table *table, int write,
  42                                     void __user *buffer, size_t *lenp,
  43                                     loff_t *ppos)
  44{
  45        unsigned int old_sysctl;
  46        int ret;
  47
  48        mutex_lock(&itmt_update_mutex);
  49
  50        if (!sched_itmt_capable) {
  51                mutex_unlock(&itmt_update_mutex);
  52                return -EINVAL;
  53        }
  54
  55        old_sysctl = sysctl_sched_itmt_enabled;
  56        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  57
  58        if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
  59                x86_topology_update = true;
  60                rebuild_sched_domains();
  61        }
  62
  63        mutex_unlock(&itmt_update_mutex);
  64
  65        return ret;
  66}
  67
  68static unsigned int zero;
  69static unsigned int one = 1;
  70static struct ctl_table itmt_kern_table[] = {
  71        {
  72                .procname       = "sched_itmt_enabled",
  73                .data           = &sysctl_sched_itmt_enabled,
  74                .maxlen         = sizeof(unsigned int),
  75                .mode           = 0644,
  76                .proc_handler   = sched_itmt_update_handler,
  77                .extra1         = &zero,
  78                .extra2         = &one,
  79        },
  80        {}
  81};
  82
  83static struct ctl_table itmt_root_table[] = {
  84        {
  85                .procname       = "kernel",
  86                .mode           = 0555,
  87                .child          = itmt_kern_table,
  88        },
  89        {}
  90};
  91
  92static struct ctl_table_header *itmt_sysctl_header;
  93
  94/**
  95 * sched_set_itmt_support() - Indicate platform supports ITMT
  96 *
  97 * This function is used by the OS to indicate to scheduler that the platform
  98 * is capable of supporting the ITMT feature.
  99 *
 100 * The current scheme has the pstate driver detects if the system
 101 * is ITMT capable and call sched_set_itmt_support.
 102 *
 103 * This must be done only after sched_set_itmt_core_prio
 104 * has been called to set the cpus' priorities.
 105 * It must not be called with cpu hot plug lock
 106 * held as we need to acquire the lock to rebuild sched domains
 107 * later.
 108 *
 109 * Return: 0 on success
 110 */
 111int sched_set_itmt_support(void)
 112{
 113        mutex_lock(&itmt_update_mutex);
 114
 115        if (sched_itmt_capable) {
 116                mutex_unlock(&itmt_update_mutex);
 117                return 0;
 118        }
 119
 120        itmt_sysctl_header = register_sysctl_table(itmt_root_table);
 121        if (!itmt_sysctl_header) {
 122                mutex_unlock(&itmt_update_mutex);
 123                return -ENOMEM;
 124        }
 125
 126        sched_itmt_capable = true;
 127
 128        sysctl_sched_itmt_enabled = 1;
 129
 130        x86_topology_update = true;
 131        rebuild_sched_domains();
 132
 133        mutex_unlock(&itmt_update_mutex);
 134
 135        return 0;
 136}
 137
 138/**
 139 * sched_clear_itmt_support() - Revoke platform's support of ITMT
 140 *
 141 * This function is used by the OS to indicate that it has
 142 * revoked the platform's support of ITMT feature.
 143 *
 144 * It must not be called with cpu hot plug lock
 145 * held as we need to acquire the lock to rebuild sched domains
 146 * later.
 147 */
 148void sched_clear_itmt_support(void)
 149{
 150        mutex_lock(&itmt_update_mutex);
 151
 152        if (!sched_itmt_capable) {
 153                mutex_unlock(&itmt_update_mutex);
 154                return;
 155        }
 156        sched_itmt_capable = false;
 157
 158        if (itmt_sysctl_header) {
 159                unregister_sysctl_table(itmt_sysctl_header);
 160                itmt_sysctl_header = NULL;
 161        }
 162
 163        if (sysctl_sched_itmt_enabled) {
 164                /* disable sched_itmt if we are no longer ITMT capable */
 165                sysctl_sched_itmt_enabled = 0;
 166                x86_topology_update = true;
 167                rebuild_sched_domains();
 168        }
 169
 170        mutex_unlock(&itmt_update_mutex);
 171}
 172
 173int arch_asym_cpu_priority(int cpu)
 174{
 175        return per_cpu(sched_core_priority, cpu);
 176}
 177
 178/**
 179 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
 180 * @prio:       Priority of cpu core
 181 * @core_cpu:   The cpu number associated with the core
 182 *
 183 * The pstate driver will find out the max boost frequency
 184 * and call this function to set a priority proportional
 185 * to the max boost frequency. CPU with higher boost
 186 * frequency will receive higher priority.
 187 *
 188 * No need to rebuild sched domain after updating
 189 * the CPU priorities. The sched domains have no
 190 * dependency on CPU priorities.
 191 */
 192void sched_set_itmt_core_prio(int prio, int core_cpu)
 193{
 194        int cpu, i = 1;
 195
 196        for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
 197                int smt_prio;
 198
 199                /*
 200                 * Ensure that the siblings are moved to the end
 201                 * of the priority chain and only used when
 202                 * all other high priority cpus are out of capacity.
 203                 */
 204                smt_prio = prio * smp_num_siblings / i;
 205                per_cpu(sched_core_priority, cpu) = smt_prio;
 206                i++;
 207        }
 208}
 209