linux/arch/x86/kernel/itmt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
   4 *
   5 * (C) Copyright 2016 Intel Corporation
   6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
   7 *
   8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
   9 * the maximum turbo frequencies of some cores in a CPU package may be
  10 * higher than for the other cores in the same package.  In that case,
  11 * better performance can be achieved by making the scheduler prefer
  12 * to run tasks on the CPUs with higher max turbo frequencies.
  13 *
  14 * This file provides functions and data structures for enabling the
  15 * scheduler to favor scheduling on cores can be boosted to a higher
  16 * frequency under ITMT.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/cpumask.h>
  21#include <linux/cpuset.h>
  22#include <linux/mutex.h>
  23#include <linux/sysctl.h>
  24#include <linux/nodemask.h>
  25
  26static DEFINE_MUTEX(itmt_update_mutex);
  27DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
  28
  29/* Boolean to track if system has ITMT capabilities */
  30static bool __read_mostly sched_itmt_capable;
  31
  32/*
  33 * Boolean to control whether we want to move processes to cpu capable
  34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
  35 * Technology 3.0.
  36 *
  37 * It can be set via /proc/sys/kernel/sched_itmt_enabled
  38 */
  39unsigned int __read_mostly sysctl_sched_itmt_enabled;
  40
  41static int sched_itmt_update_handler(struct ctl_table *table, int write,
  42                                     void *buffer, size_t *lenp, loff_t *ppos)
  43{
  44        unsigned int old_sysctl;
  45        int ret;
  46
  47        mutex_lock(&itmt_update_mutex);
  48
  49        if (!sched_itmt_capable) {
  50                mutex_unlock(&itmt_update_mutex);
  51                return -EINVAL;
  52        }
  53
  54        old_sysctl = sysctl_sched_itmt_enabled;
  55        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  56
  57        if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
  58                x86_topology_update = true;
  59                rebuild_sched_domains();
  60        }
  61
  62        mutex_unlock(&itmt_update_mutex);
  63
  64        return ret;
  65}
  66
  67static struct ctl_table itmt_kern_table[] = {
  68        {
  69                .procname       = "sched_itmt_enabled",
  70                .data           = &sysctl_sched_itmt_enabled,
  71                .maxlen         = sizeof(unsigned int),
  72                .mode           = 0644,
  73                .proc_handler   = sched_itmt_update_handler,
  74                .extra1         = SYSCTL_ZERO,
  75                .extra2         = SYSCTL_ONE,
  76        },
  77        {}
  78};
  79
  80static struct ctl_table itmt_root_table[] = {
  81        {
  82                .procname       = "kernel",
  83                .mode           = 0555,
  84                .child          = itmt_kern_table,
  85        },
  86        {}
  87};
  88
  89static struct ctl_table_header *itmt_sysctl_header;
  90
  91/**
  92 * sched_set_itmt_support() - Indicate platform supports ITMT
  93 *
  94 * This function is used by the OS to indicate to scheduler that the platform
  95 * is capable of supporting the ITMT feature.
  96 *
  97 * The current scheme has the pstate driver detects if the system
  98 * is ITMT capable and call sched_set_itmt_support.
  99 *
 100 * This must be done only after sched_set_itmt_core_prio
 101 * has been called to set the cpus' priorities.
 102 * It must not be called with cpu hot plug lock
 103 * held as we need to acquire the lock to rebuild sched domains
 104 * later.
 105 *
 106 * Return: 0 on success
 107 */
 108int sched_set_itmt_support(void)
 109{
 110        mutex_lock(&itmt_update_mutex);
 111
 112        if (sched_itmt_capable) {
 113                mutex_unlock(&itmt_update_mutex);
 114                return 0;
 115        }
 116
 117        itmt_sysctl_header = register_sysctl_table(itmt_root_table);
 118        if (!itmt_sysctl_header) {
 119                mutex_unlock(&itmt_update_mutex);
 120                return -ENOMEM;
 121        }
 122
 123        sched_itmt_capable = true;
 124
 125        sysctl_sched_itmt_enabled = 1;
 126
 127        x86_topology_update = true;
 128        rebuild_sched_domains();
 129
 130        mutex_unlock(&itmt_update_mutex);
 131
 132        return 0;
 133}
 134
 135/**
 136 * sched_clear_itmt_support() - Revoke platform's support of ITMT
 137 *
 138 * This function is used by the OS to indicate that it has
 139 * revoked the platform's support of ITMT feature.
 140 *
 141 * It must not be called with cpu hot plug lock
 142 * held as we need to acquire the lock to rebuild sched domains
 143 * later.
 144 */
 145void sched_clear_itmt_support(void)
 146{
 147        mutex_lock(&itmt_update_mutex);
 148
 149        if (!sched_itmt_capable) {
 150                mutex_unlock(&itmt_update_mutex);
 151                return;
 152        }
 153        sched_itmt_capable = false;
 154
 155        if (itmt_sysctl_header) {
 156                unregister_sysctl_table(itmt_sysctl_header);
 157                itmt_sysctl_header = NULL;
 158        }
 159
 160        if (sysctl_sched_itmt_enabled) {
 161                /* disable sched_itmt if we are no longer ITMT capable */
 162                sysctl_sched_itmt_enabled = 0;
 163                x86_topology_update = true;
 164                rebuild_sched_domains();
 165        }
 166
 167        mutex_unlock(&itmt_update_mutex);
 168}
 169
 170int arch_asym_cpu_priority(int cpu)
 171{
 172        return per_cpu(sched_core_priority, cpu);
 173}
 174
 175/**
 176 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
 177 * @prio:       Priority of cpu core
 178 * @core_cpu:   The cpu number associated with the core
 179 *
 180 * The pstate driver will find out the max boost frequency
 181 * and call this function to set a priority proportional
 182 * to the max boost frequency. CPU with higher boost
 183 * frequency will receive higher priority.
 184 *
 185 * No need to rebuild sched domain after updating
 186 * the CPU priorities. The sched domains have no
 187 * dependency on CPU priorities.
 188 */
 189void sched_set_itmt_core_prio(int prio, int core_cpu)
 190{
 191        int cpu, i = 1;
 192
 193        for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
 194                int smt_prio;
 195
 196                /*
 197                 * Ensure that the siblings are moved to the end
 198                 * of the priority chain and only used when
 199                 * all other high priority cpus are out of capacity.
 200                 */
 201                smt_prio = prio * smp_num_siblings / i;
 202                per_cpu(sched_core_priority, cpu) = smt_prio;
 203                i++;
 204        }
 205}
 206