linux/arch/x86/kernel/itmt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
   4 *
   5 * (C) Copyright 2016 Intel Corporation
   6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
   7 *
   8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
   9 * the maximum turbo frequencies of some cores in a CPU package may be
  10 * higher than for the other cores in the same package.  In that case,
  11 * better performance can be achieved by making the scheduler prefer
  12 * to run tasks on the CPUs with higher max turbo frequencies.
  13 *
  14 * This file provides functions and data structures for enabling the
  15 * scheduler to favor scheduling on cores can be boosted to a higher
  16 * frequency under ITMT.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/cpumask.h>
  21#include <linux/cpuset.h>
  22#include <linux/mutex.h>
  23#include <linux/sysctl.h>
  24#include <linux/nodemask.h>
  25
  26static DEFINE_MUTEX(itmt_update_mutex);
  27DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
  28
  29/* Boolean to track if system has ITMT capabilities */
  30static bool __read_mostly sched_itmt_capable;
  31
  32/*
  33 * Boolean to control whether we want to move processes to cpu capable
  34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
  35 * Technology 3.0.
  36 *
  37 * It can be set via /proc/sys/kernel/sched_itmt_enabled
  38 */
  39unsigned int __read_mostly sysctl_sched_itmt_enabled;
  40
  41static int sched_itmt_update_handler(struct ctl_table *table, int write,
  42                                     void __user *buffer, size_t *lenp,
  43                                     loff_t *ppos)
  44{
  45        unsigned int old_sysctl;
  46        int ret;
  47
  48        mutex_lock(&itmt_update_mutex);
  49
  50        if (!sched_itmt_capable) {
  51                mutex_unlock(&itmt_update_mutex);
  52                return -EINVAL;
  53        }
  54
  55        old_sysctl = sysctl_sched_itmt_enabled;
  56        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  57
  58        if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
  59                x86_topology_update = true;
  60                rebuild_sched_domains();
  61        }
  62
  63        mutex_unlock(&itmt_update_mutex);
  64
  65        return ret;
  66}
  67
  68static struct ctl_table itmt_kern_table[] = {
  69        {
  70                .procname       = "sched_itmt_enabled",
  71                .data           = &sysctl_sched_itmt_enabled,
  72                .maxlen         = sizeof(unsigned int),
  73                .mode           = 0644,
  74                .proc_handler   = sched_itmt_update_handler,
  75                .extra1         = SYSCTL_ZERO,
  76                .extra2         = SYSCTL_ONE,
  77        },
  78        {}
  79};
  80
  81static struct ctl_table itmt_root_table[] = {
  82        {
  83                .procname       = "kernel",
  84                .mode           = 0555,
  85                .child          = itmt_kern_table,
  86        },
  87        {}
  88};
  89
  90static struct ctl_table_header *itmt_sysctl_header;
  91
  92/**
  93 * sched_set_itmt_support() - Indicate platform supports ITMT
  94 *
  95 * This function is used by the OS to indicate to scheduler that the platform
  96 * is capable of supporting the ITMT feature.
  97 *
  98 * The current scheme has the pstate driver detects if the system
  99 * is ITMT capable and call sched_set_itmt_support.
 100 *
 101 * This must be done only after sched_set_itmt_core_prio
 102 * has been called to set the cpus' priorities.
 103 * It must not be called with cpu hot plug lock
 104 * held as we need to acquire the lock to rebuild sched domains
 105 * later.
 106 *
 107 * Return: 0 on success
 108 */
 109int sched_set_itmt_support(void)
 110{
 111        mutex_lock(&itmt_update_mutex);
 112
 113        if (sched_itmt_capable) {
 114                mutex_unlock(&itmt_update_mutex);
 115                return 0;
 116        }
 117
 118        itmt_sysctl_header = register_sysctl_table(itmt_root_table);
 119        if (!itmt_sysctl_header) {
 120                mutex_unlock(&itmt_update_mutex);
 121                return -ENOMEM;
 122        }
 123
 124        sched_itmt_capable = true;
 125
 126        sysctl_sched_itmt_enabled = 1;
 127
 128        x86_topology_update = true;
 129        rebuild_sched_domains();
 130
 131        mutex_unlock(&itmt_update_mutex);
 132
 133        return 0;
 134}
 135
 136/**
 137 * sched_clear_itmt_support() - Revoke platform's support of ITMT
 138 *
 139 * This function is used by the OS to indicate that it has
 140 * revoked the platform's support of ITMT feature.
 141 *
 142 * It must not be called with cpu hot plug lock
 143 * held as we need to acquire the lock to rebuild sched domains
 144 * later.
 145 */
 146void sched_clear_itmt_support(void)
 147{
 148        mutex_lock(&itmt_update_mutex);
 149
 150        if (!sched_itmt_capable) {
 151                mutex_unlock(&itmt_update_mutex);
 152                return;
 153        }
 154        sched_itmt_capable = false;
 155
 156        if (itmt_sysctl_header) {
 157                unregister_sysctl_table(itmt_sysctl_header);
 158                itmt_sysctl_header = NULL;
 159        }
 160
 161        if (sysctl_sched_itmt_enabled) {
 162                /* disable sched_itmt if we are no longer ITMT capable */
 163                sysctl_sched_itmt_enabled = 0;
 164                x86_topology_update = true;
 165                rebuild_sched_domains();
 166        }
 167
 168        mutex_unlock(&itmt_update_mutex);
 169}
 170
 171int arch_asym_cpu_priority(int cpu)
 172{
 173        return per_cpu(sched_core_priority, cpu);
 174}
 175
 176/**
 177 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
 178 * @prio:       Priority of cpu core
 179 * @core_cpu:   The cpu number associated with the core
 180 *
 181 * The pstate driver will find out the max boost frequency
 182 * and call this function to set a priority proportional
 183 * to the max boost frequency. CPU with higher boost
 184 * frequency will receive higher priority.
 185 *
 186 * No need to rebuild sched domain after updating
 187 * the CPU priorities. The sched domains have no
 188 * dependency on CPU priorities.
 189 */
 190void sched_set_itmt_core_prio(int prio, int core_cpu)
 191{
 192        int cpu, i = 1;
 193
 194        for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
 195                int smt_prio;
 196
 197                /*
 198                 * Ensure that the siblings are moved to the end
 199                 * of the priority chain and only used when
 200                 * all other high priority cpus are out of capacity.
 201                 */
 202                smt_prio = prio * smp_num_siblings / i;
 203                per_cpu(sched_core_priority, cpu) = smt_prio;
 204                i++;
 205        }
 206}
 207