linux/arch/x86/kernel/itmt.c
<<
>>
Prefs
   1/*
   2 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
   3 *
   4 * (C) Copyright 2016 Intel Corporation
   5 * Author: Tim Chen <tim.c.chen@linux.intel.com>
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; version 2
  10 * of the License.
  11 *
  12 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
  13 * the maximum turbo frequencies of some cores in a CPU package may be
  14 * higher than for the other cores in the same package.  In that case,
  15 * better performance can be achieved by making the scheduler prefer
  16 * to run tasks on the CPUs with higher max turbo frequencies.
  17 *
  18 * This file provides functions and data structures for enabling the
  19 * scheduler to favor scheduling on cores can be boosted to a higher
  20 * frequency under ITMT.
  21 */
  22
  23#include <linux/sched.h>
  24#include <linux/cpumask.h>
  25#include <linux/cpuset.h>
  26#include <linux/mutex.h>
  27#include <linux/sysctl.h>
  28#include <linux/nodemask.h>
  29
  30static DEFINE_MUTEX(itmt_update_mutex);
  31DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
  32
  33/* Boolean to track if system has ITMT capabilities */
  34static bool __read_mostly sched_itmt_capable;
  35
  36/*
  37 * Boolean to control whether we want to move processes to cpu capable
  38 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
  39 * Technology 3.0.
  40 *
  41 * It can be set via /proc/sys/kernel/sched_itmt_enabled
  42 */
  43unsigned int __read_mostly sysctl_sched_itmt_enabled;
  44
  45static int sched_itmt_update_handler(struct ctl_table *table, int write,
  46                                     void __user *buffer, size_t *lenp,
  47                                     loff_t *ppos)
  48{
  49        unsigned int old_sysctl;
  50        int ret;
  51
  52        mutex_lock(&itmt_update_mutex);
  53
  54        if (!sched_itmt_capable) {
  55                mutex_unlock(&itmt_update_mutex);
  56                return -EINVAL;
  57        }
  58
  59        old_sysctl = sysctl_sched_itmt_enabled;
  60        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  61
  62        if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
  63                x86_topology_update = true;
  64                rebuild_sched_domains();
  65        }
  66
  67        mutex_unlock(&itmt_update_mutex);
  68
  69        return ret;
  70}
  71
  72static unsigned int zero;
  73static unsigned int one = 1;
  74static struct ctl_table itmt_kern_table[] = {
  75        {
  76                .procname       = "sched_itmt_enabled",
  77                .data           = &sysctl_sched_itmt_enabled,
  78                .maxlen         = sizeof(unsigned int),
  79                .mode           = 0644,
  80                .proc_handler   = sched_itmt_update_handler,
  81                .extra1         = &zero,
  82                .extra2         = &one,
  83        },
  84        {}
  85};
  86
  87static struct ctl_table itmt_root_table[] = {
  88        {
  89                .procname       = "kernel",
  90                .mode           = 0555,
  91                .child          = itmt_kern_table,
  92        },
  93        {}
  94};
  95
  96static struct ctl_table_header *itmt_sysctl_header;
  97
  98/**
  99 * sched_set_itmt_support() - Indicate platform supports ITMT
 100 *
 101 * This function is used by the OS to indicate to scheduler that the platform
 102 * is capable of supporting the ITMT feature.
 103 *
 104 * The current scheme has the pstate driver detects if the system
 105 * is ITMT capable and call sched_set_itmt_support.
 106 *
 107 * This must be done only after sched_set_itmt_core_prio
 108 * has been called to set the cpus' priorities.
 109 * It must not be called with cpu hot plug lock
 110 * held as we need to acquire the lock to rebuild sched domains
 111 * later.
 112 *
 113 * Return: 0 on success
 114 */
 115int sched_set_itmt_support(void)
 116{
 117        mutex_lock(&itmt_update_mutex);
 118
 119        if (sched_itmt_capable) {
 120                mutex_unlock(&itmt_update_mutex);
 121                return 0;
 122        }
 123
 124        itmt_sysctl_header = register_sysctl_table(itmt_root_table);
 125        if (!itmt_sysctl_header) {
 126                mutex_unlock(&itmt_update_mutex);
 127                return -ENOMEM;
 128        }
 129
 130        sched_itmt_capable = true;
 131
 132        sysctl_sched_itmt_enabled = 1;
 133
 134        x86_topology_update = true;
 135        rebuild_sched_domains();
 136
 137        mutex_unlock(&itmt_update_mutex);
 138
 139        return 0;
 140}
 141
 142/**
 143 * sched_clear_itmt_support() - Revoke platform's support of ITMT
 144 *
 145 * This function is used by the OS to indicate that it has
 146 * revoked the platform's support of ITMT feature.
 147 *
 148 * It must not be called with cpu hot plug lock
 149 * held as we need to acquire the lock to rebuild sched domains
 150 * later.
 151 */
 152void sched_clear_itmt_support(void)
 153{
 154        mutex_lock(&itmt_update_mutex);
 155
 156        if (!sched_itmt_capable) {
 157                mutex_unlock(&itmt_update_mutex);
 158                return;
 159        }
 160        sched_itmt_capable = false;
 161
 162        if (itmt_sysctl_header) {
 163                unregister_sysctl_table(itmt_sysctl_header);
 164                itmt_sysctl_header = NULL;
 165        }
 166
 167        if (sysctl_sched_itmt_enabled) {
 168                /* disable sched_itmt if we are no longer ITMT capable */
 169                sysctl_sched_itmt_enabled = 0;
 170                x86_topology_update = true;
 171                rebuild_sched_domains();
 172        }
 173
 174        mutex_unlock(&itmt_update_mutex);
 175}
 176
 177int arch_asym_cpu_priority(int cpu)
 178{
 179        return per_cpu(sched_core_priority, cpu);
 180}
 181
 182/**
 183 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
 184 * @prio:       Priority of cpu core
 185 * @core_cpu:   The cpu number associated with the core
 186 *
 187 * The pstate driver will find out the max boost frequency
 188 * and call this function to set a priority proportional
 189 * to the max boost frequency. CPU with higher boost
 190 * frequency will receive higher priority.
 191 *
 192 * No need to rebuild sched domain after updating
 193 * the CPU priorities. The sched domains have no
 194 * dependency on CPU priorities.
 195 */
 196void sched_set_itmt_core_prio(int prio, int core_cpu)
 197{
 198        int cpu, i = 1;
 199
 200        for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
 201                int smt_prio;
 202
 203                /*
 204                 * Ensure that the siblings are moved to the end
 205                 * of the priority chain and only used when
 206                 * all other high priority cpus are out of capacity.
 207                 */
 208                smt_prio = prio * smp_num_siblings / i;
 209                per_cpu(sched_core_priority, cpu) = smt_prio;
 210                i++;
 211        }
 212}
 213