linux/drivers/thermal/devfreq_cooling.c
<<
>>
Prefs
   1/*
   2 * devfreq_cooling: Thermal cooling device implementation for devices using
   3 *                  devfreq
   4 *
   5 * Copyright (C) 2014-2015 ARM Limited
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
  12 * kind, whether express or implied; without even the implied warranty
  13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * TODO:
  17 *    - If OPPs are added or removed after devfreq cooling has
  18 *      registered, the devfreq cooling won't react to it.
  19 */
  20
  21#include <linux/devfreq.h>
  22#include <linux/devfreq_cooling.h>
  23#include <linux/export.h>
  24#include <linux/idr.h>
  25#include <linux/slab.h>
  26#include <linux/pm_opp.h>
  27#include <linux/pm_qos.h>
  28#include <linux/thermal.h>
  29
  30#include <trace/events/thermal.h>
  31
  32#define HZ_PER_KHZ              1000
  33#define SCALE_ERROR_MITIGATION  100
  34
  35static DEFINE_IDA(devfreq_ida);
  36
  37/**
  38 * struct devfreq_cooling_device - Devfreq cooling device
  39 * @id:         unique integer value corresponding to each
  40 *              devfreq_cooling_device registered.
  41 * @cdev:       Pointer to associated thermal cooling device.
  42 * @devfreq:    Pointer to associated devfreq device.
  43 * @cooling_state:      Current cooling state.
  44 * @power_table:        Pointer to table with maximum power draw for each
  45 *                      cooling state. State is the index into the table, and
  46 *                      the power is in mW.
  47 * @freq_table: Pointer to a table with the frequencies sorted in descending
  48 *              order.  You can index the table by cooling device state
  49 * @freq_table_size:    Size of the @freq_table and @power_table
  50 * @power_ops:  Pointer to devfreq_cooling_power, used to generate the
  51 *              @power_table.
  52 * @res_util:   Resource utilization scaling factor for the power.
  53 *              It is multiplied by 100 to minimize the error. It is used
  54 *              for estimation of the power budget instead of using
  55 *              'utilization' (which is 'busy_time / 'total_time').
  56 *              The 'res_util' range is from 100 to (power_table[state] * 100)
  57 *              for the corresponding 'state'.
  58 * @capped_state:       index to cooling state with in dynamic power budget
  59 * @req_max_freq:       PM QoS request for limiting the maximum frequency
  60 *                      of the devfreq device.
  61 */
  62struct devfreq_cooling_device {
  63        int id;
  64        struct thermal_cooling_device *cdev;
  65        struct devfreq *devfreq;
  66        unsigned long cooling_state;
  67        u32 *power_table;
  68        u32 *freq_table;
  69        size_t freq_table_size;
  70        struct devfreq_cooling_power *power_ops;
  71        u32 res_util;
  72        int capped_state;
  73        struct dev_pm_qos_request req_max_freq;
  74};
  75
  76static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
  77                                         unsigned long *state)
  78{
  79        struct devfreq_cooling_device *dfc = cdev->devdata;
  80
  81        *state = dfc->freq_table_size - 1;
  82
  83        return 0;
  84}
  85
  86static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
  87                                         unsigned long *state)
  88{
  89        struct devfreq_cooling_device *dfc = cdev->devdata;
  90
  91        *state = dfc->cooling_state;
  92
  93        return 0;
  94}
  95
  96static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
  97                                         unsigned long state)
  98{
  99        struct devfreq_cooling_device *dfc = cdev->devdata;
 100        struct devfreq *df = dfc->devfreq;
 101        struct device *dev = df->dev.parent;
 102        unsigned long freq;
 103
 104        if (state == dfc->cooling_state)
 105                return 0;
 106
 107        dev_dbg(dev, "Setting cooling state %lu\n", state);
 108
 109        if (state >= dfc->freq_table_size)
 110                return -EINVAL;
 111
 112        freq = dfc->freq_table[state];
 113
 114        dev_pm_qos_update_request(&dfc->req_max_freq,
 115                                  DIV_ROUND_UP(freq, HZ_PER_KHZ));
 116
 117        dfc->cooling_state = state;
 118
 119        return 0;
 120}
 121
 122/**
 123 * freq_get_state() - get the cooling state corresponding to a frequency
 124 * @dfc:        Pointer to devfreq cooling device
 125 * @freq:       frequency in Hz
 126 *
 127 * Return: the cooling state associated with the @freq, or
 128 * THERMAL_CSTATE_INVALID if it wasn't found.
 129 */
 130static unsigned long
 131freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
 132{
 133        int i;
 134
 135        for (i = 0; i < dfc->freq_table_size; i++) {
 136                if (dfc->freq_table[i] == freq)
 137                        return i;
 138        }
 139
 140        return THERMAL_CSTATE_INVALID;
 141}
 142
 143static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
 144{
 145        struct device *dev = df->dev.parent;
 146        unsigned long voltage;
 147        struct dev_pm_opp *opp;
 148
 149        opp = dev_pm_opp_find_freq_exact(dev, freq, true);
 150        if (PTR_ERR(opp) == -ERANGE)
 151                opp = dev_pm_opp_find_freq_exact(dev, freq, false);
 152
 153        if (IS_ERR(opp)) {
 154                dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
 155                                    freq, PTR_ERR(opp));
 156                return 0;
 157        }
 158
 159        voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 160        dev_pm_opp_put(opp);
 161
 162        if (voltage == 0) {
 163                dev_err_ratelimited(dev,
 164                                    "Failed to get voltage for frequency %lu\n",
 165                                    freq);
 166        }
 167
 168        return voltage;
 169}
 170
 171/**
 172 * get_static_power() - calculate the static power
 173 * @dfc:        Pointer to devfreq cooling device
 174 * @freq:       Frequency in Hz
 175 *
 176 * Calculate the static power in milliwatts using the supplied
 177 * get_static_power().  The current voltage is calculated using the
 178 * OPP library.  If no get_static_power() was supplied, assume the
 179 * static power is negligible.
 180 */
 181static unsigned long
 182get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 183{
 184        struct devfreq *df = dfc->devfreq;
 185        unsigned long voltage;
 186
 187        if (!dfc->power_ops->get_static_power)
 188                return 0;
 189
 190        voltage = get_voltage(df, freq);
 191
 192        if (voltage == 0)
 193                return 0;
 194
 195        return dfc->power_ops->get_static_power(df, voltage);
 196}
 197
 198/**
 199 * get_dynamic_power - calculate the dynamic power
 200 * @dfc:        Pointer to devfreq cooling device
 201 * @freq:       Frequency in Hz
 202 * @voltage:    Voltage in millivolts
 203 *
 204 * Calculate the dynamic power in milliwatts consumed by the device at
 205 * frequency @freq and voltage @voltage.  If the get_dynamic_power()
 206 * was supplied as part of the devfreq_cooling_power struct, then that
 207 * function is used.  Otherwise, a simple power model (Pdyn = Coeff *
 208 * Voltage^2 * Frequency) is used.
 209 */
 210static unsigned long
 211get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 212                  unsigned long voltage)
 213{
 214        u64 power;
 215        u32 freq_mhz;
 216        struct devfreq_cooling_power *dfc_power = dfc->power_ops;
 217
 218        if (dfc_power->get_dynamic_power)
 219                return dfc_power->get_dynamic_power(dfc->devfreq, freq,
 220                                                    voltage);
 221
 222        freq_mhz = freq / 1000000;
 223        power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;
 224        do_div(power, 1000000000);
 225
 226        return power;
 227}
 228
 229
 230static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc,
 231                                            unsigned long freq,
 232                                            unsigned long voltage)
 233{
 234        return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq,
 235                                                               voltage);
 236}
 237
 238
 239static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
 240                                               struct thermal_zone_device *tz,
 241                                               u32 *power)
 242{
 243        struct devfreq_cooling_device *dfc = cdev->devdata;
 244        struct devfreq *df = dfc->devfreq;
 245        struct devfreq_dev_status *status = &df->last_status;
 246        unsigned long state;
 247        unsigned long freq = status->current_frequency;
 248        unsigned long voltage;
 249        u32 dyn_power = 0;
 250        u32 static_power = 0;
 251        int res;
 252
 253        state = freq_get_state(dfc, freq);
 254        if (state == THERMAL_CSTATE_INVALID) {
 255                res = -EAGAIN;
 256                goto fail;
 257        }
 258
 259        if (dfc->power_ops->get_real_power) {
 260                voltage = get_voltage(df, freq);
 261                if (voltage == 0) {
 262                        res = -EINVAL;
 263                        goto fail;
 264                }
 265
 266                res = dfc->power_ops->get_real_power(df, power, freq, voltage);
 267                if (!res) {
 268                        state = dfc->capped_state;
 269                        dfc->res_util = dfc->power_table[state];
 270                        dfc->res_util *= SCALE_ERROR_MITIGATION;
 271
 272                        if (*power > 1)
 273                                dfc->res_util /= *power;
 274                } else {
 275                        goto fail;
 276                }
 277        } else {
 278                dyn_power = dfc->power_table[state];
 279
 280                /* Scale dynamic power for utilization */
 281                dyn_power *= status->busy_time;
 282                dyn_power /= status->total_time;
 283                /* Get static power */
 284                static_power = get_static_power(dfc, freq);
 285
 286                *power = dyn_power + static_power;
 287        }
 288
 289        trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
 290                                              static_power, *power);
 291
 292        return 0;
 293fail:
 294        /* It is safe to set max in this case */
 295        dfc->res_util = SCALE_ERROR_MITIGATION;
 296        return res;
 297}
 298
 299static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
 300                                       struct thermal_zone_device *tz,
 301                                       unsigned long state,
 302                                       u32 *power)
 303{
 304        struct devfreq_cooling_device *dfc = cdev->devdata;
 305        unsigned long freq;
 306        u32 static_power;
 307
 308        if (state >= dfc->freq_table_size)
 309                return -EINVAL;
 310
 311        freq = dfc->freq_table[state];
 312        static_power = get_static_power(dfc, freq);
 313
 314        *power = dfc->power_table[state] + static_power;
 315        return 0;
 316}
 317
 318static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
 319                                       struct thermal_zone_device *tz,
 320                                       u32 power, unsigned long *state)
 321{
 322        struct devfreq_cooling_device *dfc = cdev->devdata;
 323        struct devfreq *df = dfc->devfreq;
 324        struct devfreq_dev_status *status = &df->last_status;
 325        unsigned long freq = status->current_frequency;
 326        unsigned long busy_time;
 327        s32 dyn_power;
 328        u32 static_power;
 329        s32 est_power;
 330        int i;
 331
 332        if (dfc->power_ops->get_real_power) {
 333                /* Scale for resource utilization */
 334                est_power = power * dfc->res_util;
 335                est_power /= SCALE_ERROR_MITIGATION;
 336        } else {
 337                static_power = get_static_power(dfc, freq);
 338
 339                dyn_power = power - static_power;
 340                dyn_power = dyn_power > 0 ? dyn_power : 0;
 341
 342                /* Scale dynamic power for utilization */
 343                busy_time = status->busy_time ?: 1;
 344                est_power = (dyn_power * status->total_time) / busy_time;
 345        }
 346
 347        /*
 348         * Find the first cooling state that is within the power
 349         * budget for dynamic power.
 350         */
 351        for (i = 0; i < dfc->freq_table_size - 1; i++)
 352                if (est_power >= dfc->power_table[i])
 353                        break;
 354
 355        *state = i;
 356        dfc->capped_state = i;
 357        trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
 358        return 0;
 359}
 360
 361static struct thermal_cooling_device_ops devfreq_cooling_ops = {
 362        .get_max_state = devfreq_cooling_get_max_state,
 363        .get_cur_state = devfreq_cooling_get_cur_state,
 364        .set_cur_state = devfreq_cooling_set_cur_state,
 365};
 366
 367/**
 368 * devfreq_cooling_gen_tables() - Generate power and freq tables.
 369 * @dfc: Pointer to devfreq cooling device.
 370 *
 371 * Generate power and frequency tables: the power table hold the
 372 * device's maximum power usage at each cooling state (OPP).  The
 373 * static and dynamic power using the appropriate voltage and
 374 * frequency for the state, is acquired from the struct
 375 * devfreq_cooling_power, and summed to make the maximum power draw.
 376 *
 377 * The frequency table holds the frequencies in descending order.
 378 * That way its indexed by cooling device state.
 379 *
 380 * The tables are malloced, and pointers put in dfc.  They must be
 381 * freed when unregistering the devfreq cooling device.
 382 *
 383 * Return: 0 on success, negative error code on failure.
 384 */
 385static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 386{
 387        struct devfreq *df = dfc->devfreq;
 388        struct device *dev = df->dev.parent;
 389        int ret, num_opps;
 390        unsigned long freq;
 391        u32 *power_table = NULL;
 392        u32 *freq_table;
 393        int i;
 394
 395        num_opps = dev_pm_opp_get_opp_count(dev);
 396
 397        if (dfc->power_ops) {
 398                power_table = kcalloc(num_opps, sizeof(*power_table),
 399                                      GFP_KERNEL);
 400                if (!power_table)
 401                        return -ENOMEM;
 402        }
 403
 404        freq_table = kcalloc(num_opps, sizeof(*freq_table),
 405                             GFP_KERNEL);
 406        if (!freq_table) {
 407                ret = -ENOMEM;
 408                goto free_power_table;
 409        }
 410
 411        for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
 412                unsigned long power, voltage;
 413                struct dev_pm_opp *opp;
 414
 415                opp = dev_pm_opp_find_freq_floor(dev, &freq);
 416                if (IS_ERR(opp)) {
 417                        ret = PTR_ERR(opp);
 418                        goto free_tables;
 419                }
 420
 421                voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 422                dev_pm_opp_put(opp);
 423
 424                if (dfc->power_ops) {
 425                        if (dfc->power_ops->get_real_power)
 426                                power = get_total_power(dfc, freq, voltage);
 427                        else
 428                                power = get_dynamic_power(dfc, freq, voltage);
 429
 430                        dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
 431                                freq / 1000000, voltage, power, power);
 432
 433                        power_table[i] = power;
 434                }
 435
 436                freq_table[i] = freq;
 437        }
 438
 439        if (dfc->power_ops)
 440                dfc->power_table = power_table;
 441
 442        dfc->freq_table = freq_table;
 443        dfc->freq_table_size = num_opps;
 444
 445        return 0;
 446
 447free_tables:
 448        kfree(freq_table);
 449free_power_table:
 450        kfree(power_table);
 451
 452        return ret;
 453}
 454
 455/**
 456 * of_devfreq_cooling_register_power() - Register devfreq cooling device,
 457 *                                      with OF and power information.
 458 * @np: Pointer to OF device_node.
 459 * @df: Pointer to devfreq device.
 460 * @dfc_power:  Pointer to devfreq_cooling_power.
 461 *
 462 * Register a devfreq cooling device.  The available OPPs must be
 463 * registered on the device.
 464 *
 465 * If @dfc_power is provided, the cooling device is registered with the
 466 * power extensions.  For the power extensions to work correctly,
 467 * devfreq should use the simple_ondemand governor, other governors
 468 * are not currently supported.
 469 */
 470struct thermal_cooling_device *
 471of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 472                                  struct devfreq_cooling_power *dfc_power)
 473{
 474        struct thermal_cooling_device *cdev;
 475        struct devfreq_cooling_device *dfc;
 476        char dev_name[THERMAL_NAME_LENGTH];
 477        int err;
 478
 479        dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
 480        if (!dfc)
 481                return ERR_PTR(-ENOMEM);
 482
 483        dfc->devfreq = df;
 484
 485        if (dfc_power) {
 486                dfc->power_ops = dfc_power;
 487
 488                devfreq_cooling_ops.get_requested_power =
 489                        devfreq_cooling_get_requested_power;
 490                devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
 491                devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
 492        }
 493
 494        err = devfreq_cooling_gen_tables(dfc);
 495        if (err)
 496                goto free_dfc;
 497
 498        err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
 499                                     DEV_PM_QOS_MAX_FREQUENCY,
 500                                     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
 501        if (err < 0)
 502                goto free_tables;
 503
 504        err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
 505        if (err < 0)
 506                goto remove_qos_req;
 507        dfc->id = err;
 508
 509        snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
 510
 511        cdev = thermal_of_cooling_device_register(np, dev_name, dfc,
 512                                                  &devfreq_cooling_ops);
 513        if (IS_ERR(cdev)) {
 514                err = PTR_ERR(cdev);
 515                dev_err(df->dev.parent,
 516                        "Failed to register devfreq cooling device (%d)\n",
 517                        err);
 518                goto release_ida;
 519        }
 520
 521        dfc->cdev = cdev;
 522
 523        return cdev;
 524
 525release_ida:
 526        ida_simple_remove(&devfreq_ida, dfc->id);
 527
 528remove_qos_req:
 529        dev_pm_qos_remove_request(&dfc->req_max_freq);
 530
 531free_tables:
 532        kfree(dfc->power_table);
 533        kfree(dfc->freq_table);
 534free_dfc:
 535        kfree(dfc);
 536
 537        return ERR_PTR(err);
 538}
 539EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
 540
 541/**
 542 * of_devfreq_cooling_register() - Register devfreq cooling device,
 543 *                                with OF information.
 544 * @np: Pointer to OF device_node.
 545 * @df: Pointer to devfreq device.
 546 */
 547struct thermal_cooling_device *
 548of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 549{
 550        return of_devfreq_cooling_register_power(np, df, NULL);
 551}
 552EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
 553
 554/**
 555 * devfreq_cooling_register() - Register devfreq cooling device.
 556 * @df: Pointer to devfreq device.
 557 */
 558struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 559{
 560        return of_devfreq_cooling_register(NULL, df);
 561}
 562EXPORT_SYMBOL_GPL(devfreq_cooling_register);
 563
 564/**
 565 * devfreq_cooling_unregister() - Unregister devfreq cooling device.
 566 * @cdev: Pointer to devfreq cooling device to unregister.
 567 */
 568void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 569{
 570        struct devfreq_cooling_device *dfc;
 571
 572        if (!cdev)
 573                return;
 574
 575        dfc = cdev->devdata;
 576
 577        thermal_cooling_device_unregister(dfc->cdev);
 578        ida_simple_remove(&devfreq_ida, dfc->id);
 579        dev_pm_qos_remove_request(&dfc->req_max_freq);
 580        kfree(dfc->power_table);
 581        kfree(dfc->freq_table);
 582
 583        kfree(dfc);
 584}
 585EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
 586