linux/drivers/thermal/devfreq_cooling.c
<<
>>
Prefs
   1/*
   2 * devfreq_cooling: Thermal cooling device implementation for devices using
   3 *                  devfreq
   4 *
   5 * Copyright (C) 2014-2015 ARM Limited
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
  12 * kind, whether express or implied; without even the implied warranty
  13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * TODO:
  17 *    - If OPPs are added or removed after devfreq cooling has
  18 *      registered, the devfreq cooling won't react to it.
  19 */
  20
  21#include <linux/devfreq.h>
  22#include <linux/devfreq_cooling.h>
  23#include <linux/export.h>
  24#include <linux/idr.h>
  25#include <linux/slab.h>
  26#include <linux/pm_opp.h>
  27#include <linux/thermal.h>
  28
  29#include <trace/events/thermal.h>
  30
  31#define SCALE_ERROR_MITIGATION 100
  32
  33static DEFINE_IDA(devfreq_ida);
  34
  35/**
  36 * struct devfreq_cooling_device - Devfreq cooling device
  37 * @id:         unique integer value corresponding to each
  38 *              devfreq_cooling_device registered.
  39 * @cdev:       Pointer to associated thermal cooling device.
  40 * @devfreq:    Pointer to associated devfreq device.
  41 * @cooling_state:      Current cooling state.
  42 * @power_table:        Pointer to table with maximum power draw for each
  43 *                      cooling state. State is the index into the table, and
  44 *                      the power is in mW.
  45 * @freq_table: Pointer to a table with the frequencies sorted in descending
  46 *              order.  You can index the table by cooling device state
  47 * @freq_table_size:    Size of the @freq_table and @power_table
  48 * @power_ops:  Pointer to devfreq_cooling_power, used to generate the
  49 *              @power_table.
  50 * @res_util:   Resource utilization scaling factor for the power.
  51 *              It is multiplied by 100 to minimize the error. It is used
  52 *              for estimation of the power budget instead of using
  53 *              'utilization' (which is 'busy_time / 'total_time').
  54 *              The 'res_util' range is from 100 to (power_table[state] * 100)
  55 *              for the corresponding 'state'.
  56 */
  57struct devfreq_cooling_device {
  58        int id;
  59        struct thermal_cooling_device *cdev;
  60        struct devfreq *devfreq;
  61        unsigned long cooling_state;
  62        u32 *power_table;
  63        u32 *freq_table;
  64        size_t freq_table_size;
  65        struct devfreq_cooling_power *power_ops;
  66        u32 res_util;
  67        int capped_state;
  68};
  69
  70/**
  71 * partition_enable_opps() - disable all opps above a given state
  72 * @dfc:        Pointer to devfreq we are operating on
  73 * @cdev_state: cooling device state we're setting
  74 *
  75 * Go through the OPPs of the device, enabling all OPPs until
  76 * @cdev_state and disabling those frequencies above it.
  77 */
  78static int partition_enable_opps(struct devfreq_cooling_device *dfc,
  79                                 unsigned long cdev_state)
  80{
  81        int i;
  82        struct device *dev = dfc->devfreq->dev.parent;
  83
  84        for (i = 0; i < dfc->freq_table_size; i++) {
  85                struct dev_pm_opp *opp;
  86                int ret = 0;
  87                unsigned int freq = dfc->freq_table[i];
  88                bool want_enable = i >= cdev_state ? true : false;
  89
  90                opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
  91
  92                if (PTR_ERR(opp) == -ERANGE)
  93                        continue;
  94                else if (IS_ERR(opp))
  95                        return PTR_ERR(opp);
  96
  97                dev_pm_opp_put(opp);
  98
  99                if (want_enable)
 100                        ret = dev_pm_opp_enable(dev, freq);
 101                else
 102                        ret = dev_pm_opp_disable(dev, freq);
 103
 104                if (ret)
 105                        return ret;
 106        }
 107
 108        return 0;
 109}
 110
 111static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
 112                                         unsigned long *state)
 113{
 114        struct devfreq_cooling_device *dfc = cdev->devdata;
 115
 116        *state = dfc->freq_table_size - 1;
 117
 118        return 0;
 119}
 120
 121static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
 122                                         unsigned long *state)
 123{
 124        struct devfreq_cooling_device *dfc = cdev->devdata;
 125
 126        *state = dfc->cooling_state;
 127
 128        return 0;
 129}
 130
 131static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
 132                                         unsigned long state)
 133{
 134        struct devfreq_cooling_device *dfc = cdev->devdata;
 135        struct devfreq *df = dfc->devfreq;
 136        struct device *dev = df->dev.parent;
 137        int ret;
 138
 139        if (state == dfc->cooling_state)
 140                return 0;
 141
 142        dev_dbg(dev, "Setting cooling state %lu\n", state);
 143
 144        if (state >= dfc->freq_table_size)
 145                return -EINVAL;
 146
 147        ret = partition_enable_opps(dfc, state);
 148        if (ret)
 149                return ret;
 150
 151        dfc->cooling_state = state;
 152
 153        return 0;
 154}
 155
 156/**
 157 * freq_get_state() - get the cooling state corresponding to a frequency
 158 * @dfc:        Pointer to devfreq cooling device
 159 * @freq:       frequency in Hz
 160 *
 161 * Return: the cooling state associated with the @freq, or
 162 * THERMAL_CSTATE_INVALID if it wasn't found.
 163 */
 164static unsigned long
 165freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
 166{
 167        int i;
 168
 169        for (i = 0; i < dfc->freq_table_size; i++) {
 170                if (dfc->freq_table[i] == freq)
 171                        return i;
 172        }
 173
 174        return THERMAL_CSTATE_INVALID;
 175}
 176
 177static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
 178{
 179        struct device *dev = df->dev.parent;
 180        unsigned long voltage;
 181        struct dev_pm_opp *opp;
 182
 183        opp = dev_pm_opp_find_freq_exact(dev, freq, true);
 184        if (PTR_ERR(opp) == -ERANGE)
 185                opp = dev_pm_opp_find_freq_exact(dev, freq, false);
 186
 187        if (IS_ERR(opp)) {
 188                dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
 189                                    freq, PTR_ERR(opp));
 190                return 0;
 191        }
 192
 193        voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 194        dev_pm_opp_put(opp);
 195
 196        if (voltage == 0) {
 197                dev_err_ratelimited(dev,
 198                                    "Failed to get voltage for frequency %lu\n",
 199                                    freq);
 200        }
 201
 202        return voltage;
 203}
 204
 205/**
 206 * get_static_power() - calculate the static power
 207 * @dfc:        Pointer to devfreq cooling device
 208 * @freq:       Frequency in Hz
 209 *
 210 * Calculate the static power in milliwatts using the supplied
 211 * get_static_power().  The current voltage is calculated using the
 212 * OPP library.  If no get_static_power() was supplied, assume the
 213 * static power is negligible.
 214 */
 215static unsigned long
 216get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 217{
 218        struct devfreq *df = dfc->devfreq;
 219        unsigned long voltage;
 220
 221        if (!dfc->power_ops->get_static_power)
 222                return 0;
 223
 224        voltage = get_voltage(df, freq);
 225
 226        if (voltage == 0)
 227                return 0;
 228
 229        return dfc->power_ops->get_static_power(df, voltage);
 230}
 231
 232/**
 233 * get_dynamic_power - calculate the dynamic power
 234 * @dfc:        Pointer to devfreq cooling device
 235 * @freq:       Frequency in Hz
 236 * @voltage:    Voltage in millivolts
 237 *
 238 * Calculate the dynamic power in milliwatts consumed by the device at
 239 * frequency @freq and voltage @voltage.  If the get_dynamic_power()
 240 * was supplied as part of the devfreq_cooling_power struct, then that
 241 * function is used.  Otherwise, a simple power model (Pdyn = Coeff *
 242 * Voltage^2 * Frequency) is used.
 243 */
 244static unsigned long
 245get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 246                  unsigned long voltage)
 247{
 248        u64 power;
 249        u32 freq_mhz;
 250        struct devfreq_cooling_power *dfc_power = dfc->power_ops;
 251
 252        if (dfc_power->get_dynamic_power)
 253                return dfc_power->get_dynamic_power(dfc->devfreq, freq,
 254                                                    voltage);
 255
 256        freq_mhz = freq / 1000000;
 257        power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;
 258        do_div(power, 1000000000);
 259
 260        return power;
 261}
 262
 263
 264static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc,
 265                                            unsigned long freq,
 266                                            unsigned long voltage)
 267{
 268        return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq,
 269                                                               voltage);
 270}
 271
 272
 273static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
 274                                               struct thermal_zone_device *tz,
 275                                               u32 *power)
 276{
 277        struct devfreq_cooling_device *dfc = cdev->devdata;
 278        struct devfreq *df = dfc->devfreq;
 279        struct devfreq_dev_status *status = &df->last_status;
 280        unsigned long state;
 281        unsigned long freq = status->current_frequency;
 282        unsigned long voltage;
 283        u32 dyn_power = 0;
 284        u32 static_power = 0;
 285        int res;
 286
 287        state = freq_get_state(dfc, freq);
 288        if (state == THERMAL_CSTATE_INVALID) {
 289                res = -EAGAIN;
 290                goto fail;
 291        }
 292
 293        if (dfc->power_ops->get_real_power) {
 294                voltage = get_voltage(df, freq);
 295                if (voltage == 0) {
 296                        res = -EINVAL;
 297                        goto fail;
 298                }
 299
 300                res = dfc->power_ops->get_real_power(df, power, freq, voltage);
 301                if (!res) {
 302                        state = dfc->capped_state;
 303                        dfc->res_util = dfc->power_table[state];
 304                        dfc->res_util *= SCALE_ERROR_MITIGATION;
 305
 306                        if (*power > 1)
 307                                dfc->res_util /= *power;
 308                } else {
 309                        goto fail;
 310                }
 311        } else {
 312                dyn_power = dfc->power_table[state];
 313
 314                /* Scale dynamic power for utilization */
 315                dyn_power *= status->busy_time;
 316                dyn_power /= status->total_time;
 317                /* Get static power */
 318                static_power = get_static_power(dfc, freq);
 319
 320                *power = dyn_power + static_power;
 321        }
 322
 323        trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
 324                                              static_power, *power);
 325
 326        return 0;
 327fail:
 328        /* It is safe to set max in this case */
 329        dfc->res_util = SCALE_ERROR_MITIGATION;
 330        return res;
 331}
 332
 333static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
 334                                       struct thermal_zone_device *tz,
 335                                       unsigned long state,
 336                                       u32 *power)
 337{
 338        struct devfreq_cooling_device *dfc = cdev->devdata;
 339        unsigned long freq;
 340        u32 static_power;
 341
 342        if (state >= dfc->freq_table_size)
 343                return -EINVAL;
 344
 345        freq = dfc->freq_table[state];
 346        static_power = get_static_power(dfc, freq);
 347
 348        *power = dfc->power_table[state] + static_power;
 349        return 0;
 350}
 351
 352static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
 353                                       struct thermal_zone_device *tz,
 354                                       u32 power, unsigned long *state)
 355{
 356        struct devfreq_cooling_device *dfc = cdev->devdata;
 357        struct devfreq *df = dfc->devfreq;
 358        struct devfreq_dev_status *status = &df->last_status;
 359        unsigned long freq = status->current_frequency;
 360        unsigned long busy_time;
 361        s32 dyn_power;
 362        u32 static_power;
 363        s32 est_power;
 364        int i;
 365
 366        if (dfc->power_ops->get_real_power) {
 367                /* Scale for resource utilization */
 368                est_power = power * dfc->res_util;
 369                est_power /= SCALE_ERROR_MITIGATION;
 370        } else {
 371                static_power = get_static_power(dfc, freq);
 372
 373                dyn_power = power - static_power;
 374                dyn_power = dyn_power > 0 ? dyn_power : 0;
 375
 376                /* Scale dynamic power for utilization */
 377                busy_time = status->busy_time ?: 1;
 378                est_power = (dyn_power * status->total_time) / busy_time;
 379        }
 380
 381        /*
 382         * Find the first cooling state that is within the power
 383         * budget for dynamic power.
 384         */
 385        for (i = 0; i < dfc->freq_table_size - 1; i++)
 386                if (est_power >= dfc->power_table[i])
 387                        break;
 388
 389        *state = i;
 390        dfc->capped_state = i;
 391        trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
 392        return 0;
 393}
 394
 395static struct thermal_cooling_device_ops devfreq_cooling_ops = {
 396        .get_max_state = devfreq_cooling_get_max_state,
 397        .get_cur_state = devfreq_cooling_get_cur_state,
 398        .set_cur_state = devfreq_cooling_set_cur_state,
 399};
 400
 401/**
 402 * devfreq_cooling_gen_tables() - Generate power and freq tables.
 403 * @dfc: Pointer to devfreq cooling device.
 404 *
 405 * Generate power and frequency tables: the power table hold the
 406 * device's maximum power usage at each cooling state (OPP).  The
 407 * static and dynamic power using the appropriate voltage and
 408 * frequency for the state, is acquired from the struct
 409 * devfreq_cooling_power, and summed to make the maximum power draw.
 410 *
 411 * The frequency table holds the frequencies in descending order.
 412 * That way its indexed by cooling device state.
 413 *
 414 * The tables are malloced, and pointers put in dfc.  They must be
 415 * freed when unregistering the devfreq cooling device.
 416 *
 417 * Return: 0 on success, negative error code on failure.
 418 */
 419static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 420{
 421        struct devfreq *df = dfc->devfreq;
 422        struct device *dev = df->dev.parent;
 423        int ret, num_opps;
 424        unsigned long freq;
 425        u32 *power_table = NULL;
 426        u32 *freq_table;
 427        int i;
 428
 429        num_opps = dev_pm_opp_get_opp_count(dev);
 430
 431        if (dfc->power_ops) {
 432                power_table = kcalloc(num_opps, sizeof(*power_table),
 433                                      GFP_KERNEL);
 434                if (!power_table)
 435                        return -ENOMEM;
 436        }
 437
 438        freq_table = kcalloc(num_opps, sizeof(*freq_table),
 439                             GFP_KERNEL);
 440        if (!freq_table) {
 441                ret = -ENOMEM;
 442                goto free_power_table;
 443        }
 444
 445        for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
 446                unsigned long power, voltage;
 447                struct dev_pm_opp *opp;
 448
 449                opp = dev_pm_opp_find_freq_floor(dev, &freq);
 450                if (IS_ERR(opp)) {
 451                        ret = PTR_ERR(opp);
 452                        goto free_tables;
 453                }
 454
 455                voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 456                dev_pm_opp_put(opp);
 457
 458                if (dfc->power_ops) {
 459                        if (dfc->power_ops->get_real_power)
 460                                power = get_total_power(dfc, freq, voltage);
 461                        else
 462                                power = get_dynamic_power(dfc, freq, voltage);
 463
 464                        dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
 465                                freq / 1000000, voltage, power, power);
 466
 467                        power_table[i] = power;
 468                }
 469
 470                freq_table[i] = freq;
 471        }
 472
 473        if (dfc->power_ops)
 474                dfc->power_table = power_table;
 475
 476        dfc->freq_table = freq_table;
 477        dfc->freq_table_size = num_opps;
 478
 479        return 0;
 480
 481free_tables:
 482        kfree(freq_table);
 483free_power_table:
 484        kfree(power_table);
 485
 486        return ret;
 487}
 488
 489/**
 490 * of_devfreq_cooling_register_power() - Register devfreq cooling device,
 491 *                                      with OF and power information.
 492 * @np: Pointer to OF device_node.
 493 * @df: Pointer to devfreq device.
 494 * @dfc_power:  Pointer to devfreq_cooling_power.
 495 *
 496 * Register a devfreq cooling device.  The available OPPs must be
 497 * registered on the device.
 498 *
 499 * If @dfc_power is provided, the cooling device is registered with the
 500 * power extensions.  For the power extensions to work correctly,
 501 * devfreq should use the simple_ondemand governor, other governors
 502 * are not currently supported.
 503 */
 504struct thermal_cooling_device *
 505of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 506                                  struct devfreq_cooling_power *dfc_power)
 507{
 508        struct thermal_cooling_device *cdev;
 509        struct devfreq_cooling_device *dfc;
 510        char dev_name[THERMAL_NAME_LENGTH];
 511        int err;
 512
 513        dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
 514        if (!dfc)
 515                return ERR_PTR(-ENOMEM);
 516
 517        dfc->devfreq = df;
 518
 519        if (dfc_power) {
 520                dfc->power_ops = dfc_power;
 521
 522                devfreq_cooling_ops.get_requested_power =
 523                        devfreq_cooling_get_requested_power;
 524                devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
 525                devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
 526        }
 527
 528        err = devfreq_cooling_gen_tables(dfc);
 529        if (err)
 530                goto free_dfc;
 531
 532        err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
 533        if (err < 0)
 534                goto free_tables;
 535        dfc->id = err;
 536
 537        snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
 538
 539        cdev = thermal_of_cooling_device_register(np, dev_name, dfc,
 540                                                  &devfreq_cooling_ops);
 541        if (IS_ERR(cdev)) {
 542                err = PTR_ERR(cdev);
 543                dev_err(df->dev.parent,
 544                        "Failed to register devfreq cooling device (%d)\n",
 545                        err);
 546                goto release_ida;
 547        }
 548
 549        dfc->cdev = cdev;
 550
 551        return cdev;
 552
 553release_ida:
 554        ida_simple_remove(&devfreq_ida, dfc->id);
 555free_tables:
 556        kfree(dfc->power_table);
 557        kfree(dfc->freq_table);
 558free_dfc:
 559        kfree(dfc);
 560
 561        return ERR_PTR(err);
 562}
 563EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
 564
 565/**
 566 * of_devfreq_cooling_register() - Register devfreq cooling device,
 567 *                                with OF information.
 568 * @np: Pointer to OF device_node.
 569 * @df: Pointer to devfreq device.
 570 */
 571struct thermal_cooling_device *
 572of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 573{
 574        return of_devfreq_cooling_register_power(np, df, NULL);
 575}
 576EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
 577
 578/**
 579 * devfreq_cooling_register() - Register devfreq cooling device.
 580 * @df: Pointer to devfreq device.
 581 */
 582struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 583{
 584        return of_devfreq_cooling_register(NULL, df);
 585}
 586EXPORT_SYMBOL_GPL(devfreq_cooling_register);
 587
 588/**
 589 * devfreq_cooling_unregister() - Unregister devfreq cooling device.
 590 * @dfc: Pointer to devfreq cooling device to unregister.
 591 */
 592void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 593{
 594        struct devfreq_cooling_device *dfc;
 595
 596        if (!cdev)
 597                return;
 598
 599        dfc = cdev->devdata;
 600
 601        thermal_cooling_device_unregister(dfc->cdev);
 602        ida_simple_remove(&devfreq_ida, dfc->id);
 603        kfree(dfc->power_table);
 604        kfree(dfc->freq_table);
 605
 606        kfree(dfc);
 607}
 608EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
 609