linux/drivers/thermal/devfreq_cooling.c
<<
>>
Prefs
   1/*
   2 * devfreq_cooling: Thermal cooling device implementation for devices using
   3 *                  devfreq
   4 *
   5 * Copyright (C) 2014-2015 ARM Limited
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
  12 * kind, whether express or implied; without even the implied warranty
  13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * TODO:
  17 *    - If OPPs are added or removed after devfreq cooling has
  18 *      registered, the devfreq cooling won't react to it.
  19 */
  20
  21#include <linux/devfreq.h>
  22#include <linux/devfreq_cooling.h>
  23#include <linux/export.h>
  24#include <linux/slab.h>
  25#include <linux/pm_opp.h>
  26#include <linux/thermal.h>
  27
  28#include <trace/events/thermal.h>
  29
  30static DEFINE_MUTEX(devfreq_lock);
  31static DEFINE_IDR(devfreq_idr);
  32
  33/**
  34 * struct devfreq_cooling_device - Devfreq cooling device
  35 * @id:         unique integer value corresponding to each
  36 *              devfreq_cooling_device registered.
  37 * @cdev:       Pointer to associated thermal cooling device.
  38 * @devfreq:    Pointer to associated devfreq device.
  39 * @cooling_state:      Current cooling state.
  40 * @power_table:        Pointer to table with maximum power draw for each
  41 *                      cooling state. State is the index into the table, and
  42 *                      the power is in mW.
  43 * @freq_table: Pointer to a table with the frequencies sorted in descending
  44 *              order.  You can index the table by cooling device state
  45 * @freq_table_size:    Size of the @freq_table and @power_table
  46 * @power_ops:  Pointer to devfreq_cooling_power, used to generate the
  47 *              @power_table.
  48 */
  49struct devfreq_cooling_device {
  50        int id;
  51        struct thermal_cooling_device *cdev;
  52        struct devfreq *devfreq;
  53        unsigned long cooling_state;
  54        u32 *power_table;
  55        u32 *freq_table;
  56        size_t freq_table_size;
  57        struct devfreq_cooling_power *power_ops;
  58};
  59
  60/**
  61 * get_idr - function to get a unique id.
  62 * @idr: struct idr * handle used to create a id.
  63 * @id: int * value generated by this function.
  64 *
  65 * This function will populate @id with an unique
  66 * id, using the idr API.
  67 *
  68 * Return: 0 on success, an error code on failure.
  69 */
  70static int get_idr(struct idr *idr, int *id)
  71{
  72        int ret;
  73
  74        mutex_lock(&devfreq_lock);
  75        ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
  76        mutex_unlock(&devfreq_lock);
  77        if (unlikely(ret < 0))
  78                return ret;
  79        *id = ret;
  80
  81        return 0;
  82}
  83
  84/**
  85 * release_idr - function to free the unique id.
  86 * @idr: struct idr * handle used for creating the id.
  87 * @id: int value representing the unique id.
  88 */
  89static void release_idr(struct idr *idr, int id)
  90{
  91        mutex_lock(&devfreq_lock);
  92        idr_remove(idr, id);
  93        mutex_unlock(&devfreq_lock);
  94}
  95
  96/**
  97 * partition_enable_opps() - disable all opps above a given state
  98 * @dfc:        Pointer to devfreq we are operating on
  99 * @cdev_state: cooling device state we're setting
 100 *
 101 * Go through the OPPs of the device, enabling all OPPs until
 102 * @cdev_state and disabling those frequencies above it.
 103 */
 104static int partition_enable_opps(struct devfreq_cooling_device *dfc,
 105                                 unsigned long cdev_state)
 106{
 107        int i;
 108        struct device *dev = dfc->devfreq->dev.parent;
 109
 110        for (i = 0; i < dfc->freq_table_size; i++) {
 111                struct dev_pm_opp *opp;
 112                int ret = 0;
 113                unsigned int freq = dfc->freq_table[i];
 114                bool want_enable = i >= cdev_state ? true : false;
 115
 116                rcu_read_lock();
 117                opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
 118                rcu_read_unlock();
 119
 120                if (PTR_ERR(opp) == -ERANGE)
 121                        continue;
 122                else if (IS_ERR(opp))
 123                        return PTR_ERR(opp);
 124
 125                if (want_enable)
 126                        ret = dev_pm_opp_enable(dev, freq);
 127                else
 128                        ret = dev_pm_opp_disable(dev, freq);
 129
 130                if (ret)
 131                        return ret;
 132        }
 133
 134        return 0;
 135}
 136
 137static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
 138                                         unsigned long *state)
 139{
 140        struct devfreq_cooling_device *dfc = cdev->devdata;
 141
 142        *state = dfc->freq_table_size - 1;
 143
 144        return 0;
 145}
 146
 147static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
 148                                         unsigned long *state)
 149{
 150        struct devfreq_cooling_device *dfc = cdev->devdata;
 151
 152        *state = dfc->cooling_state;
 153
 154        return 0;
 155}
 156
 157static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
 158                                         unsigned long state)
 159{
 160        struct devfreq_cooling_device *dfc = cdev->devdata;
 161        struct devfreq *df = dfc->devfreq;
 162        struct device *dev = df->dev.parent;
 163        int ret;
 164
 165        if (state == dfc->cooling_state)
 166                return 0;
 167
 168        dev_dbg(dev, "Setting cooling state %lu\n", state);
 169
 170        if (state >= dfc->freq_table_size)
 171                return -EINVAL;
 172
 173        ret = partition_enable_opps(dfc, state);
 174        if (ret)
 175                return ret;
 176
 177        dfc->cooling_state = state;
 178
 179        return 0;
 180}
 181
 182/**
 183 * freq_get_state() - get the cooling state corresponding to a frequency
 184 * @dfc:        Pointer to devfreq cooling device
 185 * @freq:       frequency in Hz
 186 *
 187 * Return: the cooling state associated with the @freq, or
 188 * THERMAL_CSTATE_INVALID if it wasn't found.
 189 */
 190static unsigned long
 191freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
 192{
 193        int i;
 194
 195        for (i = 0; i < dfc->freq_table_size; i++) {
 196                if (dfc->freq_table[i] == freq)
 197                        return i;
 198        }
 199
 200        return THERMAL_CSTATE_INVALID;
 201}
 202
 203/**
 204 * get_static_power() - calculate the static power
 205 * @dfc:        Pointer to devfreq cooling device
 206 * @freq:       Frequency in Hz
 207 *
 208 * Calculate the static power in milliwatts using the supplied
 209 * get_static_power().  The current voltage is calculated using the
 210 * OPP library.  If no get_static_power() was supplied, assume the
 211 * static power is negligible.
 212 */
 213static unsigned long
 214get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 215{
 216        struct devfreq *df = dfc->devfreq;
 217        struct device *dev = df->dev.parent;
 218        unsigned long voltage;
 219        struct dev_pm_opp *opp;
 220
 221        if (!dfc->power_ops->get_static_power)
 222                return 0;
 223
 224        rcu_read_lock();
 225
 226        opp = dev_pm_opp_find_freq_exact(dev, freq, true);
 227        if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE))
 228                opp = dev_pm_opp_find_freq_exact(dev, freq, false);
 229
 230        voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 231
 232        rcu_read_unlock();
 233
 234        if (voltage == 0) {
 235                dev_warn_ratelimited(dev,
 236                                     "Failed to get voltage for frequency %lu: %ld\n",
 237                                     freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
 238                return 0;
 239        }
 240
 241        return dfc->power_ops->get_static_power(voltage);
 242}
 243
 244/**
 245 * get_dynamic_power - calculate the dynamic power
 246 * @dfc:        Pointer to devfreq cooling device
 247 * @freq:       Frequency in Hz
 248 * @voltage:    Voltage in millivolts
 249 *
 250 * Calculate the dynamic power in milliwatts consumed by the device at
 251 * frequency @freq and voltage @voltage.  If the get_dynamic_power()
 252 * was supplied as part of the devfreq_cooling_power struct, then that
 253 * function is used.  Otherwise, a simple power model (Pdyn = Coeff *
 254 * Voltage^2 * Frequency) is used.
 255 */
 256static unsigned long
 257get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 258                  unsigned long voltage)
 259{
 260        u64 power;
 261        u32 freq_mhz;
 262        struct devfreq_cooling_power *dfc_power = dfc->power_ops;
 263
 264        if (dfc_power->get_dynamic_power)
 265                return dfc_power->get_dynamic_power(freq, voltage);
 266
 267        freq_mhz = freq / 1000000;
 268        power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;
 269        do_div(power, 1000000000);
 270
 271        return power;
 272}
 273
 274static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
 275                                               struct thermal_zone_device *tz,
 276                                               u32 *power)
 277{
 278        struct devfreq_cooling_device *dfc = cdev->devdata;
 279        struct devfreq *df = dfc->devfreq;
 280        struct devfreq_dev_status *status = &df->last_status;
 281        unsigned long state;
 282        unsigned long freq = status->current_frequency;
 283        u32 dyn_power, static_power;
 284
 285        /* Get dynamic power for state */
 286        state = freq_get_state(dfc, freq);
 287        if (state == THERMAL_CSTATE_INVALID)
 288                return -EAGAIN;
 289
 290        dyn_power = dfc->power_table[state];
 291
 292        /* Scale dynamic power for utilization */
 293        dyn_power = (dyn_power * status->busy_time) / status->total_time;
 294
 295        /* Get static power */
 296        static_power = get_static_power(dfc, freq);
 297
 298        trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
 299                                              static_power);
 300
 301        *power = dyn_power + static_power;
 302
 303        return 0;
 304}
 305
 306static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
 307                                       struct thermal_zone_device *tz,
 308                                       unsigned long state,
 309                                       u32 *power)
 310{
 311        struct devfreq_cooling_device *dfc = cdev->devdata;
 312        unsigned long freq;
 313        u32 static_power;
 314
 315        if (state >= dfc->freq_table_size)
 316                return -EINVAL;
 317
 318        freq = dfc->freq_table[state];
 319        static_power = get_static_power(dfc, freq);
 320
 321        *power = dfc->power_table[state] + static_power;
 322        return 0;
 323}
 324
 325static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
 326                                       struct thermal_zone_device *tz,
 327                                       u32 power, unsigned long *state)
 328{
 329        struct devfreq_cooling_device *dfc = cdev->devdata;
 330        struct devfreq *df = dfc->devfreq;
 331        struct devfreq_dev_status *status = &df->last_status;
 332        unsigned long freq = status->current_frequency;
 333        unsigned long busy_time;
 334        s32 dyn_power;
 335        u32 static_power;
 336        int i;
 337
 338        static_power = get_static_power(dfc, freq);
 339
 340        dyn_power = power - static_power;
 341        dyn_power = dyn_power > 0 ? dyn_power : 0;
 342
 343        /* Scale dynamic power for utilization */
 344        busy_time = status->busy_time ?: 1;
 345        dyn_power = (dyn_power * status->total_time) / busy_time;
 346
 347        /*
 348         * Find the first cooling state that is within the power
 349         * budget for dynamic power.
 350         */
 351        for (i = 0; i < dfc->freq_table_size - 1; i++)
 352                if (dyn_power >= dfc->power_table[i])
 353                        break;
 354
 355        *state = i;
 356        trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
 357        return 0;
 358}
 359
 360static struct thermal_cooling_device_ops devfreq_cooling_ops = {
 361        .get_max_state = devfreq_cooling_get_max_state,
 362        .get_cur_state = devfreq_cooling_get_cur_state,
 363        .set_cur_state = devfreq_cooling_set_cur_state,
 364};
 365
 366/**
 367 * devfreq_cooling_gen_tables() - Generate power and freq tables.
 368 * @dfc: Pointer to devfreq cooling device.
 369 *
 370 * Generate power and frequency tables: the power table hold the
 371 * device's maximum power usage at each cooling state (OPP).  The
 372 * static and dynamic power using the appropriate voltage and
 373 * frequency for the state, is acquired from the struct
 374 * devfreq_cooling_power, and summed to make the maximum power draw.
 375 *
 376 * The frequency table holds the frequencies in descending order.
 377 * That way its indexed by cooling device state.
 378 *
 379 * The tables are malloced, and pointers put in dfc.  They must be
 380 * freed when unregistering the devfreq cooling device.
 381 *
 382 * Return: 0 on success, negative error code on failure.
 383 */
 384static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 385{
 386        struct devfreq *df = dfc->devfreq;
 387        struct device *dev = df->dev.parent;
 388        int ret, num_opps;
 389        unsigned long freq;
 390        u32 *power_table = NULL;
 391        u32 *freq_table;
 392        int i;
 393
 394        num_opps = dev_pm_opp_get_opp_count(dev);
 395
 396        if (dfc->power_ops) {
 397                power_table = kcalloc(num_opps, sizeof(*power_table),
 398                                      GFP_KERNEL);
 399                if (!power_table)
 400                        return -ENOMEM;
 401        }
 402
 403        freq_table = kcalloc(num_opps, sizeof(*freq_table),
 404                             GFP_KERNEL);
 405        if (!freq_table) {
 406                ret = -ENOMEM;
 407                goto free_power_table;
 408        }
 409
 410        for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
 411                unsigned long power_dyn, voltage;
 412                struct dev_pm_opp *opp;
 413
 414                rcu_read_lock();
 415
 416                opp = dev_pm_opp_find_freq_floor(dev, &freq);
 417                if (IS_ERR(opp)) {
 418                        rcu_read_unlock();
 419                        ret = PTR_ERR(opp);
 420                        goto free_tables;
 421                }
 422
 423                voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 424
 425                rcu_read_unlock();
 426
 427                if (dfc->power_ops) {
 428                        power_dyn = get_dynamic_power(dfc, freq, voltage);
 429
 430                        dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
 431                                freq / 1000000, voltage, power_dyn, power_dyn);
 432
 433                        power_table[i] = power_dyn;
 434                }
 435
 436                freq_table[i] = freq;
 437        }
 438
 439        if (dfc->power_ops)
 440                dfc->power_table = power_table;
 441
 442        dfc->freq_table = freq_table;
 443        dfc->freq_table_size = num_opps;
 444
 445        return 0;
 446
 447free_tables:
 448        kfree(freq_table);
 449free_power_table:
 450        kfree(power_table);
 451
 452        return ret;
 453}
 454
 455/**
 456 * of_devfreq_cooling_register_power() - Register devfreq cooling device,
 457 *                                      with OF and power information.
 458 * @np: Pointer to OF device_node.
 459 * @df: Pointer to devfreq device.
 460 * @dfc_power:  Pointer to devfreq_cooling_power.
 461 *
 462 * Register a devfreq cooling device.  The available OPPs must be
 463 * registered on the device.
 464 *
 465 * If @dfc_power is provided, the cooling device is registered with the
 466 * power extensions.  For the power extensions to work correctly,
 467 * devfreq should use the simple_ondemand governor, other governors
 468 * are not currently supported.
 469 */
 470struct thermal_cooling_device *
 471of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 472                                  struct devfreq_cooling_power *dfc_power)
 473{
 474        struct thermal_cooling_device *cdev;
 475        struct devfreq_cooling_device *dfc;
 476        char dev_name[THERMAL_NAME_LENGTH];
 477        int err;
 478
 479        dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
 480        if (!dfc)
 481                return ERR_PTR(-ENOMEM);
 482
 483        dfc->devfreq = df;
 484
 485        if (dfc_power) {
 486                dfc->power_ops = dfc_power;
 487
 488                devfreq_cooling_ops.get_requested_power =
 489                        devfreq_cooling_get_requested_power;
 490                devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
 491                devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
 492        }
 493
 494        err = devfreq_cooling_gen_tables(dfc);
 495        if (err)
 496                goto free_dfc;
 497
 498        err = get_idr(&devfreq_idr, &dfc->id);
 499        if (err)
 500                goto free_tables;
 501
 502        snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
 503
 504        cdev = thermal_of_cooling_device_register(np, dev_name, dfc,
 505                                                  &devfreq_cooling_ops);
 506        if (IS_ERR(cdev)) {
 507                err = PTR_ERR(cdev);
 508                dev_err(df->dev.parent,
 509                        "Failed to register devfreq cooling device (%d)\n",
 510                        err);
 511                goto release_idr;
 512        }
 513
 514        dfc->cdev = cdev;
 515
 516        return cdev;
 517
 518release_idr:
 519        release_idr(&devfreq_idr, dfc->id);
 520free_tables:
 521        kfree(dfc->power_table);
 522        kfree(dfc->freq_table);
 523free_dfc:
 524        kfree(dfc);
 525
 526        return ERR_PTR(err);
 527}
 528EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
 529
 530/**
 531 * of_devfreq_cooling_register() - Register devfreq cooling device,
 532 *                                with OF information.
 533 * @np: Pointer to OF device_node.
 534 * @df: Pointer to devfreq device.
 535 */
 536struct thermal_cooling_device *
 537of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 538{
 539        return of_devfreq_cooling_register_power(np, df, NULL);
 540}
 541EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
 542
 543/**
 544 * devfreq_cooling_register() - Register devfreq cooling device.
 545 * @df: Pointer to devfreq device.
 546 */
 547struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 548{
 549        return of_devfreq_cooling_register(NULL, df);
 550}
 551EXPORT_SYMBOL_GPL(devfreq_cooling_register);
 552
 553/**
 554 * devfreq_cooling_unregister() - Unregister devfreq cooling device.
 555 * @dfc: Pointer to devfreq cooling device to unregister.
 556 */
 557void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 558{
 559        struct devfreq_cooling_device *dfc;
 560
 561        if (!cdev)
 562                return;
 563
 564        dfc = cdev->devdata;
 565
 566        thermal_cooling_device_unregister(dfc->cdev);
 567        release_idr(&devfreq_idr, dfc->id);
 568        kfree(dfc->power_table);
 569        kfree(dfc->freq_table);
 570
 571        kfree(dfc);
 572}
 573EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
 574