dpdk/lib/power/power_acpi_cpufreq.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2014 Intel Corporation
   3 */
   4
   5#include <stdio.h>
   6#include <fcntl.h>
   7#include <stdlib.h>
   8
   9#include <rte_memcpy.h>
  10#include <rte_string_fns.h>
  11
  12#include "power_acpi_cpufreq.h"
  13#include "power_common.h"
  14
  15#define STR_SIZE     1024
  16#define POWER_CONVERT_TO_DECIMAL 10
  17
  18#define POWER_GOVERNOR_USERSPACE "userspace"
  19#define POWER_SYSFILE_AVAIL_FREQ \
  20                "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
  21#define POWER_SYSFILE_SETSPEED   \
  22                "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
  23#define POWER_ACPI_DRIVER "acpi-cpufreq"
  24
  25/*
  26 * MSR related
  27 */
  28#define PLATFORM_INFO     0x0CE
  29#define TURBO_RATIO_LIMIT 0x1AD
  30#define IA32_PERF_CTL     0x199
  31#define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
  32
  33enum power_state {
  34        POWER_IDLE = 0,
  35        POWER_ONGOING,
  36        POWER_USED,
  37        POWER_UNKNOWN
  38};
  39
  40/**
  41 * Power info per lcore.
  42 */
  43struct acpi_power_info {
  44        unsigned int lcore_id;                   /**< Logical core id */
  45        uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
  46        uint32_t nb_freqs;                   /**< number of available freqs */
  47        FILE *f;                             /**< FD of scaling_setspeed */
  48        char governor_ori[32];               /**< Original governor name */
  49        uint32_t curr_idx;                   /**< Freq index in freqs array */
  50        uint32_t state;                      /**< Power in use state */
  51        uint16_t turbo_available;            /**< Turbo Boost available */
  52        uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
  53} __rte_cache_aligned;
  54
  55static struct acpi_power_info lcore_power_info[RTE_MAX_LCORE];
  56
  57/**
  58 * It is to set specific freq for specific logical core, according to the index
  59 * of supported frequencies.
  60 */
  61static int
  62set_freq_internal(struct acpi_power_info *pi, uint32_t idx)
  63{
  64        if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
  65                RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
  66                                "should be less than %u\n", idx, pi->nb_freqs);
  67                return -1;
  68        }
  69
  70        /* Check if it is the same as current */
  71        if (idx == pi->curr_idx)
  72                return 0;
  73
  74        POWER_DEBUG_TRACE("Frequency[%u] %u to be set for lcore %u\n",
  75                        idx, pi->freqs[idx], pi->lcore_id);
  76        if (fseek(pi->f, 0, SEEK_SET) < 0) {
  77                RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
  78                                "for setting frequency for lcore %u\n", pi->lcore_id);
  79                return -1;
  80        }
  81        if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
  82                RTE_LOG(ERR, POWER, "Fail to write new frequency for "
  83                                "lcore %u\n", pi->lcore_id);
  84                return -1;
  85        }
  86        fflush(pi->f);
  87        pi->curr_idx = idx;
  88
  89        return 1;
  90}
  91
  92/**
  93 * It is to check the current scaling governor by reading sys file, and then
  94 * set it into 'userspace' if it is not by writing the sys file. The original
  95 * governor will be saved for rolling back.
  96 */
  97static int
  98power_set_governor_userspace(struct acpi_power_info *pi)
  99{
 100        return power_set_governor(pi->lcore_id, POWER_GOVERNOR_USERSPACE,
 101                        pi->governor_ori, sizeof(pi->governor_ori));
 102}
 103
 104/**
 105 * It is to check the governor and then set the original governor back if
 106 * needed by writing the sys file.
 107 */
 108static int
 109power_set_governor_original(struct acpi_power_info *pi)
 110{
 111        return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0);
 112}
 113
 114/**
 115 * It is to get the available frequencies of the specific lcore by reading the
 116 * sys file.
 117 */
 118static int
 119power_get_available_freqs(struct acpi_power_info *pi)
 120{
 121        FILE *f;
 122        int ret = -1, i, count;
 123        char *p;
 124        char buf[BUFSIZ];
 125        char *freqs[RTE_MAX_LCORE_FREQS];
 126
 127        open_core_sysfs_file(&f, "r", POWER_SYSFILE_AVAIL_FREQ, pi->lcore_id);
 128        if (f == NULL) {
 129                RTE_LOG(ERR, POWER, "failed to open %s\n",
 130                                POWER_SYSFILE_AVAIL_FREQ);
 131                goto out;
 132        }
 133
 134        ret = read_core_sysfs_s(f, buf, sizeof(buf));
 135        if ((ret) < 0) {
 136                RTE_LOG(ERR, POWER, "Failed to read %s\n",
 137                                POWER_SYSFILE_AVAIL_FREQ);
 138                goto out;
 139        }
 140
 141        /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
 142        count = rte_strsplit(buf, sizeof(buf), freqs,
 143                        RTE_MAX_LCORE_FREQS, ' ');
 144        if (count <= 0) {
 145                RTE_LOG(ERR, POWER, "No available frequency in "
 146                                ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
 147                goto out;
 148        }
 149        if (count >= RTE_MAX_LCORE_FREQS) {
 150                RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
 151                                count);
 152                goto out;
 153        }
 154
 155        /* Store the available frequencies into power context */
 156        for (i = 0, pi->nb_freqs = 0; i < count; i++) {
 157                POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
 158                                i, freqs[i]);
 159                pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
 160                                POWER_CONVERT_TO_DECIMAL);
 161        }
 162
 163        if ((pi->freqs[0]-1000) == pi->freqs[1]) {
 164                pi->turbo_available = 1;
 165                pi->turbo_enable = 1;
 166                POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
 167                                pi->lcore_id);
 168        } else {
 169                pi->turbo_available = 0;
 170                pi->turbo_enable = 0;
 171                POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
 172                                pi->lcore_id);
 173        }
 174
 175        ret = 0;
 176        POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
 177                        count, pi->lcore_id);
 178out:
 179        if (f != NULL)
 180                fclose(f);
 181
 182        return ret;
 183}
 184
 185/**
 186 * It is to fopen the sys file for the future setting the lcore frequency.
 187 */
 188static int
 189power_init_for_setting_freq(struct acpi_power_info *pi)
 190{
 191        FILE *f;
 192        char buf[BUFSIZ];
 193        uint32_t i, freq;
 194        int ret;
 195
 196        open_core_sysfs_file(&f, "rw+", POWER_SYSFILE_SETSPEED, pi->lcore_id);
 197        if (f == NULL) {
 198                RTE_LOG(ERR, POWER, "Failed to open %s\n",
 199                                POWER_SYSFILE_SETSPEED);
 200                goto err;
 201        }
 202
 203        ret = read_core_sysfs_s(f, buf, sizeof(buf));
 204        if ((ret) < 0) {
 205                RTE_LOG(ERR, POWER, "Failed to read %s\n",
 206                                POWER_SYSFILE_SETSPEED);
 207                goto err;
 208        }
 209
 210        freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
 211        for (i = 0; i < pi->nb_freqs; i++) {
 212                if (freq == pi->freqs[i]) {
 213                        pi->curr_idx = i;
 214                        pi->f = f;
 215                        return 0;
 216                }
 217        }
 218
 219err:
 220        if (f != NULL)
 221                fclose(f);
 222
 223        return -1;
 224}
 225
 226int
 227power_acpi_cpufreq_check_supported(void)
 228{
 229        return cpufreq_check_scaling_driver(POWER_ACPI_DRIVER);
 230}
 231
 232int
 233power_acpi_cpufreq_init(unsigned int lcore_id)
 234{
 235        struct acpi_power_info *pi;
 236        uint32_t exp_state;
 237
 238        if (lcore_id >= RTE_MAX_LCORE) {
 239                RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
 240                                lcore_id, RTE_MAX_LCORE - 1U);
 241                return -1;
 242        }
 243
 244        pi = &lcore_power_info[lcore_id];
 245        exp_state = POWER_IDLE;
 246        /* The power in use state works as a guard variable between
 247         * the CPU frequency control initialization and exit process.
 248         * The ACQUIRE memory ordering here pairs with the RELEASE
 249         * ordering below as lock to make sure the frequency operations
 250         * in the critical section are done under the correct state.
 251         */
 252        if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
 253                                        POWER_ONGOING, 0,
 254                                        __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
 255                RTE_LOG(INFO, POWER, "Power management of lcore %u is "
 256                                "in use\n", lcore_id);
 257                return -1;
 258        }
 259
 260        pi->lcore_id = lcore_id;
 261        /* Check and set the governor */
 262        if (power_set_governor_userspace(pi) < 0) {
 263                RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
 264                                "userspace\n", lcore_id);
 265                goto fail;
 266        }
 267
 268        /* Get the available frequencies */
 269        if (power_get_available_freqs(pi) < 0) {
 270                RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
 271                                "lcore %u\n", lcore_id);
 272                goto fail;
 273        }
 274
 275        /* Init for setting lcore frequency */
 276        if (power_init_for_setting_freq(pi) < 0) {
 277                RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
 278                                "lcore %u\n", lcore_id);
 279                goto fail;
 280        }
 281
 282        /* Set freq to max by default */
 283        if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
 284                RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
 285                                "to max\n", lcore_id);
 286                goto fail;
 287        }
 288
 289        RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
 290                        "power management\n", lcore_id);
 291        exp_state = POWER_ONGOING;
 292        __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
 293                                    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 294
 295        return 0;
 296
 297fail:
 298        exp_state = POWER_ONGOING;
 299        __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
 300                                    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 301
 302        return -1;
 303}
 304
 305int
 306power_acpi_cpufreq_exit(unsigned int lcore_id)
 307{
 308        struct acpi_power_info *pi;
 309        uint32_t exp_state;
 310
 311        if (lcore_id >= RTE_MAX_LCORE) {
 312                RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
 313                                lcore_id, RTE_MAX_LCORE - 1U);
 314                return -1;
 315        }
 316        pi = &lcore_power_info[lcore_id];
 317        exp_state = POWER_USED;
 318        /* The power in use state works as a guard variable between
 319         * the CPU frequency control initialization and exit process.
 320         * The ACQUIRE memory ordering here pairs with the RELEASE
 321         * ordering below as lock to make sure the frequency operations
 322         * in the critical section are done under the correct state.
 323         */
 324        if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
 325                                        POWER_ONGOING, 0,
 326                                        __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
 327                RTE_LOG(INFO, POWER, "Power management of lcore %u is "
 328                                "not used\n", lcore_id);
 329                return -1;
 330        }
 331
 332        /* Close FD of setting freq */
 333        fclose(pi->f);
 334        pi->f = NULL;
 335
 336        /* Set the governor back to the original */
 337        if (power_set_governor_original(pi) < 0) {
 338                RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
 339                                "to the original\n", lcore_id);
 340                goto fail;
 341        }
 342
 343        RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
 344                        "'userspace' mode and been set back to the "
 345                        "original\n", lcore_id);
 346        exp_state = POWER_ONGOING;
 347        __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
 348                                    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 349
 350        return 0;
 351
 352fail:
 353        exp_state = POWER_ONGOING;
 354        __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
 355                                    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 356
 357        return -1;
 358}
 359
 360uint32_t
 361power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 362{
 363        struct acpi_power_info *pi;
 364
 365        if (lcore_id >= RTE_MAX_LCORE) {
 366                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 367                return 0;
 368        }
 369
 370        if (freqs == NULL) {
 371                RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
 372                return 0;
 373        }
 374
 375        pi = &lcore_power_info[lcore_id];
 376        if (num < pi->nb_freqs) {
 377                RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
 378                return 0;
 379        }
 380        rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
 381
 382        return pi->nb_freqs;
 383}
 384
 385uint32_t
 386power_acpi_cpufreq_get_freq(unsigned int lcore_id)
 387{
 388        if (lcore_id >= RTE_MAX_LCORE) {
 389                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 390                return RTE_POWER_INVALID_FREQ_INDEX;
 391        }
 392
 393        return lcore_power_info[lcore_id].curr_idx;
 394}
 395
 396int
 397power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
 398{
 399        if (lcore_id >= RTE_MAX_LCORE) {
 400                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 401                return -1;
 402        }
 403
 404        return set_freq_internal(&(lcore_power_info[lcore_id]), index);
 405}
 406
 407int
 408power_acpi_cpufreq_freq_down(unsigned int lcore_id)
 409{
 410        struct acpi_power_info *pi;
 411
 412        if (lcore_id >= RTE_MAX_LCORE) {
 413                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 414                return -1;
 415        }
 416
 417        pi = &lcore_power_info[lcore_id];
 418        if (pi->curr_idx + 1 == pi->nb_freqs)
 419                return 0;
 420
 421        /* Frequencies in the array are from high to low. */
 422        return set_freq_internal(pi, pi->curr_idx + 1);
 423}
 424
 425int
 426power_acpi_cpufreq_freq_up(unsigned int lcore_id)
 427{
 428        struct acpi_power_info *pi;
 429
 430        if (lcore_id >= RTE_MAX_LCORE) {
 431                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 432                return -1;
 433        }
 434
 435        pi = &lcore_power_info[lcore_id];
 436        if (pi->curr_idx == 0 ||
 437            (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
 438                return 0;
 439
 440        /* Frequencies in the array are from high to low. */
 441        return set_freq_internal(pi, pi->curr_idx - 1);
 442}
 443
 444int
 445power_acpi_cpufreq_freq_max(unsigned int lcore_id)
 446{
 447        if (lcore_id >= RTE_MAX_LCORE) {
 448                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 449                return -1;
 450        }
 451
 452        /* Frequencies in the array are from high to low. */
 453        if (lcore_power_info[lcore_id].turbo_available) {
 454                if (lcore_power_info[lcore_id].turbo_enable)
 455                        /* Set to Turbo */
 456                        return set_freq_internal(
 457                                        &lcore_power_info[lcore_id], 0);
 458                else
 459                        /* Set to max non-turbo */
 460                        return set_freq_internal(
 461                                        &lcore_power_info[lcore_id], 1);
 462        } else
 463                return set_freq_internal(&lcore_power_info[lcore_id], 0);
 464}
 465
 466int
 467power_acpi_cpufreq_freq_min(unsigned int lcore_id)
 468{
 469        struct acpi_power_info *pi;
 470
 471        if (lcore_id >= RTE_MAX_LCORE) {
 472                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 473                return -1;
 474        }
 475
 476        pi = &lcore_power_info[lcore_id];
 477
 478        /* Frequencies in the array are from high to low. */
 479        return set_freq_internal(pi, pi->nb_freqs - 1);
 480}
 481
 482
 483int
 484power_acpi_turbo_status(unsigned int lcore_id)
 485{
 486        struct acpi_power_info *pi;
 487
 488        if (lcore_id >= RTE_MAX_LCORE) {
 489                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 490                return -1;
 491        }
 492
 493        pi = &lcore_power_info[lcore_id];
 494
 495        return pi->turbo_enable;
 496}
 497
 498
 499int
 500power_acpi_enable_turbo(unsigned int lcore_id)
 501{
 502        struct acpi_power_info *pi;
 503
 504        if (lcore_id >= RTE_MAX_LCORE) {
 505                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 506                return -1;
 507        }
 508
 509        pi = &lcore_power_info[lcore_id];
 510
 511        if (pi->turbo_available)
 512                pi->turbo_enable = 1;
 513        else {
 514                pi->turbo_enable = 0;
 515                RTE_LOG(ERR, POWER,
 516                        "Failed to enable turbo on lcore %u\n",
 517                        lcore_id);
 518                        return -1;
 519        }
 520
 521        /* Max may have changed, so call to max function */
 522        if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
 523                RTE_LOG(ERR, POWER,
 524                        "Failed to set frequency of lcore %u to max\n",
 525                        lcore_id);
 526                        return -1;
 527        }
 528
 529        return 0;
 530}
 531
 532int
 533power_acpi_disable_turbo(unsigned int lcore_id)
 534{
 535        struct acpi_power_info *pi;
 536
 537        if (lcore_id >= RTE_MAX_LCORE) {
 538                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 539                return -1;
 540        }
 541
 542        pi = &lcore_power_info[lcore_id];
 543
 544         pi->turbo_enable = 0;
 545
 546        if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
 547                /* Try to set freq to max by default coming out of turbo */
 548                if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
 549                        RTE_LOG(ERR, POWER,
 550                                "Failed to set frequency of lcore %u to max\n",
 551                                lcore_id);
 552                        return -1;
 553                }
 554        }
 555
 556        return 0;
 557}
 558
 559int power_acpi_get_capabilities(unsigned int lcore_id,
 560                struct rte_power_core_capabilities *caps)
 561{
 562        struct acpi_power_info *pi;
 563
 564        if (lcore_id >= RTE_MAX_LCORE) {
 565                RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
 566                return -1;
 567        }
 568        if (caps == NULL) {
 569                RTE_LOG(ERR, POWER, "Invalid argument\n");
 570                return -1;
 571        }
 572
 573        pi = &lcore_power_info[lcore_id];
 574        caps->capabilities = 0;
 575        caps->turbo = !!(pi->turbo_available);
 576
 577        return 0;
 578}
 579