linux/drivers/hwmon/coretemp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * coretemp.c - Linux kernel module for hardware monitoring
   4 *
   5 * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
   6 *
   7 * Inspired from many hwmon drivers
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/module.h>
  13#include <linux/init.h>
  14#include <linux/slab.h>
  15#include <linux/jiffies.h>
  16#include <linux/hwmon.h>
  17#include <linux/sysfs.h>
  18#include <linux/hwmon-sysfs.h>
  19#include <linux/err.h>
  20#include <linux/mutex.h>
  21#include <linux/list.h>
  22#include <linux/platform_device.h>
  23#include <linux/cpu.h>
  24#include <linux/smp.h>
  25#include <linux/moduleparam.h>
  26#include <linux/pci.h>
  27#include <asm/msr.h>
  28#include <asm/processor.h>
  29#include <asm/cpu_device_id.h>
  30
  31#define DRVNAME "coretemp"
  32
  33/*
  34 * force_tjmax only matters when TjMax can't be read from the CPU itself.
  35 * When set, it replaces the driver's suboptimal heuristic.
  36 */
  37static int force_tjmax;
  38module_param_named(tjmax, force_tjmax, int, 0444);
  39MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
  40
  41#define PKG_SYSFS_ATTR_NO       1       /* Sysfs attribute for package temp */
  42#define BASE_SYSFS_ATTR_NO      2       /* Sysfs Base attr no for coretemp */
  43#define NUM_REAL_CORES          128     /* Number of Real cores per cpu */
  44#define CORETEMP_NAME_LENGTH    19      /* String Length of attrs */
  45#define MAX_CORE_ATTRS          4       /* Maximum no of basic attrs */
  46#define TOTAL_ATTRS             (MAX_CORE_ATTRS + 1)
  47#define MAX_CORE_DATA           (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
  48
  49#define TO_CORE_ID(cpu)         (cpu_data(cpu).cpu_core_id)
  50#define TO_ATTR_NO(cpu)         (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
  51
  52#ifdef CONFIG_SMP
  53#define for_each_sibling(i, cpu) \
  54        for_each_cpu(i, topology_sibling_cpumask(cpu))
  55#else
  56#define for_each_sibling(i, cpu)        for (i = 0; false; )
  57#endif
  58
  59/*
  60 * Per-Core Temperature Data
  61 * @last_updated: The time when the current temperature value was updated
  62 *              earlier (in jiffies).
  63 * @cpu_core_id: The CPU Core from which temperature values should be read
  64 *              This value is passed as "id" field to rdmsr/wrmsr functions.
  65 * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
  66 *              from where the temperature values should be read.
  67 * @attr_size:  Total number of pre-core attrs displayed in the sysfs.
  68 * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data.
  69 *              Otherwise, temp_data holds coretemp data.
  70 * @valid: If this is 1, the current temperature is valid.
  71 */
  72struct temp_data {
  73        int temp;
  74        int ttarget;
  75        int tjmax;
  76        unsigned long last_updated;
  77        unsigned int cpu;
  78        u32 cpu_core_id;
  79        u32 status_reg;
  80        int attr_size;
  81        bool is_pkg_data;
  82        bool valid;
  83        struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
  84        char attr_name[TOTAL_ATTRS][CORETEMP_NAME_LENGTH];
  85        struct attribute *attrs[TOTAL_ATTRS + 1];
  86        struct attribute_group attr_group;
  87        struct mutex update_lock;
  88};
  89
  90/* Platform Data per Physical CPU */
  91struct platform_data {
  92        struct device           *hwmon_dev;
  93        u16                     pkg_id;
  94        struct cpumask          cpumask;
  95        struct temp_data        *core_data[MAX_CORE_DATA];
  96        struct device_attribute name_attr;
  97};
  98
  99/* Keep track of how many zone pointers we allocated in init() */
 100static int max_zones __read_mostly;
 101/* Array of zone pointers. Serialized by cpu hotplug lock */
 102static struct platform_device **zone_devices;
 103
 104static ssize_t show_label(struct device *dev,
 105                                struct device_attribute *devattr, char *buf)
 106{
 107        struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 108        struct platform_data *pdata = dev_get_drvdata(dev);
 109        struct temp_data *tdata = pdata->core_data[attr->index];
 110
 111        if (tdata->is_pkg_data)
 112                return sprintf(buf, "Package id %u\n", pdata->pkg_id);
 113
 114        return sprintf(buf, "Core %u\n", tdata->cpu_core_id);
 115}
 116
 117static ssize_t show_crit_alarm(struct device *dev,
 118                                struct device_attribute *devattr, char *buf)
 119{
 120        u32 eax, edx;
 121        struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 122        struct platform_data *pdata = dev_get_drvdata(dev);
 123        struct temp_data *tdata = pdata->core_data[attr->index];
 124
 125        mutex_lock(&tdata->update_lock);
 126        rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
 127        mutex_unlock(&tdata->update_lock);
 128
 129        return sprintf(buf, "%d\n", (eax >> 5) & 1);
 130}
 131
 132static ssize_t show_tjmax(struct device *dev,
 133                        struct device_attribute *devattr, char *buf)
 134{
 135        struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 136        struct platform_data *pdata = dev_get_drvdata(dev);
 137
 138        return sprintf(buf, "%d\n", pdata->core_data[attr->index]->tjmax);
 139}
 140
 141static ssize_t show_ttarget(struct device *dev,
 142                                struct device_attribute *devattr, char *buf)
 143{
 144        struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 145        struct platform_data *pdata = dev_get_drvdata(dev);
 146
 147        return sprintf(buf, "%d\n", pdata->core_data[attr->index]->ttarget);
 148}
 149
 150static ssize_t show_temp(struct device *dev,
 151                        struct device_attribute *devattr, char *buf)
 152{
 153        u32 eax, edx;
 154        struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 155        struct platform_data *pdata = dev_get_drvdata(dev);
 156        struct temp_data *tdata = pdata->core_data[attr->index];
 157
 158        mutex_lock(&tdata->update_lock);
 159
 160        /* Check whether the time interval has elapsed */
 161        if (!tdata->valid || time_after(jiffies, tdata->last_updated + HZ)) {
 162                rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
 163                /*
 164                 * Ignore the valid bit. In all observed cases the register
 165                 * value is either low or zero if the valid bit is 0.
 166                 * Return it instead of reporting an error which doesn't
 167                 * really help at all.
 168                 */
 169                tdata->temp = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000;
 170                tdata->valid = 1;
 171                tdata->last_updated = jiffies;
 172        }
 173
 174        mutex_unlock(&tdata->update_lock);
 175        return sprintf(buf, "%d\n", tdata->temp);
 176}
 177
 178struct tjmax_pci {
 179        unsigned int device;
 180        int tjmax;
 181};
 182
 183static const struct tjmax_pci tjmax_pci_table[] = {
 184        { 0x0708, 110000 },     /* CE41x0 (Sodaville ) */
 185        { 0x0c72, 102000 },     /* Atom S1240 (Centerton) */
 186        { 0x0c73, 95000 },      /* Atom S1220 (Centerton) */
 187        { 0x0c75, 95000 },      /* Atom S1260 (Centerton) */
 188};
 189
 190struct tjmax {
 191        char const *id;
 192        int tjmax;
 193};
 194
 195static const struct tjmax tjmax_table[] = {
 196        { "CPU  230", 100000 },         /* Model 0x1c, stepping 2       */
 197        { "CPU  330", 125000 },         /* Model 0x1c, stepping 2       */
 198};
 199
 200struct tjmax_model {
 201        u8 model;
 202        u8 mask;
 203        int tjmax;
 204};
 205
 206#define ANY 0xff
 207
 208static const struct tjmax_model tjmax_model_table[] = {
 209        { 0x1c, 10, 100000 },   /* D4xx, K4xx, N4xx, D5xx, K5xx, N5xx */
 210        { 0x1c, ANY, 90000 },   /* Z5xx, N2xx, possibly others
 211                                 * Note: Also matches 230 and 330,
 212                                 * which are covered by tjmax_table
 213                                 */
 214        { 0x26, ANY, 90000 },   /* Atom Tunnel Creek (Exx), Lincroft (Z6xx)
 215                                 * Note: TjMax for E6xxT is 110C, but CPU type
 216                                 * is undetectable by software
 217                                 */
 218        { 0x27, ANY, 90000 },   /* Atom Medfield (Z2460) */
 219        { 0x35, ANY, 90000 },   /* Atom Clover Trail/Cloverview (Z27x0) */
 220        { 0x36, ANY, 100000 },  /* Atom Cedar Trail/Cedarview (N2xxx, D2xxx)
 221                                 * Also matches S12x0 (stepping 9), covered by
 222                                 * PCI table
 223                                 */
 224};
 225
 226static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 227{
 228        /* The 100C is default for both mobile and non mobile CPUs */
 229
 230        int tjmax = 100000;
 231        int tjmax_ee = 85000;
 232        int usemsr_ee = 1;
 233        int err;
 234        u32 eax, edx;
 235        int i;
 236        u16 devfn = PCI_DEVFN(0, 0);
 237        struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn);
 238
 239        /*
 240         * Explicit tjmax table entries override heuristics.
 241         * First try PCI host bridge IDs, followed by model ID strings
 242         * and model/stepping information.
 243         */
 244        if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) {
 245                for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) {
 246                        if (host_bridge->device == tjmax_pci_table[i].device)
 247                                return tjmax_pci_table[i].tjmax;
 248                }
 249        }
 250
 251        for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) {
 252                if (strstr(c->x86_model_id, tjmax_table[i].id))
 253                        return tjmax_table[i].tjmax;
 254        }
 255
 256        for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) {
 257                const struct tjmax_model *tm = &tjmax_model_table[i];
 258                if (c->x86_model == tm->model &&
 259                    (tm->mask == ANY || c->x86_stepping == tm->mask))
 260                        return tm->tjmax;
 261        }
 262
 263        /* Early chips have no MSR for TjMax */
 264
 265        if (c->x86_model == 0xf && c->x86_stepping < 4)
 266                usemsr_ee = 0;
 267
 268        if (c->x86_model > 0xe && usemsr_ee) {
 269                u8 platform_id;
 270
 271                /*
 272                 * Now we can detect the mobile CPU using Intel provided table
 273                 * http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
 274                 * For Core2 cores, check MSR 0x17, bit 28 1 = Mobile CPU
 275                 */
 276                err = rdmsr_safe_on_cpu(id, 0x17, &eax, &edx);
 277                if (err) {
 278                        dev_warn(dev,
 279                                 "Unable to access MSR 0x17, assuming desktop"
 280                                 " CPU\n");
 281                        usemsr_ee = 0;
 282                } else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
 283                        /*
 284                         * Trust bit 28 up to Penryn, I could not find any
 285                         * documentation on that; if you happen to know
 286                         * someone at Intel please ask
 287                         */
 288                        usemsr_ee = 0;
 289                } else {
 290                        /* Platform ID bits 52:50 (EDX starts at bit 32) */
 291                        platform_id = (edx >> 18) & 0x7;
 292
 293                        /*
 294                         * Mobile Penryn CPU seems to be platform ID 7 or 5
 295                         * (guesswork)
 296                         */
 297                        if (c->x86_model == 0x17 &&
 298                            (platform_id == 5 || platform_id == 7)) {
 299                                /*
 300                                 * If MSR EE bit is set, set it to 90 degrees C,
 301                                 * otherwise 105 degrees C
 302                                 */
 303                                tjmax_ee = 90000;
 304                                tjmax = 105000;
 305                        }
 306                }
 307        }
 308
 309        if (usemsr_ee) {
 310                err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
 311                if (err) {
 312                        dev_warn(dev,
 313                                 "Unable to access MSR 0xEE, for Tjmax, left"
 314                                 " at default\n");
 315                } else if (eax & 0x40000000) {
 316                        tjmax = tjmax_ee;
 317                }
 318        } else if (tjmax == 100000) {
 319                /*
 320                 * If we don't use msr EE it means we are desktop CPU
 321                 * (with exeception of Atom)
 322                 */
 323                dev_warn(dev, "Using relative temperature scale!\n");
 324        }
 325
 326        return tjmax;
 327}
 328
 329static bool cpu_has_tjmax(struct cpuinfo_x86 *c)
 330{
 331        u8 model = c->x86_model;
 332
 333        return model > 0xe &&
 334               model != 0x1c &&
 335               model != 0x26 &&
 336               model != 0x27 &&
 337               model != 0x35 &&
 338               model != 0x36;
 339}
 340
 341static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 342{
 343        int err;
 344        u32 eax, edx;
 345        u32 val;
 346
 347        /*
 348         * A new feature of current Intel(R) processors, the
 349         * IA32_TEMPERATURE_TARGET contains the TjMax value
 350         */
 351        err = rdmsr_safe_on_cpu(id, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
 352        if (err) {
 353                if (cpu_has_tjmax(c))
 354                        dev_warn(dev, "Unable to read TjMax from CPU %u\n", id);
 355        } else {
 356                val = (eax >> 16) & 0xff;
 357                /*
 358                 * If the TjMax is not plausible, an assumption
 359                 * will be used
 360                 */
 361                if (val) {
 362                        dev_dbg(dev, "TjMax is %d degrees C\n", val);
 363                        return val * 1000;
 364                }
 365        }
 366
 367        if (force_tjmax) {
 368                dev_notice(dev, "TjMax forced to %d degrees C by user\n",
 369                           force_tjmax);
 370                return force_tjmax * 1000;
 371        }
 372
 373        /*
 374         * An assumption is made for early CPUs and unreadable MSR.
 375         * NOTE: the calculated value may not be correct.
 376         */
 377        return adjust_tjmax(c, id, dev);
 378}
 379
 380static int create_core_attrs(struct temp_data *tdata, struct device *dev,
 381                             int attr_no)
 382{
 383        int i;
 384        static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
 385                        struct device_attribute *devattr, char *buf) = {
 386                        show_label, show_crit_alarm, show_temp, show_tjmax,
 387                        show_ttarget };
 388        static const char *const suffixes[TOTAL_ATTRS] = {
 389                "label", "crit_alarm", "input", "crit", "max"
 390        };
 391
 392        for (i = 0; i < tdata->attr_size; i++) {
 393                snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH,
 394                         "temp%d_%s", attr_no, suffixes[i]);
 395                sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
 396                tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
 397                tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
 398                tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
 399                tdata->sd_attrs[i].index = attr_no;
 400                tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr;
 401        }
 402        tdata->attr_group.attrs = tdata->attrs;
 403        return sysfs_create_group(&dev->kobj, &tdata->attr_group);
 404}
 405
 406
 407static int chk_ucode_version(unsigned int cpu)
 408{
 409        struct cpuinfo_x86 *c = &cpu_data(cpu);
 410
 411        /*
 412         * Check if we have problem with errata AE18 of Core processors:
 413         * Readings might stop update when processor visited too deep sleep,
 414         * fixed for stepping D0 (6EC).
 415         */
 416        if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) {
 417                pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
 418                return -ENODEV;
 419        }
 420        return 0;
 421}
 422
 423static struct platform_device *coretemp_get_pdev(unsigned int cpu)
 424{
 425        int id = topology_logical_die_id(cpu);
 426
 427        if (id >= 0 && id < max_zones)
 428                return zone_devices[id];
 429        return NULL;
 430}
 431
 432static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
 433{
 434        struct temp_data *tdata;
 435
 436        tdata = kzalloc(sizeof(struct temp_data), GFP_KERNEL);
 437        if (!tdata)
 438                return NULL;
 439
 440        tdata->status_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_STATUS :
 441                                                        MSR_IA32_THERM_STATUS;
 442        tdata->is_pkg_data = pkg_flag;
 443        tdata->cpu = cpu;
 444        tdata->cpu_core_id = TO_CORE_ID(cpu);
 445        tdata->attr_size = MAX_CORE_ATTRS;
 446        mutex_init(&tdata->update_lock);
 447        return tdata;
 448}
 449
 450static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 451                            int pkg_flag)
 452{
 453        struct temp_data *tdata;
 454        struct platform_data *pdata = platform_get_drvdata(pdev);
 455        struct cpuinfo_x86 *c = &cpu_data(cpu);
 456        u32 eax, edx;
 457        int err, attr_no;
 458
 459        /*
 460         * Find attr number for sysfs:
 461         * We map the attr number to core id of the CPU
 462         * The attr number is always core id + 2
 463         * The Pkgtemp will always show up as temp1_*, if available
 464         */
 465        attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
 466
 467        if (attr_no > MAX_CORE_DATA - 1)
 468                return -ERANGE;
 469
 470        tdata = init_temp_data(cpu, pkg_flag);
 471        if (!tdata)
 472                return -ENOMEM;
 473
 474        /* Test if we can access the status register */
 475        err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
 476        if (err)
 477                goto exit_free;
 478
 479        /* We can access status register. Get Critical Temperature */
 480        tdata->tjmax = get_tjmax(c, cpu, &pdev->dev);
 481
 482        /*
 483         * Read the still undocumented bits 8:15 of IA32_TEMPERATURE_TARGET.
 484         * The target temperature is available on older CPUs but not in this
 485         * register. Atoms don't have the register at all.
 486         */
 487        if (c->x86_model > 0xe && c->x86_model != 0x1c) {
 488                err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET,
 489                                        &eax, &edx);
 490                if (!err) {
 491                        tdata->ttarget
 492                          = tdata->tjmax - ((eax >> 8) & 0xff) * 1000;
 493                        tdata->attr_size++;
 494                }
 495        }
 496
 497        pdata->core_data[attr_no] = tdata;
 498
 499        /* Create sysfs interfaces */
 500        err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
 501        if (err)
 502                goto exit_free;
 503
 504        return 0;
 505exit_free:
 506        pdata->core_data[attr_no] = NULL;
 507        kfree(tdata);
 508        return err;
 509}
 510
 511static void
 512coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag)
 513{
 514        if (create_core_data(pdev, cpu, pkg_flag))
 515                dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
 516}
 517
 518static void coretemp_remove_core(struct platform_data *pdata, int indx)
 519{
 520        struct temp_data *tdata = pdata->core_data[indx];
 521
 522        /* Remove the sysfs attributes */
 523        sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
 524
 525        kfree(pdata->core_data[indx]);
 526        pdata->core_data[indx] = NULL;
 527}
 528
 529static int coretemp_probe(struct platform_device *pdev)
 530{
 531        struct device *dev = &pdev->dev;
 532        struct platform_data *pdata;
 533
 534        /* Initialize the per-zone data structures */
 535        pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL);
 536        if (!pdata)
 537                return -ENOMEM;
 538
 539        pdata->pkg_id = pdev->id;
 540        platform_set_drvdata(pdev, pdata);
 541
 542        pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
 543                                                                  pdata, NULL);
 544        return PTR_ERR_OR_ZERO(pdata->hwmon_dev);
 545}
 546
 547static int coretemp_remove(struct platform_device *pdev)
 548{
 549        struct platform_data *pdata = platform_get_drvdata(pdev);
 550        int i;
 551
 552        for (i = MAX_CORE_DATA - 1; i >= 0; --i)
 553                if (pdata->core_data[i])
 554                        coretemp_remove_core(pdata, i);
 555
 556        return 0;
 557}
 558
 559static struct platform_driver coretemp_driver = {
 560        .driver = {
 561                .name = DRVNAME,
 562        },
 563        .probe = coretemp_probe,
 564        .remove = coretemp_remove,
 565};
 566
 567static struct platform_device *coretemp_device_add(unsigned int cpu)
 568{
 569        int err, zoneid = topology_logical_die_id(cpu);
 570        struct platform_device *pdev;
 571
 572        if (zoneid < 0)
 573                return ERR_PTR(-ENOMEM);
 574
 575        pdev = platform_device_alloc(DRVNAME, zoneid);
 576        if (!pdev)
 577                return ERR_PTR(-ENOMEM);
 578
 579        err = platform_device_add(pdev);
 580        if (err) {
 581                platform_device_put(pdev);
 582                return ERR_PTR(err);
 583        }
 584
 585        zone_devices[zoneid] = pdev;
 586        return pdev;
 587}
 588
 589static int coretemp_cpu_online(unsigned int cpu)
 590{
 591        struct platform_device *pdev = coretemp_get_pdev(cpu);
 592        struct cpuinfo_x86 *c = &cpu_data(cpu);
 593        struct platform_data *pdata;
 594
 595        /*
 596         * Don't execute this on resume as the offline callback did
 597         * not get executed on suspend.
 598         */
 599        if (cpuhp_tasks_frozen)
 600                return 0;
 601
 602        /*
 603         * CPUID.06H.EAX[0] indicates whether the CPU has thermal
 604         * sensors. We check this bit only, all the early CPUs
 605         * without thermal sensors will be filtered out.
 606         */
 607        if (!cpu_has(c, X86_FEATURE_DTHERM))
 608                return -ENODEV;
 609
 610        if (!pdev) {
 611                /* Check the microcode version of the CPU */
 612                if (chk_ucode_version(cpu))
 613                        return -EINVAL;
 614
 615                /*
 616                 * Alright, we have DTS support.
 617                 * We are bringing the _first_ core in this pkg
 618                 * online. So, initialize per-pkg data structures and
 619                 * then bring this core online.
 620                 */
 621                pdev = coretemp_device_add(cpu);
 622                if (IS_ERR(pdev))
 623                        return PTR_ERR(pdev);
 624
 625                /*
 626                 * Check whether pkgtemp support is available.
 627                 * If so, add interfaces for pkgtemp.
 628                 */
 629                if (cpu_has(c, X86_FEATURE_PTS))
 630                        coretemp_add_core(pdev, cpu, 1);
 631        }
 632
 633        pdata = platform_get_drvdata(pdev);
 634        /*
 635         * Check whether a thread sibling is already online. If not add the
 636         * interface for this CPU core.
 637         */
 638        if (!cpumask_intersects(&pdata->cpumask, topology_sibling_cpumask(cpu)))
 639                coretemp_add_core(pdev, cpu, 0);
 640
 641        cpumask_set_cpu(cpu, &pdata->cpumask);
 642        return 0;
 643}
 644
 645static int coretemp_cpu_offline(unsigned int cpu)
 646{
 647        struct platform_device *pdev = coretemp_get_pdev(cpu);
 648        struct platform_data *pd;
 649        struct temp_data *tdata;
 650        int indx, target;
 651
 652        /*
 653         * Don't execute this on suspend as the device remove locks
 654         * up the machine.
 655         */
 656        if (cpuhp_tasks_frozen)
 657                return 0;
 658
 659        /* If the physical CPU device does not exist, just return */
 660        if (!pdev)
 661                return 0;
 662
 663        /* The core id is too big, just return */
 664        indx = TO_ATTR_NO(cpu);
 665        if (indx > MAX_CORE_DATA - 1)
 666                return 0;
 667
 668        pd = platform_get_drvdata(pdev);
 669        tdata = pd->core_data[indx];
 670
 671        cpumask_clear_cpu(cpu, &pd->cpumask);
 672
 673        /*
 674         * If this is the last thread sibling, remove the CPU core
 675         * interface, If there is still a sibling online, transfer the
 676         * target cpu of that core interface to it.
 677         */
 678        target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
 679        if (target >= nr_cpu_ids) {
 680                coretemp_remove_core(pd, indx);
 681        } else if (tdata && tdata->cpu == cpu) {
 682                mutex_lock(&tdata->update_lock);
 683                tdata->cpu = target;
 684                mutex_unlock(&tdata->update_lock);
 685        }
 686
 687        /*
 688         * If all cores in this pkg are offline, remove the device. This
 689         * will invoke the platform driver remove function, which cleans up
 690         * the rest.
 691         */
 692        if (cpumask_empty(&pd->cpumask)) {
 693                zone_devices[topology_logical_die_id(cpu)] = NULL;
 694                platform_device_unregister(pdev);
 695                return 0;
 696        }
 697
 698        /*
 699         * Check whether this core is the target for the package
 700         * interface. We need to assign it to some other cpu.
 701         */
 702        tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
 703        if (tdata && tdata->cpu == cpu) {
 704                target = cpumask_first(&pd->cpumask);
 705                mutex_lock(&tdata->update_lock);
 706                tdata->cpu = target;
 707                mutex_unlock(&tdata->update_lock);
 708        }
 709        return 0;
 710}
 711static const struct x86_cpu_id __initconst coretemp_ids[] = {
 712        { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM },
 713        {}
 714};
 715MODULE_DEVICE_TABLE(x86cpu, coretemp_ids);
 716
 717static enum cpuhp_state coretemp_hp_online;
 718
 719static int __init coretemp_init(void)
 720{
 721        int err;
 722
 723        /*
 724         * CPUID.06H.EAX[0] indicates whether the CPU has thermal
 725         * sensors. We check this bit only, all the early CPUs
 726         * without thermal sensors will be filtered out.
 727         */
 728        if (!x86_match_cpu(coretemp_ids))
 729                return -ENODEV;
 730
 731        max_zones = topology_max_packages() * topology_max_die_per_package();
 732        zone_devices = kcalloc(max_zones, sizeof(struct platform_device *),
 733                              GFP_KERNEL);
 734        if (!zone_devices)
 735                return -ENOMEM;
 736
 737        err = platform_driver_register(&coretemp_driver);
 738        if (err)
 739                goto outzone;
 740
 741        err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/coretemp:online",
 742                                coretemp_cpu_online, coretemp_cpu_offline);
 743        if (err < 0)
 744                goto outdrv;
 745        coretemp_hp_online = err;
 746        return 0;
 747
 748outdrv:
 749        platform_driver_unregister(&coretemp_driver);
 750outzone:
 751        kfree(zone_devices);
 752        return err;
 753}
 754module_init(coretemp_init)
 755
 756static void __exit coretemp_exit(void)
 757{
 758        cpuhp_remove_state(coretemp_hp_online);
 759        platform_driver_unregister(&coretemp_driver);
 760        kfree(zone_devices);
 761}
 762module_exit(coretemp_exit)
 763
 764MODULE_AUTHOR("Rudolf Marek <r.marek@assembler.cz>");
 765MODULE_DESCRIPTION("Intel Core temperature monitor");
 766MODULE_LICENSE("GPL");
 767