linux/drivers/hwmon/drivetemp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Hwmon client for disk and solid state drives with temperature sensors
   4 * Copyright (C) 2019 Zodiac Inflight Innovations
   5 *
   6 * With input from:
   7 *    Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors.
   8 *    (C) 2018 Linus Walleij
   9 *
  10 *    hwmon: Driver for SCSI/ATA temperature sensors
  11 *    by Constantin Baranov <const@mimas.ru>, submitted September 2009
  12 *
  13 * This drive supports reporting the temperature of SATA drives. It can be
  14 * easily extended to report the temperature of SCSI drives.
  15 *
  16 * The primary means to read drive temperatures and temperature limits
  17 * for ATA drives is the SCT Command Transport feature set as specified in
  18 * ATA8-ACS.
  19 * It can be used to read the current drive temperature, temperature limits,
  20 * and historic minimum and maximum temperatures. The SCT Command Transport
  21 * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set
  22 * (ATA8-ACS)".
  23 *
  24 * If the SCT Command Transport feature set is not available, drive temperatures
  25 * may be readable through SMART attributes. Since SMART attributes are not well
  26 * defined, this method is only used as fallback mechanism.
  27 *
  28 * There are three SMART attributes which may report drive temperatures.
  29 * Those are defined as follows (from
  30 * http://www.cropel.com/library/smart-attribute-list.aspx).
  31 *
  32 * 190  Temperature     Temperature, monitored by a sensor somewhere inside
  33 *                      the drive. Raw value typicaly holds the actual
  34 *                      temperature (hexadecimal) in its rightmost two digits.
  35 *
  36 * 194  Temperature     Temperature, monitored by a sensor somewhere inside
  37 *                      the drive. Raw value typicaly holds the actual
  38 *                      temperature (hexadecimal) in its rightmost two digits.
  39 *
  40 * 231  Temperature     Temperature, monitored by a sensor somewhere inside
  41 *                      the drive. Raw value typicaly holds the actual
  42 *                      temperature (hexadecimal) in its rightmost two digits.
  43 *
  44 * Wikipedia defines attributes a bit differently.
  45 *
  46 * 190  Temperature     Value is equal to (100-temp. °C), allowing manufacturer
  47 *      Difference or   to set a minimum threshold which corresponds to a
  48 *      Airflow         maximum temperature. This also follows the convention of
  49 *      Temperature     100 being a best-case value and lower values being
  50 *                      undesirable. However, some older drives may instead
  51 *                      report raw Temperature (identical to 0xC2) or
  52 *                      Temperature minus 50 here.
  53 * 194  Temperature or  Indicates the device temperature, if the appropriate
  54 *      Temperature     sensor is fitted. Lowest byte of the raw value contains
  55 *      Celsius         the exact temperature value (Celsius degrees).
  56 * 231  Life Left       Indicates the approximate SSD life left, in terms of
  57 *      (SSDs) or       program/erase cycles or available reserved blocks.
  58 *      Temperature     A normalized value of 100 represents a new drive, with
  59 *                      a threshold value at 10 indicating a need for
  60 *                      replacement. A value of 0 may mean that the drive is
  61 *                      operating in read-only mode to allow data recovery.
  62 *                      Previously (pre-2010) occasionally used for Drive
  63 *                      Temperature (more typically reported at 0xC2).
  64 *
  65 * Common denominator is that the first raw byte reports the temperature
  66 * in degrees C on almost all drives. Some drives may report a fractional
  67 * temperature in the second raw byte.
  68 *
  69 * Known exceptions (from libatasmart):
  70 * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th
  71 *   degrees C in the first two raw bytes.
  72 * - A few Maxtor drives report an unknown or bad value in attribute 194.
  73 * - Certain Apple SSD drives report an unknown value in attribute 190.
  74 *   Only certain firmware versions are affected.
  75 *
  76 * Those exceptions affect older ATA drives and are currently ignored.
  77 * Also, the second raw byte (possibly reporting the fractional temperature)
  78 * is currently ignored.
  79 *
  80 * Many drives also report temperature limits in additional SMART data raw
  81 * bytes. The format of those is not well defined and varies widely.
  82 * The driver does not currently attempt to report those limits.
  83 *
  84 * According to data in smartmontools, attribute 231 is rarely used to report
  85 * drive temperatures. At the same time, several drives report SSD life left
  86 * in attribute 231, but do not support temperature sensors. For this reason,
  87 * attribute 231 is currently ignored.
  88 *
  89 * Following above definitions, temperatures are reported as follows.
  90 *   If SCT Command Transport is supported, it is used to read the
  91 *   temperature and, if available, temperature limits.
  92 * - Otherwise, if SMART attribute 194 is supported, it is used to read
  93 *   the temperature.
  94 * - Otherwise, if SMART attribute 190 is supported, it is used to read
  95 *   the temperature.
  96 */
  97
  98#include <linux/ata.h>
  99#include <linux/bits.h>
 100#include <linux/device.h>
 101#include <linux/hwmon.h>
 102#include <linux/kernel.h>
 103#include <linux/list.h>
 104#include <linux/module.h>
 105#include <linux/mutex.h>
 106#include <scsi/scsi_cmnd.h>
 107#include <scsi/scsi_device.h>
 108#include <scsi/scsi_driver.h>
 109#include <scsi/scsi_proto.h>
 110
 111struct drivetemp_data {
 112        struct list_head list;          /* list of instantiated devices */
 113        struct mutex lock;              /* protect data buffer accesses */
 114        struct scsi_device *sdev;       /* SCSI device */
 115        struct device *dev;             /* instantiating device */
 116        struct device *hwdev;           /* hardware monitoring device */
 117        u8 smartdata[ATA_SECT_SIZE];    /* local buffer */
 118        int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val);
 119        bool have_temp_lowest;          /* lowest temp in SCT status */
 120        bool have_temp_highest;         /* highest temp in SCT status */
 121        bool have_temp_min;             /* have min temp */
 122        bool have_temp_max;             /* have max temp */
 123        bool have_temp_lcrit;           /* have lower critical limit */
 124        bool have_temp_crit;            /* have critical limit */
 125        int temp_min;                   /* min temp */
 126        int temp_max;                   /* max temp */
 127        int temp_lcrit;                 /* lower critical limit */
 128        int temp_crit;                  /* critical limit */
 129};
 130
 131static LIST_HEAD(drivetemp_devlist);
 132
 133#define ATA_MAX_SMART_ATTRS     30
 134#define SMART_TEMP_PROP_190     190
 135#define SMART_TEMP_PROP_194     194
 136
 137#define SCT_STATUS_REQ_ADDR     0xe0
 138#define  SCT_STATUS_VERSION_LOW         0       /* log byte offsets */
 139#define  SCT_STATUS_VERSION_HIGH        1
 140#define  SCT_STATUS_TEMP                200
 141#define  SCT_STATUS_TEMP_LOWEST         201
 142#define  SCT_STATUS_TEMP_HIGHEST        202
 143#define SCT_READ_LOG_ADDR       0xe1
 144#define  SMART_READ_LOG                 0xd5
 145#define  SMART_WRITE_LOG                0xd6
 146
 147#define INVALID_TEMP            0x80
 148
 149#define temp_is_valid(temp)     ((temp) != INVALID_TEMP)
 150#define temp_from_sct(temp)     (((s8)(temp)) * 1000)
 151
 152static inline bool ata_id_smart_supported(u16 *id)
 153{
 154        return id[ATA_ID_COMMAND_SET_1] & BIT(0);
 155}
 156
 157static inline bool ata_id_smart_enabled(u16 *id)
 158{
 159        return id[ATA_ID_CFS_ENABLE_1] & BIT(0);
 160}
 161
 162static int drivetemp_scsi_command(struct drivetemp_data *st,
 163                                 u8 ata_command, u8 feature,
 164                                 u8 lba_low, u8 lba_mid, u8 lba_high)
 165{
 166        u8 scsi_cmd[MAX_COMMAND_SIZE];
 167        int data_dir;
 168
 169        memset(scsi_cmd, 0, sizeof(scsi_cmd));
 170        scsi_cmd[0] = ATA_16;
 171        if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) {
 172                scsi_cmd[1] = (5 << 1); /* PIO Data-out */
 173                /*
 174                 * No off.line or cc, write to dev, block count in sector count
 175                 * field.
 176                 */
 177                scsi_cmd[2] = 0x06;
 178                data_dir = DMA_TO_DEVICE;
 179        } else {
 180                scsi_cmd[1] = (4 << 1); /* PIO Data-in */
 181                /*
 182                 * No off.line or cc, read from dev, block count in sector count
 183                 * field.
 184                 */
 185                scsi_cmd[2] = 0x0e;
 186                data_dir = DMA_FROM_DEVICE;
 187        }
 188        scsi_cmd[4] = feature;
 189        scsi_cmd[6] = 1;        /* 1 sector */
 190        scsi_cmd[8] = lba_low;
 191        scsi_cmd[10] = lba_mid;
 192        scsi_cmd[12] = lba_high;
 193        scsi_cmd[14] = ata_command;
 194
 195        return scsi_execute_req(st->sdev, scsi_cmd, data_dir,
 196                                st->smartdata, ATA_SECT_SIZE, NULL, HZ, 5,
 197                                NULL);
 198}
 199
 200static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature,
 201                                 u8 select)
 202{
 203        return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select,
 204                                     ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS);
 205}
 206
 207static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr,
 208                                  long *temp)
 209{
 210        u8 *buf = st->smartdata;
 211        bool have_temp = false;
 212        u8 temp_raw;
 213        u8 csum;
 214        int err;
 215        int i;
 216
 217        err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0);
 218        if (err)
 219                return err;
 220
 221        /* Checksum the read value table */
 222        csum = 0;
 223        for (i = 0; i < ATA_SECT_SIZE; i++)
 224                csum += buf[i];
 225        if (csum) {
 226                dev_dbg(&st->sdev->sdev_gendev,
 227                        "checksum error reading SMART values\n");
 228                return -EIO;
 229        }
 230
 231        for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) {
 232                u8 *attr = buf + i * 12;
 233                int id = attr[2];
 234
 235                if (!id)
 236                        continue;
 237
 238                if (id == SMART_TEMP_PROP_190) {
 239                        temp_raw = attr[7];
 240                        have_temp = true;
 241                }
 242                if (id == SMART_TEMP_PROP_194) {
 243                        temp_raw = attr[7];
 244                        have_temp = true;
 245                        break;
 246                }
 247        }
 248
 249        if (have_temp) {
 250                *temp = temp_raw * 1000;
 251                return 0;
 252        }
 253
 254        return -ENXIO;
 255}
 256
 257static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val)
 258{
 259        u8 *buf = st->smartdata;
 260        int err;
 261
 262        err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
 263        if (err)
 264                return err;
 265        switch (attr) {
 266        case hwmon_temp_input:
 267                if (!temp_is_valid(buf[SCT_STATUS_TEMP]))
 268                        return -ENODATA;
 269                *val = temp_from_sct(buf[SCT_STATUS_TEMP]);
 270                break;
 271        case hwmon_temp_lowest:
 272                if (!temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]))
 273                        return -ENODATA;
 274                *val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]);
 275                break;
 276        case hwmon_temp_highest:
 277                if (!temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]))
 278                        return -ENODATA;
 279                *val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]);
 280                break;
 281        default:
 282                err = -EINVAL;
 283                break;
 284        }
 285        return err;
 286}
 287
 288static const char * const sct_avoid_models[] = {
 289/*
 290 * These drives will have WRITE FPDMA QUEUED command timeouts and sometimes just
 291 * freeze until power-cycled under heavy write loads when their temperature is
 292 * getting polled in SCT mode. The SMART mode seems to be fine, though.
 293 *
 294 * While only the 3 TB model (DT01ACA3) was actually caught exhibiting the
 295 * problem let's play safe here to avoid data corruption and ban the whole
 296 * DT01ACAx family.
 297
 298 * The models from this array are prefix-matched.
 299 */
 300        "TOSHIBA DT01ACA",
 301};
 302
 303static bool drivetemp_sct_avoid(struct drivetemp_data *st)
 304{
 305        struct scsi_device *sdev = st->sdev;
 306        unsigned int ctr;
 307
 308        if (!sdev->model)
 309                return false;
 310
 311        /*
 312         * The "model" field contains just the raw SCSI INQUIRY response
 313         * "product identification" field, which has a width of 16 bytes.
 314         * This field is space-filled, but is NOT NULL-terminated.
 315         */
 316        for (ctr = 0; ctr < ARRAY_SIZE(sct_avoid_models); ctr++)
 317                if (!strncmp(sdev->model, sct_avoid_models[ctr],
 318                             strlen(sct_avoid_models[ctr])))
 319                        return true;
 320
 321        return false;
 322}
 323
 324static int drivetemp_identify_sata(struct drivetemp_data *st)
 325{
 326        struct scsi_device *sdev = st->sdev;
 327        u8 *buf = st->smartdata;
 328        struct scsi_vpd *vpd;
 329        bool is_ata, is_sata;
 330        bool have_sct_data_table;
 331        bool have_sct_temp;
 332        bool have_smart;
 333        bool have_sct;
 334        u16 *ata_id;
 335        u16 version;
 336        long temp;
 337        int err;
 338
 339        /* SCSI-ATA Translation present? */
 340        rcu_read_lock();
 341        vpd = rcu_dereference(sdev->vpd_pg89);
 342
 343        /*
 344         * Verify that ATA IDENTIFY DEVICE data is included in ATA Information
 345         * VPD and that the drive implements the SATA protocol.
 346         */
 347        if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA ||
 348            vpd->data[36] != 0x34) {
 349                rcu_read_unlock();
 350                return -ENODEV;
 351        }
 352        ata_id = (u16 *)&vpd->data[60];
 353        is_ata = ata_id_is_ata(ata_id);
 354        is_sata = ata_id_is_sata(ata_id);
 355        have_sct = ata_id_sct_supported(ata_id);
 356        have_sct_data_table = ata_id_sct_data_tables(ata_id);
 357        have_smart = ata_id_smart_supported(ata_id) &&
 358                                ata_id_smart_enabled(ata_id);
 359
 360        rcu_read_unlock();
 361
 362        /* bail out if this is not a SATA device */
 363        if (!is_ata || !is_sata)
 364                return -ENODEV;
 365
 366        if (have_sct && drivetemp_sct_avoid(st)) {
 367                dev_notice(&sdev->sdev_gendev,
 368                           "will avoid using SCT for temperature monitoring\n");
 369                have_sct = false;
 370        }
 371
 372        if (!have_sct)
 373                goto skip_sct;
 374
 375        err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
 376        if (err)
 377                goto skip_sct;
 378
 379        version = (buf[SCT_STATUS_VERSION_HIGH] << 8) |
 380                  buf[SCT_STATUS_VERSION_LOW];
 381        if (version != 2 && version != 3)
 382                goto skip_sct;
 383
 384        have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]);
 385        if (!have_sct_temp)
 386                goto skip_sct;
 387
 388        st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]);
 389        st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]);
 390
 391        if (!have_sct_data_table)
 392                goto skip_sct_data;
 393
 394        /* Request and read temperature history table */
 395        memset(buf, '\0', sizeof(st->smartdata));
 396        buf[0] = 5;     /* data table command */
 397        buf[2] = 1;     /* read table */
 398        buf[4] = 2;     /* temperature history table */
 399
 400        err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR);
 401        if (err)
 402                goto skip_sct_data;
 403
 404        err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR);
 405        if (err)
 406                goto skip_sct_data;
 407
 408        /*
 409         * Temperature limits per AT Attachment 8 -
 410         * ATA/ATAPI Command Set (ATA8-ACS)
 411         */
 412        st->have_temp_max = temp_is_valid(buf[6]);
 413        st->have_temp_crit = temp_is_valid(buf[7]);
 414        st->have_temp_min = temp_is_valid(buf[8]);
 415        st->have_temp_lcrit = temp_is_valid(buf[9]);
 416
 417        st->temp_max = temp_from_sct(buf[6]);
 418        st->temp_crit = temp_from_sct(buf[7]);
 419        st->temp_min = temp_from_sct(buf[8]);
 420        st->temp_lcrit = temp_from_sct(buf[9]);
 421
 422skip_sct_data:
 423        if (have_sct_temp) {
 424                st->get_temp = drivetemp_get_scttemp;
 425                return 0;
 426        }
 427skip_sct:
 428        if (!have_smart)
 429                return -ENODEV;
 430        st->get_temp = drivetemp_get_smarttemp;
 431        return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp);
 432}
 433
 434static int drivetemp_identify(struct drivetemp_data *st)
 435{
 436        struct scsi_device *sdev = st->sdev;
 437
 438        /* Bail out immediately if there is no inquiry data */
 439        if (!sdev->inquiry || sdev->inquiry_len < 16)
 440                return -ENODEV;
 441
 442        /* Disk device? */
 443        if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC)
 444                return -ENODEV;
 445
 446        return drivetemp_identify_sata(st);
 447}
 448
 449static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type,
 450                         u32 attr, int channel, long *val)
 451{
 452        struct drivetemp_data *st = dev_get_drvdata(dev);
 453        int err = 0;
 454
 455        if (type != hwmon_temp)
 456                return -EINVAL;
 457
 458        switch (attr) {
 459        case hwmon_temp_input:
 460        case hwmon_temp_lowest:
 461        case hwmon_temp_highest:
 462                mutex_lock(&st->lock);
 463                err = st->get_temp(st, attr, val);
 464                mutex_unlock(&st->lock);
 465                break;
 466        case hwmon_temp_lcrit:
 467                *val = st->temp_lcrit;
 468                break;
 469        case hwmon_temp_min:
 470                *val = st->temp_min;
 471                break;
 472        case hwmon_temp_max:
 473                *val = st->temp_max;
 474                break;
 475        case hwmon_temp_crit:
 476                *val = st->temp_crit;
 477                break;
 478        default:
 479                err = -EINVAL;
 480                break;
 481        }
 482        return err;
 483}
 484
 485static umode_t drivetemp_is_visible(const void *data,
 486                                   enum hwmon_sensor_types type,
 487                                   u32 attr, int channel)
 488{
 489        const struct drivetemp_data *st = data;
 490
 491        switch (type) {
 492        case hwmon_temp:
 493                switch (attr) {
 494                case hwmon_temp_input:
 495                        return 0444;
 496                case hwmon_temp_lowest:
 497                        if (st->have_temp_lowest)
 498                                return 0444;
 499                        break;
 500                case hwmon_temp_highest:
 501                        if (st->have_temp_highest)
 502                                return 0444;
 503                        break;
 504                case hwmon_temp_min:
 505                        if (st->have_temp_min)
 506                                return 0444;
 507                        break;
 508                case hwmon_temp_max:
 509                        if (st->have_temp_max)
 510                                return 0444;
 511                        break;
 512                case hwmon_temp_lcrit:
 513                        if (st->have_temp_lcrit)
 514                                return 0444;
 515                        break;
 516                case hwmon_temp_crit:
 517                        if (st->have_temp_crit)
 518                                return 0444;
 519                        break;
 520                default:
 521                        break;
 522                }
 523                break;
 524        default:
 525                break;
 526        }
 527        return 0;
 528}
 529
 530static const struct hwmon_channel_info *drivetemp_info[] = {
 531        HWMON_CHANNEL_INFO(chip,
 532                           HWMON_C_REGISTER_TZ),
 533        HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT |
 534                           HWMON_T_LOWEST | HWMON_T_HIGHEST |
 535                           HWMON_T_MIN | HWMON_T_MAX |
 536                           HWMON_T_LCRIT | HWMON_T_CRIT),
 537        NULL
 538};
 539
 540static const struct hwmon_ops drivetemp_ops = {
 541        .is_visible = drivetemp_is_visible,
 542        .read = drivetemp_read,
 543};
 544
 545static const struct hwmon_chip_info drivetemp_chip_info = {
 546        .ops = &drivetemp_ops,
 547        .info = drivetemp_info,
 548};
 549
 550/*
 551 * The device argument points to sdev->sdev_dev. Its parent is
 552 * sdev->sdev_gendev, which we can use to get the scsi_device pointer.
 553 */
 554static int drivetemp_add(struct device *dev, struct class_interface *intf)
 555{
 556        struct scsi_device *sdev = to_scsi_device(dev->parent);
 557        struct drivetemp_data *st;
 558        int err;
 559
 560        st = kzalloc(sizeof(*st), GFP_KERNEL);
 561        if (!st)
 562                return -ENOMEM;
 563
 564        st->sdev = sdev;
 565        st->dev = dev;
 566        mutex_init(&st->lock);
 567
 568        if (drivetemp_identify(st)) {
 569                err = -ENODEV;
 570                goto abort;
 571        }
 572
 573        st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp",
 574                                                    st, &drivetemp_chip_info,
 575                                                    NULL);
 576        if (IS_ERR(st->hwdev)) {
 577                err = PTR_ERR(st->hwdev);
 578                goto abort;
 579        }
 580
 581        list_add(&st->list, &drivetemp_devlist);
 582        return 0;
 583
 584abort:
 585        kfree(st);
 586        return err;
 587}
 588
 589static void drivetemp_remove(struct device *dev, struct class_interface *intf)
 590{
 591        struct drivetemp_data *st, *tmp;
 592
 593        list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) {
 594                if (st->dev == dev) {
 595                        list_del(&st->list);
 596                        hwmon_device_unregister(st->hwdev);
 597                        kfree(st);
 598                        break;
 599                }
 600        }
 601}
 602
 603static struct class_interface drivetemp_interface = {
 604        .add_dev = drivetemp_add,
 605        .remove_dev = drivetemp_remove,
 606};
 607
 608static int __init drivetemp_init(void)
 609{
 610        return scsi_register_interface(&drivetemp_interface);
 611}
 612
 613static void __exit drivetemp_exit(void)
 614{
 615        scsi_unregister_interface(&drivetemp_interface);
 616}
 617
 618module_init(drivetemp_init);
 619module_exit(drivetemp_exit);
 620
 621MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>");
 622MODULE_DESCRIPTION("Hard drive temperature monitor");
 623MODULE_LICENSE("GPL");
 624