linux/drivers/sbus/char/bbc_envctrl.c
<<
>>
Prefs
   1/* bbc_envctrl.c: UltraSPARC-III environment control driver.
   2 *
   3 * Copyright (C) 2001, 2008 David S. Miller (davem@davemloft.net)
   4 */
   5
   6#include <linux/kthread.h>
   7#include <linux/delay.h>
   8#include <linux/kmod.h>
   9#include <linux/reboot.h>
  10#include <linux/of.h>
  11#include <linux/of_device.h>
  12#include <asm/oplib.h>
  13
  14#include "bbc_i2c.h"
  15#include "max1617.h"
  16
  17#undef ENVCTRL_TRACE
  18
  19/* WARNING: Making changes to this driver is very dangerous.
  20 *          If you misprogram the sensor chips they can
  21 *          cut the power on you instantly.
  22 */
  23
  24/* Two temperature sensors exist in the SunBLADE-1000 enclosure.
  25 * Both are implemented using max1617 i2c devices.  Each max1617
  26 * monitors 2 temperatures, one for one of the cpu dies and the other
  27 * for the ambient temperature.
  28 *
  29 * The max1617 is capable of being programmed with power-off
  30 * temperature values, one low limit and one high limit.  These
  31 * can be controlled independently for the cpu or ambient temperature.
  32 * If a limit is violated, the power is simply shut off.  The frequency
  33 * with which the max1617 does temperature sampling can be controlled
  34 * as well.
  35 *
  36 * Three fans exist inside the machine, all three are controlled with
  37 * an i2c digital to analog converter.  There is a fan directed at the
  38 * two processor slots, another for the rest of the enclosure, and the
  39 * third is for the power supply.  The first two fans may be speed
  40 * controlled by changing the voltage fed to them.  The third fan may
  41 * only be completely off or on.  The third fan is meant to only be
  42 * disabled/enabled when entering/exiting the lowest power-saving
  43 * mode of the machine.
  44 *
  45 * An environmental control kernel thread periodically monitors all
  46 * temperature sensors.  Based upon the samples it will adjust the
  47 * fan speeds to try and keep the system within a certain temperature
  48 * range (the goal being to make the fans as quiet as possible without
  49 * allowing the system to get too hot).
  50 *
  51 * If the temperature begins to rise/fall outside of the acceptable
  52 * operating range, a periodic warning will be sent to the kernel log.
  53 * The fans will be put on full blast to attempt to deal with this
  54 * situation.  After exceeding the acceptable operating range by a
  55 * certain threshold, the kernel thread will shut down the system.
  56 * Here, the thread is attempting to shut the machine down cleanly
  57 * before the hardware based power-off event is triggered.
  58 */
  59
  60/* These settings are in Celsius.  We use these defaults only
  61 * if we cannot interrogate the cpu-fru SEEPROM.
  62 */
  63struct temp_limits {
  64        s8 high_pwroff, high_shutdown, high_warn;
  65        s8 low_warn, low_shutdown, low_pwroff;
  66};
  67
  68static struct temp_limits cpu_temp_limits[2] = {
  69        { 100, 85, 80, 5, -5, -10 },
  70        { 100, 85, 80, 5, -5, -10 },
  71};
  72
  73static struct temp_limits amb_temp_limits[2] = {
  74        { 65, 55, 40, 5, -5, -10 },
  75        { 65, 55, 40, 5, -5, -10 },
  76};
  77
  78static LIST_HEAD(all_temps);
  79static LIST_HEAD(all_fans);
  80
  81#define CPU_FAN_REG     0xf0
  82#define SYS_FAN_REG     0xf2
  83#define PSUPPLY_FAN_REG 0xf4
  84
  85#define FAN_SPEED_MIN   0x0c
  86#define FAN_SPEED_MAX   0x3f
  87
  88#define PSUPPLY_FAN_ON  0x1f
  89#define PSUPPLY_FAN_OFF 0x00
  90
  91static void set_fan_speeds(struct bbc_fan_control *fp)
  92{
  93        /* Put temperatures into range so we don't mis-program
  94         * the hardware.
  95         */
  96        if (fp->cpu_fan_speed < FAN_SPEED_MIN)
  97                fp->cpu_fan_speed = FAN_SPEED_MIN;
  98        if (fp->cpu_fan_speed > FAN_SPEED_MAX)
  99                fp->cpu_fan_speed = FAN_SPEED_MAX;
 100        if (fp->system_fan_speed < FAN_SPEED_MIN)
 101                fp->system_fan_speed = FAN_SPEED_MIN;
 102        if (fp->system_fan_speed > FAN_SPEED_MAX)
 103                fp->system_fan_speed = FAN_SPEED_MAX;
 104#ifdef ENVCTRL_TRACE
 105        printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
 106               fp->index,
 107               fp->cpu_fan_speed, fp->system_fan_speed);
 108#endif
 109
 110        bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
 111        bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
 112        bbc_i2c_writeb(fp->client,
 113                       (fp->psupply_fan_on ?
 114                        PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
 115                       PSUPPLY_FAN_REG);
 116}
 117
 118static void get_current_temps(struct bbc_cpu_temperature *tp)
 119{
 120        tp->prev_amb_temp = tp->curr_amb_temp;
 121        bbc_i2c_readb(tp->client,
 122                      (unsigned char *) &tp->curr_amb_temp,
 123                      MAX1617_AMB_TEMP);
 124        tp->prev_cpu_temp = tp->curr_cpu_temp;
 125        bbc_i2c_readb(tp->client,
 126                      (unsigned char *) &tp->curr_cpu_temp,
 127                      MAX1617_CPU_TEMP);
 128#ifdef ENVCTRL_TRACE
 129        printk("temp%d: cpu(%d C) amb(%d C)\n",
 130               tp->index,
 131               (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
 132#endif
 133}
 134
 135
 136static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
 137{
 138        static int shutting_down = 0;
 139        char *type = "???";
 140        s8 val = -1;
 141
 142        if (shutting_down != 0)
 143                return;
 144
 145        if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
 146            tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
 147                type = "ambient";
 148                val = tp->curr_amb_temp;
 149        } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
 150                   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
 151                type = "CPU";
 152                val = tp->curr_cpu_temp;
 153        }
 154
 155        printk(KERN_CRIT "temp%d: Outside of safe %s "
 156               "operating temperature, %d C.\n",
 157               tp->index, type, val);
 158
 159        printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
 160
 161        shutting_down = 1;
 162        if (orderly_poweroff(true) < 0)
 163                printk(KERN_CRIT "envctrl: shutdown execution failed\n");
 164}
 165
 166#define WARN_INTERVAL   (30 * HZ)
 167
 168static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
 169{
 170        int ret = 0;
 171
 172        if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
 173                if (tp->curr_amb_temp >=
 174                    amb_temp_limits[tp->index].high_warn) {
 175                        printk(KERN_WARNING "temp%d: "
 176                               "Above safe ambient operating temperature, %d C.\n",
 177                               tp->index, (int) tp->curr_amb_temp);
 178                        ret = 1;
 179                } else if (tp->curr_amb_temp <
 180                           amb_temp_limits[tp->index].low_warn) {
 181                        printk(KERN_WARNING "temp%d: "
 182                               "Below safe ambient operating temperature, %d C.\n",
 183                               tp->index, (int) tp->curr_amb_temp);
 184                        ret = 1;
 185                }
 186                if (ret)
 187                        *last_warn = jiffies;
 188        } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
 189                   tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
 190                ret = 1;
 191
 192        /* Now check the shutdown limits. */
 193        if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
 194            tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
 195                do_envctrl_shutdown(tp);
 196                ret = 1;
 197        }
 198
 199        if (ret) {
 200                tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
 201        } else if ((tick & (8 - 1)) == 0) {
 202                s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
 203                s8 amb_goal_lo;
 204
 205                amb_goal_lo = amb_goal_hi - 3;
 206
 207                /* We do not try to avoid 'too cold' events.  Basically we
 208                 * only try to deal with over-heating and fan noise reduction.
 209                 */
 210                if (tp->avg_amb_temp < amb_goal_hi) {
 211                        if (tp->avg_amb_temp >= amb_goal_lo)
 212                                tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
 213                        else
 214                                tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
 215                } else {
 216                        tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
 217                }
 218        } else {
 219                tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
 220        }
 221}
 222
 223static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
 224{
 225        int ret = 0;
 226
 227        if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
 228                if (tp->curr_cpu_temp >=
 229                    cpu_temp_limits[tp->index].high_warn) {
 230                        printk(KERN_WARNING "temp%d: "
 231                               "Above safe CPU operating temperature, %d C.\n",
 232                               tp->index, (int) tp->curr_cpu_temp);
 233                        ret = 1;
 234                } else if (tp->curr_cpu_temp <
 235                           cpu_temp_limits[tp->index].low_warn) {
 236                        printk(KERN_WARNING "temp%d: "
 237                               "Below safe CPU operating temperature, %d C.\n",
 238                               tp->index, (int) tp->curr_cpu_temp);
 239                        ret = 1;
 240                }
 241                if (ret)
 242                        *last_warn = jiffies;
 243        } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
 244                   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
 245                ret = 1;
 246
 247        /* Now check the shutdown limits. */
 248        if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
 249            tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
 250                do_envctrl_shutdown(tp);
 251                ret = 1;
 252        }
 253
 254        if (ret) {
 255                tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
 256        } else if ((tick & (8 - 1)) == 0) {
 257                s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
 258                s8 cpu_goal_lo;
 259
 260                cpu_goal_lo = cpu_goal_hi - 3;
 261
 262                /* We do not try to avoid 'too cold' events.  Basically we
 263                 * only try to deal with over-heating and fan noise reduction.
 264                 */
 265                if (tp->avg_cpu_temp < cpu_goal_hi) {
 266                        if (tp->avg_cpu_temp >= cpu_goal_lo)
 267                                tp->fan_todo[FAN_CPU] = FAN_SAME;
 268                        else
 269                                tp->fan_todo[FAN_CPU] = FAN_SLOWER;
 270                } else {
 271                        tp->fan_todo[FAN_CPU] = FAN_FASTER;
 272                }
 273        } else {
 274                tp->fan_todo[FAN_CPU] = FAN_SAME;
 275        }
 276}
 277
 278static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
 279{
 280        tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
 281        tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
 282
 283        analyze_ambient_temp(tp, last_warn, tp->sample_tick);
 284        analyze_cpu_temp(tp, last_warn, tp->sample_tick);
 285
 286        tp->sample_tick++;
 287}
 288
 289static enum fan_action prioritize_fan_action(int which_fan)
 290{
 291        struct bbc_cpu_temperature *tp;
 292        enum fan_action decision = FAN_STATE_MAX;
 293
 294        /* Basically, prioritize what the temperature sensors
 295         * recommend we do, and perform that action on all the
 296         * fans.
 297         */
 298        list_for_each_entry(tp, &all_temps, glob_list) {
 299                if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
 300                        decision = FAN_FULLBLAST;
 301                        break;
 302                }
 303                if (tp->fan_todo[which_fan] == FAN_SAME &&
 304                    decision != FAN_FASTER)
 305                        decision = FAN_SAME;
 306                else if (tp->fan_todo[which_fan] == FAN_FASTER)
 307                        decision = FAN_FASTER;
 308                else if (decision != FAN_FASTER &&
 309                         decision != FAN_SAME &&
 310                         tp->fan_todo[which_fan] == FAN_SLOWER)
 311                        decision = FAN_SLOWER;
 312        }
 313        if (decision == FAN_STATE_MAX)
 314                decision = FAN_SAME;
 315
 316        return decision;
 317}
 318
 319static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
 320{
 321        enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
 322        int ret;
 323
 324        if (decision == FAN_SAME)
 325                return 0;
 326
 327        ret = 1;
 328        if (decision == FAN_FULLBLAST) {
 329                if (fp->system_fan_speed >= FAN_SPEED_MAX)
 330                        ret = 0;
 331                else
 332                        fp->system_fan_speed = FAN_SPEED_MAX;
 333        } else {
 334                if (decision == FAN_FASTER) {
 335                        if (fp->system_fan_speed >= FAN_SPEED_MAX)
 336                                ret = 0;
 337                        else
 338                                fp->system_fan_speed += 2;
 339                } else {
 340                        int orig_speed = fp->system_fan_speed;
 341
 342                        if (orig_speed <= FAN_SPEED_MIN ||
 343                            orig_speed <= (fp->cpu_fan_speed - 3))
 344                                ret = 0;
 345                        else
 346                                fp->system_fan_speed -= 1;
 347                }
 348        }
 349
 350        return ret;
 351}
 352
 353static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
 354{
 355        enum fan_action decision = prioritize_fan_action(FAN_CPU);
 356        int ret;
 357
 358        if (decision == FAN_SAME)
 359                return 0;
 360
 361        ret = 1;
 362        if (decision == FAN_FULLBLAST) {
 363                if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
 364                        ret = 0;
 365                else
 366                        fp->cpu_fan_speed = FAN_SPEED_MAX;
 367        } else {
 368                if (decision == FAN_FASTER) {
 369                        if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
 370                                ret = 0;
 371                        else {
 372                                fp->cpu_fan_speed += 2;
 373                                if (fp->system_fan_speed <
 374                                    (fp->cpu_fan_speed - 3))
 375                                        fp->system_fan_speed =
 376                                                fp->cpu_fan_speed - 3;
 377                        }
 378                } else {
 379                        if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
 380                                ret = 0;
 381                        else
 382                                fp->cpu_fan_speed -= 1;
 383                }
 384        }
 385
 386        return ret;
 387}
 388
 389static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
 390{
 391        int new;
 392
 393        new  = maybe_new_ambient_fan_speed(fp);
 394        new |= maybe_new_cpu_fan_speed(fp);
 395
 396        if (new)
 397                set_fan_speeds(fp);
 398}
 399
 400static void fans_full_blast(void)
 401{
 402        struct bbc_fan_control *fp;
 403
 404        /* Since we will not be monitoring things anymore, put
 405         * the fans on full blast.
 406         */
 407        list_for_each_entry(fp, &all_fans, glob_list) {
 408                fp->cpu_fan_speed = FAN_SPEED_MAX;
 409                fp->system_fan_speed = FAN_SPEED_MAX;
 410                fp->psupply_fan_on = 1;
 411                set_fan_speeds(fp);
 412        }
 413}
 414
 415#define POLL_INTERVAL   (5 * 1000)
 416static unsigned long last_warning_jiffies;
 417static struct task_struct *kenvctrld_task;
 418
 419static int kenvctrld(void *__unused)
 420{
 421        printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
 422        last_warning_jiffies = jiffies - WARN_INTERVAL;
 423        for (;;) {
 424                struct bbc_cpu_temperature *tp;
 425                struct bbc_fan_control *fp;
 426
 427                msleep_interruptible(POLL_INTERVAL);
 428                if (kthread_should_stop())
 429                        break;
 430
 431                list_for_each_entry(tp, &all_temps, glob_list) {
 432                        get_current_temps(tp);
 433                        analyze_temps(tp, &last_warning_jiffies);
 434                }
 435                list_for_each_entry(fp, &all_fans, glob_list)
 436                        maybe_new_fan_speeds(fp);
 437        }
 438        printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
 439
 440        fans_full_blast();
 441
 442        return 0;
 443}
 444
 445static void attach_one_temp(struct bbc_i2c_bus *bp, struct of_device *op,
 446                            int temp_idx)
 447{
 448        struct bbc_cpu_temperature *tp;
 449
 450        tp = kzalloc(sizeof(*tp), GFP_KERNEL);
 451        if (!tp)
 452                return;
 453
 454        tp->client = bbc_i2c_attach(bp, op);
 455        if (!tp->client) {
 456                kfree(tp);
 457                return;
 458        }
 459
 460
 461        tp->index = temp_idx;
 462
 463        list_add(&tp->glob_list, &all_temps);
 464        list_add(&tp->bp_list, &bp->temps);
 465
 466        /* Tell it to convert once every 5 seconds, clear all cfg
 467         * bits.
 468         */
 469        bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
 470        bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
 471
 472        /* Program the hard temperature limits into the chip. */
 473        bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
 474                       MAX1617_WR_AMB_HIGHLIM);
 475        bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
 476                       MAX1617_WR_AMB_LOWLIM);
 477        bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
 478                       MAX1617_WR_CPU_HIGHLIM);
 479        bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
 480                       MAX1617_WR_CPU_LOWLIM);
 481
 482        get_current_temps(tp);
 483        tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
 484        tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
 485
 486        tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
 487        tp->fan_todo[FAN_CPU] = FAN_SAME;
 488}
 489
 490static void attach_one_fan(struct bbc_i2c_bus *bp, struct of_device *op,
 491                           int fan_idx)
 492{
 493        struct bbc_fan_control *fp;
 494
 495        fp = kzalloc(sizeof(*fp), GFP_KERNEL);
 496        if (!fp)
 497                return;
 498
 499        fp->client = bbc_i2c_attach(bp, op);
 500        if (!fp->client) {
 501                kfree(fp);
 502                return;
 503        }
 504
 505        fp->index = fan_idx;
 506
 507        list_add(&fp->glob_list, &all_fans);
 508        list_add(&fp->bp_list, &bp->fans);
 509
 510        /* The i2c device controlling the fans is write-only.
 511         * So the only way to keep track of the current power
 512         * level fed to the fans is via software.  Choose half
 513         * power for cpu/system and 'on' fo the powersupply fan
 514         * and set it now.
 515         */
 516        fp->psupply_fan_on = 1;
 517        fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
 518        fp->cpu_fan_speed += FAN_SPEED_MIN;
 519        fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
 520        fp->system_fan_speed += FAN_SPEED_MIN;
 521
 522        set_fan_speeds(fp);
 523}
 524
 525int bbc_envctrl_init(struct bbc_i2c_bus *bp)
 526{
 527        struct of_device *op;
 528        int temp_index = 0;
 529        int fan_index = 0;
 530        int devidx = 0;
 531
 532        while ((op = bbc_i2c_getdev(bp, devidx++)) != NULL) {
 533                if (!strcmp(op->node->name, "temperature"))
 534                        attach_one_temp(bp, op, temp_index++);
 535                if (!strcmp(op->node->name, "fan-control"))
 536                        attach_one_fan(bp, op, fan_index++);
 537        }
 538        if (temp_index != 0 && fan_index != 0) {
 539                kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
 540                if (IS_ERR(kenvctrld_task)) {
 541                        int err = PTR_ERR(kenvctrld_task);
 542
 543                        kenvctrld_task = NULL;
 544                        return err;
 545                }
 546        }
 547
 548        return 0;
 549}
 550
 551static void destroy_one_temp(struct bbc_cpu_temperature *tp)
 552{
 553        bbc_i2c_detach(tp->client);
 554        kfree(tp);
 555}
 556
 557static void destroy_one_fan(struct bbc_fan_control *fp)
 558{
 559        bbc_i2c_detach(fp->client);
 560        kfree(fp);
 561}
 562
 563void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp)
 564{
 565        struct bbc_cpu_temperature *tp, *tpos;
 566        struct bbc_fan_control *fp, *fpos;
 567
 568        if (kenvctrld_task)
 569                kthread_stop(kenvctrld_task);
 570
 571        list_for_each_entry_safe(tp, tpos, &bp->temps, bp_list) {
 572                list_del(&tp->bp_list);
 573                list_del(&tp->glob_list);
 574                destroy_one_temp(tp);
 575        }
 576
 577        list_for_each_entry_safe(fp, fpos, &bp->fans, bp_list) {
 578                list_del(&fp->bp_list);
 579                list_del(&fp->glob_list);
 580                destroy_one_fan(fp);
 581        }
 582}
 583