linux/arch/powerpc/kernel/rtasd.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 *
   9 * Communication to userspace based on kernel/printk.c
  10 */
  11
  12#include <linux/types.h>
  13#include <linux/errno.h>
  14#include <linux/sched.h>
  15#include <linux/kernel.h>
  16#include <linux/poll.h>
  17#include <linux/proc_fs.h>
  18#include <linux/init.h>
  19#include <linux/vmalloc.h>
  20#include <linux/spinlock.h>
  21#include <linux/cpu.h>
  22#include <linux/workqueue.h>
  23#include <linux/slab.h>
  24
  25#include <asm/uaccess.h>
  26#include <asm/io.h>
  27#include <asm/rtas.h>
  28#include <asm/prom.h>
  29#include <asm/nvram.h>
  30#include <asm/atomic.h>
  31#include <asm/machdep.h>
  32
  33
  34static DEFINE_SPINLOCK(rtasd_log_lock);
  35
  36static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
  37
  38static char *rtas_log_buf;
  39static unsigned long rtas_log_start;
  40static unsigned long rtas_log_size;
  41
  42static int surveillance_timeout = -1;
  43
  44static unsigned int rtas_error_log_max;
  45static unsigned int rtas_error_log_buffer_max;
  46
  47/* RTAS service tokens */
  48static unsigned int event_scan;
  49static unsigned int rtas_event_scan_rate;
  50
  51static int full_rtas_msgs = 0;
  52
  53/* Stop logging to nvram after first fatal error */
  54static int logging_enabled; /* Until we initialize everything,
  55                             * make sure we don't try logging
  56                             * anything */
  57static int error_log_cnt;
  58
  59/*
  60 * Since we use 32 bit RTAS, the physical address of this must be below
  61 * 4G or else bad things happen. Allocate this in the kernel data and
  62 * make it big enough.
  63 */
  64static unsigned char logdata[RTAS_ERROR_LOG_MAX];
  65
  66static char *rtas_type[] = {
  67        "Unknown", "Retry", "TCE Error", "Internal Device Failure",
  68        "Timeout", "Data Parity", "Address Parity", "Cache Parity",
  69        "Address Invalid", "ECC Uncorrected", "ECC Corrupted",
  70};
  71
  72static char *rtas_event_type(int type)
  73{
  74        if ((type > 0) && (type < 11))
  75                return rtas_type[type];
  76
  77        switch (type) {
  78                case RTAS_TYPE_EPOW:
  79                        return "EPOW";
  80                case RTAS_TYPE_PLATFORM:
  81                        return "Platform Error";
  82                case RTAS_TYPE_IO:
  83                        return "I/O Event";
  84                case RTAS_TYPE_INFO:
  85                        return "Platform Information Event";
  86                case RTAS_TYPE_DEALLOC:
  87                        return "Resource Deallocation Event";
  88                case RTAS_TYPE_DUMP:
  89                        return "Dump Notification Event";
  90        }
  91
  92        return rtas_type[0];
  93}
  94
  95/* To see this info, grep RTAS /var/log/messages and each entry
  96 * will be collected together with obvious begin/end.
  97 * There will be a unique identifier on the begin and end lines.
  98 * This will persist across reboots.
  99 *
 100 * format of error logs returned from RTAS:
 101 * bytes        (size)  : contents
 102 * --------------------------------------------------------
 103 * 0-7          (8)     : rtas_error_log
 104 * 8-47         (40)    : extended info
 105 * 48-51        (4)     : vendor id
 106 * 52-1023 (vendor specific) : location code and debug data
 107 */
 108static void printk_log_rtas(char *buf, int len)
 109{
 110
 111        int i,j,n = 0;
 112        int perline = 16;
 113        char buffer[64];
 114        char * str = "RTAS event";
 115
 116        if (full_rtas_msgs) {
 117                printk(RTAS_DEBUG "%d -------- %s begin --------\n",
 118                       error_log_cnt, str);
 119
 120                /*
 121                 * Print perline bytes on each line, each line will start
 122                 * with RTAS and a changing number, so syslogd will
 123                 * print lines that are otherwise the same.  Separate every
 124                 * 4 bytes with a space.
 125                 */
 126                for (i = 0; i < len; i++) {
 127                        j = i % perline;
 128                        if (j == 0) {
 129                                memset(buffer, 0, sizeof(buffer));
 130                                n = sprintf(buffer, "RTAS %d:", i/perline);
 131                        }
 132
 133                        if ((i % 4) == 0)
 134                                n += sprintf(buffer+n, " ");
 135
 136                        n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
 137
 138                        if (j == (perline-1))
 139                                printk(KERN_DEBUG "%s\n", buffer);
 140                }
 141                if ((i % perline) != 0)
 142                        printk(KERN_DEBUG "%s\n", buffer);
 143
 144                printk(RTAS_DEBUG "%d -------- %s end ----------\n",
 145                       error_log_cnt, str);
 146        } else {
 147                struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
 148
 149                printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
 150                       error_log_cnt, rtas_event_type(errlog->type),
 151                       errlog->severity);
 152        }
 153}
 154
 155static int log_rtas_len(char * buf)
 156{
 157        int len;
 158        struct rtas_error_log *err;
 159
 160        /* rtas fixed header */
 161        len = 8;
 162        err = (struct rtas_error_log *)buf;
 163        if (err->extended && err->extended_log_length) {
 164
 165                /* extended header */
 166                len += err->extended_log_length;
 167        }
 168
 169        if (rtas_error_log_max == 0)
 170                rtas_error_log_max = rtas_get_error_log_max();
 171
 172        if (len > rtas_error_log_max)
 173                len = rtas_error_log_max;
 174
 175        return len;
 176}
 177
 178/*
 179 * First write to nvram, if fatal error, that is the only
 180 * place we log the info.  The error will be picked up
 181 * on the next reboot by rtasd.  If not fatal, run the
 182 * method for the type of error.  Currently, only RTAS
 183 * errors have methods implemented, but in the future
 184 * there might be a need to store data in nvram before a
 185 * call to panic().
 186 *
 187 * XXX We write to nvram periodically, to indicate error has
 188 * been written and sync'd, but there is a possibility
 189 * that if we don't shutdown correctly, a duplicate error
 190 * record will be created on next reboot.
 191 */
 192void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 193{
 194        unsigned long offset;
 195        unsigned long s;
 196        int len = 0;
 197
 198        pr_debug("rtasd: logging event\n");
 199        if (buf == NULL)
 200                return;
 201
 202        spin_lock_irqsave(&rtasd_log_lock, s);
 203
 204        /* get length and increase count */
 205        switch (err_type & ERR_TYPE_MASK) {
 206        case ERR_TYPE_RTAS_LOG:
 207                len = log_rtas_len(buf);
 208                if (!(err_type & ERR_FLAG_BOOT))
 209                        error_log_cnt++;
 210                break;
 211        case ERR_TYPE_KERNEL_PANIC:
 212        default:
 213                WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 214                spin_unlock_irqrestore(&rtasd_log_lock, s);
 215                return;
 216        }
 217
 218#ifdef CONFIG_PPC64
 219        /* Write error to NVRAM */
 220        if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
 221                nvram_write_error_log(buf, len, err_type, error_log_cnt);
 222#endif /* CONFIG_PPC64 */
 223
 224        /*
 225         * rtas errors can occur during boot, and we do want to capture
 226         * those somewhere, even if nvram isn't ready (why not?), and even
 227         * if rtasd isn't ready. Put them into the boot log, at least.
 228         */
 229        if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
 230                printk_log_rtas(buf, len);
 231
 232        /* Check to see if we need to or have stopped logging */
 233        if (fatal || !logging_enabled) {
 234                logging_enabled = 0;
 235                WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 236                spin_unlock_irqrestore(&rtasd_log_lock, s);
 237                return;
 238        }
 239
 240        /* call type specific method for error */
 241        switch (err_type & ERR_TYPE_MASK) {
 242        case ERR_TYPE_RTAS_LOG:
 243                offset = rtas_error_log_buffer_max *
 244                        ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
 245
 246                /* First copy over sequence number */
 247                memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
 248
 249                /* Second copy over error log data */
 250                offset += sizeof(int);
 251                memcpy(&rtas_log_buf[offset], buf, len);
 252
 253                if (rtas_log_size < LOG_NUMBER)
 254                        rtas_log_size += 1;
 255                else
 256                        rtas_log_start += 1;
 257
 258                WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 259                spin_unlock_irqrestore(&rtasd_log_lock, s);
 260                wake_up_interruptible(&rtas_log_wait);
 261                break;
 262        case ERR_TYPE_KERNEL_PANIC:
 263        default:
 264                WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 265                spin_unlock_irqrestore(&rtasd_log_lock, s);
 266                return;
 267        }
 268
 269}
 270
 271static int rtas_log_open(struct inode * inode, struct file * file)
 272{
 273        return 0;
 274}
 275
 276static int rtas_log_release(struct inode * inode, struct file * file)
 277{
 278        return 0;
 279}
 280
 281/* This will check if all events are logged, if they are then, we
 282 * know that we can safely clear the events in NVRAM.
 283 * Next we'll sit and wait for something else to log.
 284 */
 285static ssize_t rtas_log_read(struct file * file, char __user * buf,
 286                         size_t count, loff_t *ppos)
 287{
 288        int error;
 289        char *tmp;
 290        unsigned long s;
 291        unsigned long offset;
 292
 293        if (!buf || count < rtas_error_log_buffer_max)
 294                return -EINVAL;
 295
 296        count = rtas_error_log_buffer_max;
 297
 298        if (!access_ok(VERIFY_WRITE, buf, count))
 299                return -EFAULT;
 300
 301        tmp = kmalloc(count, GFP_KERNEL);
 302        if (!tmp)
 303                return -ENOMEM;
 304
 305        spin_lock_irqsave(&rtasd_log_lock, s);
 306
 307        /* if it's 0, then we know we got the last one (the one in NVRAM) */
 308        while (rtas_log_size == 0) {
 309                if (file->f_flags & O_NONBLOCK) {
 310                        spin_unlock_irqrestore(&rtasd_log_lock, s);
 311                        error = -EAGAIN;
 312                        goto out;
 313                }
 314
 315                if (!logging_enabled) {
 316                        spin_unlock_irqrestore(&rtasd_log_lock, s);
 317                        error = -ENODATA;
 318                        goto out;
 319                }
 320#ifdef CONFIG_PPC64
 321                nvram_clear_error_log();
 322#endif /* CONFIG_PPC64 */
 323
 324                spin_unlock_irqrestore(&rtasd_log_lock, s);
 325                error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
 326                if (error)
 327                        goto out;
 328                spin_lock_irqsave(&rtasd_log_lock, s);
 329        }
 330
 331        offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
 332        memcpy(tmp, &rtas_log_buf[offset], count);
 333
 334        rtas_log_start += 1;
 335        rtas_log_size -= 1;
 336        spin_unlock_irqrestore(&rtasd_log_lock, s);
 337
 338        error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
 339out:
 340        kfree(tmp);
 341        return error;
 342}
 343
 344static unsigned int rtas_log_poll(struct file *file, poll_table * wait)
 345{
 346        poll_wait(file, &rtas_log_wait, wait);
 347        if (rtas_log_size)
 348                return POLLIN | POLLRDNORM;
 349        return 0;
 350}
 351
 352static const struct file_operations proc_rtas_log_operations = {
 353        .read =         rtas_log_read,
 354        .poll =         rtas_log_poll,
 355        .open =         rtas_log_open,
 356        .release =      rtas_log_release,
 357        .llseek =       noop_llseek,
 358};
 359
 360static int enable_surveillance(int timeout)
 361{
 362        int error;
 363
 364        error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
 365
 366        if (error == 0)
 367                return 0;
 368
 369        if (error == -EINVAL) {
 370                printk(KERN_DEBUG "rtasd: surveillance not supported\n");
 371                return 0;
 372        }
 373
 374        printk(KERN_ERR "rtasd: could not update surveillance\n");
 375        return -1;
 376}
 377
 378static void do_event_scan(void)
 379{
 380        int error;
 381        do {
 382                memset(logdata, 0, rtas_error_log_max);
 383                error = rtas_call(event_scan, 4, 1, NULL,
 384                                  RTAS_EVENT_SCAN_ALL_EVENTS, 0,
 385                                  __pa(logdata), rtas_error_log_max);
 386                if (error == -1) {
 387                        printk(KERN_ERR "event-scan failed\n");
 388                        break;
 389                }
 390
 391                if (error == 0)
 392                        pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0);
 393
 394        } while(error == 0);
 395}
 396
 397static void rtas_event_scan(struct work_struct *w);
 398DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
 399
 400/*
 401 * Delay should be at least one second since some machines have problems if
 402 * we call event-scan too quickly.
 403 */
 404static unsigned long event_scan_delay = 1*HZ;
 405static int first_pass = 1;
 406
 407static void rtas_event_scan(struct work_struct *w)
 408{
 409        unsigned int cpu;
 410
 411        do_event_scan();
 412
 413        get_online_cpus();
 414
 415        cpu = cpumask_next(smp_processor_id(), cpu_online_mask);
 416        if (cpu >= nr_cpu_ids) {
 417                cpu = cpumask_first(cpu_online_mask);
 418
 419                if (first_pass) {
 420                        first_pass = 0;
 421                        event_scan_delay = 30*HZ/rtas_event_scan_rate;
 422
 423                        if (surveillance_timeout != -1) {
 424                                pr_debug("rtasd: enabling surveillance\n");
 425                                enable_surveillance(surveillance_timeout);
 426                                pr_debug("rtasd: surveillance enabled\n");
 427                        }
 428                }
 429        }
 430
 431        schedule_delayed_work_on(cpu, &event_scan_work,
 432                __round_jiffies_relative(event_scan_delay, cpu));
 433
 434        put_online_cpus();
 435}
 436
 437#ifdef CONFIG_PPC64
 438static void retreive_nvram_error_log(void)
 439{
 440        unsigned int err_type ;
 441        int rc ;
 442
 443        /* See if we have any error stored in NVRAM */
 444        memset(logdata, 0, rtas_error_log_max);
 445        rc = nvram_read_error_log(logdata, rtas_error_log_max,
 446                                  &err_type, &error_log_cnt);
 447        /* We can use rtas_log_buf now */
 448        logging_enabled = 1;
 449        if (!rc) {
 450                if (err_type != ERR_FLAG_ALREADY_LOGGED) {
 451                        pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
 452                }
 453        }
 454}
 455#else /* CONFIG_PPC64 */
 456static void retreive_nvram_error_log(void)
 457{
 458}
 459#endif /* CONFIG_PPC64 */
 460
 461static void start_event_scan(void)
 462{
 463        printk(KERN_DEBUG "RTAS daemon started\n");
 464        pr_debug("rtasd: will sleep for %d milliseconds\n",
 465                 (30000 / rtas_event_scan_rate));
 466
 467        /* Retreive errors from nvram if any */
 468        retreive_nvram_error_log();
 469
 470        schedule_delayed_work_on(cpumask_first(cpu_online_mask),
 471                                 &event_scan_work, event_scan_delay);
 472}
 473
 474static int __init rtas_init(void)
 475{
 476        struct proc_dir_entry *entry;
 477
 478        if (!machine_is(pseries) && !machine_is(chrp))
 479                return 0;
 480
 481        /* No RTAS */
 482        event_scan = rtas_token("event-scan");
 483        if (event_scan == RTAS_UNKNOWN_SERVICE) {
 484                printk(KERN_INFO "rtasd: No event-scan on system\n");
 485                return -ENODEV;
 486        }
 487
 488        rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
 489        if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
 490                printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
 491                return -ENODEV;
 492        }
 493
 494        if (!rtas_event_scan_rate) {
 495                /* Broken firmware: take a rate of zero to mean don't scan */
 496                printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n");
 497                return 0;
 498        }
 499
 500        /* Make room for the sequence number */
 501        rtas_error_log_max = rtas_get_error_log_max();
 502        rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
 503
 504        rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER);
 505        if (!rtas_log_buf) {
 506                printk(KERN_ERR "rtasd: no memory\n");
 507                return -ENOMEM;
 508        }
 509
 510        entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
 511                            &proc_rtas_log_operations);
 512        if (!entry)
 513                printk(KERN_ERR "Failed to create error_log proc entry\n");
 514
 515        start_event_scan();
 516
 517        return 0;
 518}
 519__initcall(rtas_init);
 520
 521static int __init surveillance_setup(char *str)
 522{
 523        int i;
 524
 525        /* We only do surveillance on pseries */
 526        if (!machine_is(pseries))
 527                return 0;
 528
 529        if (get_option(&str,&i)) {
 530                if (i >= 0 && i <= 255)
 531                        surveillance_timeout = i;
 532        }
 533
 534        return 1;
 535}
 536__setup("surveillance=", surveillance_setup);
 537
 538static int __init rtasmsgs_setup(char *str)
 539{
 540        if (strcmp(str, "on") == 0)
 541                full_rtas_msgs = 1;
 542        else if (strcmp(str, "off") == 0)
 543                full_rtas_msgs = 0;
 544
 545        return 1;
 546}
 547__setup("rtasmsgs=", rtasmsgs_setup);
 548