linux/arch/powerpc/platforms/pseries/phyp_dump.c
<<
>>
Prefs
   1/*
   2 * Hypervisor-assisted dump
   3 *
   4 * Linas Vepstas, Manish Ahuja 2008
   5 * Copyright 2008 IBM Corp.
   6 *
   7 *      This program is free software; you can redistribute it and/or
   8 *      modify it under the terms of the GNU General Public License
   9 *      as published by the Free Software Foundation; either version
  10 *      2 of the License, or (at your option) any later version.
  11 *
  12 */
  13
  14#include <linux/gfp.h>
  15#include <linux/init.h>
  16#include <linux/kobject.h>
  17#include <linux/mm.h>
  18#include <linux/of.h>
  19#include <linux/pfn.h>
  20#include <linux/swap.h>
  21#include <linux/sysfs.h>
  22
  23#include <asm/page.h>
  24#include <asm/phyp_dump.h>
  25#include <asm/machdep.h>
  26#include <asm/prom.h>
  27#include <asm/rtas.h>
  28
  29/* Variables, used to communicate data between early boot and late boot */
  30static struct phyp_dump phyp_dump_vars;
  31struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
  32
  33static int ibm_configure_kernel_dump;
  34/* ------------------------------------------------- */
  35/* RTAS interfaces to declare the dump regions */
  36
  37struct dump_section {
  38        u32 dump_flags;
  39        u16 source_type;
  40        u16 error_flags;
  41        u64 source_address;
  42        u64 source_length;
  43        u64 length_copied;
  44        u64 destination_address;
  45};
  46
  47struct phyp_dump_header {
  48        u32 version;
  49        u16 num_of_sections;
  50        u16 status;
  51
  52        u32 first_offset_section;
  53        u32 dump_disk_section;
  54        u64 block_num_dd;
  55        u64 num_of_blocks_dd;
  56        u32 offset_dd;
  57        u32 maxtime_to_auto;
  58        /* No dump disk path string used */
  59
  60        struct dump_section cpu_data;
  61        struct dump_section hpte_data;
  62        struct dump_section kernel_data;
  63};
  64
  65/* The dump header *must be* in low memory, so .bss it */
  66static struct phyp_dump_header phdr;
  67
  68#define NUM_DUMP_SECTIONS       3
  69#define DUMP_HEADER_VERSION     0x1
  70#define DUMP_REQUEST_FLAG       0x1
  71#define DUMP_SOURCE_CPU         0x0001
  72#define DUMP_SOURCE_HPTE        0x0002
  73#define DUMP_SOURCE_RMO         0x0011
  74#define DUMP_ERROR_FLAG         0x2000
  75#define DUMP_TRIGGERED          0x4000
  76#define DUMP_PERFORMED          0x8000
  77
  78
  79/**
  80 * init_dump_header() - initialize the header declaring a dump
  81 * Returns: length of dump save area.
  82 *
  83 * When the hypervisor saves crashed state, it needs to put
  84 * it somewhere. The dump header tells the hypervisor where
  85 * the data can be saved.
  86 */
  87static unsigned long init_dump_header(struct phyp_dump_header *ph)
  88{
  89        unsigned long addr_offset = 0;
  90
  91        /* Set up the dump header */
  92        ph->version = DUMP_HEADER_VERSION;
  93        ph->num_of_sections = NUM_DUMP_SECTIONS;
  94        ph->status = 0;
  95
  96        ph->first_offset_section =
  97                (u32)offsetof(struct phyp_dump_header, cpu_data);
  98        ph->dump_disk_section = 0;
  99        ph->block_num_dd = 0;
 100        ph->num_of_blocks_dd = 0;
 101        ph->offset_dd = 0;
 102
 103        ph->maxtime_to_auto = 0; /* disabled */
 104
 105        /* The first two sections are mandatory */
 106        ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
 107        ph->cpu_data.source_type = DUMP_SOURCE_CPU;
 108        ph->cpu_data.source_address = 0;
 109        ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
 110        ph->cpu_data.destination_address = addr_offset;
 111        addr_offset += phyp_dump_info->cpu_state_size;
 112
 113        ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
 114        ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
 115        ph->hpte_data.source_address = 0;
 116        ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
 117        ph->hpte_data.destination_address = addr_offset;
 118        addr_offset += phyp_dump_info->hpte_region_size;
 119
 120        /* This section describes the low kernel region */
 121        ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
 122        ph->kernel_data.source_type = DUMP_SOURCE_RMO;
 123        ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
 124        ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
 125        ph->kernel_data.destination_address = addr_offset;
 126        addr_offset += ph->kernel_data.source_length;
 127
 128        return addr_offset;
 129}
 130
 131static void print_dump_header(const struct phyp_dump_header *ph)
 132{
 133#ifdef DEBUG
 134        if (ph == NULL)
 135                return;
 136
 137        printk(KERN_INFO "dump header:\n");
 138        /* setup some ph->sections required */
 139        printk(KERN_INFO "version = %d\n", ph->version);
 140        printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
 141        printk(KERN_INFO "Status = 0x%x\n", ph->status);
 142
 143        /* No ph->disk, so all should be set to 0 */
 144        printk(KERN_INFO "Offset to first section 0x%x\n",
 145                ph->first_offset_section);
 146        printk(KERN_INFO "dump disk sections should be zero\n");
 147        printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
 148        printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
 149        printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
 150        printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
 151        printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
 152
 153        /*set cpu state and hpte states as well scratch pad area */
 154        printk(KERN_INFO " CPU AREA\n");
 155        printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
 156        printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
 157        printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
 158        printk(KERN_INFO "cpu source_address =%llx\n",
 159                ph->cpu_data.source_address);
 160        printk(KERN_INFO "cpu source_length =%llx\n",
 161                ph->cpu_data.source_length);
 162        printk(KERN_INFO "cpu length_copied =%llx\n",
 163                ph->cpu_data.length_copied);
 164
 165        printk(KERN_INFO " HPTE AREA\n");
 166        printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
 167        printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
 168        printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
 169        printk(KERN_INFO "HPTE source_address =%llx\n",
 170                ph->hpte_data.source_address);
 171        printk(KERN_INFO "HPTE source_length =%llx\n",
 172                ph->hpte_data.source_length);
 173        printk(KERN_INFO "HPTE length_copied =%llx\n",
 174                ph->hpte_data.length_copied);
 175
 176        printk(KERN_INFO " SRSD AREA\n");
 177        printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
 178        printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
 179        printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
 180        printk(KERN_INFO "SRSD source_address =%llx\n",
 181                ph->kernel_data.source_address);
 182        printk(KERN_INFO "SRSD source_length =%llx\n",
 183                ph->kernel_data.source_length);
 184        printk(KERN_INFO "SRSD length_copied =%llx\n",
 185                ph->kernel_data.length_copied);
 186#endif
 187}
 188
 189static ssize_t show_phyp_dump_active(struct kobject *kobj,
 190                        struct kobj_attribute *attr, char *buf)
 191{
 192
 193        /* create filesystem entry so kdump is phyp-dump aware */
 194        return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
 195}
 196
 197static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
 198                                        show_phyp_dump_active,
 199                                        NULL);
 200
 201static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
 202{
 203        int rc;
 204
 205        /* Add addr value if not initialized before */
 206        if (ph->cpu_data.destination_address == 0) {
 207                ph->cpu_data.destination_address += addr;
 208                ph->hpte_data.destination_address += addr;
 209                ph->kernel_data.destination_address += addr;
 210        }
 211
 212        /* ToDo Invalidate kdump and free memory range. */
 213
 214        do {
 215                rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
 216                                1, ph, sizeof(struct phyp_dump_header));
 217        } while (rtas_busy_delay(rc));
 218
 219        if (rc) {
 220                printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
 221                                                "register\n", rc);
 222                print_dump_header(ph);
 223                return;
 224        }
 225
 226        rc = sysfs_create_file(kernel_kobj, &pdl.attr);
 227        if (rc)
 228                printk(KERN_ERR "phyp-dump: unable to create sysfs"
 229                                " file (%d)\n", rc);
 230}
 231
 232static
 233void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
 234{
 235        int rc;
 236
 237        /* Add addr value if not initialized before */
 238        if (ph->cpu_data.destination_address == 0) {
 239                ph->cpu_data.destination_address += addr;
 240                ph->hpte_data.destination_address += addr;
 241                ph->kernel_data.destination_address += addr;
 242        }
 243
 244        do {
 245                rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
 246                                2, ph, sizeof(struct phyp_dump_header));
 247        } while (rtas_busy_delay(rc));
 248
 249        if (rc) {
 250                printk(KERN_ERR "phyp-dump: unexpected error (%d) "
 251                                                "on invalidate\n", rc);
 252                print_dump_header(ph);
 253        }
 254}
 255
 256/* ------------------------------------------------- */
 257/**
 258 * release_memory_range -- release memory previously memblock_reserved
 259 * @start_pfn: starting physical frame number
 260 * @nr_pages: number of pages to free.
 261 *
 262 * This routine will release memory that had been previously
 263 * memblock_reserved in early boot. The released memory becomes
 264 * available for genreal use.
 265 */
 266static void release_memory_range(unsigned long start_pfn,
 267                        unsigned long nr_pages)
 268{
 269        struct page *rpage;
 270        unsigned long end_pfn;
 271        long i;
 272
 273        end_pfn = start_pfn + nr_pages;
 274
 275        for (i = start_pfn; i <= end_pfn; i++) {
 276                rpage = pfn_to_page(i);
 277                if (PageReserved(rpage)) {
 278                        ClearPageReserved(rpage);
 279                        init_page_count(rpage);
 280                        __free_page(rpage);
 281                        totalram_pages++;
 282                }
 283        }
 284}
 285
 286/**
 287 * track_freed_range -- Counts the range being freed.
 288 * Once the counter goes to zero, it re-registers dump for
 289 * future use.
 290 */
 291static void
 292track_freed_range(unsigned long addr, unsigned long length)
 293{
 294        static unsigned long scratch_area_size, reserved_area_size;
 295
 296        if (addr < phyp_dump_info->init_reserve_start)
 297                return;
 298
 299        if ((addr >= phyp_dump_info->init_reserve_start) &&
 300            (addr <= phyp_dump_info->init_reserve_start +
 301             phyp_dump_info->init_reserve_size))
 302                reserved_area_size += length;
 303
 304        if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
 305            (addr <= phyp_dump_info->reserved_scratch_addr +
 306             phyp_dump_info->reserved_scratch_size))
 307                scratch_area_size += length;
 308
 309        if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
 310            (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
 311
 312                invalidate_last_dump(&phdr,
 313                                phyp_dump_info->reserved_scratch_addr);
 314                register_dump_area(&phdr,
 315                                phyp_dump_info->reserved_scratch_addr);
 316        }
 317}
 318
 319/* ------------------------------------------------- */
 320/**
 321 * sysfs_release_region -- sysfs interface to release memory range.
 322 *
 323 * Usage:
 324 *   "echo <start addr> <length> > /sys/kernel/release_region"
 325 *
 326 * Example:
 327 *   "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
 328 *
 329 * will release 256MB starting at 1GB.
 330 */
 331static ssize_t store_release_region(struct kobject *kobj,
 332                                struct kobj_attribute *attr,
 333                                const char *buf, size_t count)
 334{
 335        unsigned long start_addr, length, end_addr;
 336        unsigned long start_pfn, nr_pages;
 337        ssize_t ret;
 338
 339        ret = sscanf(buf, "%lx %lx", &start_addr, &length);
 340        if (ret != 2)
 341                return -EINVAL;
 342
 343        track_freed_range(start_addr, length);
 344
 345        /* Range-check - don't free any reserved memory that
 346         * wasn't reserved for phyp-dump */
 347        if (start_addr < phyp_dump_info->init_reserve_start)
 348                start_addr = phyp_dump_info->init_reserve_start;
 349
 350        end_addr = phyp_dump_info->init_reserve_start +
 351                        phyp_dump_info->init_reserve_size;
 352        if (start_addr+length > end_addr)
 353                length = end_addr - start_addr;
 354
 355        /* Release the region of memory assed in by user */
 356        start_pfn = PFN_DOWN(start_addr);
 357        nr_pages = PFN_DOWN(length);
 358        release_memory_range(start_pfn, nr_pages);
 359
 360        return count;
 361}
 362
 363static ssize_t show_release_region(struct kobject *kobj,
 364                        struct kobj_attribute *attr, char *buf)
 365{
 366        u64 second_addr_range;
 367
 368        /* total reserved size - start of scratch area */
 369        second_addr_range = phyp_dump_info->init_reserve_size -
 370                                phyp_dump_info->reserved_scratch_size;
 371        return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
 372                            " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
 373                phdr.cpu_data.destination_address,
 374                phdr.cpu_data.length_copied,
 375                phdr.hpte_data.destination_address,
 376                phdr.hpte_data.length_copied,
 377                phdr.kernel_data.destination_address,
 378                phdr.kernel_data.length_copied,
 379                phyp_dump_info->init_reserve_start,
 380                second_addr_range);
 381}
 382
 383static struct kobj_attribute rr = __ATTR(release_region, 0600,
 384                                        show_release_region,
 385                                        store_release_region);
 386
 387static int __init phyp_dump_setup(void)
 388{
 389        struct device_node *rtas;
 390        const struct phyp_dump_header *dump_header = NULL;
 391        unsigned long dump_area_start;
 392        unsigned long dump_area_length;
 393        int header_len = 0;
 394        int rc;
 395
 396        /* If no memory was reserved in early boot, there is nothing to do */
 397        if (phyp_dump_info->init_reserve_size == 0)
 398                return 0;
 399
 400        /* Return if phyp dump not supported */
 401        if (!phyp_dump_info->phyp_dump_configured)
 402                return -ENOSYS;
 403
 404        /* Is there dump data waiting for us? If there isn't,
 405         * then register a new dump area, and release all of
 406         * the rest of the reserved ram.
 407         *
 408         * The /rtas/ibm,kernel-dump rtas node is present only
 409         * if there is dump data waiting for us.
 410         */
 411        rtas = of_find_node_by_path("/rtas");
 412        if (rtas) {
 413                dump_header = of_get_property(rtas, "ibm,kernel-dump",
 414                                                &header_len);
 415                of_node_put(rtas);
 416        }
 417
 418        ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
 419
 420        print_dump_header(dump_header);
 421        dump_area_length = init_dump_header(&phdr);
 422        /* align down */
 423        dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
 424
 425        if (dump_header == NULL) {
 426                register_dump_area(&phdr, dump_area_start);
 427                return 0;
 428        }
 429
 430        /* re-register the dump area, if old dump was invalid */
 431        if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
 432                invalidate_last_dump(&phdr, dump_area_start);
 433                register_dump_area(&phdr, dump_area_start);
 434                return 0;
 435        }
 436
 437        if (dump_header) {
 438                phyp_dump_info->reserved_scratch_addr =
 439                                dump_header->cpu_data.destination_address;
 440                phyp_dump_info->reserved_scratch_size =
 441                                dump_header->cpu_data.source_length +
 442                                dump_header->hpte_data.source_length +
 443                                dump_header->kernel_data.source_length;
 444        }
 445
 446        /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
 447        rc = sysfs_create_file(kernel_kobj, &rr.attr);
 448        if (rc)
 449                printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
 450                                                                        rc);
 451
 452        /* ToDo: re-register the dump area, for next time. */
 453        return 0;
 454}
 455machine_subsys_initcall(pseries, phyp_dump_setup);
 456
 457int __init early_init_dt_scan_phyp_dump(unsigned long node,
 458                const char *uname, int depth, void *data)
 459{
 460        const unsigned int *sizes;
 461
 462        phyp_dump_info->phyp_dump_configured = 0;
 463        phyp_dump_info->phyp_dump_is_active = 0;
 464
 465        if (depth != 1 || strcmp(uname, "rtas") != 0)
 466                return 0;
 467
 468        if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
 469                phyp_dump_info->phyp_dump_configured++;
 470
 471        if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
 472                phyp_dump_info->phyp_dump_is_active++;
 473
 474        sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
 475                                    NULL);
 476        if (!sizes)
 477                return 0;
 478
 479        if (sizes[0] == 1)
 480                phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
 481
 482        if (sizes[3] == 2)
 483                phyp_dump_info->hpte_region_size =
 484                                                *((unsigned long *)&sizes[4]);
 485        return 1;
 486}
 487
 488/* Look for phyp_dump= cmdline option */
 489static int __init early_phyp_dump_enabled(char *p)
 490{
 491        phyp_dump_info->phyp_dump_at_boot = 1;
 492
 493        if (!p)
 494                return 0;
 495
 496        if (strncmp(p, "1", 1) == 0)
 497                phyp_dump_info->phyp_dump_at_boot = 1;
 498        else if (strncmp(p, "0", 1) == 0)
 499                phyp_dump_info->phyp_dump_at_boot = 0;
 500
 501        return 0;
 502}
 503early_param("phyp_dump", early_phyp_dump_enabled);
 504
 505/* Look for phyp_dump_reserve_size= cmdline option */
 506static int __init early_phyp_dump_reserve_size(char *p)
 507{
 508        if (p)
 509                phyp_dump_info->reserve_bootvar = memparse(p, &p);
 510
 511        return 0;
 512}
 513early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);
 514