linux/arch/powerpc/kernel/fadump.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
   4 * dump with assistance from firmware. This approach does not use kexec,
   5 * instead firmware assists in booting the kdump kernel while preserving
   6 * memory contents. The most of the code implementation has been adapted
   7 * from phyp assisted dump implementation written by Linas Vepstas and
   8 * Manish Ahuja
   9 *
  10 * Copyright 2011 IBM Corporation
  11 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  12 */
  13
  14#undef DEBUG
  15#define pr_fmt(fmt) "fadump: " fmt
  16
  17#include <linux/string.h>
  18#include <linux/memblock.h>
  19#include <linux/delay.h>
  20#include <linux/seq_file.h>
  21#include <linux/crash_dump.h>
  22#include <linux/kobject.h>
  23#include <linux/sysfs.h>
  24#include <linux/slab.h>
  25#include <linux/cma.h>
  26#include <linux/hugetlb.h>
  27
  28#include <asm/debugfs.h>
  29#include <asm/page.h>
  30#include <asm/prom.h>
  31#include <asm/rtas.h>
  32#include <asm/fadump.h>
  33#include <asm/setup.h>
  34
  35static struct fw_dump fw_dump;
  36static struct fadump_mem_struct fdm;
  37static const struct fadump_mem_struct *fdm_active;
  38#ifdef CONFIG_CMA
  39static struct cma *fadump_cma;
  40#endif
  41
  42static DEFINE_MUTEX(fadump_mutex);
  43struct fad_crash_memory_ranges *crash_memory_ranges;
  44int crash_memory_ranges_size;
  45int crash_mem_ranges;
  46int max_crash_mem_ranges;
  47
  48#ifdef CONFIG_CMA
  49/*
  50 * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
  51 *
  52 * This function initializes CMA area from fadump reserved memory.
  53 * The total size of fadump reserved memory covers for boot memory size
  54 * + cpu data size + hpte size and metadata.
  55 * Initialize only the area equivalent to boot memory size for CMA use.
  56 * The reamining portion of fadump reserved memory will be not given
  57 * to CMA and pages for thoes will stay reserved. boot memory size is
  58 * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
  59 * But for some reason even if it fails we still have the memory reservation
  60 * with us and we can still continue doing fadump.
  61 */
  62int __init fadump_cma_init(void)
  63{
  64        unsigned long long base, size;
  65        int rc;
  66
  67        if (!fw_dump.fadump_enabled)
  68                return 0;
  69
  70        /*
  71         * Do not use CMA if user has provided fadump=nocma kernel parameter.
  72         * Return 1 to continue with fadump old behaviour.
  73         */
  74        if (fw_dump.nocma)
  75                return 1;
  76
  77        base = fw_dump.reserve_dump_area_start;
  78        size = fw_dump.boot_memory_size;
  79
  80        if (!size)
  81                return 0;
  82
  83        rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
  84        if (rc) {
  85                pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
  86                /*
  87                 * Though the CMA init has failed we still have memory
  88                 * reservation with us. The reserved memory will be
  89                 * blocked from production system usage.  Hence return 1,
  90                 * so that we can continue with fadump.
  91                 */
  92                return 1;
  93        }
  94
  95        /*
  96         * So we now have successfully initialized cma area for fadump.
  97         */
  98        pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
  99                "bytes of memory reserved for firmware-assisted dump\n",
 100                cma_get_size(fadump_cma),
 101                (unsigned long)cma_get_base(fadump_cma) >> 20,
 102                fw_dump.reserve_dump_area_size);
 103        return 1;
 104}
 105#else
 106static int __init fadump_cma_init(void) { return 1; }
 107#endif /* CONFIG_CMA */
 108
 109/* Scan the Firmware Assisted dump configuration details. */
 110int __init early_init_dt_scan_fw_dump(unsigned long node,
 111                        const char *uname, int depth, void *data)
 112{
 113        const __be32 *sections;
 114        int i, num_sections;
 115        int size;
 116        const __be32 *token;
 117
 118        if (depth != 1 || strcmp(uname, "rtas") != 0)
 119                return 0;
 120
 121        /*
 122         * Check if Firmware Assisted dump is supported. if yes, check
 123         * if dump has been initiated on last reboot.
 124         */
 125        token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
 126        if (!token)
 127                return 1;
 128
 129        fw_dump.fadump_supported = 1;
 130        fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
 131
 132        /*
 133         * The 'ibm,kernel-dump' rtas node is present only if there is
 134         * dump data waiting for us.
 135         */
 136        fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
 137        if (fdm_active)
 138                fw_dump.dump_active = 1;
 139
 140        /* Get the sizes required to store dump data for the firmware provided
 141         * dump sections.
 142         * For each dump section type supported, a 32bit cell which defines
 143         * the ID of a supported section followed by two 32 bit cells which
 144         * gives teh size of the section in bytes.
 145         */
 146        sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
 147                                        &size);
 148
 149        if (!sections)
 150                return 1;
 151
 152        num_sections = size / (3 * sizeof(u32));
 153
 154        for (i = 0; i < num_sections; i++, sections += 3) {
 155                u32 type = (u32)of_read_number(sections, 1);
 156
 157                switch (type) {
 158                case FADUMP_CPU_STATE_DATA:
 159                        fw_dump.cpu_state_data_size =
 160                                        of_read_ulong(&sections[1], 2);
 161                        break;
 162                case FADUMP_HPTE_REGION:
 163                        fw_dump.hpte_region_size =
 164                                        of_read_ulong(&sections[1], 2);
 165                        break;
 166                }
 167        }
 168
 169        return 1;
 170}
 171
 172/*
 173 * If fadump is registered, check if the memory provided
 174 * falls within boot memory area and reserved memory area.
 175 */
 176int is_fadump_memory_area(u64 addr, ulong size)
 177{
 178        u64 d_start = fw_dump.reserve_dump_area_start;
 179        u64 d_end = d_start + fw_dump.reserve_dump_area_size;
 180
 181        if (!fw_dump.dump_registered)
 182                return 0;
 183
 184        if (((addr + size) > d_start) && (addr <= d_end))
 185                return 1;
 186
 187        return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
 188}
 189
 190int should_fadump_crash(void)
 191{
 192        if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
 193                return 0;
 194        return 1;
 195}
 196
 197int is_fadump_active(void)
 198{
 199        return fw_dump.dump_active;
 200}
 201
 202/*
 203 * Returns 1, if there are no holes in boot memory area,
 204 * 0 otherwise.
 205 */
 206static int is_boot_memory_area_contiguous(void)
 207{
 208        struct memblock_region *reg;
 209        unsigned long tstart, tend;
 210        unsigned long start_pfn = PHYS_PFN(RMA_START);
 211        unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
 212        unsigned int ret = 0;
 213
 214        for_each_memblock(memory, reg) {
 215                tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
 216                tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
 217                if (tstart < tend) {
 218                        /* Memory hole from start_pfn to tstart */
 219                        if (tstart > start_pfn)
 220                                break;
 221
 222                        if (tend == end_pfn) {
 223                                ret = 1;
 224                                break;
 225                        }
 226
 227                        start_pfn = tend + 1;
 228                }
 229        }
 230
 231        return ret;
 232}
 233
 234/*
 235 * Returns true, if there are no holes in reserved memory area,
 236 * false otherwise.
 237 */
 238static bool is_reserved_memory_area_contiguous(void)
 239{
 240        struct memblock_region *reg;
 241        unsigned long start, end;
 242        unsigned long d_start = fw_dump.reserve_dump_area_start;
 243        unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
 244
 245        for_each_memblock(memory, reg) {
 246                start = max(d_start, (unsigned long)reg->base);
 247                end = min(d_end, (unsigned long)(reg->base + reg->size));
 248                if (d_start < end) {
 249                        /* Memory hole from d_start to start */
 250                        if (start > d_start)
 251                                break;
 252
 253                        if (end == d_end)
 254                                return true;
 255
 256                        d_start = end + 1;
 257                }
 258        }
 259
 260        return false;
 261}
 262
 263/* Print firmware assisted dump configurations for debugging purpose. */
 264static void fadump_show_config(void)
 265{
 266        pr_debug("Support for firmware-assisted dump (fadump): %s\n",
 267                        (fw_dump.fadump_supported ? "present" : "no support"));
 268
 269        if (!fw_dump.fadump_supported)
 270                return;
 271
 272        pr_debug("Fadump enabled    : %s\n",
 273                                (fw_dump.fadump_enabled ? "yes" : "no"));
 274        pr_debug("Dump Active       : %s\n",
 275                                (fw_dump.dump_active ? "yes" : "no"));
 276        pr_debug("Dump section sizes:\n");
 277        pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
 278        pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
 279        pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
 280}
 281
 282static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
 283                                unsigned long addr)
 284{
 285        if (!fdm)
 286                return 0;
 287
 288        memset(fdm, 0, sizeof(struct fadump_mem_struct));
 289        addr = addr & PAGE_MASK;
 290
 291        fdm->header.dump_format_version = cpu_to_be32(0x00000001);
 292        fdm->header.dump_num_sections = cpu_to_be16(3);
 293        fdm->header.dump_status_flag = 0;
 294        fdm->header.offset_first_dump_section =
 295                cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
 296
 297        /*
 298         * Fields for disk dump option.
 299         * We are not using disk dump option, hence set these fields to 0.
 300         */
 301        fdm->header.dd_block_size = 0;
 302        fdm->header.dd_block_offset = 0;
 303        fdm->header.dd_num_blocks = 0;
 304        fdm->header.dd_offset_disk_path = 0;
 305
 306        /* set 0 to disable an automatic dump-reboot. */
 307        fdm->header.max_time_auto = 0;
 308
 309        /* Kernel dump sections */
 310        /* cpu state data section. */
 311        fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
 312        fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
 313        fdm->cpu_state_data.source_address = 0;
 314        fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
 315        fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
 316        addr += fw_dump.cpu_state_data_size;
 317
 318        /* hpte region section */
 319        fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
 320        fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
 321        fdm->hpte_region.source_address = 0;
 322        fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
 323        fdm->hpte_region.destination_address = cpu_to_be64(addr);
 324        addr += fw_dump.hpte_region_size;
 325
 326        /* RMA region section */
 327        fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
 328        fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
 329        fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
 330        fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
 331        fdm->rmr_region.destination_address = cpu_to_be64(addr);
 332        addr += fw_dump.boot_memory_size;
 333
 334        return addr;
 335}
 336
 337/**
 338 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
 339 *
 340 * Function to find the largest memory size we need to reserve during early
 341 * boot process. This will be the size of the memory that is required for a
 342 * kernel to boot successfully.
 343 *
 344 * This function has been taken from phyp-assisted dump feature implementation.
 345 *
 346 * returns larger of 256MB or 5% rounded down to multiples of 256MB.
 347 *
 348 * TODO: Come up with better approach to find out more accurate memory size
 349 * that is required for a kernel to boot successfully.
 350 *
 351 */
 352static inline unsigned long fadump_calculate_reserve_size(void)
 353{
 354        int ret;
 355        unsigned long long base, size;
 356
 357        if (fw_dump.reserve_bootvar)
 358                pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
 359
 360        /*
 361         * Check if the size is specified through crashkernel= cmdline
 362         * option. If yes, then use that but ignore base as fadump reserves
 363         * memory at a predefined offset.
 364         */
 365        ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 366                                &size, &base);
 367        if (ret == 0 && size > 0) {
 368                unsigned long max_size;
 369
 370                if (fw_dump.reserve_bootvar)
 371                        pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
 372
 373                fw_dump.reserve_bootvar = (unsigned long)size;
 374
 375                /*
 376                 * Adjust if the boot memory size specified is above
 377                 * the upper limit.
 378                 */
 379                max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
 380                if (fw_dump.reserve_bootvar > max_size) {
 381                        fw_dump.reserve_bootvar = max_size;
 382                        pr_info("Adjusted boot memory size to %luMB\n",
 383                                (fw_dump.reserve_bootvar >> 20));
 384                }
 385
 386                return fw_dump.reserve_bootvar;
 387        } else if (fw_dump.reserve_bootvar) {
 388                /*
 389                 * 'fadump_reserve_mem=' is being used to reserve memory
 390                 * for firmware-assisted dump.
 391                 */
 392                return fw_dump.reserve_bootvar;
 393        }
 394
 395        /* divide by 20 to get 5% of value */
 396        size = memblock_phys_mem_size() / 20;
 397
 398        /* round it down in multiples of 256 */
 399        size = size & ~0x0FFFFFFFUL;
 400
 401        /* Truncate to memory_limit. We don't want to over reserve the memory.*/
 402        if (memory_limit && size > memory_limit)
 403                size = memory_limit;
 404
 405        return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
 406}
 407
 408/*
 409 * Calculate the total memory size required to be reserved for
 410 * firmware-assisted dump registration.
 411 */
 412static unsigned long get_fadump_area_size(void)
 413{
 414        unsigned long size = 0;
 415
 416        size += fw_dump.cpu_state_data_size;
 417        size += fw_dump.hpte_region_size;
 418        size += fw_dump.boot_memory_size;
 419        size += sizeof(struct fadump_crash_info_header);
 420        size += sizeof(struct elfhdr); /* ELF core header.*/
 421        size += sizeof(struct elf_phdr); /* place holder for cpu notes */
 422        /* Program headers for crash memory regions. */
 423        size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
 424
 425        size = PAGE_ALIGN(size);
 426        return size;
 427}
 428
 429static void __init fadump_reserve_crash_area(unsigned long base,
 430                                             unsigned long size)
 431{
 432        struct memblock_region *reg;
 433        unsigned long mstart, mend, msize;
 434
 435        for_each_memblock(memory, reg) {
 436                mstart = max_t(unsigned long, base, reg->base);
 437                mend = reg->base + reg->size;
 438                mend = min(base + size, mend);
 439
 440                if (mstart < mend) {
 441                        msize = mend - mstart;
 442                        memblock_reserve(mstart, msize);
 443                        pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
 444                                (msize >> 20), mstart);
 445                }
 446        }
 447}
 448
 449int __init fadump_reserve_mem(void)
 450{
 451        unsigned long base, size, memory_boundary;
 452
 453        if (!fw_dump.fadump_enabled)
 454                return 0;
 455
 456        if (!fw_dump.fadump_supported) {
 457                printk(KERN_INFO "Firmware-assisted dump is not supported on"
 458                                " this hardware\n");
 459                fw_dump.fadump_enabled = 0;
 460                return 0;
 461        }
 462        /*
 463         * Initialize boot memory size
 464         * If dump is active then we have already calculated the size during
 465         * first kernel.
 466         */
 467        if (fdm_active)
 468                fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
 469        else {
 470                fw_dump.boot_memory_size = fadump_calculate_reserve_size();
 471#ifdef CONFIG_CMA
 472                if (!fw_dump.nocma)
 473                        fw_dump.boot_memory_size =
 474                                ALIGN(fw_dump.boot_memory_size,
 475                                                        FADUMP_CMA_ALIGNMENT);
 476#endif
 477        }
 478
 479        /*
 480         * Calculate the memory boundary.
 481         * If memory_limit is less than actual memory boundary then reserve
 482         * the memory for fadump beyond the memory_limit and adjust the
 483         * memory_limit accordingly, so that the running kernel can run with
 484         * specified memory_limit.
 485         */
 486        if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
 487                size = get_fadump_area_size();
 488                if ((memory_limit + size) < memblock_end_of_DRAM())
 489                        memory_limit += size;
 490                else
 491                        memory_limit = memblock_end_of_DRAM();
 492                printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
 493                                " dump, now %#016llx\n", memory_limit);
 494        }
 495        if (memory_limit)
 496                memory_boundary = memory_limit;
 497        else
 498                memory_boundary = memblock_end_of_DRAM();
 499
 500        if (fw_dump.dump_active) {
 501                pr_info("Firmware-assisted dump is active.\n");
 502
 503#ifdef CONFIG_HUGETLB_PAGE
 504                /*
 505                 * FADump capture kernel doesn't care much about hugepages.
 506                 * In fact, handling hugepages in capture kernel is asking for
 507                 * trouble. So, disable HugeTLB support when fadump is active.
 508                 */
 509                hugetlb_disabled = true;
 510#endif
 511                /*
 512                 * If last boot has crashed then reserve all the memory
 513                 * above boot_memory_size so that we don't touch it until
 514                 * dump is written to disk by userspace tool. This memory
 515                 * will be released for general use once the dump is saved.
 516                 */
 517                base = fw_dump.boot_memory_size;
 518                size = memory_boundary - base;
 519                fadump_reserve_crash_area(base, size);
 520
 521                fw_dump.fadumphdr_addr =
 522                                be64_to_cpu(fdm_active->rmr_region.destination_address) +
 523                                be64_to_cpu(fdm_active->rmr_region.source_len);
 524                pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
 525                fw_dump.reserve_dump_area_start = base;
 526                fw_dump.reserve_dump_area_size = size;
 527        } else {
 528                size = get_fadump_area_size();
 529
 530                /*
 531                 * Reserve memory at an offset closer to bottom of the RAM to
 532                 * minimize the impact of memory hot-remove operation. We can't
 533                 * use memblock_find_in_range() here since it doesn't allocate
 534                 * from bottom to top.
 535                 */
 536                for (base = fw_dump.boot_memory_size;
 537                     base <= (memory_boundary - size);
 538                     base += size) {
 539                        if (memblock_is_region_memory(base, size) &&
 540                            !memblock_is_region_reserved(base, size))
 541                                break;
 542                }
 543                if ((base > (memory_boundary - size)) ||
 544                    memblock_reserve(base, size)) {
 545                        pr_err("Failed to reserve memory\n");
 546                        return 0;
 547                }
 548
 549                pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
 550                        "assisted dump (System RAM: %ldMB)\n",
 551                        (unsigned long)(size >> 20),
 552                        (unsigned long)(base >> 20),
 553                        (unsigned long)(memblock_phys_mem_size() >> 20));
 554
 555                fw_dump.reserve_dump_area_start = base;
 556                fw_dump.reserve_dump_area_size = size;
 557                return fadump_cma_init();
 558        }
 559        return 1;
 560}
 561
 562unsigned long __init arch_reserved_kernel_pages(void)
 563{
 564        return memblock_reserved_size() / PAGE_SIZE;
 565}
 566
 567/* Look for fadump= cmdline option. */
 568static int __init early_fadump_param(char *p)
 569{
 570        if (!p)
 571                return 1;
 572
 573        if (strncmp(p, "on", 2) == 0)
 574                fw_dump.fadump_enabled = 1;
 575        else if (strncmp(p, "off", 3) == 0)
 576                fw_dump.fadump_enabled = 0;
 577        else if (strncmp(p, "nocma", 5) == 0) {
 578                fw_dump.fadump_enabled = 1;
 579                fw_dump.nocma = 1;
 580        }
 581
 582        return 0;
 583}
 584early_param("fadump", early_fadump_param);
 585
 586/*
 587 * Look for fadump_reserve_mem= cmdline option
 588 * TODO: Remove references to 'fadump_reserve_mem=' parameter,
 589 *       the sooner 'crashkernel=' parameter is accustomed to.
 590 */
 591static int __init early_fadump_reserve_mem(char *p)
 592{
 593        if (p)
 594                fw_dump.reserve_bootvar = memparse(p, &p);
 595        return 0;
 596}
 597early_param("fadump_reserve_mem", early_fadump_reserve_mem);
 598
 599static int register_fw_dump(struct fadump_mem_struct *fdm)
 600{
 601        int rc, err;
 602        unsigned int wait_time;
 603
 604        pr_debug("Registering for firmware-assisted kernel dump...\n");
 605
 606        /* TODO: Add upper time limit for the delay */
 607        do {
 608                rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
 609                        FADUMP_REGISTER, fdm,
 610                        sizeof(struct fadump_mem_struct));
 611
 612                wait_time = rtas_busy_delay_time(rc);
 613                if (wait_time)
 614                        mdelay(wait_time);
 615
 616        } while (wait_time);
 617
 618        err = -EIO;
 619        switch (rc) {
 620        default:
 621                pr_err("Failed to register. Unknown Error(%d).\n", rc);
 622                break;
 623        case -1:
 624                printk(KERN_ERR "Failed to register firmware-assisted kernel"
 625                        " dump. Hardware Error(%d).\n", rc);
 626                break;
 627        case -3:
 628                if (!is_boot_memory_area_contiguous())
 629                        pr_err("Can't have holes in boot memory area while registering fadump\n");
 630                else if (!is_reserved_memory_area_contiguous())
 631                        pr_err("Can't have holes in reserved memory area while"
 632                               " registering fadump\n");
 633
 634                printk(KERN_ERR "Failed to register firmware-assisted kernel"
 635                        " dump. Parameter Error(%d).\n", rc);
 636                err = -EINVAL;
 637                break;
 638        case -9:
 639                printk(KERN_ERR "firmware-assisted kernel dump is already "
 640                        " registered.");
 641                fw_dump.dump_registered = 1;
 642                err = -EEXIST;
 643                break;
 644        case 0:
 645                printk(KERN_INFO "firmware-assisted kernel dump registration"
 646                        " is successful\n");
 647                fw_dump.dump_registered = 1;
 648                err = 0;
 649                break;
 650        }
 651        return err;
 652}
 653
 654void crash_fadump(struct pt_regs *regs, const char *str)
 655{
 656        struct fadump_crash_info_header *fdh = NULL;
 657        int old_cpu, this_cpu;
 658
 659        if (!should_fadump_crash())
 660                return;
 661
 662        /*
 663         * old_cpu == -1 means this is the first CPU which has come here,
 664         * go ahead and trigger fadump.
 665         *
 666         * old_cpu != -1 means some other CPU has already on it's way
 667         * to trigger fadump, just keep looping here.
 668         */
 669        this_cpu = smp_processor_id();
 670        old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
 671
 672        if (old_cpu != -1) {
 673                /*
 674                 * We can't loop here indefinitely. Wait as long as fadump
 675                 * is in force. If we race with fadump un-registration this
 676                 * loop will break and then we go down to normal panic path
 677                 * and reboot. If fadump is in force the first crashing
 678                 * cpu will definitely trigger fadump.
 679                 */
 680                while (fw_dump.dump_registered)
 681                        cpu_relax();
 682                return;
 683        }
 684
 685        fdh = __va(fw_dump.fadumphdr_addr);
 686        fdh->crashing_cpu = crashing_cpu;
 687        crash_save_vmcoreinfo();
 688
 689        if (regs)
 690                fdh->regs = *regs;
 691        else
 692                ppc_save_regs(&fdh->regs);
 693
 694        fdh->online_mask = *cpu_online_mask;
 695
 696        /* Call ibm,os-term rtas call to trigger firmware assisted dump */
 697        rtas_os_term((char *)str);
 698}
 699
 700#define GPR_MASK        0xffffff0000000000
 701static inline int fadump_gpr_index(u64 id)
 702{
 703        int i = -1;
 704        char str[3];
 705
 706        if ((id & GPR_MASK) == REG_ID("GPR")) {
 707                /* get the digits at the end */
 708                id &= ~GPR_MASK;
 709                id >>= 24;
 710                str[2] = '\0';
 711                str[1] = id & 0xff;
 712                str[0] = (id >> 8) & 0xff;
 713                sscanf(str, "%d", &i);
 714                if (i > 31)
 715                        i = -1;
 716        }
 717        return i;
 718}
 719
 720static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
 721                                                                u64 reg_val)
 722{
 723        int i;
 724
 725        i = fadump_gpr_index(reg_id);
 726        if (i >= 0)
 727                regs->gpr[i] = (unsigned long)reg_val;
 728        else if (reg_id == REG_ID("NIA"))
 729                regs->nip = (unsigned long)reg_val;
 730        else if (reg_id == REG_ID("MSR"))
 731                regs->msr = (unsigned long)reg_val;
 732        else if (reg_id == REG_ID("CTR"))
 733                regs->ctr = (unsigned long)reg_val;
 734        else if (reg_id == REG_ID("LR"))
 735                regs->link = (unsigned long)reg_val;
 736        else if (reg_id == REG_ID("XER"))
 737                regs->xer = (unsigned long)reg_val;
 738        else if (reg_id == REG_ID("CR"))
 739                regs->ccr = (unsigned long)reg_val;
 740        else if (reg_id == REG_ID("DAR"))
 741                regs->dar = (unsigned long)reg_val;
 742        else if (reg_id == REG_ID("DSISR"))
 743                regs->dsisr = (unsigned long)reg_val;
 744}
 745
 746static struct fadump_reg_entry*
 747fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
 748{
 749        memset(regs, 0, sizeof(struct pt_regs));
 750
 751        while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
 752                fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
 753                                        be64_to_cpu(reg_entry->reg_value));
 754                reg_entry++;
 755        }
 756        reg_entry++;
 757        return reg_entry;
 758}
 759
 760static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
 761{
 762        struct elf_prstatus prstatus;
 763
 764        memset(&prstatus, 0, sizeof(prstatus));
 765        /*
 766         * FIXME: How do i get PID? Do I really need it?
 767         * prstatus.pr_pid = ????
 768         */
 769        elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
 770        buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
 771                              &prstatus, sizeof(prstatus));
 772        return buf;
 773}
 774
 775static void fadump_update_elfcore_header(char *bufp)
 776{
 777        struct elfhdr *elf;
 778        struct elf_phdr *phdr;
 779
 780        elf = (struct elfhdr *)bufp;
 781        bufp += sizeof(struct elfhdr);
 782
 783        /* First note is a place holder for cpu notes info. */
 784        phdr = (struct elf_phdr *)bufp;
 785
 786        if (phdr->p_type == PT_NOTE) {
 787                phdr->p_paddr = fw_dump.cpu_notes_buf;
 788                phdr->p_offset  = phdr->p_paddr;
 789                phdr->p_filesz  = fw_dump.cpu_notes_buf_size;
 790                phdr->p_memsz = fw_dump.cpu_notes_buf_size;
 791        }
 792        return;
 793}
 794
 795static void *fadump_cpu_notes_buf_alloc(unsigned long size)
 796{
 797        void *vaddr;
 798        struct page *page;
 799        unsigned long order, count, i;
 800
 801        order = get_order(size);
 802        vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
 803        if (!vaddr)
 804                return NULL;
 805
 806        count = 1 << order;
 807        page = virt_to_page(vaddr);
 808        for (i = 0; i < count; i++)
 809                SetPageReserved(page + i);
 810        return vaddr;
 811}
 812
 813static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
 814{
 815        struct page *page;
 816        unsigned long order, count, i;
 817
 818        order = get_order(size);
 819        count = 1 << order;
 820        page = virt_to_page(vaddr);
 821        for (i = 0; i < count; i++)
 822                ClearPageReserved(page + i);
 823        __free_pages(page, order);
 824}
 825
 826/*
 827 * Read CPU state dump data and convert it into ELF notes.
 828 * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
 829 * used to access the data to allow for additional fields to be added without
 830 * affecting compatibility. Each list of registers for a CPU starts with
 831 * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
 832 * 8 Byte ASCII identifier and 8 Byte register value. The register entry
 833 * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
 834 * of register value. For more details refer to PAPR document.
 835 *
 836 * Only for the crashing cpu we ignore the CPU dump data and get exact
 837 * state from fadump crash info structure populated by first kernel at the
 838 * time of crash.
 839 */
 840static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
 841{
 842        struct fadump_reg_save_area_header *reg_header;
 843        struct fadump_reg_entry *reg_entry;
 844        struct fadump_crash_info_header *fdh = NULL;
 845        void *vaddr;
 846        unsigned long addr;
 847        u32 num_cpus, *note_buf;
 848        struct pt_regs regs;
 849        int i, rc = 0, cpu = 0;
 850
 851        if (!fdm->cpu_state_data.bytes_dumped)
 852                return -EINVAL;
 853
 854        addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
 855        vaddr = __va(addr);
 856
 857        reg_header = vaddr;
 858        if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
 859                printk(KERN_ERR "Unable to read register save area.\n");
 860                return -ENOENT;
 861        }
 862        pr_debug("--------CPU State Data------------\n");
 863        pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
 864        pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
 865
 866        vaddr += be32_to_cpu(reg_header->num_cpu_offset);
 867        num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
 868        pr_debug("NumCpus     : %u\n", num_cpus);
 869        vaddr += sizeof(u32);
 870        reg_entry = (struct fadump_reg_entry *)vaddr;
 871
 872        /* Allocate buffer to hold cpu crash notes. */
 873        fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
 874        fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
 875        note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
 876        if (!note_buf) {
 877                printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
 878                        "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
 879                return -ENOMEM;
 880        }
 881        fw_dump.cpu_notes_buf = __pa(note_buf);
 882
 883        pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
 884                        (num_cpus * sizeof(note_buf_t)), note_buf);
 885
 886        if (fw_dump.fadumphdr_addr)
 887                fdh = __va(fw_dump.fadumphdr_addr);
 888
 889        for (i = 0; i < num_cpus; i++) {
 890                if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
 891                        printk(KERN_ERR "Unable to read CPU state data\n");
 892                        rc = -ENOENT;
 893                        goto error_out;
 894                }
 895                /* Lower 4 bytes of reg_value contains logical cpu id */
 896                cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
 897                if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
 898                        SKIP_TO_NEXT_CPU(reg_entry);
 899                        continue;
 900                }
 901                pr_debug("Reading register data for cpu %d...\n", cpu);
 902                if (fdh && fdh->crashing_cpu == cpu) {
 903                        regs = fdh->regs;
 904                        note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
 905                        SKIP_TO_NEXT_CPU(reg_entry);
 906                } else {
 907                        reg_entry++;
 908                        reg_entry = fadump_read_registers(reg_entry, &regs);
 909                        note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
 910                }
 911        }
 912        final_note(note_buf);
 913
 914        if (fdh) {
 915                pr_debug("Updating elfcore header (%llx) with cpu notes\n",
 916                                                        fdh->elfcorehdr_addr);
 917                fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
 918        }
 919        return 0;
 920
 921error_out:
 922        fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
 923                                        fw_dump.cpu_notes_buf_size);
 924        fw_dump.cpu_notes_buf = 0;
 925        fw_dump.cpu_notes_buf_size = 0;
 926        return rc;
 927
 928}
 929
 930/*
 931 * Validate and process the dump data stored by firmware before exporting
 932 * it through '/proc/vmcore'.
 933 */
 934static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
 935{
 936        struct fadump_crash_info_header *fdh;
 937        int rc = 0;
 938
 939        if (!fdm_active || !fw_dump.fadumphdr_addr)
 940                return -EINVAL;
 941
 942        /* Check if the dump data is valid. */
 943        if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
 944                        (fdm_active->cpu_state_data.error_flags != 0) ||
 945                        (fdm_active->rmr_region.error_flags != 0)) {
 946                printk(KERN_ERR "Dump taken by platform is not valid\n");
 947                return -EINVAL;
 948        }
 949        if ((fdm_active->rmr_region.bytes_dumped !=
 950                        fdm_active->rmr_region.source_len) ||
 951                        !fdm_active->cpu_state_data.bytes_dumped) {
 952                printk(KERN_ERR "Dump taken by platform is incomplete\n");
 953                return -EINVAL;
 954        }
 955
 956        /* Validate the fadump crash info header */
 957        fdh = __va(fw_dump.fadumphdr_addr);
 958        if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
 959                printk(KERN_ERR "Crash info header is not valid.\n");
 960                return -EINVAL;
 961        }
 962
 963        rc = fadump_build_cpu_notes(fdm_active);
 964        if (rc)
 965                return rc;
 966
 967        /*
 968         * We are done validating dump info and elfcore header is now ready
 969         * to be exported. set elfcorehdr_addr so that vmcore module will
 970         * export the elfcore header through '/proc/vmcore'.
 971         */
 972        elfcorehdr_addr = fdh->elfcorehdr_addr;
 973
 974        return 0;
 975}
 976
 977static void free_crash_memory_ranges(void)
 978{
 979        kfree(crash_memory_ranges);
 980        crash_memory_ranges = NULL;
 981        crash_memory_ranges_size = 0;
 982        max_crash_mem_ranges = 0;
 983}
 984
 985/*
 986 * Allocate or reallocate crash memory ranges array in incremental units
 987 * of PAGE_SIZE.
 988 */
 989static int allocate_crash_memory_ranges(void)
 990{
 991        struct fad_crash_memory_ranges *new_array;
 992        u64 new_size;
 993
 994        new_size = crash_memory_ranges_size + PAGE_SIZE;
 995        pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
 996                 new_size);
 997
 998        new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
 999        if (new_array == NULL) {
1000                pr_err("Insufficient memory for setting up crash memory ranges\n");
1001                free_crash_memory_ranges();
1002                return -ENOMEM;
1003        }
1004
1005        crash_memory_ranges = new_array;
1006        crash_memory_ranges_size = new_size;
1007        max_crash_mem_ranges = (new_size /
1008                                sizeof(struct fad_crash_memory_ranges));
1009        return 0;
1010}
1011
1012static inline int fadump_add_crash_memory(unsigned long long base,
1013                                          unsigned long long end)
1014{
1015        u64  start, size;
1016        bool is_adjacent = false;
1017
1018        if (base == end)
1019                return 0;
1020
1021        /*
1022         * Fold adjacent memory ranges to bring down the memory ranges/
1023         * PT_LOAD segments count.
1024         */
1025        if (crash_mem_ranges) {
1026                start = crash_memory_ranges[crash_mem_ranges - 1].base;
1027                size = crash_memory_ranges[crash_mem_ranges - 1].size;
1028
1029                if ((start + size) == base)
1030                        is_adjacent = true;
1031        }
1032        if (!is_adjacent) {
1033                /* resize the array on reaching the limit */
1034                if (crash_mem_ranges == max_crash_mem_ranges) {
1035                        int ret;
1036
1037                        ret = allocate_crash_memory_ranges();
1038                        if (ret)
1039                                return ret;
1040                }
1041
1042                start = base;
1043                crash_memory_ranges[crash_mem_ranges].base = start;
1044                crash_mem_ranges++;
1045        }
1046
1047        crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
1048        pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
1049                (crash_mem_ranges - 1), start, end - 1, (end - start));
1050        return 0;
1051}
1052
1053static int fadump_exclude_reserved_area(unsigned long long start,
1054                                        unsigned long long end)
1055{
1056        unsigned long long ra_start, ra_end;
1057        int ret = 0;
1058
1059        ra_start = fw_dump.reserve_dump_area_start;
1060        ra_end = ra_start + fw_dump.reserve_dump_area_size;
1061
1062        if ((ra_start < end) && (ra_end > start)) {
1063                if ((start < ra_start) && (end > ra_end)) {
1064                        ret = fadump_add_crash_memory(start, ra_start);
1065                        if (ret)
1066                                return ret;
1067
1068                        ret = fadump_add_crash_memory(ra_end, end);
1069                } else if (start < ra_start) {
1070                        ret = fadump_add_crash_memory(start, ra_start);
1071                } else if (ra_end < end) {
1072                        ret = fadump_add_crash_memory(ra_end, end);
1073                }
1074        } else
1075                ret = fadump_add_crash_memory(start, end);
1076
1077        return ret;
1078}
1079
1080static int fadump_init_elfcore_header(char *bufp)
1081{
1082        struct elfhdr *elf;
1083
1084        elf = (struct elfhdr *) bufp;
1085        bufp += sizeof(struct elfhdr);
1086        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1087        elf->e_ident[EI_CLASS] = ELF_CLASS;
1088        elf->e_ident[EI_DATA] = ELF_DATA;
1089        elf->e_ident[EI_VERSION] = EV_CURRENT;
1090        elf->e_ident[EI_OSABI] = ELF_OSABI;
1091        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1092        elf->e_type = ET_CORE;
1093        elf->e_machine = ELF_ARCH;
1094        elf->e_version = EV_CURRENT;
1095        elf->e_entry = 0;
1096        elf->e_phoff = sizeof(struct elfhdr);
1097        elf->e_shoff = 0;
1098#if defined(_CALL_ELF)
1099        elf->e_flags = _CALL_ELF;
1100#else
1101        elf->e_flags = 0;
1102#endif
1103        elf->e_ehsize = sizeof(struct elfhdr);
1104        elf->e_phentsize = sizeof(struct elf_phdr);
1105        elf->e_phnum = 0;
1106        elf->e_shentsize = 0;
1107        elf->e_shnum = 0;
1108        elf->e_shstrndx = 0;
1109
1110        return 0;
1111}
1112
1113/*
1114 * Traverse through memblock structure and setup crash memory ranges. These
1115 * ranges will be used create PT_LOAD program headers in elfcore header.
1116 */
1117static int fadump_setup_crash_memory_ranges(void)
1118{
1119        struct memblock_region *reg;
1120        unsigned long long start, end;
1121        int ret;
1122
1123        pr_debug("Setup crash memory ranges.\n");
1124        crash_mem_ranges = 0;
1125
1126        /*
1127         * add the first memory chunk (RMA_START through boot_memory_size) as
1128         * a separate memory chunk. The reason is, at the time crash firmware
1129         * will move the content of this memory chunk to different location
1130         * specified during fadump registration. We need to create a separate
1131         * program header for this chunk with the correct offset.
1132         */
1133        ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
1134        if (ret)
1135                return ret;
1136
1137        for_each_memblock(memory, reg) {
1138                start = (unsigned long long)reg->base;
1139                end = start + (unsigned long long)reg->size;
1140
1141                /*
1142                 * skip the first memory chunk that is already added (RMA_START
1143                 * through boot_memory_size). This logic needs a relook if and
1144                 * when RMA_START changes to a non-zero value.
1145                 */
1146                BUILD_BUG_ON(RMA_START != 0);
1147                if (start < fw_dump.boot_memory_size) {
1148                        if (end > fw_dump.boot_memory_size)
1149                                start = fw_dump.boot_memory_size;
1150                        else
1151                                continue;
1152                }
1153
1154                /* add this range excluding the reserved dump area. */
1155                ret = fadump_exclude_reserved_area(start, end);
1156                if (ret)
1157                        return ret;
1158        }
1159
1160        return 0;
1161}
1162
1163/*
1164 * If the given physical address falls within the boot memory region then
1165 * return the relocated address that points to the dump region reserved
1166 * for saving initial boot memory contents.
1167 */
1168static inline unsigned long fadump_relocate(unsigned long paddr)
1169{
1170        if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
1171                return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
1172        else
1173                return paddr;
1174}
1175
1176static int fadump_create_elfcore_headers(char *bufp)
1177{
1178        struct elfhdr *elf;
1179        struct elf_phdr *phdr;
1180        int i;
1181
1182        fadump_init_elfcore_header(bufp);
1183        elf = (struct elfhdr *)bufp;
1184        bufp += sizeof(struct elfhdr);
1185
1186        /*
1187         * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
1188         * will be populated during second kernel boot after crash. Hence
1189         * this PT_NOTE will always be the first elf note.
1190         *
1191         * NOTE: Any new ELF note addition should be placed after this note.
1192         */
1193        phdr = (struct elf_phdr *)bufp;
1194        bufp += sizeof(struct elf_phdr);
1195        phdr->p_type = PT_NOTE;
1196        phdr->p_flags = 0;
1197        phdr->p_vaddr = 0;
1198        phdr->p_align = 0;
1199
1200        phdr->p_offset = 0;
1201        phdr->p_paddr = 0;
1202        phdr->p_filesz = 0;
1203        phdr->p_memsz = 0;
1204
1205        (elf->e_phnum)++;
1206
1207        /* setup ELF PT_NOTE for vmcoreinfo */
1208        phdr = (struct elf_phdr *)bufp;
1209        bufp += sizeof(struct elf_phdr);
1210        phdr->p_type    = PT_NOTE;
1211        phdr->p_flags   = 0;
1212        phdr->p_vaddr   = 0;
1213        phdr->p_align   = 0;
1214
1215        phdr->p_paddr   = fadump_relocate(paddr_vmcoreinfo_note());
1216        phdr->p_offset  = phdr->p_paddr;
1217        phdr->p_memsz   = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
1218
1219        /* Increment number of program headers. */
1220        (elf->e_phnum)++;
1221
1222        /* setup PT_LOAD sections. */
1223
1224        for (i = 0; i < crash_mem_ranges; i++) {
1225                unsigned long long mbase, msize;
1226                mbase = crash_memory_ranges[i].base;
1227                msize = crash_memory_ranges[i].size;
1228
1229                if (!msize)
1230                        continue;
1231
1232                phdr = (struct elf_phdr *)bufp;
1233                bufp += sizeof(struct elf_phdr);
1234                phdr->p_type    = PT_LOAD;
1235                phdr->p_flags   = PF_R|PF_W|PF_X;
1236                phdr->p_offset  = mbase;
1237
1238                if (mbase == RMA_START) {
1239                        /*
1240                         * The entire RMA region will be moved by firmware
1241                         * to the specified destination_address. Hence set
1242                         * the correct offset.
1243                         */
1244                        phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
1245                }
1246
1247                phdr->p_paddr = mbase;
1248                phdr->p_vaddr = (unsigned long)__va(mbase);
1249                phdr->p_filesz = msize;
1250                phdr->p_memsz = msize;
1251                phdr->p_align = 0;
1252
1253                /* Increment number of program headers. */
1254                (elf->e_phnum)++;
1255        }
1256        return 0;
1257}
1258
1259static unsigned long init_fadump_header(unsigned long addr)
1260{
1261        struct fadump_crash_info_header *fdh;
1262
1263        if (!addr)
1264                return 0;
1265
1266        fw_dump.fadumphdr_addr = addr;
1267        fdh = __va(addr);
1268        addr += sizeof(struct fadump_crash_info_header);
1269
1270        memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1271        fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
1272        fdh->elfcorehdr_addr = addr;
1273        /* We will set the crashing cpu id in crash_fadump() during crash. */
1274        fdh->crashing_cpu = CPU_UNKNOWN;
1275
1276        return addr;
1277}
1278
1279static int register_fadump(void)
1280{
1281        unsigned long addr;
1282        void *vaddr;
1283        int ret;
1284
1285        /*
1286         * If no memory is reserved then we can not register for firmware-
1287         * assisted dump.
1288         */
1289        if (!fw_dump.reserve_dump_area_size)
1290                return -ENODEV;
1291
1292        ret = fadump_setup_crash_memory_ranges();
1293        if (ret)
1294                return ret;
1295
1296        addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
1297        /* Initialize fadump crash info header. */
1298        addr = init_fadump_header(addr);
1299        vaddr = __va(addr);
1300
1301        pr_debug("Creating ELF core headers at %#016lx\n", addr);
1302        fadump_create_elfcore_headers(vaddr);
1303
1304        /* register the future kernel dump with firmware. */
1305        return register_fw_dump(&fdm);
1306}
1307
1308static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
1309{
1310        int rc = 0;
1311        unsigned int wait_time;
1312
1313        pr_debug("Un-register firmware-assisted dump\n");
1314
1315        /* TODO: Add upper time limit for the delay */
1316        do {
1317                rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
1318                        FADUMP_UNREGISTER, fdm,
1319                        sizeof(struct fadump_mem_struct));
1320
1321                wait_time = rtas_busy_delay_time(rc);
1322                if (wait_time)
1323                        mdelay(wait_time);
1324        } while (wait_time);
1325
1326        if (rc) {
1327                printk(KERN_ERR "Failed to un-register firmware-assisted dump."
1328                        " unexpected error(%d).\n", rc);
1329                return rc;
1330        }
1331        fw_dump.dump_registered = 0;
1332        return 0;
1333}
1334
1335static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
1336{
1337        int rc = 0;
1338        unsigned int wait_time;
1339
1340        pr_debug("Invalidating firmware-assisted dump registration\n");
1341
1342        /* TODO: Add upper time limit for the delay */
1343        do {
1344                rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
1345                        FADUMP_INVALIDATE, fdm,
1346                        sizeof(struct fadump_mem_struct));
1347
1348                wait_time = rtas_busy_delay_time(rc);
1349                if (wait_time)
1350                        mdelay(wait_time);
1351        } while (wait_time);
1352
1353        if (rc) {
1354                pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
1355                return rc;
1356        }
1357        fw_dump.dump_active = 0;
1358        fdm_active = NULL;
1359        return 0;
1360}
1361
1362void fadump_cleanup(void)
1363{
1364        /* Invalidate the registration only if dump is active. */
1365        if (fw_dump.dump_active) {
1366                /* pass the same memory dump structure provided by platform */
1367                fadump_invalidate_dump(fdm_active);
1368        } else if (fw_dump.dump_registered) {
1369                /* Un-register Firmware-assisted dump if it was registered. */
1370                fadump_unregister_dump(&fdm);
1371                free_crash_memory_ranges();
1372        }
1373}
1374
1375static void fadump_free_reserved_memory(unsigned long start_pfn,
1376                                        unsigned long end_pfn)
1377{
1378        unsigned long pfn;
1379        unsigned long time_limit = jiffies + HZ;
1380
1381        pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1382                PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1383
1384        for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1385                free_reserved_page(pfn_to_page(pfn));
1386
1387                if (time_after(jiffies, time_limit)) {
1388                        cond_resched();
1389                        time_limit = jiffies + HZ;
1390                }
1391        }
1392}
1393
1394/*
1395 * Skip memory holes and free memory that was actually reserved.
1396 */
1397static void fadump_release_reserved_area(unsigned long start, unsigned long end)
1398{
1399        struct memblock_region *reg;
1400        unsigned long tstart, tend;
1401        unsigned long start_pfn = PHYS_PFN(start);
1402        unsigned long end_pfn = PHYS_PFN(end);
1403
1404        for_each_memblock(memory, reg) {
1405                tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
1406                tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
1407                if (tstart < tend) {
1408                        fadump_free_reserved_memory(tstart, tend);
1409
1410                        if (tend == end_pfn)
1411                                break;
1412
1413                        start_pfn = tend + 1;
1414                }
1415        }
1416}
1417
1418/*
1419 * Release the memory that was reserved in early boot to preserve the memory
1420 * contents. The released memory will be available for general use.
1421 */
1422static void fadump_release_memory(unsigned long begin, unsigned long end)
1423{
1424        unsigned long ra_start, ra_end;
1425
1426        ra_start = fw_dump.reserve_dump_area_start;
1427        ra_end = ra_start + fw_dump.reserve_dump_area_size;
1428
1429        /*
1430         * exclude the dump reserve area. Will reuse it for next
1431         * fadump registration.
1432         */
1433        if (begin < ra_end && end > ra_start) {
1434                if (begin < ra_start)
1435                        fadump_release_reserved_area(begin, ra_start);
1436                if (end > ra_end)
1437                        fadump_release_reserved_area(ra_end, end);
1438        } else
1439                fadump_release_reserved_area(begin, end);
1440}
1441
1442static void fadump_invalidate_release_mem(void)
1443{
1444        unsigned long reserved_area_start, reserved_area_end;
1445        unsigned long destination_address;
1446
1447        mutex_lock(&fadump_mutex);
1448        if (!fw_dump.dump_active) {
1449                mutex_unlock(&fadump_mutex);
1450                return;
1451        }
1452
1453        destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
1454        fadump_cleanup();
1455        mutex_unlock(&fadump_mutex);
1456
1457        /*
1458         * Save the current reserved memory bounds we will require them
1459         * later for releasing the memory for general use.
1460         */
1461        reserved_area_start = fw_dump.reserve_dump_area_start;
1462        reserved_area_end = reserved_area_start +
1463                        fw_dump.reserve_dump_area_size;
1464        /*
1465         * Setup reserve_dump_area_start and its size so that we can
1466         * reuse this reserved memory for Re-registration.
1467         */
1468        fw_dump.reserve_dump_area_start = destination_address;
1469        fw_dump.reserve_dump_area_size = get_fadump_area_size();
1470
1471        fadump_release_memory(reserved_area_start, reserved_area_end);
1472        if (fw_dump.cpu_notes_buf) {
1473                fadump_cpu_notes_buf_free(
1474                                (unsigned long)__va(fw_dump.cpu_notes_buf),
1475                                fw_dump.cpu_notes_buf_size);
1476                fw_dump.cpu_notes_buf = 0;
1477                fw_dump.cpu_notes_buf_size = 0;
1478        }
1479        /* Initialize the kernel dump memory structure for FAD registration. */
1480        init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1481}
1482
1483static ssize_t fadump_release_memory_store(struct kobject *kobj,
1484                                        struct kobj_attribute *attr,
1485                                        const char *buf, size_t count)
1486{
1487        int input = -1;
1488
1489        if (!fw_dump.dump_active)
1490                return -EPERM;
1491
1492        if (kstrtoint(buf, 0, &input))
1493                return -EINVAL;
1494
1495        if (input == 1) {
1496                /*
1497                 * Take away the '/proc/vmcore'. We are releasing the dump
1498                 * memory, hence it will not be valid anymore.
1499                 */
1500#ifdef CONFIG_PROC_VMCORE
1501                vmcore_cleanup();
1502#endif
1503                fadump_invalidate_release_mem();
1504
1505        } else
1506                return -EINVAL;
1507        return count;
1508}
1509
1510static ssize_t fadump_enabled_show(struct kobject *kobj,
1511                                        struct kobj_attribute *attr,
1512                                        char *buf)
1513{
1514        return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1515}
1516
1517static ssize_t fadump_register_show(struct kobject *kobj,
1518                                        struct kobj_attribute *attr,
1519                                        char *buf)
1520{
1521        return sprintf(buf, "%d\n", fw_dump.dump_registered);
1522}
1523
1524static ssize_t fadump_register_store(struct kobject *kobj,
1525                                        struct kobj_attribute *attr,
1526                                        const char *buf, size_t count)
1527{
1528        int ret = 0;
1529        int input = -1;
1530
1531        if (!fw_dump.fadump_enabled || fdm_active)
1532                return -EPERM;
1533
1534        if (kstrtoint(buf, 0, &input))
1535                return -EINVAL;
1536
1537        mutex_lock(&fadump_mutex);
1538
1539        switch (input) {
1540        case 0:
1541                if (fw_dump.dump_registered == 0) {
1542                        goto unlock_out;
1543                }
1544                /* Un-register Firmware-assisted dump */
1545                fadump_unregister_dump(&fdm);
1546                break;
1547        case 1:
1548                if (fw_dump.dump_registered == 1) {
1549                        /* Un-register Firmware-assisted dump */
1550                        fadump_unregister_dump(&fdm);
1551                }
1552                /* Register Firmware-assisted dump */
1553                ret = register_fadump();
1554                break;
1555        default:
1556                ret = -EINVAL;
1557                break;
1558        }
1559
1560unlock_out:
1561        mutex_unlock(&fadump_mutex);
1562        return ret < 0 ? ret : count;
1563}
1564
1565static int fadump_region_show(struct seq_file *m, void *private)
1566{
1567        const struct fadump_mem_struct *fdm_ptr;
1568
1569        if (!fw_dump.fadump_enabled)
1570                return 0;
1571
1572        mutex_lock(&fadump_mutex);
1573        if (fdm_active)
1574                fdm_ptr = fdm_active;
1575        else {
1576                mutex_unlock(&fadump_mutex);
1577                fdm_ptr = &fdm;
1578        }
1579
1580        seq_printf(m,
1581                        "CPU : [%#016llx-%#016llx] %#llx bytes, "
1582                        "Dumped: %#llx\n",
1583                        be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
1584                        be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
1585                        be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
1586                        be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
1587                        be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
1588        seq_printf(m,
1589                        "HPTE: [%#016llx-%#016llx] %#llx bytes, "
1590                        "Dumped: %#llx\n",
1591                        be64_to_cpu(fdm_ptr->hpte_region.destination_address),
1592                        be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
1593                        be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
1594                        be64_to_cpu(fdm_ptr->hpte_region.source_len),
1595                        be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
1596        seq_printf(m,
1597                        "DUMP: [%#016llx-%#016llx] %#llx bytes, "
1598                        "Dumped: %#llx\n",
1599                        be64_to_cpu(fdm_ptr->rmr_region.destination_address),
1600                        be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
1601                        be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
1602                        be64_to_cpu(fdm_ptr->rmr_region.source_len),
1603                        be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
1604
1605        if (!fdm_active ||
1606                (fw_dump.reserve_dump_area_start ==
1607                be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
1608                goto out;
1609
1610        /* Dump is active. Show reserved memory region. */
1611        seq_printf(m,
1612                        "    : [%#016llx-%#016llx] %#llx bytes, "
1613                        "Dumped: %#llx\n",
1614                        (unsigned long long)fw_dump.reserve_dump_area_start,
1615                        be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
1616                        be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
1617                        fw_dump.reserve_dump_area_start,
1618                        be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
1619                        fw_dump.reserve_dump_area_start);
1620out:
1621        if (fdm_active)
1622                mutex_unlock(&fadump_mutex);
1623        return 0;
1624}
1625
1626static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1627                                                0200, NULL,
1628                                                fadump_release_memory_store);
1629static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1630                                                0444, fadump_enabled_show,
1631                                                NULL);
1632static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1633                                                0644, fadump_register_show,
1634                                                fadump_register_store);
1635
1636DEFINE_SHOW_ATTRIBUTE(fadump_region);
1637
1638static void fadump_init_files(void)
1639{
1640        struct dentry *debugfs_file;
1641        int rc = 0;
1642
1643        rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1644        if (rc)
1645                printk(KERN_ERR "fadump: unable to create sysfs file"
1646                        " fadump_enabled (%d)\n", rc);
1647
1648        rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1649        if (rc)
1650                printk(KERN_ERR "fadump: unable to create sysfs file"
1651                        " fadump_registered (%d)\n", rc);
1652
1653        debugfs_file = debugfs_create_file("fadump_region", 0444,
1654                                        powerpc_debugfs_root, NULL,
1655                                        &fadump_region_fops);
1656        if (!debugfs_file)
1657                printk(KERN_ERR "fadump: unable to create debugfs file"
1658                                " fadump_region\n");
1659
1660        if (fw_dump.dump_active) {
1661                rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1662                if (rc)
1663                        printk(KERN_ERR "fadump: unable to create sysfs file"
1664                                " fadump_release_mem (%d)\n", rc);
1665        }
1666        return;
1667}
1668
1669/*
1670 * Prepare for firmware-assisted dump.
1671 */
1672int __init setup_fadump(void)
1673{
1674        if (!fw_dump.fadump_enabled)
1675                return 0;
1676
1677        if (!fw_dump.fadump_supported) {
1678                printk(KERN_ERR "Firmware-assisted dump is not supported on"
1679                        " this hardware\n");
1680                return 0;
1681        }
1682
1683        fadump_show_config();
1684        /*
1685         * If dump data is available then see if it is valid and prepare for
1686         * saving it to the disk.
1687         */
1688        if (fw_dump.dump_active) {
1689                /*
1690                 * if dump process fails then invalidate the registration
1691                 * and release memory before proceeding for re-registration.
1692                 */
1693                if (process_fadump(fdm_active) < 0)
1694                        fadump_invalidate_release_mem();
1695        }
1696        /* Initialize the kernel dump memory structure for FAD registration. */
1697        else if (fw_dump.reserve_dump_area_size)
1698                init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1699        fadump_init_files();
1700
1701        return 1;
1702}
1703subsys_initcall(setup_fadump);
1704