linux/arch/powerpc/kexec/file_load_64.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * ppc64 code to implement the kexec_file_load syscall
   4 *
   5 * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
   6 * Copyright (C) 2004  IBM Corp.
   7 * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
   8 * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
   9 * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
  10 * Copyright (C) 2020  IBM Corporation
  11 *
  12 * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
  13 * Heavily modified for the kernel by
  14 * Hari Bathini, IBM Corporation.
  15 */
  16
  17#include <linux/kexec.h>
  18#include <linux/of_fdt.h>
  19#include <linux/libfdt.h>
  20#include <linux/of_device.h>
  21#include <linux/memblock.h>
  22#include <linux/slab.h>
  23#include <linux/vmalloc.h>
  24#include <asm/setup.h>
  25#include <asm/drmem.h>
  26#include <asm/kexec_ranges.h>
  27#include <asm/crashdump-ppc64.h>
  28
  29struct umem_info {
  30        u64 *buf;               /* data buffer for usable-memory property */
  31        u32 size;               /* size allocated for the data buffer */
  32        u32 max_entries;        /* maximum no. of entries */
  33        u32 idx;                /* index of current entry */
  34
  35        /* usable memory ranges to look up */
  36        unsigned int nr_ranges;
  37        const struct crash_mem_range *ranges;
  38};
  39
  40const struct kexec_file_ops * const kexec_file_loaders[] = {
  41        &kexec_elf64_ops,
  42        NULL
  43};
  44
  45/**
  46 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
  47 *                             regions like opal/rtas, tce-table, initrd,
  48 *                             kernel, htab which should be avoided while
  49 *                             setting up kexec load segments.
  50 * @mem_ranges:                Range list to add the memory ranges to.
  51 *
  52 * Returns 0 on success, negative errno on error.
  53 */
  54static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
  55{
  56        int ret;
  57
  58        ret = add_tce_mem_ranges(mem_ranges);
  59        if (ret)
  60                goto out;
  61
  62        ret = add_initrd_mem_range(mem_ranges);
  63        if (ret)
  64                goto out;
  65
  66        ret = add_htab_mem_range(mem_ranges);
  67        if (ret)
  68                goto out;
  69
  70        ret = add_kernel_mem_range(mem_ranges);
  71        if (ret)
  72                goto out;
  73
  74        ret = add_rtas_mem_range(mem_ranges);
  75        if (ret)
  76                goto out;
  77
  78        ret = add_opal_mem_range(mem_ranges);
  79        if (ret)
  80                goto out;
  81
  82        ret = add_reserved_mem_ranges(mem_ranges);
  83        if (ret)
  84                goto out;
  85
  86        /* exclude memory ranges should be sorted for easy lookup */
  87        sort_memory_ranges(*mem_ranges, true);
  88out:
  89        if (ret)
  90                pr_err("Failed to setup exclude memory ranges\n");
  91        return ret;
  92}
  93
  94/**
  95 * get_usable_memory_ranges - Get usable memory ranges. This list includes
  96 *                            regions like crashkernel, opal/rtas & tce-table,
  97 *                            that kdump kernel could use.
  98 * @mem_ranges:               Range list to add the memory ranges to.
  99 *
 100 * Returns 0 on success, negative errno on error.
 101 */
 102static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
 103{
 104        int ret;
 105
 106        /*
 107         * Early boot failure observed on guests when low memory (first memory
 108         * block?) is not added to usable memory. So, add [0, crashk_res.end]
 109         * instead of [crashk_res.start, crashk_res.end] to workaround it.
 110         * Also, crashed kernel's memory must be added to reserve map to
 111         * avoid kdump kernel from using it.
 112         */
 113        ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
 114        if (ret)
 115                goto out;
 116
 117        ret = add_rtas_mem_range(mem_ranges);
 118        if (ret)
 119                goto out;
 120
 121        ret = add_opal_mem_range(mem_ranges);
 122        if (ret)
 123                goto out;
 124
 125        ret = add_tce_mem_ranges(mem_ranges);
 126out:
 127        if (ret)
 128                pr_err("Failed to setup usable memory ranges\n");
 129        return ret;
 130}
 131
 132/**
 133 * get_crash_memory_ranges - Get crash memory ranges. This list includes
 134 *                           first/crashing kernel's memory regions that
 135 *                           would be exported via an elfcore.
 136 * @mem_ranges:              Range list to add the memory ranges to.
 137 *
 138 * Returns 0 on success, negative errno on error.
 139 */
 140static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
 141{
 142        phys_addr_t base, end;
 143        struct crash_mem *tmem;
 144        u64 i;
 145        int ret;
 146
 147        for_each_mem_range(i, &base, &end) {
 148                u64 size = end - base;
 149
 150                /* Skip backup memory region, which needs a separate entry */
 151                if (base == BACKUP_SRC_START) {
 152                        if (size > BACKUP_SRC_SIZE) {
 153                                base = BACKUP_SRC_END + 1;
 154                                size -= BACKUP_SRC_SIZE;
 155                        } else
 156                                continue;
 157                }
 158
 159                ret = add_mem_range(mem_ranges, base, size);
 160                if (ret)
 161                        goto out;
 162
 163                /* Try merging adjacent ranges before reallocation attempt */
 164                if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
 165                        sort_memory_ranges(*mem_ranges, true);
 166        }
 167
 168        /* Reallocate memory ranges if there is no space to split ranges */
 169        tmem = *mem_ranges;
 170        if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
 171                tmem = realloc_mem_ranges(mem_ranges);
 172                if (!tmem)
 173                        goto out;
 174        }
 175
 176        /* Exclude crashkernel region */
 177        ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
 178        if (ret)
 179                goto out;
 180
 181        /*
 182         * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
 183         *        regions are exported to save their context at the time of
 184         *        crash, they should actually be backed up just like the
 185         *        first 64K bytes of memory.
 186         */
 187        ret = add_rtas_mem_range(mem_ranges);
 188        if (ret)
 189                goto out;
 190
 191        ret = add_opal_mem_range(mem_ranges);
 192        if (ret)
 193                goto out;
 194
 195        /* create a separate program header for the backup region */
 196        ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
 197        if (ret)
 198                goto out;
 199
 200        sort_memory_ranges(*mem_ranges, false);
 201out:
 202        if (ret)
 203                pr_err("Failed to setup crash memory ranges\n");
 204        return ret;
 205}
 206
 207/**
 208 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
 209 *                              memory regions that should be added to the
 210 *                              memory reserve map to ensure the region is
 211 *                              protected from any mischief.
 212 * @mem_ranges:                 Range list to add the memory ranges to.
 213 *
 214 * Returns 0 on success, negative errno on error.
 215 */
 216static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
 217{
 218        int ret;
 219
 220        ret = add_rtas_mem_range(mem_ranges);
 221        if (ret)
 222                goto out;
 223
 224        ret = add_tce_mem_ranges(mem_ranges);
 225        if (ret)
 226                goto out;
 227
 228        ret = add_reserved_mem_ranges(mem_ranges);
 229out:
 230        if (ret)
 231                pr_err("Failed to setup reserved memory ranges\n");
 232        return ret;
 233}
 234
 235/**
 236 * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
 237 *                              in the memory regions between buf_min & buf_max
 238 *                              for the buffer. If found, sets kbuf->mem.
 239 * @kbuf:                       Buffer contents and memory parameters.
 240 * @buf_min:                    Minimum address for the buffer.
 241 * @buf_max:                    Maximum address for the buffer.
 242 *
 243 * Returns 0 on success, negative errno on error.
 244 */
 245static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
 246                                      u64 buf_min, u64 buf_max)
 247{
 248        int ret = -EADDRNOTAVAIL;
 249        phys_addr_t start, end;
 250        u64 i;
 251
 252        for_each_mem_range_rev(i, &start, &end) {
 253                /*
 254                 * memblock uses [start, end) convention while it is
 255                 * [start, end] here. Fix the off-by-one to have the
 256                 * same convention.
 257                 */
 258                end -= 1;
 259
 260                if (start > buf_max)
 261                        continue;
 262
 263                /* Memory hole not found */
 264                if (end < buf_min)
 265                        break;
 266
 267                /* Adjust memory region based on the given range */
 268                if (start < buf_min)
 269                        start = buf_min;
 270                if (end > buf_max)
 271                        end = buf_max;
 272
 273                start = ALIGN(start, kbuf->buf_align);
 274                if (start < end && (end - start + 1) >= kbuf->memsz) {
 275                        /* Suitable memory range found. Set kbuf->mem */
 276                        kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,
 277                                               kbuf->buf_align);
 278                        ret = 0;
 279                        break;
 280                }
 281        }
 282
 283        return ret;
 284}
 285
 286/**
 287 * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
 288 *                                  suitable buffer with top down approach.
 289 * @kbuf:                           Buffer contents and memory parameters.
 290 * @buf_min:                        Minimum address for the buffer.
 291 * @buf_max:                        Maximum address for the buffer.
 292 * @emem:                           Exclude memory ranges.
 293 *
 294 * Returns 0 on success, negative errno on error.
 295 */
 296static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
 297                                          u64 buf_min, u64 buf_max,
 298                                          const struct crash_mem *emem)
 299{
 300        int i, ret = 0, err = -EADDRNOTAVAIL;
 301        u64 start, end, tmin, tmax;
 302
 303        tmax = buf_max;
 304        for (i = (emem->nr_ranges - 1); i >= 0; i--) {
 305                start = emem->ranges[i].start;
 306                end = emem->ranges[i].end;
 307
 308                if (start > tmax)
 309                        continue;
 310
 311                if (end < tmax) {
 312                        tmin = (end < buf_min ? buf_min : end + 1);
 313                        ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
 314                        if (!ret)
 315                                return 0;
 316                }
 317
 318                tmax = start - 1;
 319
 320                if (tmax < buf_min) {
 321                        ret = err;
 322                        break;
 323                }
 324                ret = 0;
 325        }
 326
 327        if (!ret) {
 328                tmin = buf_min;
 329                ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
 330        }
 331        return ret;
 332}
 333
 334/**
 335 * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
 336 *                               in the memory regions between buf_min & buf_max
 337 *                               for the buffer. If found, sets kbuf->mem.
 338 * @kbuf:                        Buffer contents and memory parameters.
 339 * @buf_min:                     Minimum address for the buffer.
 340 * @buf_max:                     Maximum address for the buffer.
 341 *
 342 * Returns 0 on success, negative errno on error.
 343 */
 344static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
 345                                       u64 buf_min, u64 buf_max)
 346{
 347        int ret = -EADDRNOTAVAIL;
 348        phys_addr_t start, end;
 349        u64 i;
 350
 351        for_each_mem_range(i, &start, &end) {
 352                /*
 353                 * memblock uses [start, end) convention while it is
 354                 * [start, end] here. Fix the off-by-one to have the
 355                 * same convention.
 356                 */
 357                end -= 1;
 358
 359                if (end < buf_min)
 360                        continue;
 361
 362                /* Memory hole not found */
 363                if (start > buf_max)
 364                        break;
 365
 366                /* Adjust memory region based on the given range */
 367                if (start < buf_min)
 368                        start = buf_min;
 369                if (end > buf_max)
 370                        end = buf_max;
 371
 372                start = ALIGN(start, kbuf->buf_align);
 373                if (start < end && (end - start + 1) >= kbuf->memsz) {
 374                        /* Suitable memory range found. Set kbuf->mem */
 375                        kbuf->mem = start;
 376                        ret = 0;
 377                        break;
 378                }
 379        }
 380
 381        return ret;
 382}
 383
 384/**
 385 * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
 386 *                                   suitable buffer with bottom up approach.
 387 * @kbuf:                            Buffer contents and memory parameters.
 388 * @buf_min:                         Minimum address for the buffer.
 389 * @buf_max:                         Maximum address for the buffer.
 390 * @emem:                            Exclude memory ranges.
 391 *
 392 * Returns 0 on success, negative errno on error.
 393 */
 394static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
 395                                           u64 buf_min, u64 buf_max,
 396                                           const struct crash_mem *emem)
 397{
 398        int i, ret = 0, err = -EADDRNOTAVAIL;
 399        u64 start, end, tmin, tmax;
 400
 401        tmin = buf_min;
 402        for (i = 0; i < emem->nr_ranges; i++) {
 403                start = emem->ranges[i].start;
 404                end = emem->ranges[i].end;
 405
 406                if (end < tmin)
 407                        continue;
 408
 409                if (start > tmin) {
 410                        tmax = (start > buf_max ? buf_max : start - 1);
 411                        ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
 412                        if (!ret)
 413                                return 0;
 414                }
 415
 416                tmin = end + 1;
 417
 418                if (tmin > buf_max) {
 419                        ret = err;
 420                        break;
 421                }
 422                ret = 0;
 423        }
 424
 425        if (!ret) {
 426                tmax = buf_max;
 427                ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
 428        }
 429        return ret;
 430}
 431
 432/**
 433 * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
 434 * @um_info:                  Usable memory buffer and ranges info.
 435 * @cnt:                      No. of entries to accommodate.
 436 *
 437 * Frees up the old buffer if memory reallocation fails.
 438 *
 439 * Returns buffer on success, NULL on error.
 440 */
 441static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
 442{
 443        u32 new_size;
 444        u64 *tbuf;
 445
 446        if ((um_info->idx + cnt) <= um_info->max_entries)
 447                return um_info->buf;
 448
 449        new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
 450        tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL);
 451        if (tbuf) {
 452                um_info->buf = tbuf;
 453                um_info->size = new_size;
 454                um_info->max_entries = (um_info->size / sizeof(u64));
 455        }
 456
 457        return tbuf;
 458}
 459
 460/**
 461 * add_usable_mem - Add the usable memory ranges within the given memory range
 462 *                  to the buffer
 463 * @um_info:        Usable memory buffer and ranges info.
 464 * @base:           Base address of memory range to look for.
 465 * @end:            End address of memory range to look for.
 466 *
 467 * Returns 0 on success, negative errno on error.
 468 */
 469static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
 470{
 471        u64 loc_base, loc_end;
 472        bool add;
 473        int i;
 474
 475        for (i = 0; i < um_info->nr_ranges; i++) {
 476                add = false;
 477                loc_base = um_info->ranges[i].start;
 478                loc_end = um_info->ranges[i].end;
 479                if (loc_base >= base && loc_end <= end)
 480                        add = true;
 481                else if (base < loc_end && end > loc_base) {
 482                        if (loc_base < base)
 483                                loc_base = base;
 484                        if (loc_end > end)
 485                                loc_end = end;
 486                        add = true;
 487                }
 488
 489                if (add) {
 490                        if (!check_realloc_usable_mem(um_info, 2))
 491                                return -ENOMEM;
 492
 493                        um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
 494                        um_info->buf[um_info->idx++] =
 495                                        cpu_to_be64(loc_end - loc_base + 1);
 496                }
 497        }
 498
 499        return 0;
 500}
 501
 502/**
 503 * kdump_setup_usable_lmb - This is a callback function that gets called by
 504 *                          walk_drmem_lmbs for every LMB to set its
 505 *                          usable memory ranges.
 506 * @lmb:                    LMB info.
 507 * @usm:                    linux,drconf-usable-memory property value.
 508 * @data:                   Pointer to usable memory buffer and ranges info.
 509 *
 510 * Returns 0 on success, negative errno on error.
 511 */
 512static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
 513                                  void *data)
 514{
 515        struct umem_info *um_info;
 516        int tmp_idx, ret;
 517        u64 base, end;
 518
 519        /*
 520         * kdump load isn't supported on kernels already booted with
 521         * linux,drconf-usable-memory property.
 522         */
 523        if (*usm) {
 524                pr_err("linux,drconf-usable-memory property already exists!");
 525                return -EINVAL;
 526        }
 527
 528        um_info = data;
 529        tmp_idx = um_info->idx;
 530        if (!check_realloc_usable_mem(um_info, 1))
 531                return -ENOMEM;
 532
 533        um_info->idx++;
 534        base = lmb->base_addr;
 535        end = base + drmem_lmb_size() - 1;
 536        ret = add_usable_mem(um_info, base, end);
 537        if (!ret) {
 538                /*
 539                 * Update the no. of ranges added. Two entries (base & size)
 540                 * for every range added.
 541                 */
 542                um_info->buf[tmp_idx] =
 543                                cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
 544        }
 545
 546        return ret;
 547}
 548
 549#define NODE_PATH_LEN           256
 550/**
 551 * add_usable_mem_property - Add usable memory property for the given
 552 *                           memory node.
 553 * @fdt:                     Flattened device tree for the kdump kernel.
 554 * @dn:                      Memory node.
 555 * @um_info:                 Usable memory buffer and ranges info.
 556 *
 557 * Returns 0 on success, negative errno on error.
 558 */
 559static int add_usable_mem_property(void *fdt, struct device_node *dn,
 560                                   struct umem_info *um_info)
 561{
 562        int n_mem_addr_cells, n_mem_size_cells, node;
 563        char path[NODE_PATH_LEN];
 564        int i, len, ranges, ret;
 565        const __be32 *prop;
 566        u64 base, end;
 567
 568        of_node_get(dn);
 569
 570        if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) {
 571                pr_err("Buffer (%d) too small for memory node: %pOF\n",
 572                       NODE_PATH_LEN, dn);
 573                return -EOVERFLOW;
 574        }
 575        pr_debug("Memory node path: %s\n", path);
 576
 577        /* Now that we know the path, find its offset in kdump kernel's fdt */
 578        node = fdt_path_offset(fdt, path);
 579        if (node < 0) {
 580                pr_err("Malformed device tree: error reading %s\n", path);
 581                ret = -EINVAL;
 582                goto out;
 583        }
 584
 585        /* Get the address & size cells */
 586        n_mem_addr_cells = of_n_addr_cells(dn);
 587        n_mem_size_cells = of_n_size_cells(dn);
 588        pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells,
 589                 n_mem_size_cells);
 590
 591        um_info->idx  = 0;
 592        if (!check_realloc_usable_mem(um_info, 2)) {
 593                ret = -ENOMEM;
 594                goto out;
 595        }
 596
 597        prop = of_get_property(dn, "reg", &len);
 598        if (!prop || len <= 0) {
 599                ret = 0;
 600                goto out;
 601        }
 602
 603        /*
 604         * "reg" property represents sequence of (addr,size) tuples
 605         * each representing a memory range.
 606         */
 607        ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
 608
 609        for (i = 0; i < ranges; i++) {
 610                base = of_read_number(prop, n_mem_addr_cells);
 611                prop += n_mem_addr_cells;
 612                end = base + of_read_number(prop, n_mem_size_cells) - 1;
 613                prop += n_mem_size_cells;
 614
 615                ret = add_usable_mem(um_info, base, end);
 616                if (ret)
 617                        goto out;
 618        }
 619
 620        /*
 621         * No kdump kernel usable memory found in this memory node.
 622         * Write (0,0) tuple in linux,usable-memory property for
 623         * this region to be ignored.
 624         */
 625        if (um_info->idx == 0) {
 626                um_info->buf[0] = 0;
 627                um_info->buf[1] = 0;
 628                um_info->idx = 2;
 629        }
 630
 631        ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf,
 632                          (um_info->idx * sizeof(u64)));
 633
 634out:
 635        of_node_put(dn);
 636        return ret;
 637}
 638
 639
 640/**
 641 * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
 642 *                         and linux,drconf-usable-memory DT properties as
 643 *                         appropriate to restrict its memory usage.
 644 * @fdt:                   Flattened device tree for the kdump kernel.
 645 * @usable_mem:            Usable memory ranges for kdump kernel.
 646 *
 647 * Returns 0 on success, negative errno on error.
 648 */
 649static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
 650{
 651        struct umem_info um_info;
 652        struct device_node *dn;
 653        int node, ret = 0;
 654
 655        if (!usable_mem) {
 656                pr_err("Usable memory ranges for kdump kernel not found\n");
 657                return -ENOENT;
 658        }
 659
 660        node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
 661        if (node == -FDT_ERR_NOTFOUND)
 662                pr_debug("No dynamic reconfiguration memory found\n");
 663        else if (node < 0) {
 664                pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
 665                return -EINVAL;
 666        }
 667
 668        um_info.buf  = NULL;
 669        um_info.size = 0;
 670        um_info.max_entries = 0;
 671        um_info.idx  = 0;
 672        /* Memory ranges to look up */
 673        um_info.ranges = &(usable_mem->ranges[0]);
 674        um_info.nr_ranges = usable_mem->nr_ranges;
 675
 676        dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 677        if (dn) {
 678                ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
 679                of_node_put(dn);
 680
 681                if (ret) {
 682                        pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
 683                        goto out;
 684                }
 685
 686                ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory",
 687                                  um_info.buf, (um_info.idx * sizeof(u64)));
 688                if (ret) {
 689                        pr_err("Failed to update fdt with linux,drconf-usable-memory property");
 690                        goto out;
 691                }
 692        }
 693
 694        /*
 695         * Walk through each memory node and set linux,usable-memory property
 696         * for the corresponding node in kdump kernel's fdt.
 697         */
 698        for_each_node_by_type(dn, "memory") {
 699                ret = add_usable_mem_property(fdt, dn, &um_info);
 700                if (ret) {
 701                        pr_err("Failed to set linux,usable-memory property for %s node",
 702                               dn->full_name);
 703                        goto out;
 704                }
 705        }
 706
 707out:
 708        kfree(um_info.buf);
 709        return ret;
 710}
 711
 712/**
 713 * load_backup_segment - Locate a memory hole to place the backup region.
 714 * @image:               Kexec image.
 715 * @kbuf:                Buffer contents and memory parameters.
 716 *
 717 * Returns 0 on success, negative errno on error.
 718 */
 719static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
 720{
 721        void *buf;
 722        int ret;
 723
 724        /*
 725         * Setup a source buffer for backup segment.
 726         *
 727         * A source buffer has no meaning for backup region as data will
 728         * be copied from backup source, after crash, in the purgatory.
 729         * But as load segment code doesn't recognize such segments,
 730         * setup a dummy source buffer to keep it happy for now.
 731         */
 732        buf = vzalloc(BACKUP_SRC_SIZE);
 733        if (!buf)
 734                return -ENOMEM;
 735
 736        kbuf->buffer = buf;
 737        kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
 738        kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
 739        kbuf->top_down = false;
 740
 741        ret = kexec_add_buffer(kbuf);
 742        if (ret) {
 743                vfree(buf);
 744                return ret;
 745        }
 746
 747        image->arch.backup_buf = buf;
 748        image->arch.backup_start = kbuf->mem;
 749        return 0;
 750}
 751
 752/**
 753 * update_backup_region_phdr - Update backup region's offset for the core to
 754 *                             export the region appropriately.
 755 * @image:                     Kexec image.
 756 * @ehdr:                      ELF core header.
 757 *
 758 * Assumes an exclusive program header is setup for the backup region
 759 * in the ELF headers
 760 *
 761 * Returns nothing.
 762 */
 763static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
 764{
 765        Elf64_Phdr *phdr;
 766        unsigned int i;
 767
 768        phdr = (Elf64_Phdr *)(ehdr + 1);
 769        for (i = 0; i < ehdr->e_phnum; i++) {
 770                if (phdr->p_paddr == BACKUP_SRC_START) {
 771                        phdr->p_offset = image->arch.backup_start;
 772                        pr_debug("Backup region offset updated to 0x%lx\n",
 773                                 image->arch.backup_start);
 774                        return;
 775                }
 776        }
 777}
 778
 779/**
 780 * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
 781 *                           segment needed to load kdump kernel.
 782 * @image:                   Kexec image.
 783 * @kbuf:                    Buffer contents and memory parameters.
 784 *
 785 * Returns 0 on success, negative errno on error.
 786 */
 787static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
 788{
 789        struct crash_mem *cmem = NULL;
 790        unsigned long headers_sz;
 791        void *headers = NULL;
 792        int ret;
 793
 794        ret = get_crash_memory_ranges(&cmem);
 795        if (ret)
 796                goto out;
 797
 798        /* Setup elfcorehdr segment */
 799        ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz);
 800        if (ret) {
 801                pr_err("Failed to prepare elf headers for the core\n");
 802                goto out;
 803        }
 804
 805        /* Fix the offset for backup region in the ELF header */
 806        update_backup_region_phdr(image, headers);
 807
 808        kbuf->buffer = headers;
 809        kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
 810        kbuf->bufsz = kbuf->memsz = headers_sz;
 811        kbuf->top_down = false;
 812
 813        ret = kexec_add_buffer(kbuf);
 814        if (ret) {
 815                vfree(headers);
 816                goto out;
 817        }
 818
 819        image->elf_load_addr = kbuf->mem;
 820        image->elf_headers_sz = headers_sz;
 821        image->elf_headers = headers;
 822out:
 823        kfree(cmem);
 824        return ret;
 825}
 826
 827/**
 828 * load_crashdump_segments_ppc64 - Initialize the additional segements needed
 829 *                                 to load kdump kernel.
 830 * @image:                         Kexec image.
 831 * @kbuf:                          Buffer contents and memory parameters.
 832 *
 833 * Returns 0 on success, negative errno on error.
 834 */
 835int load_crashdump_segments_ppc64(struct kimage *image,
 836                                  struct kexec_buf *kbuf)
 837{
 838        int ret;
 839
 840        /* Load backup segment - first 64K bytes of the crashing kernel */
 841        ret = load_backup_segment(image, kbuf);
 842        if (ret) {
 843                pr_err("Failed to load backup segment\n");
 844                return ret;
 845        }
 846        pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem);
 847
 848        /* Load elfcorehdr segment - to export crashing kernel's vmcore */
 849        ret = load_elfcorehdr_segment(image, kbuf);
 850        if (ret) {
 851                pr_err("Failed to load elfcorehdr segment\n");
 852                return ret;
 853        }
 854        pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
 855                 image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
 856
 857        return 0;
 858}
 859
 860/**
 861 * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
 862 *                         variables and call setup_purgatory() to initialize
 863 *                         common global variable.
 864 * @image:                 kexec image.
 865 * @slave_code:            Slave code for the purgatory.
 866 * @fdt:                   Flattened device tree for the next kernel.
 867 * @kernel_load_addr:      Address where the kernel is loaded.
 868 * @fdt_load_addr:         Address where the flattened device tree is loaded.
 869 *
 870 * Returns 0 on success, negative errno on error.
 871 */
 872int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
 873                          const void *fdt, unsigned long kernel_load_addr,
 874                          unsigned long fdt_load_addr)
 875{
 876        struct device_node *dn = NULL;
 877        int ret;
 878
 879        ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
 880                              fdt_load_addr);
 881        if (ret)
 882                goto out;
 883
 884        if (image->type == KEXEC_TYPE_CRASH) {
 885                u32 my_run_at_load = 1;
 886
 887                /*
 888                 * Tell relocatable kernel to run at load address
 889                 * via the word meant for that at 0x5c.
 890                 */
 891                ret = kexec_purgatory_get_set_symbol(image, "run_at_load",
 892                                                     &my_run_at_load,
 893                                                     sizeof(my_run_at_load),
 894                                                     false);
 895                if (ret)
 896                        goto out;
 897        }
 898
 899        /* Tell purgatory where to look for backup region */
 900        ret = kexec_purgatory_get_set_symbol(image, "backup_start",
 901                                             &image->arch.backup_start,
 902                                             sizeof(image->arch.backup_start),
 903                                             false);
 904        if (ret)
 905                goto out;
 906
 907        /* Setup OPAL base & entry values */
 908        dn = of_find_node_by_path("/ibm,opal");
 909        if (dn) {
 910                u64 val;
 911
 912                of_property_read_u64(dn, "opal-base-address", &val);
 913                ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
 914                                                     sizeof(val), false);
 915                if (ret)
 916                        goto out;
 917
 918                of_property_read_u64(dn, "opal-entry-address", &val);
 919                ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
 920                                                     sizeof(val), false);
 921        }
 922out:
 923        if (ret)
 924                pr_err("Failed to setup purgatory symbols");
 925        of_node_put(dn);
 926        return ret;
 927}
 928
 929/**
 930 * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
 931 *                              setup FDT for kexec/kdump kernel.
 932 * @image:                      kexec image being loaded.
 933 *
 934 * Returns the estimated extra size needed for kexec/kdump kernel FDT.
 935 */
 936unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
 937{
 938        u64 usm_entries;
 939
 940        if (image->type != KEXEC_TYPE_CRASH)
 941                return 0;
 942
 943        /*
 944         * For kdump kernel, account for linux,usable-memory and
 945         * linux,drconf-usable-memory properties. Get an approximate on the
 946         * number of usable memory entries and use for FDT size estimation.
 947         */
 948        usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
 949                       (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
 950        return (unsigned int)(usm_entries * sizeof(u64));
 951}
 952
 953/**
 954 * add_node_props - Reads node properties from device node structure and add
 955 *                  them to fdt.
 956 * @fdt:            Flattened device tree of the kernel
 957 * @node_offset:    offset of the node to add a property at
 958 * @dn:             device node pointer
 959 *
 960 * Returns 0 on success, negative errno on error.
 961 */
 962static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
 963{
 964        int ret = 0;
 965        struct property *pp;
 966
 967        if (!dn)
 968                return -EINVAL;
 969
 970        for_each_property_of_node(dn, pp) {
 971                ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
 972                if (ret < 0) {
 973                        pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
 974                        return ret;
 975                }
 976        }
 977        return ret;
 978}
 979
 980/**
 981 * update_cpus_node - Update cpus node of flattened device tree using of_root
 982 *                    device node.
 983 * @fdt:              Flattened device tree of the kernel.
 984 *
 985 * Returns 0 on success, negative errno on error.
 986 */
 987static int update_cpus_node(void *fdt)
 988{
 989        struct device_node *cpus_node, *dn;
 990        int cpus_offset, cpus_subnode_offset, ret = 0;
 991
 992        cpus_offset = fdt_path_offset(fdt, "/cpus");
 993        if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
 994                pr_err("Malformed device tree: error reading /cpus node: %s\n",
 995                       fdt_strerror(cpus_offset));
 996                return cpus_offset;
 997        }
 998
 999        if (cpus_offset > 0) {
1000                ret = fdt_del_node(fdt, cpus_offset);
1001                if (ret < 0) {
1002                        pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
1003                        return -EINVAL;
1004                }
1005        }
1006
1007        /* Add cpus node to fdt */
1008        cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
1009        if (cpus_offset < 0) {
1010                pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
1011                return -EINVAL;
1012        }
1013
1014        /* Add cpus node properties */
1015        cpus_node = of_find_node_by_path("/cpus");
1016        ret = add_node_props(fdt, cpus_offset, cpus_node);
1017        of_node_put(cpus_node);
1018        if (ret < 0)
1019                return ret;
1020
1021        /* Loop through all subnodes of cpus and add them to fdt */
1022        for_each_node_by_type(dn, "cpu") {
1023                cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
1024                if (cpus_subnode_offset < 0) {
1025                        pr_err("Unable to add %s subnode: %s\n", dn->full_name,
1026                               fdt_strerror(cpus_subnode_offset));
1027                        ret = cpus_subnode_offset;
1028                        goto out;
1029                }
1030
1031                ret = add_node_props(fdt, cpus_subnode_offset, dn);
1032                if (ret < 0)
1033                        goto out;
1034        }
1035out:
1036        of_node_put(dn);
1037        return ret;
1038}
1039
1040/**
1041 * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
1042 *                       being loaded.
1043 * @image:               kexec image being loaded.
1044 * @fdt:                 Flattened device tree for the next kernel.
1045 * @initrd_load_addr:    Address where the next initrd will be loaded.
1046 * @initrd_len:          Size of the next initrd, or 0 if there will be none.
1047 * @cmdline:             Command line for the next kernel, or NULL if there will
1048 *                       be none.
1049 *
1050 * Returns 0 on success, negative errno on error.
1051 */
1052int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
1053                        unsigned long initrd_load_addr,
1054                        unsigned long initrd_len, const char *cmdline)
1055{
1056        struct crash_mem *umem = NULL, *rmem = NULL;
1057        int i, nr_ranges, ret;
1058
1059        /*
1060         * Restrict memory usage for kdump kernel by setting up
1061         * usable memory ranges and memory reserve map.
1062         */
1063        if (image->type == KEXEC_TYPE_CRASH) {
1064                ret = get_usable_memory_ranges(&umem);
1065                if (ret)
1066                        goto out;
1067
1068                ret = update_usable_mem_fdt(fdt, umem);
1069                if (ret) {
1070                        pr_err("Error setting up usable-memory property for kdump kernel\n");
1071                        goto out;
1072                }
1073
1074                /*
1075                 * Ensure we don't touch crashed kernel's memory except the
1076                 * first 64K of RAM, which will be backed up.
1077                 */
1078                ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
1079                                      crashk_res.start - BACKUP_SRC_SIZE);
1080                if (ret) {
1081                        pr_err("Error reserving crash memory: %s\n",
1082                               fdt_strerror(ret));
1083                        goto out;
1084                }
1085
1086                /* Ensure backup region is not used by kdump/capture kernel */
1087                ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
1088                                      BACKUP_SRC_SIZE);
1089                if (ret) {
1090                        pr_err("Error reserving memory for backup: %s\n",
1091                               fdt_strerror(ret));
1092                        goto out;
1093                }
1094        }
1095
1096        /* Update cpus nodes information to account hotplug CPUs. */
1097        ret =  update_cpus_node(fdt);
1098        if (ret < 0)
1099                goto out;
1100
1101        /* Update memory reserve map */
1102        ret = get_reserved_memory_ranges(&rmem);
1103        if (ret)
1104                goto out;
1105
1106        nr_ranges = rmem ? rmem->nr_ranges : 0;
1107        for (i = 0; i < nr_ranges; i++) {
1108                u64 base, size;
1109
1110                base = rmem->ranges[i].start;
1111                size = rmem->ranges[i].end - base + 1;
1112                ret = fdt_add_mem_rsv(fdt, base, size);
1113                if (ret) {
1114                        pr_err("Error updating memory reserve map: %s\n",
1115                               fdt_strerror(ret));
1116                        goto out;
1117                }
1118        }
1119
1120out:
1121        kfree(rmem);
1122        kfree(umem);
1123        return ret;
1124}
1125
1126/**
1127 * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal,
1128 *                              tce-table, reserved-ranges & such (exclude
1129 *                              memory ranges) as they can't be used for kexec
1130 *                              segment buffer. Sets kbuf->mem when a suitable
1131 *                              memory hole is found.
1132 * @kbuf:                       Buffer contents and memory parameters.
1133 *
1134 * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align.
1135 *
1136 * Returns 0 on success, negative errno on error.
1137 */
1138int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
1139{
1140        struct crash_mem **emem;
1141        u64 buf_min, buf_max;
1142        int ret;
1143
1144        /* Look up the exclude ranges list while locating the memory hole */
1145        emem = &(kbuf->image->arch.exclude_ranges);
1146        if (!(*emem) || ((*emem)->nr_ranges == 0)) {
1147                pr_warn("No exclude range list. Using the default locate mem hole method\n");
1148                return kexec_locate_mem_hole(kbuf);
1149        }
1150
1151        buf_min = kbuf->buf_min;
1152        buf_max = kbuf->buf_max;
1153        /* Segments for kdump kernel should be within crashkernel region */
1154        if (kbuf->image->type == KEXEC_TYPE_CRASH) {
1155                buf_min = (buf_min < crashk_res.start ?
1156                           crashk_res.start : buf_min);
1157                buf_max = (buf_max > crashk_res.end ?
1158                           crashk_res.end : buf_max);
1159        }
1160
1161        if (buf_min > buf_max) {
1162                pr_err("Invalid buffer min and/or max values\n");
1163                return -EINVAL;
1164        }
1165
1166        if (kbuf->top_down)
1167                ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
1168                                                     *emem);
1169        else
1170                ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
1171                                                      *emem);
1172
1173        /* Add the buffer allocated to the exclude list for the next lookup */
1174        if (!ret) {
1175                add_mem_range(emem, kbuf->mem, kbuf->memsz);
1176                sort_memory_ranges(*emem, true);
1177        } else {
1178                pr_err("Failed to locate memory buffer of size %lu\n",
1179                       kbuf->memsz);
1180        }
1181        return ret;
1182}
1183
1184/**
1185 * arch_kexec_kernel_image_probe - Does additional handling needed to setup
1186 *                                 kexec segments.
1187 * @image:                         kexec image being loaded.
1188 * @buf:                           Buffer pointing to elf data.
1189 * @buf_len:                       Length of the buffer.
1190 *
1191 * Returns 0 on success, negative errno on error.
1192 */
1193int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
1194                                  unsigned long buf_len)
1195{
1196        int ret;
1197
1198        /* Get exclude memory ranges needed for setting up kexec segments */
1199        ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
1200        if (ret) {
1201                pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
1202                return ret;
1203        }
1204
1205        return kexec_image_probe_default(image, buf, buf_len);
1206}
1207
1208/**
1209 * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
1210 *                                      while loading the image.
1211 * @image:                              kexec image being loaded.
1212 *
1213 * Returns 0 on success, negative errno on error.
1214 */
1215int arch_kimage_file_post_load_cleanup(struct kimage *image)
1216{
1217        kfree(image->arch.exclude_ranges);
1218        image->arch.exclude_ranges = NULL;
1219
1220        vfree(image->arch.backup_buf);
1221        image->arch.backup_buf = NULL;
1222
1223        vfree(image->elf_headers);
1224        image->elf_headers = NULL;
1225        image->elf_headers_sz = 0;
1226
1227        kvfree(image->arch.fdt);
1228        image->arch.fdt = NULL;
1229
1230        return kexec_image_post_load_cleanup_default(image);
1231}
1232