linux/arch/s390/kernel/setup.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  S390 version
   4 *    Copyright IBM Corp. 1999, 2012
   5 *    Author(s): Hartmut Penner (hp@de.ibm.com),
   6 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
   7 *
   8 *  Derived from "arch/i386/kernel/setup.c"
   9 *    Copyright (C) 1995, Linus Torvalds
  10 */
  11
  12/*
  13 * This file handles the architecture-dependent parts of initialization
  14 */
  15
  16#define KMSG_COMPONENT "setup"
  17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18
  19#include <linux/errno.h>
  20#include <linux/export.h>
  21#include <linux/sched.h>
  22#include <linux/sched/task.h>
  23#include <linux/cpu.h>
  24#include <linux/kernel.h>
  25#include <linux/memblock.h>
  26#include <linux/mm.h>
  27#include <linux/stddef.h>
  28#include <linux/unistd.h>
  29#include <linux/ptrace.h>
  30#include <linux/random.h>
  31#include <linux/user.h>
  32#include <linux/tty.h>
  33#include <linux/ioport.h>
  34#include <linux/delay.h>
  35#include <linux/init.h>
  36#include <linux/initrd.h>
  37#include <linux/root_dev.h>
  38#include <linux/console.h>
  39#include <linux/kernel_stat.h>
  40#include <linux/dma-contiguous.h>
  41#include <linux/device.h>
  42#include <linux/notifier.h>
  43#include <linux/pfn.h>
  44#include <linux/ctype.h>
  45#include <linux/reboot.h>
  46#include <linux/topology.h>
  47#include <linux/kexec.h>
  48#include <linux/crash_dump.h>
  49#include <linux/memory.h>
  50#include <linux/compat.h>
  51#include <linux/start_kernel.h>
  52
  53#include <asm/boot_data.h>
  54#include <asm/ipl.h>
  55#include <asm/facility.h>
  56#include <asm/smp.h>
  57#include <asm/mmu_context.h>
  58#include <asm/cpcmd.h>
  59#include <asm/lowcore.h>
  60#include <asm/nmi.h>
  61#include <asm/irq.h>
  62#include <asm/page.h>
  63#include <asm/ptrace.h>
  64#include <asm/sections.h>
  65#include <asm/ebcdic.h>
  66#include <asm/diag.h>
  67#include <asm/os_info.h>
  68#include <asm/sclp.h>
  69#include <asm/stacktrace.h>
  70#include <asm/sysinfo.h>
  71#include <asm/numa.h>
  72#include <asm/alternative.h>
  73#include <asm/nospec-branch.h>
  74#include <asm/mem_detect.h>
  75#include <asm/uv.h>
  76#include "entry.h"
  77
  78/*
  79 * Machine setup..
  80 */
  81unsigned int console_mode = 0;
  82EXPORT_SYMBOL(console_mode);
  83
  84unsigned int console_devno = -1;
  85EXPORT_SYMBOL(console_devno);
  86
  87unsigned int console_irq = -1;
  88EXPORT_SYMBOL(console_irq);
  89
  90unsigned long elf_hwcap __read_mostly = 0;
  91char elf_platform[ELF_PLATFORM_SIZE];
  92
  93unsigned long int_hwcap = 0;
  94
  95#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
  96int __bootdata_preserved(prot_virt_guest);
  97#endif
  98
  99int __bootdata(noexec_disabled);
 100int __bootdata(memory_end_set);
 101unsigned long __bootdata(memory_end);
 102unsigned long __bootdata(vmalloc_size);
 103unsigned long __bootdata(max_physmem_end);
 104struct mem_detect_info __bootdata(mem_detect);
 105
 106struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
 107struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
 108unsigned long __bootdata_preserved(__swsusp_reset_dma);
 109unsigned long __bootdata_preserved(__stext_dma);
 110unsigned long __bootdata_preserved(__etext_dma);
 111unsigned long __bootdata_preserved(__sdma);
 112unsigned long __bootdata_preserved(__edma);
 113unsigned long __bootdata_preserved(__kaslr_offset);
 114
 115unsigned long VMALLOC_START;
 116EXPORT_SYMBOL(VMALLOC_START);
 117
 118unsigned long VMALLOC_END;
 119EXPORT_SYMBOL(VMALLOC_END);
 120
 121struct page *vmemmap;
 122EXPORT_SYMBOL(vmemmap);
 123
 124unsigned long MODULES_VADDR;
 125unsigned long MODULES_END;
 126
 127/* An array with a pointer to the lowcore of every CPU. */
 128struct lowcore *lowcore_ptr[NR_CPUS];
 129EXPORT_SYMBOL(lowcore_ptr);
 130
 131/*
 132 * This is set up by the setup-routine at boot-time
 133 * for S390 need to find out, what we have to setup
 134 * using address 0x10400 ...
 135 */
 136
 137#include <asm/setup.h>
 138
 139/*
 140 * condev= and conmode= setup parameter.
 141 */
 142
 143static int __init condev_setup(char *str)
 144{
 145        int vdev;
 146
 147        vdev = simple_strtoul(str, &str, 0);
 148        if (vdev >= 0 && vdev < 65536) {
 149                console_devno = vdev;
 150                console_irq = -1;
 151        }
 152        return 1;
 153}
 154
 155__setup("condev=", condev_setup);
 156
 157static void __init set_preferred_console(void)
 158{
 159        if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 160                add_preferred_console("ttyS", 0, NULL);
 161        else if (CONSOLE_IS_3270)
 162                add_preferred_console("tty3270", 0, NULL);
 163        else if (CONSOLE_IS_VT220)
 164                add_preferred_console("ttyS", 1, NULL);
 165        else if (CONSOLE_IS_HVC)
 166                add_preferred_console("hvc", 0, NULL);
 167}
 168
 169static int __init conmode_setup(char *str)
 170{
 171#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 172        if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
 173                SET_CONSOLE_SCLP;
 174#endif
 175#if defined(CONFIG_TN3215_CONSOLE)
 176        if (!strcmp(str, "3215"))
 177                SET_CONSOLE_3215;
 178#endif
 179#if defined(CONFIG_TN3270_CONSOLE)
 180        if (!strcmp(str, "3270"))
 181                SET_CONSOLE_3270;
 182#endif
 183        set_preferred_console();
 184        return 1;
 185}
 186
 187__setup("conmode=", conmode_setup);
 188
 189static void __init conmode_default(void)
 190{
 191        char query_buffer[1024];
 192        char *ptr;
 193
 194        if (MACHINE_IS_VM) {
 195                cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
 196                console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
 197                ptr = strstr(query_buffer, "SUBCHANNEL =");
 198                console_irq = simple_strtoul(ptr + 13, NULL, 16);
 199                cpcmd("QUERY TERM", query_buffer, 1024, NULL);
 200                ptr = strstr(query_buffer, "CONMODE");
 201                /*
 202                 * Set the conmode to 3215 so that the device recognition 
 203                 * will set the cu_type of the console to 3215. If the
 204                 * conmode is 3270 and we don't set it back then both
 205                 * 3215 and the 3270 driver will try to access the console
 206                 * device (3215 as console and 3270 as normal tty).
 207                 */
 208                cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
 209                if (ptr == NULL) {
 210#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 211                        SET_CONSOLE_SCLP;
 212#endif
 213                        return;
 214                }
 215                if (str_has_prefix(ptr + 8, "3270")) {
 216#if defined(CONFIG_TN3270_CONSOLE)
 217                        SET_CONSOLE_3270;
 218#elif defined(CONFIG_TN3215_CONSOLE)
 219                        SET_CONSOLE_3215;
 220#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 221                        SET_CONSOLE_SCLP;
 222#endif
 223                } else if (str_has_prefix(ptr + 8, "3215")) {
 224#if defined(CONFIG_TN3215_CONSOLE)
 225                        SET_CONSOLE_3215;
 226#elif defined(CONFIG_TN3270_CONSOLE)
 227                        SET_CONSOLE_3270;
 228#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 229                        SET_CONSOLE_SCLP;
 230#endif
 231                }
 232        } else if (MACHINE_IS_KVM) {
 233                if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
 234                        SET_CONSOLE_VT220;
 235                else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
 236                        SET_CONSOLE_SCLP;
 237                else
 238                        SET_CONSOLE_HVC;
 239        } else {
 240#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 241                SET_CONSOLE_SCLP;
 242#endif
 243        }
 244        if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
 245                conswitchp = &dummy_con;
 246}
 247
 248#ifdef CONFIG_CRASH_DUMP
 249static void __init setup_zfcpdump(void)
 250{
 251        if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 252                return;
 253        if (OLDMEM_BASE)
 254                return;
 255        strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
 256        console_loglevel = 2;
 257}
 258#else
 259static inline void setup_zfcpdump(void) {}
 260#endif /* CONFIG_CRASH_DUMP */
 261
 262 /*
 263 * Reboot, halt and power_off stubs. They just call _machine_restart,
 264 * _machine_halt or _machine_power_off. 
 265 */
 266
 267void machine_restart(char *command)
 268{
 269        if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
 270                /*
 271                 * Only unblank the console if we are called in enabled
 272                 * context or a bust_spinlocks cleared the way for us.
 273                 */
 274                console_unblank();
 275        _machine_restart(command);
 276}
 277
 278void machine_halt(void)
 279{
 280        if (!in_interrupt() || oops_in_progress)
 281                /*
 282                 * Only unblank the console if we are called in enabled
 283                 * context or a bust_spinlocks cleared the way for us.
 284                 */
 285                console_unblank();
 286        _machine_halt();
 287}
 288
 289void machine_power_off(void)
 290{
 291        if (!in_interrupt() || oops_in_progress)
 292                /*
 293                 * Only unblank the console if we are called in enabled
 294                 * context or a bust_spinlocks cleared the way for us.
 295                 */
 296                console_unblank();
 297        _machine_power_off();
 298}
 299
 300/*
 301 * Dummy power off function.
 302 */
 303void (*pm_power_off)(void) = machine_power_off;
 304EXPORT_SYMBOL_GPL(pm_power_off);
 305
 306void *restart_stack __section(.data);
 307
 308unsigned long stack_alloc(void)
 309{
 310#ifdef CONFIG_VMAP_STACK
 311        return (unsigned long)
 312                __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
 313                                     VMALLOC_START, VMALLOC_END,
 314                                     THREADINFO_GFP,
 315                                     PAGE_KERNEL, 0, NUMA_NO_NODE,
 316                                     __builtin_return_address(0));
 317#else
 318        return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 319#endif
 320}
 321
 322void stack_free(unsigned long stack)
 323{
 324#ifdef CONFIG_VMAP_STACK
 325        vfree((void *) stack);
 326#else
 327        free_pages(stack, THREAD_SIZE_ORDER);
 328#endif
 329}
 330
 331int __init arch_early_irq_init(void)
 332{
 333        unsigned long stack;
 334
 335        stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 336        if (!stack)
 337                panic("Couldn't allocate async stack");
 338        S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
 339        return 0;
 340}
 341
 342static int __init async_stack_realloc(void)
 343{
 344        unsigned long old, new;
 345
 346        old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
 347        new = stack_alloc();
 348        if (!new)
 349                panic("Couldn't allocate async stack");
 350        S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
 351        free_pages(old, THREAD_SIZE_ORDER);
 352        return 0;
 353}
 354early_initcall(async_stack_realloc);
 355
 356void __init arch_call_rest_init(void)
 357{
 358        unsigned long stack;
 359
 360        stack = stack_alloc();
 361        if (!stack)
 362                panic("Couldn't allocate kernel stack");
 363        current->stack = (void *) stack;
 364#ifdef CONFIG_VMAP_STACK
 365        current->stack_vm_area = (void *) stack;
 366#endif
 367        set_task_stack_end_magic(current);
 368        stack += STACK_INIT_OFFSET;
 369        S390_lowcore.kernel_stack = stack;
 370        CALL_ON_STACK_NORETURN(rest_init, stack);
 371}
 372
 373static void __init setup_lowcore_dat_off(void)
 374{
 375        struct lowcore *lc;
 376
 377        /*
 378         * Setup lowcore for boot cpu
 379         */
 380        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
 381        lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
 382        if (!lc)
 383                panic("%s: Failed to allocate %zu bytes align=%zx\n",
 384                      __func__, sizeof(*lc), sizeof(*lc));
 385
 386        lc->restart_psw.mask = PSW_KERNEL_BITS;
 387        lc->restart_psw.addr = (unsigned long) restart_int_handler;
 388        lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 389        lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 390        lc->svc_new_psw.mask = PSW_KERNEL_BITS |
 391                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 392        lc->svc_new_psw.addr = (unsigned long) system_call;
 393        lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 394        lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 395        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
 396        lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 397        lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 398        lc->io_new_psw.addr = (unsigned long) io_int_handler;
 399        lc->clock_comparator = clock_comparator_max;
 400        lc->nodat_stack = ((unsigned long) &init_thread_union)
 401                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 402        lc->current_task = (unsigned long)&init_task;
 403        lc->lpp = LPP_MAGIC;
 404        lc->machine_flags = S390_lowcore.machine_flags;
 405        lc->preempt_count = S390_lowcore.preempt_count;
 406        lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 407        memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 408               sizeof(lc->stfle_fac_list));
 409        memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
 410               sizeof(lc->alt_stfle_fac_list));
 411        nmi_alloc_boot_cpu(lc);
 412        vdso_alloc_boot_cpu(lc);
 413        lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 414        lc->async_enter_timer = S390_lowcore.async_enter_timer;
 415        lc->exit_timer = S390_lowcore.exit_timer;
 416        lc->user_timer = S390_lowcore.user_timer;
 417        lc->system_timer = S390_lowcore.system_timer;
 418        lc->steal_timer = S390_lowcore.steal_timer;
 419        lc->last_update_timer = S390_lowcore.last_update_timer;
 420        lc->last_update_clock = S390_lowcore.last_update_clock;
 421
 422        /*
 423         * Allocate the global restart stack which is the same for
 424         * all CPUs in cast *one* of them does a PSW restart.
 425         */
 426        restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 427        if (!restart_stack)
 428                panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 429                      __func__, THREAD_SIZE, THREAD_SIZE);
 430        restart_stack += STACK_INIT_OFFSET;
 431
 432        /*
 433         * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
 434         * restart data to the absolute zero lowcore. This is necessary if
 435         * PSW restart is done on an offline CPU that has lowcore zero.
 436         */
 437        lc->restart_stack = (unsigned long) restart_stack;
 438        lc->restart_fn = (unsigned long) do_restart;
 439        lc->restart_data = 0;
 440        lc->restart_source = -1UL;
 441
 442        /* Setup absolute zero lowcore */
 443        mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
 444        mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
 445        mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
 446        mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
 447        mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
 448
 449        lc->spinlock_lockval = arch_spin_lockval(0);
 450        lc->spinlock_index = 0;
 451        arch_spin_lock_setup(0);
 452        lc->br_r1_trampoline = 0x07f1;  /* br %r1 */
 453
 454        set_prefix((u32)(unsigned long) lc);
 455        lowcore_ptr[0] = lc;
 456}
 457
 458static void __init setup_lowcore_dat_on(void)
 459{
 460        __ctl_clear_bit(0, 28);
 461        S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
 462        S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
 463        S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
 464        S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
 465        __ctl_set_bit(0, 28);
 466}
 467
 468static struct resource code_resource = {
 469        .name  = "Kernel code",
 470        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 471};
 472
 473static struct resource data_resource = {
 474        .name = "Kernel data",
 475        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 476};
 477
 478static struct resource bss_resource = {
 479        .name = "Kernel bss",
 480        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 481};
 482
 483static struct resource __initdata *standard_resources[] = {
 484        &code_resource,
 485        &data_resource,
 486        &bss_resource,
 487};
 488
 489static void __init setup_resources(void)
 490{
 491        struct resource *res, *std_res, *sub_res;
 492        struct memblock_region *reg;
 493        int j;
 494
 495        code_resource.start = (unsigned long) _text;
 496        code_resource.end = (unsigned long) _etext - 1;
 497        data_resource.start = (unsigned long) _etext;
 498        data_resource.end = (unsigned long) _edata - 1;
 499        bss_resource.start = (unsigned long) __bss_start;
 500        bss_resource.end = (unsigned long) __bss_stop - 1;
 501
 502        for_each_memblock(memory, reg) {
 503                res = memblock_alloc(sizeof(*res), 8);
 504                if (!res)
 505                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 506                              __func__, sizeof(*res), 8);
 507                res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 508
 509                res->name = "System RAM";
 510                res->start = reg->base;
 511                res->end = reg->base + reg->size - 1;
 512                request_resource(&iomem_resource, res);
 513
 514                for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
 515                        std_res = standard_resources[j];
 516                        if (std_res->start < res->start ||
 517                            std_res->start > res->end)
 518                                continue;
 519                        if (std_res->end > res->end) {
 520                                sub_res = memblock_alloc(sizeof(*sub_res), 8);
 521                                if (!sub_res)
 522                                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 523                                              __func__, sizeof(*sub_res), 8);
 524                                *sub_res = *std_res;
 525                                sub_res->end = res->end;
 526                                std_res->start = res->end + 1;
 527                                request_resource(res, sub_res);
 528                        } else {
 529                                request_resource(res, std_res);
 530                        }
 531                }
 532        }
 533#ifdef CONFIG_CRASH_DUMP
 534        /*
 535         * Re-add removed crash kernel memory as reserved memory. This makes
 536         * sure it will be mapped with the identity mapping and struct pages
 537         * will be created, so it can be resized later on.
 538         * However add it later since the crash kernel resource should not be
 539         * part of the System RAM resource.
 540         */
 541        if (crashk_res.end) {
 542                memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
 543                memblock_reserve(crashk_res.start, resource_size(&crashk_res));
 544                insert_resource(&iomem_resource, &crashk_res);
 545        }
 546#endif
 547}
 548
 549static void __init setup_memory_end(void)
 550{
 551        unsigned long vmax, tmp;
 552
 553        /* Choose kernel address space layout: 3 or 4 levels. */
 554        if (IS_ENABLED(CONFIG_KASAN)) {
 555                vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
 556                           ? _REGION1_SIZE
 557                           : _REGION2_SIZE;
 558        } else {
 559                tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
 560                tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
 561                if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
 562                        vmax = _REGION2_SIZE; /* 3-level kernel page table */
 563                else
 564                        vmax = _REGION1_SIZE; /* 4-level kernel page table */
 565        }
 566
 567        /* module area is at the end of the kernel address space. */
 568        MODULES_END = vmax;
 569        MODULES_VADDR = MODULES_END - MODULES_LEN;
 570        VMALLOC_END = MODULES_VADDR;
 571        VMALLOC_START = VMALLOC_END - vmalloc_size;
 572
 573        /* Split remaining virtual space between 1:1 mapping & vmemmap array */
 574        tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
 575        /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
 576        tmp = SECTION_ALIGN_UP(tmp);
 577        tmp = VMALLOC_START - tmp * sizeof(struct page);
 578        tmp &= ~((vmax >> 11) - 1);     /* align to page table level */
 579        tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
 580        vmemmap = (struct page *) tmp;
 581
 582        /* Take care that memory_end is set and <= vmemmap */
 583        memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
 584#ifdef CONFIG_KASAN
 585        /* fit in kasan shadow memory region between 1:1 and vmemmap */
 586        memory_end = min(memory_end, KASAN_SHADOW_START);
 587        vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
 588#endif
 589        max_pfn = max_low_pfn = PFN_DOWN(memory_end);
 590        memblock_remove(memory_end, ULONG_MAX);
 591
 592        pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 593}
 594
 595#ifdef CONFIG_CRASH_DUMP
 596
 597/*
 598 * When kdump is enabled, we have to ensure that no memory from
 599 * the area [0 - crashkernel memory size] and
 600 * [crashk_res.start - crashk_res.end] is set offline.
 601 */
 602static int kdump_mem_notifier(struct notifier_block *nb,
 603                              unsigned long action, void *data)
 604{
 605        struct memory_notify *arg = data;
 606
 607        if (action != MEM_GOING_OFFLINE)
 608                return NOTIFY_OK;
 609        if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
 610                return NOTIFY_BAD;
 611        if (arg->start_pfn > PFN_DOWN(crashk_res.end))
 612                return NOTIFY_OK;
 613        if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
 614                return NOTIFY_OK;
 615        return NOTIFY_BAD;
 616}
 617
 618static struct notifier_block kdump_mem_nb = {
 619        .notifier_call = kdump_mem_notifier,
 620};
 621
 622#endif
 623
 624/*
 625 * Make sure that the area behind memory_end is protected
 626 */
 627static void reserve_memory_end(void)
 628{
 629        if (memory_end_set)
 630                memblock_reserve(memory_end, ULONG_MAX);
 631}
 632
 633/*
 634 * Make sure that oldmem, where the dump is stored, is protected
 635 */
 636static void reserve_oldmem(void)
 637{
 638#ifdef CONFIG_CRASH_DUMP
 639        if (OLDMEM_BASE)
 640                /* Forget all memory above the running kdump system */
 641                memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 642#endif
 643}
 644
 645/*
 646 * Make sure that oldmem, where the dump is stored, is protected
 647 */
 648static void remove_oldmem(void)
 649{
 650#ifdef CONFIG_CRASH_DUMP
 651        if (OLDMEM_BASE)
 652                /* Forget all memory above the running kdump system */
 653                memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 654#endif
 655}
 656
 657/*
 658 * Reserve memory for kdump kernel to be loaded with kexec
 659 */
 660static void __init reserve_crashkernel(void)
 661{
 662#ifdef CONFIG_CRASH_DUMP
 663        unsigned long long crash_base, crash_size;
 664        phys_addr_t low, high;
 665        int rc;
 666
 667        rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
 668                               &crash_base);
 669
 670        crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 671        crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
 672        if (rc || crash_size == 0)
 673                return;
 674
 675        if (memblock.memory.regions[0].size < crash_size) {
 676                pr_info("crashkernel reservation failed: %s\n",
 677                        "first memory chunk must be at least crashkernel size");
 678                return;
 679        }
 680
 681        low = crash_base ?: OLDMEM_BASE;
 682        high = low + crash_size;
 683        if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
 684                /* The crashkernel fits into OLDMEM, reuse OLDMEM */
 685                crash_base = low;
 686        } else {
 687                /* Find suitable area in free memory */
 688                low = max_t(unsigned long, crash_size, sclp.hsa_size);
 689                high = crash_base ? crash_base + crash_size : ULONG_MAX;
 690
 691                if (crash_base && crash_base < low) {
 692                        pr_info("crashkernel reservation failed: %s\n",
 693                                "crash_base too low");
 694                        return;
 695                }
 696                low = crash_base ?: low;
 697                crash_base = memblock_find_in_range(low, high, crash_size,
 698                                                    KEXEC_CRASH_MEM_ALIGN);
 699        }
 700
 701        if (!crash_base) {
 702                pr_info("crashkernel reservation failed: %s\n",
 703                        "no suitable area found");
 704                return;
 705        }
 706
 707        if (register_memory_notifier(&kdump_mem_nb))
 708                return;
 709
 710        if (!OLDMEM_BASE && MACHINE_IS_VM)
 711                diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 712        crashk_res.start = crash_base;
 713        crashk_res.end = crash_base + crash_size - 1;
 714        memblock_remove(crash_base, crash_size);
 715        pr_info("Reserving %lluMB of memory at %lluMB "
 716                "for crashkernel (System RAM: %luMB)\n",
 717                crash_size >> 20, crash_base >> 20,
 718                (unsigned long)memblock.memory.total_size >> 20);
 719        os_info_crashkernel_add(crash_base, crash_size);
 720#endif
 721}
 722
 723/*
 724 * Reserve the initrd from being used by memblock
 725 */
 726static void __init reserve_initrd(void)
 727{
 728#ifdef CONFIG_BLK_DEV_INITRD
 729        if (!INITRD_START || !INITRD_SIZE)
 730                return;
 731        initrd_start = INITRD_START;
 732        initrd_end = initrd_start + INITRD_SIZE;
 733        memblock_reserve(INITRD_START, INITRD_SIZE);
 734#endif
 735}
 736
 737/*
 738 * Reserve the memory area used to pass the certificate lists
 739 */
 740static void __init reserve_certificate_list(void)
 741{
 742        if (ipl_cert_list_addr)
 743                memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
 744}
 745
 746static void __init reserve_mem_detect_info(void)
 747{
 748        unsigned long start, size;
 749
 750        get_mem_detect_reserved(&start, &size);
 751        if (size)
 752                memblock_reserve(start, size);
 753}
 754
 755static void __init free_mem_detect_info(void)
 756{
 757        unsigned long start, size;
 758
 759        get_mem_detect_reserved(&start, &size);
 760        if (size)
 761                memblock_free(start, size);
 762}
 763
 764static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
 765{
 766        memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
 767                     start, start + size - 1);
 768        memblock_add_range(&memblock.memory, start, size, 0, 0);
 769        memblock_add_range(&memblock.physmem, start, size, 0, 0);
 770}
 771
 772static const char * __init get_mem_info_source(void)
 773{
 774        switch (mem_detect.info_source) {
 775        case MEM_DETECT_SCLP_STOR_INFO:
 776                return "sclp storage info";
 777        case MEM_DETECT_DIAG260:
 778                return "diag260";
 779        case MEM_DETECT_SCLP_READ_INFO:
 780                return "sclp read info";
 781        case MEM_DETECT_BIN_SEARCH:
 782                return "binary search";
 783        }
 784        return "none";
 785}
 786
 787static void __init memblock_add_mem_detect_info(void)
 788{
 789        unsigned long start, end;
 790        int i;
 791
 792        memblock_dbg("physmem info source: %s (%hhd)\n",
 793                     get_mem_info_source(), mem_detect.info_source);
 794        /* keep memblock lists close to the kernel */
 795        memblock_set_bottom_up(true);
 796        for_each_mem_detect_block(i, &start, &end)
 797                memblock_physmem_add(start, end - start);
 798        memblock_set_bottom_up(false);
 799        memblock_dump_all();
 800}
 801
 802/*
 803 * Check for initrd being in usable memory
 804 */
 805static void __init check_initrd(void)
 806{
 807#ifdef CONFIG_BLK_DEV_INITRD
 808        if (INITRD_START && INITRD_SIZE &&
 809            !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
 810                pr_err("The initial RAM disk does not fit into the memory\n");
 811                memblock_free(INITRD_START, INITRD_SIZE);
 812                initrd_start = initrd_end = 0;
 813        }
 814#endif
 815}
 816
 817/*
 818 * Reserve memory used for lowcore/command line/kernel image.
 819 */
 820static void __init reserve_kernel(void)
 821{
 822        unsigned long start_pfn = PFN_UP(__pa(_end));
 823
 824        memblock_reserve(0, HEAD_END);
 825        memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
 826                         - (unsigned long)_stext);
 827        memblock_reserve(__sdma, __edma - __sdma);
 828}
 829
 830static void __init setup_memory(void)
 831{
 832        struct memblock_region *reg;
 833
 834        /*
 835         * Init storage key for present memory
 836         */
 837        for_each_memblock(memory, reg) {
 838                storage_key_init_range(reg->base, reg->base + reg->size);
 839        }
 840        psw_set_key(PAGE_DEFAULT_KEY);
 841
 842        /* Only cosmetics */
 843        memblock_enforce_memory_limit(memblock_end_of_DRAM());
 844}
 845
 846/*
 847 * Setup hardware capabilities.
 848 */
 849static int __init setup_hwcaps(void)
 850{
 851        static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 852        struct cpuid cpu_id;
 853        int i;
 854
 855        /*
 856         * The store facility list bits numbers as found in the principles
 857         * of operation are numbered with bit 1UL<<31 as number 0 to
 858         * bit 1UL<<0 as number 31.
 859         *   Bit 0: instructions named N3, "backported" to esa-mode
 860         *   Bit 2: z/Architecture mode is active
 861         *   Bit 7: the store-facility-list-extended facility is installed
 862         *   Bit 17: the message-security assist is installed
 863         *   Bit 19: the long-displacement facility is installed
 864         *   Bit 21: the extended-immediate facility is installed
 865         *   Bit 22: extended-translation facility 3 is installed
 866         *   Bit 30: extended-translation facility 3 enhancement facility
 867         * These get translated to:
 868         *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
 869         *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
 870         *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
 871         *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
 872         */
 873        for (i = 0; i < 6; i++)
 874                if (test_facility(stfl_bits[i]))
 875                        elf_hwcap |= 1UL << i;
 876
 877        if (test_facility(22) && test_facility(30))
 878                elf_hwcap |= HWCAP_S390_ETF3EH;
 879
 880        /*
 881         * Check for additional facilities with store-facility-list-extended.
 882         * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
 883         * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
 884         * as stored by stfl, bits 32-xxx contain additional facilities.
 885         * How many facility words are stored depends on the number of
 886         * doublewords passed to the instruction. The additional facilities
 887         * are:
 888         *   Bit 42: decimal floating point facility is installed
 889         *   Bit 44: perform floating point operation facility is installed
 890         * translated to:
 891         *   HWCAP_S390_DFP bit 6 (42 && 44).
 892         */
 893        if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
 894                elf_hwcap |= HWCAP_S390_DFP;
 895
 896        /*
 897         * Huge page support HWCAP_S390_HPAGE is bit 7.
 898         */
 899        if (MACHINE_HAS_EDAT1)
 900                elf_hwcap |= HWCAP_S390_HPAGE;
 901
 902        /*
 903         * 64-bit register support for 31-bit processes
 904         * HWCAP_S390_HIGH_GPRS is bit 9.
 905         */
 906        elf_hwcap |= HWCAP_S390_HIGH_GPRS;
 907
 908        /*
 909         * Transactional execution support HWCAP_S390_TE is bit 10.
 910         */
 911        if (MACHINE_HAS_TE)
 912                elf_hwcap |= HWCAP_S390_TE;
 913
 914        /*
 915         * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
 916         * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
 917         * instead of facility bit 129.
 918         */
 919        if (MACHINE_HAS_VX) {
 920                elf_hwcap |= HWCAP_S390_VXRS;
 921                if (test_facility(134))
 922                        elf_hwcap |= HWCAP_S390_VXRS_EXT;
 923                if (test_facility(135))
 924                        elf_hwcap |= HWCAP_S390_VXRS_BCD;
 925                if (test_facility(148))
 926                        elf_hwcap |= HWCAP_S390_VXRS_EXT2;
 927                if (test_facility(152))
 928                        elf_hwcap |= HWCAP_S390_VXRS_PDE;
 929        }
 930        if (test_facility(150))
 931                elf_hwcap |= HWCAP_S390_SORT;
 932        if (test_facility(151))
 933                elf_hwcap |= HWCAP_S390_DFLT;
 934
 935        /*
 936         * Guarded storage support HWCAP_S390_GS is bit 12.
 937         */
 938        if (MACHINE_HAS_GS)
 939                elf_hwcap |= HWCAP_S390_GS;
 940
 941        get_cpu_id(&cpu_id);
 942        add_device_randomness(&cpu_id, sizeof(cpu_id));
 943        switch (cpu_id.machine) {
 944        case 0x2064:
 945        case 0x2066:
 946        default:        /* Use "z900" as default for 64 bit kernels. */
 947                strcpy(elf_platform, "z900");
 948                break;
 949        case 0x2084:
 950        case 0x2086:
 951                strcpy(elf_platform, "z990");
 952                break;
 953        case 0x2094:
 954        case 0x2096:
 955                strcpy(elf_platform, "z9-109");
 956                break;
 957        case 0x2097:
 958        case 0x2098:
 959                strcpy(elf_platform, "z10");
 960                break;
 961        case 0x2817:
 962        case 0x2818:
 963                strcpy(elf_platform, "z196");
 964                break;
 965        case 0x2827:
 966        case 0x2828:
 967                strcpy(elf_platform, "zEC12");
 968                break;
 969        case 0x2964:
 970        case 0x2965:
 971                strcpy(elf_platform, "z13");
 972                break;
 973        case 0x3906:
 974        case 0x3907:
 975                strcpy(elf_platform, "z14");
 976                break;
 977        case 0x8561:
 978        case 0x8562:
 979                strcpy(elf_platform, "z15");
 980                break;
 981        }
 982
 983        /*
 984         * Virtualization support HWCAP_INT_SIE is bit 0.
 985         */
 986        if (sclp.has_sief2)
 987                int_hwcap |= HWCAP_INT_SIE;
 988
 989        return 0;
 990}
 991arch_initcall(setup_hwcaps);
 992
 993/*
 994 * Add system information as device randomness
 995 */
 996static void __init setup_randomness(void)
 997{
 998        struct sysinfo_3_2_2 *vmms;
 999
1000        vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
1001                                                            PAGE_SIZE);
1002        if (!vmms)
1003                panic("Failed to allocate memory for sysinfo structure\n");
1004
1005        if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
1006                add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
1007        memblock_free((unsigned long) vmms, PAGE_SIZE);
1008}
1009
1010/*
1011 * Find the correct size for the task_struct. This depends on
1012 * the size of the struct fpu at the end of the thread_struct
1013 * which is embedded in the task_struct.
1014 */
1015static void __init setup_task_size(void)
1016{
1017        int task_size = sizeof(struct task_struct);
1018
1019        if (!MACHINE_HAS_VX) {
1020                task_size -= sizeof(__vector128) * __NUM_VXRS;
1021                task_size += sizeof(freg_t) * __NUM_FPRS;
1022        }
1023        arch_task_struct_size = task_size;
1024}
1025
1026/*
1027 * Issue diagnose 318 to set the control program name and
1028 * version codes.
1029 */
1030static void __init setup_control_program_code(void)
1031{
1032        union diag318_info diag318_info = {
1033                .cpnc = CPNC_LINUX,
1034                .cpvc_linux = 0,
1035                .cpvc_distro = {0},
1036        };
1037
1038        if (!sclp.has_diag318)
1039                return;
1040
1041        diag_stat_inc(DIAG_STAT_X318);
1042        asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
1043}
1044
1045/*
1046 * Print the component list from the IPL report
1047 */
1048static void __init log_component_list(void)
1049{
1050        struct ipl_rb_component_entry *ptr, *end;
1051        char *str;
1052
1053        if (!early_ipl_comp_list_addr)
1054                return;
1055        if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
1056                pr_info("Linux is running with Secure-IPL enabled\n");
1057        else
1058                pr_info("Linux is running with Secure-IPL disabled\n");
1059        ptr = (void *) early_ipl_comp_list_addr;
1060        end = (void *) ptr + early_ipl_comp_list_size;
1061        pr_info("The IPL report contains the following components:\n");
1062        while (ptr < end) {
1063                if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
1064                        if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
1065                                str = "signed, verified";
1066                        else
1067                                str = "signed, verification failed";
1068                } else {
1069                        str = "not signed";
1070                }
1071                pr_info("%016llx - %016llx (%s)\n",
1072                        ptr->addr, ptr->addr + ptr->len, str);
1073                ptr++;
1074        }
1075}
1076
1077/*
1078 * Setup function called from init/main.c just after the banner
1079 * was printed.
1080 */
1081
1082void __init setup_arch(char **cmdline_p)
1083{
1084        /*
1085         * print what head.S has found out about the machine
1086         */
1087        if (MACHINE_IS_VM)
1088                pr_info("Linux is running as a z/VM "
1089                        "guest operating system in 64-bit mode\n");
1090        else if (MACHINE_IS_KVM)
1091                pr_info("Linux is running under KVM in 64-bit mode\n");
1092        else if (MACHINE_IS_LPAR)
1093                pr_info("Linux is running natively in 64-bit mode\n");
1094        else
1095                pr_info("Linux is running as a guest in 64-bit mode\n");
1096
1097        log_component_list();
1098
1099        /* Have one command line that is parsed and saved in /proc/cmdline */
1100        /* boot_command_line has been already set up in early.c */
1101        *cmdline_p = boot_command_line;
1102
1103        ROOT_DEV = Root_RAM0;
1104
1105        init_mm.start_code = (unsigned long) _text;
1106        init_mm.end_code = (unsigned long) _etext;
1107        init_mm.end_data = (unsigned long) _edata;
1108        init_mm.brk = (unsigned long) _end;
1109
1110        if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
1111                nospec_auto_detect();
1112
1113        parse_early_param();
1114#ifdef CONFIG_CRASH_DUMP
1115        /* Deactivate elfcorehdr= kernel parameter */
1116        elfcorehdr_addr = ELFCORE_ADDR_MAX;
1117#endif
1118
1119        os_info_init();
1120        setup_ipl();
1121        setup_task_size();
1122        setup_control_program_code();
1123
1124        /* Do some memory reservations *before* memory is added to memblock */
1125        reserve_memory_end();
1126        reserve_oldmem();
1127        reserve_kernel();
1128        reserve_initrd();
1129        reserve_certificate_list();
1130        reserve_mem_detect_info();
1131        memblock_allow_resize();
1132
1133        /* Get information about *all* installed memory */
1134        memblock_add_mem_detect_info();
1135
1136        free_mem_detect_info();
1137        remove_oldmem();
1138
1139        /*
1140         * Make sure all chunks are MAX_ORDER aligned so we don't need the
1141         * extra checks that HOLES_IN_ZONE would require.
1142         *
1143         * Is this still required?
1144         */
1145        memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
1146
1147        setup_memory_end();
1148        setup_memory();
1149        dma_contiguous_reserve(memory_end);
1150        vmcp_cma_reserve();
1151
1152        check_initrd();
1153        reserve_crashkernel();
1154#ifdef CONFIG_CRASH_DUMP
1155        /*
1156         * Be aware that smp_save_dump_cpus() triggers a system reset.
1157         * Therefore CPU and device initialization should be done afterwards.
1158         */
1159        smp_save_dump_cpus();
1160#endif
1161
1162        setup_resources();
1163        setup_lowcore_dat_off();
1164        smp_fill_possible_mask();
1165        cpu_detect_mhz_feature();
1166        cpu_init();
1167        numa_setup();
1168        smp_detect_cpus();
1169        topology_init_early();
1170
1171        /*
1172         * Create kernel page tables and switch to virtual addressing.
1173         */
1174        paging_init();
1175
1176        /*
1177         * After paging_init created the kernel page table, the new PSWs
1178         * in lowcore can now run with DAT enabled.
1179         */
1180        setup_lowcore_dat_on();
1181
1182        /* Setup default console */
1183        conmode_default();
1184        set_preferred_console();
1185
1186        apply_alternative_instructions();
1187        if (IS_ENABLED(CONFIG_EXPOLINE))
1188                nospec_init_branches();
1189
1190        /* Setup zfcpdump support */
1191        setup_zfcpdump();
1192
1193        /* Add system specific data to the random pool */
1194        setup_randomness();
1195}
1196