linux/arch/s390/kernel/setup.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  S390 version
   4 *    Copyright IBM Corp. 1999, 2012
   5 *    Author(s): Hartmut Penner (hp@de.ibm.com),
   6 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
   7 *
   8 *  Derived from "arch/i386/kernel/setup.c"
   9 *    Copyright (C) 1995, Linus Torvalds
  10 */
  11
  12/*
  13 * This file handles the architecture-dependent parts of initialization
  14 */
  15
  16#define KMSG_COMPONENT "setup"
  17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18
  19#include <linux/errno.h>
  20#include <linux/export.h>
  21#include <linux/sched.h>
  22#include <linux/sched/task.h>
  23#include <linux/cpu.h>
  24#include <linux/kernel.h>
  25#include <linux/memblock.h>
  26#include <linux/mm.h>
  27#include <linux/stddef.h>
  28#include <linux/unistd.h>
  29#include <linux/ptrace.h>
  30#include <linux/random.h>
  31#include <linux/user.h>
  32#include <linux/tty.h>
  33#include <linux/ioport.h>
  34#include <linux/delay.h>
  35#include <linux/init.h>
  36#include <linux/initrd.h>
  37#include <linux/root_dev.h>
  38#include <linux/console.h>
  39#include <linux/kernel_stat.h>
  40#include <linux/dma-contiguous.h>
  41#include <linux/device.h>
  42#include <linux/notifier.h>
  43#include <linux/pfn.h>
  44#include <linux/ctype.h>
  45#include <linux/reboot.h>
  46#include <linux/topology.h>
  47#include <linux/kexec.h>
  48#include <linux/crash_dump.h>
  49#include <linux/memory.h>
  50#include <linux/compat.h>
  51#include <linux/start_kernel.h>
  52
  53#include <asm/boot_data.h>
  54#include <asm/ipl.h>
  55#include <asm/facility.h>
  56#include <asm/smp.h>
  57#include <asm/mmu_context.h>
  58#include <asm/cpcmd.h>
  59#include <asm/lowcore.h>
  60#include <asm/nmi.h>
  61#include <asm/irq.h>
  62#include <asm/page.h>
  63#include <asm/ptrace.h>
  64#include <asm/sections.h>
  65#include <asm/ebcdic.h>
  66#include <asm/diag.h>
  67#include <asm/os_info.h>
  68#include <asm/sclp.h>
  69#include <asm/stacktrace.h>
  70#include <asm/sysinfo.h>
  71#include <asm/numa.h>
  72#include <asm/alternative.h>
  73#include <asm/nospec-branch.h>
  74#include <asm/mem_detect.h>
  75#include <asm/uv.h>
  76#include <asm/asm-offsets.h>
  77#include "entry.h"
  78
  79/*
  80 * Machine setup..
  81 */
  82unsigned int console_mode = 0;
  83EXPORT_SYMBOL(console_mode);
  84
  85unsigned int console_devno = -1;
  86EXPORT_SYMBOL(console_devno);
  87
  88unsigned int console_irq = -1;
  89EXPORT_SYMBOL(console_irq);
  90
  91unsigned long elf_hwcap __read_mostly = 0;
  92char elf_platform[ELF_PLATFORM_SIZE];
  93
  94unsigned long int_hwcap = 0;
  95
  96int __bootdata(noexec_disabled);
  97int __bootdata(memory_end_set);
  98unsigned long __bootdata(memory_end);
  99unsigned long __bootdata(vmalloc_size);
 100unsigned long __bootdata(max_physmem_end);
 101struct mem_detect_info __bootdata(mem_detect);
 102
 103struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
 104struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
 105unsigned long __bootdata_preserved(__swsusp_reset_dma);
 106unsigned long __bootdata_preserved(__stext_dma);
 107unsigned long __bootdata_preserved(__etext_dma);
 108unsigned long __bootdata_preserved(__sdma);
 109unsigned long __bootdata_preserved(__edma);
 110unsigned long __bootdata_preserved(__kaslr_offset);
 111unsigned int __bootdata_preserved(zlib_dfltcc_support);
 112EXPORT_SYMBOL(zlib_dfltcc_support);
 113
 114unsigned long VMALLOC_START;
 115EXPORT_SYMBOL(VMALLOC_START);
 116
 117unsigned long VMALLOC_END;
 118EXPORT_SYMBOL(VMALLOC_END);
 119
 120struct page *vmemmap;
 121EXPORT_SYMBOL(vmemmap);
 122
 123unsigned long MODULES_VADDR;
 124unsigned long MODULES_END;
 125
 126/* An array with a pointer to the lowcore of every CPU. */
 127struct lowcore *lowcore_ptr[NR_CPUS];
 128EXPORT_SYMBOL(lowcore_ptr);
 129
 130/*
 131 * This is set up by the setup-routine at boot-time
 132 * for S390 need to find out, what we have to setup
 133 * using address 0x10400 ...
 134 */
 135
 136#include <asm/setup.h>
 137
 138/*
 139 * condev= and conmode= setup parameter.
 140 */
 141
 142static int __init condev_setup(char *str)
 143{
 144        int vdev;
 145
 146        vdev = simple_strtoul(str, &str, 0);
 147        if (vdev >= 0 && vdev < 65536) {
 148                console_devno = vdev;
 149                console_irq = -1;
 150        }
 151        return 1;
 152}
 153
 154__setup("condev=", condev_setup);
 155
 156static void __init set_preferred_console(void)
 157{
 158        if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 159                add_preferred_console("ttyS", 0, NULL);
 160        else if (CONSOLE_IS_3270)
 161                add_preferred_console("tty3270", 0, NULL);
 162        else if (CONSOLE_IS_VT220)
 163                add_preferred_console("ttyS", 1, NULL);
 164        else if (CONSOLE_IS_HVC)
 165                add_preferred_console("hvc", 0, NULL);
 166}
 167
 168static int __init conmode_setup(char *str)
 169{
 170#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 171        if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
 172                SET_CONSOLE_SCLP;
 173#endif
 174#if defined(CONFIG_TN3215_CONSOLE)
 175        if (!strcmp(str, "3215"))
 176                SET_CONSOLE_3215;
 177#endif
 178#if defined(CONFIG_TN3270_CONSOLE)
 179        if (!strcmp(str, "3270"))
 180                SET_CONSOLE_3270;
 181#endif
 182        set_preferred_console();
 183        return 1;
 184}
 185
 186__setup("conmode=", conmode_setup);
 187
 188static void __init conmode_default(void)
 189{
 190        char query_buffer[1024];
 191        char *ptr;
 192
 193        if (MACHINE_IS_VM) {
 194                cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
 195                console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
 196                ptr = strstr(query_buffer, "SUBCHANNEL =");
 197                console_irq = simple_strtoul(ptr + 13, NULL, 16);
 198                cpcmd("QUERY TERM", query_buffer, 1024, NULL);
 199                ptr = strstr(query_buffer, "CONMODE");
 200                /*
 201                 * Set the conmode to 3215 so that the device recognition 
 202                 * will set the cu_type of the console to 3215. If the
 203                 * conmode is 3270 and we don't set it back then both
 204                 * 3215 and the 3270 driver will try to access the console
 205                 * device (3215 as console and 3270 as normal tty).
 206                 */
 207                cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
 208                if (ptr == NULL) {
 209#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 210                        SET_CONSOLE_SCLP;
 211#endif
 212                        return;
 213                }
 214                if (str_has_prefix(ptr + 8, "3270")) {
 215#if defined(CONFIG_TN3270_CONSOLE)
 216                        SET_CONSOLE_3270;
 217#elif defined(CONFIG_TN3215_CONSOLE)
 218                        SET_CONSOLE_3215;
 219#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 220                        SET_CONSOLE_SCLP;
 221#endif
 222                } else if (str_has_prefix(ptr + 8, "3215")) {
 223#if defined(CONFIG_TN3215_CONSOLE)
 224                        SET_CONSOLE_3215;
 225#elif defined(CONFIG_TN3270_CONSOLE)
 226                        SET_CONSOLE_3270;
 227#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 228                        SET_CONSOLE_SCLP;
 229#endif
 230                }
 231        } else if (MACHINE_IS_KVM) {
 232                if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
 233                        SET_CONSOLE_VT220;
 234                else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
 235                        SET_CONSOLE_SCLP;
 236                else
 237                        SET_CONSOLE_HVC;
 238        } else {
 239#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 240                SET_CONSOLE_SCLP;
 241#endif
 242        }
 243}
 244
 245#ifdef CONFIG_CRASH_DUMP
 246static void __init setup_zfcpdump(void)
 247{
 248        if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 249                return;
 250        if (OLDMEM_BASE)
 251                return;
 252        strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
 253        console_loglevel = 2;
 254}
 255#else
 256static inline void setup_zfcpdump(void) {}
 257#endif /* CONFIG_CRASH_DUMP */
 258
 259 /*
 260 * Reboot, halt and power_off stubs. They just call _machine_restart,
 261 * _machine_halt or _machine_power_off. 
 262 */
 263
 264void machine_restart(char *command)
 265{
 266        if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
 267                /*
 268                 * Only unblank the console if we are called in enabled
 269                 * context or a bust_spinlocks cleared the way for us.
 270                 */
 271                console_unblank();
 272        _machine_restart(command);
 273}
 274
 275void machine_halt(void)
 276{
 277        if (!in_interrupt() || oops_in_progress)
 278                /*
 279                 * Only unblank the console if we are called in enabled
 280                 * context or a bust_spinlocks cleared the way for us.
 281                 */
 282                console_unblank();
 283        _machine_halt();
 284}
 285
 286void machine_power_off(void)
 287{
 288        if (!in_interrupt() || oops_in_progress)
 289                /*
 290                 * Only unblank the console if we are called in enabled
 291                 * context or a bust_spinlocks cleared the way for us.
 292                 */
 293                console_unblank();
 294        _machine_power_off();
 295}
 296
 297/*
 298 * Dummy power off function.
 299 */
 300void (*pm_power_off)(void) = machine_power_off;
 301EXPORT_SYMBOL_GPL(pm_power_off);
 302
 303void *restart_stack __section(.data);
 304
 305unsigned long stack_alloc(void)
 306{
 307#ifdef CONFIG_VMAP_STACK
 308        return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
 309                        THREADINFO_GFP, NUMA_NO_NODE,
 310                        __builtin_return_address(0));
 311#else
 312        return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 313#endif
 314}
 315
 316void stack_free(unsigned long stack)
 317{
 318#ifdef CONFIG_VMAP_STACK
 319        vfree((void *) stack);
 320#else
 321        free_pages(stack, THREAD_SIZE_ORDER);
 322#endif
 323}
 324
 325int __init arch_early_irq_init(void)
 326{
 327        unsigned long stack;
 328
 329        stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 330        if (!stack)
 331                panic("Couldn't allocate async stack");
 332        S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
 333        return 0;
 334}
 335
 336static int __init async_stack_realloc(void)
 337{
 338        unsigned long old, new;
 339
 340        old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
 341        new = stack_alloc();
 342        if (!new)
 343                panic("Couldn't allocate async stack");
 344        S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
 345        free_pages(old, THREAD_SIZE_ORDER);
 346        return 0;
 347}
 348early_initcall(async_stack_realloc);
 349
 350void __init arch_call_rest_init(void)
 351{
 352        unsigned long stack;
 353
 354        stack = stack_alloc();
 355        if (!stack)
 356                panic("Couldn't allocate kernel stack");
 357        current->stack = (void *) stack;
 358#ifdef CONFIG_VMAP_STACK
 359        current->stack_vm_area = (void *) stack;
 360#endif
 361        set_task_stack_end_magic(current);
 362        stack += STACK_INIT_OFFSET;
 363        S390_lowcore.kernel_stack = stack;
 364        CALL_ON_STACK_NORETURN(rest_init, stack);
 365}
 366
 367static void __init setup_lowcore_dat_off(void)
 368{
 369        struct lowcore *lc;
 370
 371        /*
 372         * Setup lowcore for boot cpu
 373         */
 374        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
 375        lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
 376        if (!lc)
 377                panic("%s: Failed to allocate %zu bytes align=%zx\n",
 378                      __func__, sizeof(*lc), sizeof(*lc));
 379
 380        lc->restart_psw.mask = PSW_KERNEL_BITS;
 381        lc->restart_psw.addr = (unsigned long) restart_int_handler;
 382        lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 383        lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 384        lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 385        lc->svc_new_psw.addr = (unsigned long) system_call;
 386        lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 387        lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 388        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
 389        lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 390        lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 391        lc->io_new_psw.addr = (unsigned long) io_int_handler;
 392        lc->clock_comparator = clock_comparator_max;
 393        lc->nodat_stack = ((unsigned long) &init_thread_union)
 394                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 395        lc->current_task = (unsigned long)&init_task;
 396        lc->lpp = LPP_MAGIC;
 397        lc->machine_flags = S390_lowcore.machine_flags;
 398        lc->preempt_count = S390_lowcore.preempt_count;
 399        lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 400        memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 401               sizeof(lc->stfle_fac_list));
 402        memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
 403               sizeof(lc->alt_stfle_fac_list));
 404        nmi_alloc_boot_cpu(lc);
 405        vdso_alloc_boot_cpu(lc);
 406        lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 407        lc->async_enter_timer = S390_lowcore.async_enter_timer;
 408        lc->exit_timer = S390_lowcore.exit_timer;
 409        lc->user_timer = S390_lowcore.user_timer;
 410        lc->system_timer = S390_lowcore.system_timer;
 411        lc->steal_timer = S390_lowcore.steal_timer;
 412        lc->last_update_timer = S390_lowcore.last_update_timer;
 413        lc->last_update_clock = S390_lowcore.last_update_clock;
 414
 415        /*
 416         * Allocate the global restart stack which is the same for
 417         * all CPUs in cast *one* of them does a PSW restart.
 418         */
 419        restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 420        if (!restart_stack)
 421                panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 422                      __func__, THREAD_SIZE, THREAD_SIZE);
 423        restart_stack += STACK_INIT_OFFSET;
 424
 425        /*
 426         * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
 427         * restart data to the absolute zero lowcore. This is necessary if
 428         * PSW restart is done on an offline CPU that has lowcore zero.
 429         */
 430        lc->restart_stack = (unsigned long) restart_stack;
 431        lc->restart_fn = (unsigned long) do_restart;
 432        lc->restart_data = 0;
 433        lc->restart_source = -1UL;
 434
 435        /* Setup absolute zero lowcore */
 436        mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
 437        mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
 438        mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
 439        mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
 440        mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
 441
 442        lc->spinlock_lockval = arch_spin_lockval(0);
 443        lc->spinlock_index = 0;
 444        arch_spin_lock_setup(0);
 445        lc->br_r1_trampoline = 0x07f1;  /* br %r1 */
 446        lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
 447        lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 448
 449        set_prefix((u32)(unsigned long) lc);
 450        lowcore_ptr[0] = lc;
 451}
 452
 453static void __init setup_lowcore_dat_on(void)
 454{
 455        __ctl_clear_bit(0, 28);
 456        S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
 457        S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
 458        S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
 459        S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
 460        __ctl_set_bit(0, 28);
 461}
 462
 463static struct resource code_resource = {
 464        .name  = "Kernel code",
 465        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 466};
 467
 468static struct resource data_resource = {
 469        .name = "Kernel data",
 470        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 471};
 472
 473static struct resource bss_resource = {
 474        .name = "Kernel bss",
 475        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 476};
 477
 478static struct resource __initdata *standard_resources[] = {
 479        &code_resource,
 480        &data_resource,
 481        &bss_resource,
 482};
 483
 484static void __init setup_resources(void)
 485{
 486        struct resource *res, *std_res, *sub_res;
 487        struct memblock_region *reg;
 488        int j;
 489
 490        code_resource.start = (unsigned long) _text;
 491        code_resource.end = (unsigned long) _etext - 1;
 492        data_resource.start = (unsigned long) _etext;
 493        data_resource.end = (unsigned long) _edata - 1;
 494        bss_resource.start = (unsigned long) __bss_start;
 495        bss_resource.end = (unsigned long) __bss_stop - 1;
 496
 497        for_each_memblock(memory, reg) {
 498                res = memblock_alloc(sizeof(*res), 8);
 499                if (!res)
 500                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 501                              __func__, sizeof(*res), 8);
 502                res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 503
 504                res->name = "System RAM";
 505                res->start = reg->base;
 506                res->end = reg->base + reg->size - 1;
 507                request_resource(&iomem_resource, res);
 508
 509                for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
 510                        std_res = standard_resources[j];
 511                        if (std_res->start < res->start ||
 512                            std_res->start > res->end)
 513                                continue;
 514                        if (std_res->end > res->end) {
 515                                sub_res = memblock_alloc(sizeof(*sub_res), 8);
 516                                if (!sub_res)
 517                                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 518                                              __func__, sizeof(*sub_res), 8);
 519                                *sub_res = *std_res;
 520                                sub_res->end = res->end;
 521                                std_res->start = res->end + 1;
 522                                request_resource(res, sub_res);
 523                        } else {
 524                                request_resource(res, std_res);
 525                        }
 526                }
 527        }
 528#ifdef CONFIG_CRASH_DUMP
 529        /*
 530         * Re-add removed crash kernel memory as reserved memory. This makes
 531         * sure it will be mapped with the identity mapping and struct pages
 532         * will be created, so it can be resized later on.
 533         * However add it later since the crash kernel resource should not be
 534         * part of the System RAM resource.
 535         */
 536        if (crashk_res.end) {
 537                memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
 538                memblock_reserve(crashk_res.start, resource_size(&crashk_res));
 539                insert_resource(&iomem_resource, &crashk_res);
 540        }
 541#endif
 542}
 543
 544static void __init setup_memory_end(void)
 545{
 546        unsigned long vmax, tmp;
 547
 548        /* Choose kernel address space layout: 3 or 4 levels. */
 549        if (IS_ENABLED(CONFIG_KASAN)) {
 550                vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
 551                           ? _REGION1_SIZE
 552                           : _REGION2_SIZE;
 553        } else {
 554                tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
 555                tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
 556                if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
 557                        vmax = _REGION2_SIZE; /* 3-level kernel page table */
 558                else
 559                        vmax = _REGION1_SIZE; /* 4-level kernel page table */
 560        }
 561
 562        if (is_prot_virt_host())
 563                adjust_to_uv_max(&vmax);
 564
 565        /* module area is at the end of the kernel address space. */
 566        MODULES_END = vmax;
 567        MODULES_VADDR = MODULES_END - MODULES_LEN;
 568        VMALLOC_END = MODULES_VADDR;
 569        VMALLOC_START = VMALLOC_END - vmalloc_size;
 570
 571        /* Split remaining virtual space between 1:1 mapping & vmemmap array */
 572        tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
 573        /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
 574        tmp = SECTION_ALIGN_UP(tmp);
 575        tmp = VMALLOC_START - tmp * sizeof(struct page);
 576        tmp &= ~((vmax >> 11) - 1);     /* align to page table level */
 577        tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
 578        vmemmap = (struct page *) tmp;
 579
 580        /* Take care that memory_end is set and <= vmemmap */
 581        memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
 582#ifdef CONFIG_KASAN
 583        /* fit in kasan shadow memory region between 1:1 and vmemmap */
 584        memory_end = min(memory_end, KASAN_SHADOW_START);
 585        vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
 586#endif
 587        max_pfn = max_low_pfn = PFN_DOWN(memory_end);
 588        memblock_remove(memory_end, ULONG_MAX);
 589
 590        pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 591}
 592
 593#ifdef CONFIG_CRASH_DUMP
 594
 595/*
 596 * When kdump is enabled, we have to ensure that no memory from the area
 597 * [0 - crashkernel memory size] is set offline - it will be exchanged with
 598 * the crashkernel memory region when kdump is triggered. The crashkernel
 599 * memory region can never get offlined (pages are unmovable).
 600 */
 601static int kdump_mem_notifier(struct notifier_block *nb,
 602                              unsigned long action, void *data)
 603{
 604        struct memory_notify *arg = data;
 605
 606        if (action != MEM_GOING_OFFLINE)
 607                return NOTIFY_OK;
 608        if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
 609                return NOTIFY_BAD;
 610        return NOTIFY_OK;
 611}
 612
 613static struct notifier_block kdump_mem_nb = {
 614        .notifier_call = kdump_mem_notifier,
 615};
 616
 617#endif
 618
 619/*
 620 * Make sure that the area behind memory_end is protected
 621 */
 622static void reserve_memory_end(void)
 623{
 624        if (memory_end_set)
 625                memblock_reserve(memory_end, ULONG_MAX);
 626}
 627
 628/*
 629 * Make sure that oldmem, where the dump is stored, is protected
 630 */
 631static void reserve_oldmem(void)
 632{
 633#ifdef CONFIG_CRASH_DUMP
 634        if (OLDMEM_BASE)
 635                /* Forget all memory above the running kdump system */
 636                memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 637#endif
 638}
 639
 640/*
 641 * Make sure that oldmem, where the dump is stored, is protected
 642 */
 643static void remove_oldmem(void)
 644{
 645#ifdef CONFIG_CRASH_DUMP
 646        if (OLDMEM_BASE)
 647                /* Forget all memory above the running kdump system */
 648                memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 649#endif
 650}
 651
 652/*
 653 * Reserve memory for kdump kernel to be loaded with kexec
 654 */
 655static void __init reserve_crashkernel(void)
 656{
 657#ifdef CONFIG_CRASH_DUMP
 658        unsigned long long crash_base, crash_size;
 659        phys_addr_t low, high;
 660        int rc;
 661
 662        rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
 663                               &crash_base);
 664
 665        crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 666        crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
 667        if (rc || crash_size == 0)
 668                return;
 669
 670        if (memblock.memory.regions[0].size < crash_size) {
 671                pr_info("crashkernel reservation failed: %s\n",
 672                        "first memory chunk must be at least crashkernel size");
 673                return;
 674        }
 675
 676        low = crash_base ?: OLDMEM_BASE;
 677        high = low + crash_size;
 678        if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
 679                /* The crashkernel fits into OLDMEM, reuse OLDMEM */
 680                crash_base = low;
 681        } else {
 682                /* Find suitable area in free memory */
 683                low = max_t(unsigned long, crash_size, sclp.hsa_size);
 684                high = crash_base ? crash_base + crash_size : ULONG_MAX;
 685
 686                if (crash_base && crash_base < low) {
 687                        pr_info("crashkernel reservation failed: %s\n",
 688                                "crash_base too low");
 689                        return;
 690                }
 691                low = crash_base ?: low;
 692                crash_base = memblock_find_in_range(low, high, crash_size,
 693                                                    KEXEC_CRASH_MEM_ALIGN);
 694        }
 695
 696        if (!crash_base) {
 697                pr_info("crashkernel reservation failed: %s\n",
 698                        "no suitable area found");
 699                return;
 700        }
 701
 702        if (register_memory_notifier(&kdump_mem_nb))
 703                return;
 704
 705        if (!OLDMEM_BASE && MACHINE_IS_VM)
 706                diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 707        crashk_res.start = crash_base;
 708        crashk_res.end = crash_base + crash_size - 1;
 709        memblock_remove(crash_base, crash_size);
 710        pr_info("Reserving %lluMB of memory at %lluMB "
 711                "for crashkernel (System RAM: %luMB)\n",
 712                crash_size >> 20, crash_base >> 20,
 713                (unsigned long)memblock.memory.total_size >> 20);
 714        os_info_crashkernel_add(crash_base, crash_size);
 715#endif
 716}
 717
 718/*
 719 * Reserve the initrd from being used by memblock
 720 */
 721static void __init reserve_initrd(void)
 722{
 723#ifdef CONFIG_BLK_DEV_INITRD
 724        if (!INITRD_START || !INITRD_SIZE)
 725                return;
 726        initrd_start = INITRD_START;
 727        initrd_end = initrd_start + INITRD_SIZE;
 728        memblock_reserve(INITRD_START, INITRD_SIZE);
 729#endif
 730}
 731
 732/*
 733 * Reserve the memory area used to pass the certificate lists
 734 */
 735static void __init reserve_certificate_list(void)
 736{
 737        if (ipl_cert_list_addr)
 738                memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
 739}
 740
 741static void __init reserve_mem_detect_info(void)
 742{
 743        unsigned long start, size;
 744
 745        get_mem_detect_reserved(&start, &size);
 746        if (size)
 747                memblock_reserve(start, size);
 748}
 749
 750static void __init free_mem_detect_info(void)
 751{
 752        unsigned long start, size;
 753
 754        get_mem_detect_reserved(&start, &size);
 755        if (size)
 756                memblock_free(start, size);
 757}
 758
 759static const char * __init get_mem_info_source(void)
 760{
 761        switch (mem_detect.info_source) {
 762        case MEM_DETECT_SCLP_STOR_INFO:
 763                return "sclp storage info";
 764        case MEM_DETECT_DIAG260:
 765                return "diag260";
 766        case MEM_DETECT_SCLP_READ_INFO:
 767                return "sclp read info";
 768        case MEM_DETECT_BIN_SEARCH:
 769                return "binary search";
 770        }
 771        return "none";
 772}
 773
 774static void __init memblock_add_mem_detect_info(void)
 775{
 776        unsigned long start, end;
 777        int i;
 778
 779        memblock_dbg("physmem info source: %s (%hhd)\n",
 780                     get_mem_info_source(), mem_detect.info_source);
 781        /* keep memblock lists close to the kernel */
 782        memblock_set_bottom_up(true);
 783        for_each_mem_detect_block(i, &start, &end) {
 784                memblock_add(start, end - start);
 785                memblock_physmem_add(start, end - start);
 786        }
 787        memblock_set_bottom_up(false);
 788        memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 789        memblock_dump_all();
 790}
 791
 792/*
 793 * Check for initrd being in usable memory
 794 */
 795static void __init check_initrd(void)
 796{
 797#ifdef CONFIG_BLK_DEV_INITRD
 798        if (INITRD_START && INITRD_SIZE &&
 799            !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
 800                pr_err("The initial RAM disk does not fit into the memory\n");
 801                memblock_free(INITRD_START, INITRD_SIZE);
 802                initrd_start = initrd_end = 0;
 803        }
 804#endif
 805}
 806
 807/*
 808 * Reserve memory used for lowcore/command line/kernel image.
 809 */
 810static void __init reserve_kernel(void)
 811{
 812        unsigned long start_pfn = PFN_UP(__pa(_end));
 813
 814        memblock_reserve(0, HEAD_END);
 815        memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
 816                         - (unsigned long)_stext);
 817        memblock_reserve(__sdma, __edma - __sdma);
 818}
 819
 820static void __init setup_memory(void)
 821{
 822        struct memblock_region *reg;
 823
 824        /*
 825         * Init storage key for present memory
 826         */
 827        for_each_memblock(memory, reg) {
 828                storage_key_init_range(reg->base, reg->base + reg->size);
 829        }
 830        psw_set_key(PAGE_DEFAULT_KEY);
 831
 832        /* Only cosmetics */
 833        memblock_enforce_memory_limit(memblock_end_of_DRAM());
 834}
 835
 836/*
 837 * Setup hardware capabilities.
 838 */
 839static int __init setup_hwcaps(void)
 840{
 841        static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 842        struct cpuid cpu_id;
 843        int i;
 844
 845        /*
 846         * The store facility list bits numbers as found in the principles
 847         * of operation are numbered with bit 1UL<<31 as number 0 to
 848         * bit 1UL<<0 as number 31.
 849         *   Bit 0: instructions named N3, "backported" to esa-mode
 850         *   Bit 2: z/Architecture mode is active
 851         *   Bit 7: the store-facility-list-extended facility is installed
 852         *   Bit 17: the message-security assist is installed
 853         *   Bit 19: the long-displacement facility is installed
 854         *   Bit 21: the extended-immediate facility is installed
 855         *   Bit 22: extended-translation facility 3 is installed
 856         *   Bit 30: extended-translation facility 3 enhancement facility
 857         * These get translated to:
 858         *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
 859         *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
 860         *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
 861         *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
 862         */
 863        for (i = 0; i < 6; i++)
 864                if (test_facility(stfl_bits[i]))
 865                        elf_hwcap |= 1UL << i;
 866
 867        if (test_facility(22) && test_facility(30))
 868                elf_hwcap |= HWCAP_S390_ETF3EH;
 869
 870        /*
 871         * Check for additional facilities with store-facility-list-extended.
 872         * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
 873         * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
 874         * as stored by stfl, bits 32-xxx contain additional facilities.
 875         * How many facility words are stored depends on the number of
 876         * doublewords passed to the instruction. The additional facilities
 877         * are:
 878         *   Bit 42: decimal floating point facility is installed
 879         *   Bit 44: perform floating point operation facility is installed
 880         * translated to:
 881         *   HWCAP_S390_DFP bit 6 (42 && 44).
 882         */
 883        if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
 884                elf_hwcap |= HWCAP_S390_DFP;
 885
 886        /*
 887         * Huge page support HWCAP_S390_HPAGE is bit 7.
 888         */
 889        if (MACHINE_HAS_EDAT1)
 890                elf_hwcap |= HWCAP_S390_HPAGE;
 891
 892        /*
 893         * 64-bit register support for 31-bit processes
 894         * HWCAP_S390_HIGH_GPRS is bit 9.
 895         */
 896        elf_hwcap |= HWCAP_S390_HIGH_GPRS;
 897
 898        /*
 899         * Transactional execution support HWCAP_S390_TE is bit 10.
 900         */
 901        if (MACHINE_HAS_TE)
 902                elf_hwcap |= HWCAP_S390_TE;
 903
 904        /*
 905         * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
 906         * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
 907         * instead of facility bit 129.
 908         */
 909        if (MACHINE_HAS_VX) {
 910                elf_hwcap |= HWCAP_S390_VXRS;
 911                if (test_facility(134))
 912                        elf_hwcap |= HWCAP_S390_VXRS_EXT;
 913                if (test_facility(135))
 914                        elf_hwcap |= HWCAP_S390_VXRS_BCD;
 915                if (test_facility(148))
 916                        elf_hwcap |= HWCAP_S390_VXRS_EXT2;
 917                if (test_facility(152))
 918                        elf_hwcap |= HWCAP_S390_VXRS_PDE;
 919        }
 920        if (test_facility(150))
 921                elf_hwcap |= HWCAP_S390_SORT;
 922        if (test_facility(151))
 923                elf_hwcap |= HWCAP_S390_DFLT;
 924
 925        /*
 926         * Guarded storage support HWCAP_S390_GS is bit 12.
 927         */
 928        if (MACHINE_HAS_GS)
 929                elf_hwcap |= HWCAP_S390_GS;
 930
 931        get_cpu_id(&cpu_id);
 932        add_device_randomness(&cpu_id, sizeof(cpu_id));
 933        switch (cpu_id.machine) {
 934        case 0x2064:
 935        case 0x2066:
 936        default:        /* Use "z900" as default for 64 bit kernels. */
 937                strcpy(elf_platform, "z900");
 938                break;
 939        case 0x2084:
 940        case 0x2086:
 941                strcpy(elf_platform, "z990");
 942                break;
 943        case 0x2094:
 944        case 0x2096:
 945                strcpy(elf_platform, "z9-109");
 946                break;
 947        case 0x2097:
 948        case 0x2098:
 949                strcpy(elf_platform, "z10");
 950                break;
 951        case 0x2817:
 952        case 0x2818:
 953                strcpy(elf_platform, "z196");
 954                break;
 955        case 0x2827:
 956        case 0x2828:
 957                strcpy(elf_platform, "zEC12");
 958                break;
 959        case 0x2964:
 960        case 0x2965:
 961                strcpy(elf_platform, "z13");
 962                break;
 963        case 0x3906:
 964        case 0x3907:
 965                strcpy(elf_platform, "z14");
 966                break;
 967        case 0x8561:
 968        case 0x8562:
 969                strcpy(elf_platform, "z15");
 970                break;
 971        }
 972
 973        /*
 974         * Virtualization support HWCAP_INT_SIE is bit 0.
 975         */
 976        if (sclp.has_sief2)
 977                int_hwcap |= HWCAP_INT_SIE;
 978
 979        return 0;
 980}
 981arch_initcall(setup_hwcaps);
 982
 983/*
 984 * Add system information as device randomness
 985 */
 986static void __init setup_randomness(void)
 987{
 988        struct sysinfo_3_2_2 *vmms;
 989
 990        vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
 991                                                            PAGE_SIZE);
 992        if (!vmms)
 993                panic("Failed to allocate memory for sysinfo structure\n");
 994
 995        if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
 996                add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
 997        memblock_free((unsigned long) vmms, PAGE_SIZE);
 998}
 999
1000/*
1001 * Find the correct size for the task_struct. This depends on
1002 * the size of the struct fpu at the end of the thread_struct
1003 * which is embedded in the task_struct.
1004 */
1005static void __init setup_task_size(void)
1006{
1007        int task_size = sizeof(struct task_struct);
1008
1009        if (!MACHINE_HAS_VX) {
1010                task_size -= sizeof(__vector128) * __NUM_VXRS;
1011                task_size += sizeof(freg_t) * __NUM_FPRS;
1012        }
1013        arch_task_struct_size = task_size;
1014}
1015
1016/*
1017 * Issue diagnose 318 to set the control program name and
1018 * version codes.
1019 */
1020static void __init setup_control_program_code(void)
1021{
1022        union diag318_info diag318_info = {
1023                .cpnc = CPNC_LINUX,
1024                .cpvc_linux = 0,
1025                .cpvc_distro = {0},
1026        };
1027
1028        if (!sclp.has_diag318)
1029                return;
1030
1031        diag_stat_inc(DIAG_STAT_X318);
1032        asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
1033}
1034
1035/*
1036 * Print the component list from the IPL report
1037 */
1038static void __init log_component_list(void)
1039{
1040        struct ipl_rb_component_entry *ptr, *end;
1041        char *str;
1042
1043        if (!early_ipl_comp_list_addr)
1044                return;
1045        if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
1046                pr_info("Linux is running with Secure-IPL enabled\n");
1047        else
1048                pr_info("Linux is running with Secure-IPL disabled\n");
1049        ptr = (void *) early_ipl_comp_list_addr;
1050        end = (void *) ptr + early_ipl_comp_list_size;
1051        pr_info("The IPL report contains the following components:\n");
1052        while (ptr < end) {
1053                if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
1054                        if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
1055                                str = "signed, verified";
1056                        else
1057                                str = "signed, verification failed";
1058                } else {
1059                        str = "not signed";
1060                }
1061                pr_info("%016llx - %016llx (%s)\n",
1062                        ptr->addr, ptr->addr + ptr->len, str);
1063                ptr++;
1064        }
1065}
1066
1067/*
1068 * Setup function called from init/main.c just after the banner
1069 * was printed.
1070 */
1071
1072void __init setup_arch(char **cmdline_p)
1073{
1074        /*
1075         * print what head.S has found out about the machine
1076         */
1077        if (MACHINE_IS_VM)
1078                pr_info("Linux is running as a z/VM "
1079                        "guest operating system in 64-bit mode\n");
1080        else if (MACHINE_IS_KVM)
1081                pr_info("Linux is running under KVM in 64-bit mode\n");
1082        else if (MACHINE_IS_LPAR)
1083                pr_info("Linux is running natively in 64-bit mode\n");
1084        else
1085                pr_info("Linux is running as a guest in 64-bit mode\n");
1086
1087        log_component_list();
1088
1089        /* Have one command line that is parsed and saved in /proc/cmdline */
1090        /* boot_command_line has been already set up in early.c */
1091        *cmdline_p = boot_command_line;
1092
1093        ROOT_DEV = Root_RAM0;
1094
1095        init_mm.start_code = (unsigned long) _text;
1096        init_mm.end_code = (unsigned long) _etext;
1097        init_mm.end_data = (unsigned long) _edata;
1098        init_mm.brk = (unsigned long) _end;
1099
1100        if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
1101                nospec_auto_detect();
1102
1103        jump_label_init();
1104        parse_early_param();
1105#ifdef CONFIG_CRASH_DUMP
1106        /* Deactivate elfcorehdr= kernel parameter */
1107        elfcorehdr_addr = ELFCORE_ADDR_MAX;
1108#endif
1109
1110        os_info_init();
1111        setup_ipl();
1112        setup_task_size();
1113        setup_control_program_code();
1114
1115        /* Do some memory reservations *before* memory is added to memblock */
1116        reserve_memory_end();
1117        reserve_oldmem();
1118        reserve_kernel();
1119        reserve_initrd();
1120        reserve_certificate_list();
1121        reserve_mem_detect_info();
1122        memblock_allow_resize();
1123
1124        /* Get information about *all* installed memory */
1125        memblock_add_mem_detect_info();
1126
1127        free_mem_detect_info();
1128        remove_oldmem();
1129
1130        /*
1131         * Make sure all chunks are MAX_ORDER aligned so we don't need the
1132         * extra checks that HOLES_IN_ZONE would require.
1133         *
1134         * Is this still required?
1135         */
1136        memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
1137
1138        if (is_prot_virt_host())
1139                setup_uv();
1140        setup_memory_end();
1141        setup_memory();
1142        dma_contiguous_reserve(memory_end);
1143        vmcp_cma_reserve();
1144
1145        check_initrd();
1146        reserve_crashkernel();
1147#ifdef CONFIG_CRASH_DUMP
1148        /*
1149         * Be aware that smp_save_dump_cpus() triggers a system reset.
1150         * Therefore CPU and device initialization should be done afterwards.
1151         */
1152        smp_save_dump_cpus();
1153#endif
1154
1155        setup_resources();
1156        setup_lowcore_dat_off();
1157        smp_fill_possible_mask();
1158        cpu_detect_mhz_feature();
1159        cpu_init();
1160        numa_setup();
1161        smp_detect_cpus();
1162        topology_init_early();
1163
1164        /*
1165         * Create kernel page tables and switch to virtual addressing.
1166         */
1167        paging_init();
1168
1169        /*
1170         * After paging_init created the kernel page table, the new PSWs
1171         * in lowcore can now run with DAT enabled.
1172         */
1173        setup_lowcore_dat_on();
1174
1175        /* Setup default console */
1176        conmode_default();
1177        set_preferred_console();
1178
1179        apply_alternative_instructions();
1180        if (IS_ENABLED(CONFIG_EXPOLINE))
1181                nospec_init_branches();
1182
1183        /* Setup zfcpdump support */
1184        setup_zfcpdump();
1185
1186        /* Add system specific data to the random pool */
1187        setup_randomness();
1188}
1189