linux/arch/s390/kernel/setup.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  S390 version
   4 *    Copyright IBM Corp. 1999, 2012
   5 *    Author(s): Hartmut Penner (hp@de.ibm.com),
   6 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
   7 *
   8 *  Derived from "arch/i386/kernel/setup.c"
   9 *    Copyright (C) 1995, Linus Torvalds
  10 */
  11
  12/*
  13 * This file handles the architecture-dependent parts of initialization
  14 */
  15
  16#define KMSG_COMPONENT "setup"
  17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18
  19#include <linux/errno.h>
  20#include <linux/export.h>
  21#include <linux/sched.h>
  22#include <linux/sched/task.h>
  23#include <linux/cpu.h>
  24#include <linux/kernel.h>
  25#include <linux/memblock.h>
  26#include <linux/mm.h>
  27#include <linux/stddef.h>
  28#include <linux/unistd.h>
  29#include <linux/ptrace.h>
  30#include <linux/random.h>
  31#include <linux/user.h>
  32#include <linux/tty.h>
  33#include <linux/ioport.h>
  34#include <linux/delay.h>
  35#include <linux/init.h>
  36#include <linux/initrd.h>
  37#include <linux/root_dev.h>
  38#include <linux/console.h>
  39#include <linux/kernel_stat.h>
  40#include <linux/dma-map-ops.h>
  41#include <linux/device.h>
  42#include <linux/notifier.h>
  43#include <linux/pfn.h>
  44#include <linux/ctype.h>
  45#include <linux/reboot.h>
  46#include <linux/topology.h>
  47#include <linux/kexec.h>
  48#include <linux/crash_dump.h>
  49#include <linux/memory.h>
  50#include <linux/compat.h>
  51#include <linux/start_kernel.h>
  52#include <linux/hugetlb.h>
  53
  54#include <asm/boot_data.h>
  55#include <asm/ipl.h>
  56#include <asm/facility.h>
  57#include <asm/smp.h>
  58#include <asm/mmu_context.h>
  59#include <asm/cpcmd.h>
  60#include <asm/lowcore.h>
  61#include <asm/nmi.h>
  62#include <asm/irq.h>
  63#include <asm/page.h>
  64#include <asm/ptrace.h>
  65#include <asm/sections.h>
  66#include <asm/ebcdic.h>
  67#include <asm/diag.h>
  68#include <asm/os_info.h>
  69#include <asm/sclp.h>
  70#include <asm/stacktrace.h>
  71#include <asm/sysinfo.h>
  72#include <asm/numa.h>
  73#include <asm/alternative.h>
  74#include <asm/nospec-branch.h>
  75#include <asm/mem_detect.h>
  76#include <asm/uv.h>
  77#include <asm/asm-offsets.h>
  78#include "entry.h"
  79
  80/*
  81 * Machine setup..
  82 */
  83unsigned int console_mode = 0;
  84EXPORT_SYMBOL(console_mode);
  85
  86unsigned int console_devno = -1;
  87EXPORT_SYMBOL(console_devno);
  88
  89unsigned int console_irq = -1;
  90EXPORT_SYMBOL(console_irq);
  91
  92unsigned long elf_hwcap __read_mostly = 0;
  93char elf_platform[ELF_PLATFORM_SIZE];
  94
  95unsigned long int_hwcap = 0;
  96
  97int __bootdata(noexec_disabled);
  98unsigned long __bootdata(ident_map_size);
  99unsigned long __bootdata(vmalloc_size);
 100struct mem_detect_info __bootdata(mem_detect);
 101
 102struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
 103struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
 104unsigned long __bootdata_preserved(__stext_dma);
 105unsigned long __bootdata_preserved(__etext_dma);
 106unsigned long __bootdata_preserved(__sdma);
 107unsigned long __bootdata_preserved(__edma);
 108unsigned long __bootdata_preserved(__kaslr_offset);
 109unsigned int __bootdata_preserved(zlib_dfltcc_support);
 110EXPORT_SYMBOL(zlib_dfltcc_support);
 111
 112unsigned long VMALLOC_START;
 113EXPORT_SYMBOL(VMALLOC_START);
 114
 115unsigned long VMALLOC_END;
 116EXPORT_SYMBOL(VMALLOC_END);
 117
 118struct page *vmemmap;
 119EXPORT_SYMBOL(vmemmap);
 120unsigned long vmemmap_size;
 121
 122unsigned long MODULES_VADDR;
 123unsigned long MODULES_END;
 124
 125/* An array with a pointer to the lowcore of every CPU. */
 126struct lowcore *lowcore_ptr[NR_CPUS];
 127EXPORT_SYMBOL(lowcore_ptr);
 128
 129/*
 130 * The Write Back bit position in the physaddr is given by the SLPC PCI.
 131 * Leaving the mask zero always uses write through which is safe
 132 */
 133unsigned long mio_wb_bit_mask __ro_after_init;
 134
 135/*
 136 * This is set up by the setup-routine at boot-time
 137 * for S390 need to find out, what we have to setup
 138 * using address 0x10400 ...
 139 */
 140
 141#include <asm/setup.h>
 142
 143/*
 144 * condev= and conmode= setup parameter.
 145 */
 146
 147static int __init condev_setup(char *str)
 148{
 149        int vdev;
 150
 151        vdev = simple_strtoul(str, &str, 0);
 152        if (vdev >= 0 && vdev < 65536) {
 153                console_devno = vdev;
 154                console_irq = -1;
 155        }
 156        return 1;
 157}
 158
 159__setup("condev=", condev_setup);
 160
 161static void __init set_preferred_console(void)
 162{
 163        if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 164                add_preferred_console("ttyS", 0, NULL);
 165        else if (CONSOLE_IS_3270)
 166                add_preferred_console("tty3270", 0, NULL);
 167        else if (CONSOLE_IS_VT220)
 168                add_preferred_console("ttyS", 1, NULL);
 169        else if (CONSOLE_IS_HVC)
 170                add_preferred_console("hvc", 0, NULL);
 171}
 172
 173static int __init conmode_setup(char *str)
 174{
 175#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 176        if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
 177                SET_CONSOLE_SCLP;
 178#endif
 179#if defined(CONFIG_TN3215_CONSOLE)
 180        if (!strcmp(str, "3215"))
 181                SET_CONSOLE_3215;
 182#endif
 183#if defined(CONFIG_TN3270_CONSOLE)
 184        if (!strcmp(str, "3270"))
 185                SET_CONSOLE_3270;
 186#endif
 187        set_preferred_console();
 188        return 1;
 189}
 190
 191__setup("conmode=", conmode_setup);
 192
 193static void __init conmode_default(void)
 194{
 195        char query_buffer[1024];
 196        char *ptr;
 197
 198        if (MACHINE_IS_VM) {
 199                cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
 200                console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
 201                ptr = strstr(query_buffer, "SUBCHANNEL =");
 202                console_irq = simple_strtoul(ptr + 13, NULL, 16);
 203                cpcmd("QUERY TERM", query_buffer, 1024, NULL);
 204                ptr = strstr(query_buffer, "CONMODE");
 205                /*
 206                 * Set the conmode to 3215 so that the device recognition 
 207                 * will set the cu_type of the console to 3215. If the
 208                 * conmode is 3270 and we don't set it back then both
 209                 * 3215 and the 3270 driver will try to access the console
 210                 * device (3215 as console and 3270 as normal tty).
 211                 */
 212                cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
 213                if (ptr == NULL) {
 214#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 215                        SET_CONSOLE_SCLP;
 216#endif
 217                        return;
 218                }
 219                if (str_has_prefix(ptr + 8, "3270")) {
 220#if defined(CONFIG_TN3270_CONSOLE)
 221                        SET_CONSOLE_3270;
 222#elif defined(CONFIG_TN3215_CONSOLE)
 223                        SET_CONSOLE_3215;
 224#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 225                        SET_CONSOLE_SCLP;
 226#endif
 227                } else if (str_has_prefix(ptr + 8, "3215")) {
 228#if defined(CONFIG_TN3215_CONSOLE)
 229                        SET_CONSOLE_3215;
 230#elif defined(CONFIG_TN3270_CONSOLE)
 231                        SET_CONSOLE_3270;
 232#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 233                        SET_CONSOLE_SCLP;
 234#endif
 235                }
 236        } else if (MACHINE_IS_KVM) {
 237                if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
 238                        SET_CONSOLE_VT220;
 239                else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
 240                        SET_CONSOLE_SCLP;
 241                else
 242                        SET_CONSOLE_HVC;
 243        } else {
 244#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 245                SET_CONSOLE_SCLP;
 246#endif
 247        }
 248}
 249
 250#ifdef CONFIG_CRASH_DUMP
 251static void __init setup_zfcpdump(void)
 252{
 253        if (!is_ipl_type_dump())
 254                return;
 255        if (OLDMEM_BASE)
 256                return;
 257        strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
 258        console_loglevel = 2;
 259}
 260#else
 261static inline void setup_zfcpdump(void) {}
 262#endif /* CONFIG_CRASH_DUMP */
 263
 264 /*
 265 * Reboot, halt and power_off stubs. They just call _machine_restart,
 266 * _machine_halt or _machine_power_off. 
 267 */
 268
 269void machine_restart(char *command)
 270{
 271        if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
 272                /*
 273                 * Only unblank the console if we are called in enabled
 274                 * context or a bust_spinlocks cleared the way for us.
 275                 */
 276                console_unblank();
 277        _machine_restart(command);
 278}
 279
 280void machine_halt(void)
 281{
 282        if (!in_interrupt() || oops_in_progress)
 283                /*
 284                 * Only unblank the console if we are called in enabled
 285                 * context or a bust_spinlocks cleared the way for us.
 286                 */
 287                console_unblank();
 288        _machine_halt();
 289}
 290
 291void machine_power_off(void)
 292{
 293        if (!in_interrupt() || oops_in_progress)
 294                /*
 295                 * Only unblank the console if we are called in enabled
 296                 * context or a bust_spinlocks cleared the way for us.
 297                 */
 298                console_unblank();
 299        _machine_power_off();
 300}
 301
 302/*
 303 * Dummy power off function.
 304 */
 305void (*pm_power_off)(void) = machine_power_off;
 306EXPORT_SYMBOL_GPL(pm_power_off);
 307
 308void *restart_stack;
 309
 310unsigned long stack_alloc(void)
 311{
 312#ifdef CONFIG_VMAP_STACK
 313        return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
 314                        THREADINFO_GFP, NUMA_NO_NODE,
 315                        __builtin_return_address(0));
 316#else
 317        return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 318#endif
 319}
 320
 321void stack_free(unsigned long stack)
 322{
 323#ifdef CONFIG_VMAP_STACK
 324        vfree((void *) stack);
 325#else
 326        free_pages(stack, THREAD_SIZE_ORDER);
 327#endif
 328}
 329
 330int __init arch_early_irq_init(void)
 331{
 332        unsigned long stack;
 333
 334        stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 335        if (!stack)
 336                panic("Couldn't allocate async stack");
 337        S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
 338        return 0;
 339}
 340
 341static int __init async_stack_realloc(void)
 342{
 343        unsigned long old, new;
 344
 345        old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
 346        new = stack_alloc();
 347        if (!new)
 348                panic("Couldn't allocate async stack");
 349        S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
 350        free_pages(old, THREAD_SIZE_ORDER);
 351        return 0;
 352}
 353early_initcall(async_stack_realloc);
 354
 355void __init arch_call_rest_init(void)
 356{
 357        unsigned long stack;
 358
 359        stack = stack_alloc();
 360        if (!stack)
 361                panic("Couldn't allocate kernel stack");
 362        current->stack = (void *) stack;
 363#ifdef CONFIG_VMAP_STACK
 364        current->stack_vm_area = (void *) stack;
 365#endif
 366        set_task_stack_end_magic(current);
 367        stack += STACK_INIT_OFFSET;
 368        S390_lowcore.kernel_stack = stack;
 369        CALL_ON_STACK_NORETURN(rest_init, stack);
 370}
 371
 372static void __init setup_lowcore_dat_off(void)
 373{
 374        unsigned long int_psw_mask = PSW_KERNEL_BITS;
 375        struct lowcore *lc;
 376
 377        if (IS_ENABLED(CONFIG_KASAN))
 378                int_psw_mask |= PSW_MASK_DAT;
 379
 380        /*
 381         * Setup lowcore for boot cpu
 382         */
 383        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
 384        lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
 385        if (!lc)
 386                panic("%s: Failed to allocate %zu bytes align=%zx\n",
 387                      __func__, sizeof(*lc), sizeof(*lc));
 388
 389        lc->restart_psw.mask = PSW_KERNEL_BITS;
 390        lc->restart_psw.addr = (unsigned long) restart_int_handler;
 391        lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 392        lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 393        lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 394        lc->svc_new_psw.addr = (unsigned long) system_call;
 395        lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 396        lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 397        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
 398        lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 399        lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 400        lc->io_new_psw.addr = (unsigned long) io_int_handler;
 401        lc->clock_comparator = clock_comparator_max;
 402        lc->nodat_stack = ((unsigned long) &init_thread_union)
 403                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 404        lc->current_task = (unsigned long)&init_task;
 405        lc->lpp = LPP_MAGIC;
 406        lc->machine_flags = S390_lowcore.machine_flags;
 407        lc->preempt_count = S390_lowcore.preempt_count;
 408        lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 409        memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 410               sizeof(lc->stfle_fac_list));
 411        memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
 412               sizeof(lc->alt_stfle_fac_list));
 413        nmi_alloc_boot_cpu(lc);
 414        lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 415        lc->async_enter_timer = S390_lowcore.async_enter_timer;
 416        lc->exit_timer = S390_lowcore.exit_timer;
 417        lc->user_timer = S390_lowcore.user_timer;
 418        lc->system_timer = S390_lowcore.system_timer;
 419        lc->steal_timer = S390_lowcore.steal_timer;
 420        lc->last_update_timer = S390_lowcore.last_update_timer;
 421        lc->last_update_clock = S390_lowcore.last_update_clock;
 422
 423        /*
 424         * Allocate the global restart stack which is the same for
 425         * all CPUs in cast *one* of them does a PSW restart.
 426         */
 427        restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 428        if (!restart_stack)
 429                panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 430                      __func__, THREAD_SIZE, THREAD_SIZE);
 431        restart_stack += STACK_INIT_OFFSET;
 432
 433        /*
 434         * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
 435         * restart data to the absolute zero lowcore. This is necessary if
 436         * PSW restart is done on an offline CPU that has lowcore zero.
 437         */
 438        lc->restart_stack = (unsigned long) restart_stack;
 439        lc->restart_fn = (unsigned long) do_restart;
 440        lc->restart_data = 0;
 441        lc->restart_source = -1UL;
 442
 443        /* Setup absolute zero lowcore */
 444        mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
 445        mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
 446        mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
 447        mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
 448        mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
 449
 450        lc->spinlock_lockval = arch_spin_lockval(0);
 451        lc->spinlock_index = 0;
 452        arch_spin_lock_setup(0);
 453        lc->br_r1_trampoline = 0x07f1;  /* br %r1 */
 454        lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
 455        lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 456
 457        set_prefix((u32)(unsigned long) lc);
 458        lowcore_ptr[0] = lc;
 459}
 460
 461static void __init setup_lowcore_dat_on(void)
 462{
 463        __ctl_clear_bit(0, 28);
 464        S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
 465        S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
 466        S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
 467        S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
 468        __ctl_set_bit(0, 28);
 469}
 470
 471static struct resource code_resource = {
 472        .name  = "Kernel code",
 473        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 474};
 475
 476static struct resource data_resource = {
 477        .name = "Kernel data",
 478        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 479};
 480
 481static struct resource bss_resource = {
 482        .name = "Kernel bss",
 483        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 484};
 485
 486static struct resource __initdata *standard_resources[] = {
 487        &code_resource,
 488        &data_resource,
 489        &bss_resource,
 490};
 491
 492static void __init setup_resources(void)
 493{
 494        struct resource *res, *std_res, *sub_res;
 495        phys_addr_t start, end;
 496        int j;
 497        u64 i;
 498
 499        code_resource.start = (unsigned long) _text;
 500        code_resource.end = (unsigned long) _etext - 1;
 501        data_resource.start = (unsigned long) _etext;
 502        data_resource.end = (unsigned long) _edata - 1;
 503        bss_resource.start = (unsigned long) __bss_start;
 504        bss_resource.end = (unsigned long) __bss_stop - 1;
 505
 506        for_each_mem_range(i, &start, &end) {
 507                res = memblock_alloc(sizeof(*res), 8);
 508                if (!res)
 509                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 510                              __func__, sizeof(*res), 8);
 511                res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 512
 513                res->name = "System RAM";
 514                res->start = start;
 515                /*
 516                 * In memblock, end points to the first byte after the
 517                 * range while in resourses, end points to the last byte in
 518                 * the range.
 519                 */
 520                res->end = end - 1;
 521                request_resource(&iomem_resource, res);
 522
 523                for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
 524                        std_res = standard_resources[j];
 525                        if (std_res->start < res->start ||
 526                            std_res->start > res->end)
 527                                continue;
 528                        if (std_res->end > res->end) {
 529                                sub_res = memblock_alloc(sizeof(*sub_res), 8);
 530                                if (!sub_res)
 531                                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 532                                              __func__, sizeof(*sub_res), 8);
 533                                *sub_res = *std_res;
 534                                sub_res->end = res->end;
 535                                std_res->start = res->end + 1;
 536                                request_resource(res, sub_res);
 537                        } else {
 538                                request_resource(res, std_res);
 539                        }
 540                }
 541        }
 542#ifdef CONFIG_CRASH_DUMP
 543        /*
 544         * Re-add removed crash kernel memory as reserved memory. This makes
 545         * sure it will be mapped with the identity mapping and struct pages
 546         * will be created, so it can be resized later on.
 547         * However add it later since the crash kernel resource should not be
 548         * part of the System RAM resource.
 549         */
 550        if (crashk_res.end) {
 551                memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
 552                memblock_reserve(crashk_res.start, resource_size(&crashk_res));
 553                insert_resource(&iomem_resource, &crashk_res);
 554        }
 555#endif
 556}
 557
 558static void __init setup_ident_map_size(void)
 559{
 560        unsigned long vmax, tmp;
 561
 562        /* Choose kernel address space layout: 3 or 4 levels. */
 563        tmp = ident_map_size / PAGE_SIZE;
 564        tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
 565        if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
 566                vmax = _REGION2_SIZE; /* 3-level kernel page table */
 567        else
 568                vmax = _REGION1_SIZE; /* 4-level kernel page table */
 569        /* module area is at the end of the kernel address space. */
 570        MODULES_END = vmax;
 571        if (is_prot_virt_host())
 572                adjust_to_uv_max(&MODULES_END);
 573#ifdef CONFIG_KASAN
 574        vmax = _REGION1_SIZE;
 575        MODULES_END = kasan_vmax;
 576#endif
 577        MODULES_VADDR = MODULES_END - MODULES_LEN;
 578        VMALLOC_END = MODULES_VADDR;
 579        VMALLOC_START = VMALLOC_END - vmalloc_size;
 580
 581        /* Split remaining virtual space between 1:1 mapping & vmemmap array */
 582        tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
 583        /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
 584        tmp = SECTION_ALIGN_UP(tmp);
 585        tmp = VMALLOC_START - tmp * sizeof(struct page);
 586        tmp &= ~((vmax >> 11) - 1);     /* align to page table level */
 587        tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
 588        vmemmap = (struct page *) tmp;
 589
 590        /* Take care that ident_map_size <= vmemmap */
 591        ident_map_size = min(ident_map_size, (unsigned long)vmemmap);
 592#ifdef CONFIG_KASAN
 593        ident_map_size = min(ident_map_size, KASAN_SHADOW_START);
 594#endif
 595        vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
 596#ifdef CONFIG_KASAN
 597        /* move vmemmap above kasan shadow only if stands in a way */
 598        if (KASAN_SHADOW_END > (unsigned long)vmemmap &&
 599            (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START)
 600                vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
 601#endif
 602        max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
 603        memblock_remove(ident_map_size, ULONG_MAX);
 604
 605        pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
 606}
 607
 608#ifdef CONFIG_CRASH_DUMP
 609
 610/*
 611 * When kdump is enabled, we have to ensure that no memory from the area
 612 * [0 - crashkernel memory size] is set offline - it will be exchanged with
 613 * the crashkernel memory region when kdump is triggered. The crashkernel
 614 * memory region can never get offlined (pages are unmovable).
 615 */
 616static int kdump_mem_notifier(struct notifier_block *nb,
 617                              unsigned long action, void *data)
 618{
 619        struct memory_notify *arg = data;
 620
 621        if (action != MEM_GOING_OFFLINE)
 622                return NOTIFY_OK;
 623        if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
 624                return NOTIFY_BAD;
 625        return NOTIFY_OK;
 626}
 627
 628static struct notifier_block kdump_mem_nb = {
 629        .notifier_call = kdump_mem_notifier,
 630};
 631
 632#endif
 633
 634/*
 635 * Make sure that the area above identity mapping is protected
 636 */
 637static void __init reserve_above_ident_map(void)
 638{
 639        memblock_reserve(ident_map_size, ULONG_MAX);
 640}
 641
 642/*
 643 * Make sure that oldmem, where the dump is stored, is protected
 644 */
 645static void __init reserve_oldmem(void)
 646{
 647#ifdef CONFIG_CRASH_DUMP
 648        if (OLDMEM_BASE)
 649                /* Forget all memory above the running kdump system */
 650                memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 651#endif
 652}
 653
 654/*
 655 * Make sure that oldmem, where the dump is stored, is protected
 656 */
 657static void __init remove_oldmem(void)
 658{
 659#ifdef CONFIG_CRASH_DUMP
 660        if (OLDMEM_BASE)
 661                /* Forget all memory above the running kdump system */
 662                memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 663#endif
 664}
 665
 666/*
 667 * Reserve memory for kdump kernel to be loaded with kexec
 668 */
 669static void __init reserve_crashkernel(void)
 670{
 671#ifdef CONFIG_CRASH_DUMP
 672        unsigned long long crash_base, crash_size;
 673        phys_addr_t low, high;
 674        int rc;
 675
 676        rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
 677                               &crash_base);
 678
 679        crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 680        crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
 681        if (rc || crash_size == 0)
 682                return;
 683
 684        if (memblock.memory.regions[0].size < crash_size) {
 685                pr_info("crashkernel reservation failed: %s\n",
 686                        "first memory chunk must be at least crashkernel size");
 687                return;
 688        }
 689
 690        low = crash_base ?: OLDMEM_BASE;
 691        high = low + crash_size;
 692        if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
 693                /* The crashkernel fits into OLDMEM, reuse OLDMEM */
 694                crash_base = low;
 695        } else {
 696                /* Find suitable area in free memory */
 697                low = max_t(unsigned long, crash_size, sclp.hsa_size);
 698                high = crash_base ? crash_base + crash_size : ULONG_MAX;
 699
 700                if (crash_base && crash_base < low) {
 701                        pr_info("crashkernel reservation failed: %s\n",
 702                                "crash_base too low");
 703                        return;
 704                }
 705                low = crash_base ?: low;
 706                crash_base = memblock_find_in_range(low, high, crash_size,
 707                                                    KEXEC_CRASH_MEM_ALIGN);
 708        }
 709
 710        if (!crash_base) {
 711                pr_info("crashkernel reservation failed: %s\n",
 712                        "no suitable area found");
 713                return;
 714        }
 715
 716        if (register_memory_notifier(&kdump_mem_nb))
 717                return;
 718
 719        if (!OLDMEM_BASE && MACHINE_IS_VM)
 720                diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 721        crashk_res.start = crash_base;
 722        crashk_res.end = crash_base + crash_size - 1;
 723        memblock_remove(crash_base, crash_size);
 724        pr_info("Reserving %lluMB of memory at %lluMB "
 725                "for crashkernel (System RAM: %luMB)\n",
 726                crash_size >> 20, crash_base >> 20,
 727                (unsigned long)memblock.memory.total_size >> 20);
 728        os_info_crashkernel_add(crash_base, crash_size);
 729#endif
 730}
 731
 732/*
 733 * Reserve the initrd from being used by memblock
 734 */
 735static void __init reserve_initrd(void)
 736{
 737#ifdef CONFIG_BLK_DEV_INITRD
 738        if (!INITRD_START || !INITRD_SIZE)
 739                return;
 740        initrd_start = INITRD_START;
 741        initrd_end = initrd_start + INITRD_SIZE;
 742        memblock_reserve(INITRD_START, INITRD_SIZE);
 743#endif
 744}
 745
 746/*
 747 * Reserve the memory area used to pass the certificate lists
 748 */
 749static void __init reserve_certificate_list(void)
 750{
 751        if (ipl_cert_list_addr)
 752                memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
 753}
 754
 755static void __init reserve_mem_detect_info(void)
 756{
 757        unsigned long start, size;
 758
 759        get_mem_detect_reserved(&start, &size);
 760        if (size)
 761                memblock_reserve(start, size);
 762}
 763
 764static void __init free_mem_detect_info(void)
 765{
 766        unsigned long start, size;
 767
 768        get_mem_detect_reserved(&start, &size);
 769        if (size)
 770                memblock_free(start, size);
 771}
 772
 773static const char * __init get_mem_info_source(void)
 774{
 775        switch (mem_detect.info_source) {
 776        case MEM_DETECT_SCLP_STOR_INFO:
 777                return "sclp storage info";
 778        case MEM_DETECT_DIAG260:
 779                return "diag260";
 780        case MEM_DETECT_SCLP_READ_INFO:
 781                return "sclp read info";
 782        case MEM_DETECT_BIN_SEARCH:
 783                return "binary search";
 784        }
 785        return "none";
 786}
 787
 788static void __init memblock_add_mem_detect_info(void)
 789{
 790        unsigned long start, end;
 791        int i;
 792
 793        pr_debug("physmem info source: %s (%hhd)\n",
 794                 get_mem_info_source(), mem_detect.info_source);
 795        /* keep memblock lists close to the kernel */
 796        memblock_set_bottom_up(true);
 797        for_each_mem_detect_block(i, &start, &end) {
 798                memblock_add(start, end - start);
 799                memblock_physmem_add(start, end - start);
 800        }
 801        memblock_set_bottom_up(false);
 802        memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 803        memblock_dump_all();
 804}
 805
 806/*
 807 * Check for initrd being in usable memory
 808 */
 809static void __init check_initrd(void)
 810{
 811#ifdef CONFIG_BLK_DEV_INITRD
 812        if (INITRD_START && INITRD_SIZE &&
 813            !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
 814                pr_err("The initial RAM disk does not fit into the memory\n");
 815                memblock_free(INITRD_START, INITRD_SIZE);
 816                initrd_start = initrd_end = 0;
 817        }
 818#endif
 819}
 820
 821/*
 822 * Reserve memory used for lowcore/command line/kernel image.
 823 */
 824static void __init reserve_kernel(void)
 825{
 826        unsigned long start_pfn = PFN_UP(__pa(_end));
 827
 828        memblock_reserve(0, HEAD_END);
 829        memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
 830                         - (unsigned long)_stext);
 831        memblock_reserve(__sdma, __edma - __sdma);
 832}
 833
 834static void __init setup_memory(void)
 835{
 836        phys_addr_t start, end;
 837        u64 i;
 838
 839        /*
 840         * Init storage key for present memory
 841         */
 842        for_each_mem_range(i, &start, &end)
 843                storage_key_init_range(start, end);
 844
 845        psw_set_key(PAGE_DEFAULT_KEY);
 846
 847        /* Only cosmetics */
 848        memblock_enforce_memory_limit(memblock_end_of_DRAM());
 849}
 850
 851/*
 852 * Setup hardware capabilities.
 853 */
 854static int __init setup_hwcaps(void)
 855{
 856        static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 857        struct cpuid cpu_id;
 858        int i;
 859
 860        /*
 861         * The store facility list bits numbers as found in the principles
 862         * of operation are numbered with bit 1UL<<31 as number 0 to
 863         * bit 1UL<<0 as number 31.
 864         *   Bit 0: instructions named N3, "backported" to esa-mode
 865         *   Bit 2: z/Architecture mode is active
 866         *   Bit 7: the store-facility-list-extended facility is installed
 867         *   Bit 17: the message-security assist is installed
 868         *   Bit 19: the long-displacement facility is installed
 869         *   Bit 21: the extended-immediate facility is installed
 870         *   Bit 22: extended-translation facility 3 is installed
 871         *   Bit 30: extended-translation facility 3 enhancement facility
 872         * These get translated to:
 873         *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
 874         *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
 875         *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
 876         *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
 877         */
 878        for (i = 0; i < 6; i++)
 879                if (test_facility(stfl_bits[i]))
 880                        elf_hwcap |= 1UL << i;
 881
 882        if (test_facility(22) && test_facility(30))
 883                elf_hwcap |= HWCAP_S390_ETF3EH;
 884
 885        /*
 886         * Check for additional facilities with store-facility-list-extended.
 887         * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
 888         * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
 889         * as stored by stfl, bits 32-xxx contain additional facilities.
 890         * How many facility words are stored depends on the number of
 891         * doublewords passed to the instruction. The additional facilities
 892         * are:
 893         *   Bit 42: decimal floating point facility is installed
 894         *   Bit 44: perform floating point operation facility is installed
 895         * translated to:
 896         *   HWCAP_S390_DFP bit 6 (42 && 44).
 897         */
 898        if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
 899                elf_hwcap |= HWCAP_S390_DFP;
 900
 901        /*
 902         * Huge page support HWCAP_S390_HPAGE is bit 7.
 903         */
 904        if (MACHINE_HAS_EDAT1)
 905                elf_hwcap |= HWCAP_S390_HPAGE;
 906
 907        /*
 908         * 64-bit register support for 31-bit processes
 909         * HWCAP_S390_HIGH_GPRS is bit 9.
 910         */
 911        elf_hwcap |= HWCAP_S390_HIGH_GPRS;
 912
 913        /*
 914         * Transactional execution support HWCAP_S390_TE is bit 10.
 915         */
 916        if (MACHINE_HAS_TE)
 917                elf_hwcap |= HWCAP_S390_TE;
 918
 919        /*
 920         * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
 921         * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
 922         * instead of facility bit 129.
 923         */
 924        if (MACHINE_HAS_VX) {
 925                elf_hwcap |= HWCAP_S390_VXRS;
 926                if (test_facility(134))
 927                        elf_hwcap |= HWCAP_S390_VXRS_EXT;
 928                if (test_facility(135))
 929                        elf_hwcap |= HWCAP_S390_VXRS_BCD;
 930                if (test_facility(148))
 931                        elf_hwcap |= HWCAP_S390_VXRS_EXT2;
 932                if (test_facility(152))
 933                        elf_hwcap |= HWCAP_S390_VXRS_PDE;
 934        }
 935        if (test_facility(150))
 936                elf_hwcap |= HWCAP_S390_SORT;
 937        if (test_facility(151))
 938                elf_hwcap |= HWCAP_S390_DFLT;
 939
 940        /*
 941         * Guarded storage support HWCAP_S390_GS is bit 12.
 942         */
 943        if (MACHINE_HAS_GS)
 944                elf_hwcap |= HWCAP_S390_GS;
 945
 946        get_cpu_id(&cpu_id);
 947        add_device_randomness(&cpu_id, sizeof(cpu_id));
 948        switch (cpu_id.machine) {
 949        case 0x2064:
 950        case 0x2066:
 951        default:        /* Use "z900" as default for 64 bit kernels. */
 952                strcpy(elf_platform, "z900");
 953                break;
 954        case 0x2084:
 955        case 0x2086:
 956                strcpy(elf_platform, "z990");
 957                break;
 958        case 0x2094:
 959        case 0x2096:
 960                strcpy(elf_platform, "z9-109");
 961                break;
 962        case 0x2097:
 963        case 0x2098:
 964                strcpy(elf_platform, "z10");
 965                break;
 966        case 0x2817:
 967        case 0x2818:
 968                strcpy(elf_platform, "z196");
 969                break;
 970        case 0x2827:
 971        case 0x2828:
 972                strcpy(elf_platform, "zEC12");
 973                break;
 974        case 0x2964:
 975        case 0x2965:
 976                strcpy(elf_platform, "z13");
 977                break;
 978        case 0x3906:
 979        case 0x3907:
 980                strcpy(elf_platform, "z14");
 981                break;
 982        case 0x8561:
 983        case 0x8562:
 984                strcpy(elf_platform, "z15");
 985                break;
 986        }
 987
 988        /*
 989         * Virtualization support HWCAP_INT_SIE is bit 0.
 990         */
 991        if (sclp.has_sief2)
 992                int_hwcap |= HWCAP_INT_SIE;
 993
 994        return 0;
 995}
 996arch_initcall(setup_hwcaps);
 997
 998/*
 999 * Add system information as device randomness
1000 */
1001static void __init setup_randomness(void)
1002{
1003        struct sysinfo_3_2_2 *vmms;
1004
1005        vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
1006                                                            PAGE_SIZE);
1007        if (!vmms)
1008                panic("Failed to allocate memory for sysinfo structure\n");
1009
1010        if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
1011                add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
1012        memblock_free((unsigned long) vmms, PAGE_SIZE);
1013}
1014
1015/*
1016 * Find the correct size for the task_struct. This depends on
1017 * the size of the struct fpu at the end of the thread_struct
1018 * which is embedded in the task_struct.
1019 */
1020static void __init setup_task_size(void)
1021{
1022        int task_size = sizeof(struct task_struct);
1023
1024        if (!MACHINE_HAS_VX) {
1025                task_size -= sizeof(__vector128) * __NUM_VXRS;
1026                task_size += sizeof(freg_t) * __NUM_FPRS;
1027        }
1028        arch_task_struct_size = task_size;
1029}
1030
1031/*
1032 * Issue diagnose 318 to set the control program name and
1033 * version codes.
1034 */
1035static void __init setup_control_program_code(void)
1036{
1037        union diag318_info diag318_info = {
1038                .cpnc = CPNC_LINUX,
1039                .cpvc = 0,
1040        };
1041
1042        if (!sclp.has_diag318)
1043                return;
1044
1045        diag_stat_inc(DIAG_STAT_X318);
1046        asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
1047}
1048
1049/*
1050 * Print the component list from the IPL report
1051 */
1052static void __init log_component_list(void)
1053{
1054        struct ipl_rb_component_entry *ptr, *end;
1055        char *str;
1056
1057        if (!early_ipl_comp_list_addr)
1058                return;
1059        if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
1060                pr_info("Linux is running with Secure-IPL enabled\n");
1061        else
1062                pr_info("Linux is running with Secure-IPL disabled\n");
1063        ptr = (void *) early_ipl_comp_list_addr;
1064        end = (void *) ptr + early_ipl_comp_list_size;
1065        pr_info("The IPL report contains the following components:\n");
1066        while (ptr < end) {
1067                if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
1068                        if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
1069                                str = "signed, verified";
1070                        else
1071                                str = "signed, verification failed";
1072                } else {
1073                        str = "not signed";
1074                }
1075                pr_info("%016llx - %016llx (%s)\n",
1076                        ptr->addr, ptr->addr + ptr->len, str);
1077                ptr++;
1078        }
1079}
1080
1081/*
1082 * Setup function called from init/main.c just after the banner
1083 * was printed.
1084 */
1085
1086void __init setup_arch(char **cmdline_p)
1087{
1088        /*
1089         * print what head.S has found out about the machine
1090         */
1091        if (MACHINE_IS_VM)
1092                pr_info("Linux is running as a z/VM "
1093                        "guest operating system in 64-bit mode\n");
1094        else if (MACHINE_IS_KVM)
1095                pr_info("Linux is running under KVM in 64-bit mode\n");
1096        else if (MACHINE_IS_LPAR)
1097                pr_info("Linux is running natively in 64-bit mode\n");
1098        else
1099                pr_info("Linux is running as a guest in 64-bit mode\n");
1100
1101        log_component_list();
1102
1103        /* Have one command line that is parsed and saved in /proc/cmdline */
1104        /* boot_command_line has been already set up in early.c */
1105        *cmdline_p = boot_command_line;
1106
1107        ROOT_DEV = Root_RAM0;
1108
1109        init_mm.start_code = (unsigned long) _text;
1110        init_mm.end_code = (unsigned long) _etext;
1111        init_mm.end_data = (unsigned long) _edata;
1112        init_mm.brk = (unsigned long) _end;
1113
1114        if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
1115                nospec_auto_detect();
1116
1117        jump_label_init();
1118        parse_early_param();
1119#ifdef CONFIG_CRASH_DUMP
1120        /* Deactivate elfcorehdr= kernel parameter */
1121        elfcorehdr_addr = ELFCORE_ADDR_MAX;
1122#endif
1123
1124        os_info_init();
1125        setup_ipl();
1126        setup_task_size();
1127        setup_control_program_code();
1128
1129        /* Do some memory reservations *before* memory is added to memblock */
1130        reserve_above_ident_map();
1131        reserve_oldmem();
1132        reserve_kernel();
1133        reserve_initrd();
1134        reserve_certificate_list();
1135        reserve_mem_detect_info();
1136        memblock_allow_resize();
1137
1138        /* Get information about *all* installed memory */
1139        memblock_add_mem_detect_info();
1140
1141        free_mem_detect_info();
1142        remove_oldmem();
1143
1144        setup_uv();
1145        setup_ident_map_size();
1146        setup_memory();
1147        dma_contiguous_reserve(ident_map_size);
1148        vmcp_cma_reserve();
1149        if (MACHINE_HAS_EDAT2)
1150                hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
1151
1152        check_initrd();
1153        reserve_crashkernel();
1154#ifdef CONFIG_CRASH_DUMP
1155        /*
1156         * Be aware that smp_save_dump_cpus() triggers a system reset.
1157         * Therefore CPU and device initialization should be done afterwards.
1158         */
1159        smp_save_dump_cpus();
1160#endif
1161
1162        setup_resources();
1163        setup_lowcore_dat_off();
1164        smp_fill_possible_mask();
1165        cpu_detect_mhz_feature();
1166        cpu_init();
1167        numa_setup();
1168        smp_detect_cpus();
1169        topology_init_early();
1170
1171        /*
1172         * Create kernel page tables and switch to virtual addressing.
1173         */
1174        paging_init();
1175
1176        /*
1177         * After paging_init created the kernel page table, the new PSWs
1178         * in lowcore can now run with DAT enabled.
1179         */
1180        setup_lowcore_dat_on();
1181
1182        /* Setup default console */
1183        conmode_default();
1184        set_preferred_console();
1185
1186        apply_alternative_instructions();
1187        if (IS_ENABLED(CONFIG_EXPOLINE))
1188                nospec_init_branches();
1189
1190        /* Setup zfcp/nvme dump support */
1191        setup_zfcpdump();
1192
1193        /* Add system specific data to the random pool */
1194        setup_randomness();
1195}
1196