linux/arch/s390/kernel/setup.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  S390 version
   4 *    Copyright IBM Corp. 1999, 2012
   5 *    Author(s): Hartmut Penner (hp@de.ibm.com),
   6 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
   7 *
   8 *  Derived from "arch/i386/kernel/setup.c"
   9 *    Copyright (C) 1995, Linus Torvalds
  10 */
  11
  12/*
  13 * This file handles the architecture-dependent parts of initialization
  14 */
  15
  16#define KMSG_COMPONENT "setup"
  17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18
  19#include <linux/errno.h>
  20#include <linux/export.h>
  21#include <linux/sched.h>
  22#include <linux/sched/task.h>
  23#include <linux/cpu.h>
  24#include <linux/kernel.h>
  25#include <linux/memblock.h>
  26#include <linux/mm.h>
  27#include <linux/stddef.h>
  28#include <linux/unistd.h>
  29#include <linux/ptrace.h>
  30#include <linux/random.h>
  31#include <linux/user.h>
  32#include <linux/tty.h>
  33#include <linux/ioport.h>
  34#include <linux/delay.h>
  35#include <linux/init.h>
  36#include <linux/initrd.h>
  37#include <linux/root_dev.h>
  38#include <linux/console.h>
  39#include <linux/kernel_stat.h>
  40#include <linux/dma-contiguous.h>
  41#include <linux/device.h>
  42#include <linux/notifier.h>
  43#include <linux/pfn.h>
  44#include <linux/ctype.h>
  45#include <linux/reboot.h>
  46#include <linux/topology.h>
  47#include <linux/kexec.h>
  48#include <linux/crash_dump.h>
  49#include <linux/memory.h>
  50#include <linux/compat.h>
  51#include <linux/start_kernel.h>
  52
  53#include <asm/boot_data.h>
  54#include <asm/ipl.h>
  55#include <asm/facility.h>
  56#include <asm/smp.h>
  57#include <asm/mmu_context.h>
  58#include <asm/cpcmd.h>
  59#include <asm/lowcore.h>
  60#include <asm/nmi.h>
  61#include <asm/irq.h>
  62#include <asm/page.h>
  63#include <asm/ptrace.h>
  64#include <asm/sections.h>
  65#include <asm/ebcdic.h>
  66#include <asm/diag.h>
  67#include <asm/os_info.h>
  68#include <asm/sclp.h>
  69#include <asm/stacktrace.h>
  70#include <asm/sysinfo.h>
  71#include <asm/numa.h>
  72#include <asm/alternative.h>
  73#include <asm/nospec-branch.h>
  74#include <asm/mem_detect.h>
  75#include <asm/uv.h>
  76#include "entry.h"
  77
  78/*
  79 * Machine setup..
  80 */
  81unsigned int console_mode = 0;
  82EXPORT_SYMBOL(console_mode);
  83
  84unsigned int console_devno = -1;
  85EXPORT_SYMBOL(console_devno);
  86
  87unsigned int console_irq = -1;
  88EXPORT_SYMBOL(console_irq);
  89
  90unsigned long elf_hwcap __read_mostly = 0;
  91char elf_platform[ELF_PLATFORM_SIZE];
  92
  93unsigned long int_hwcap = 0;
  94
  95#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
  96int __bootdata_preserved(prot_virt_guest);
  97#endif
  98
  99int __bootdata(noexec_disabled);
 100int __bootdata(memory_end_set);
 101unsigned long __bootdata(memory_end);
 102unsigned long __bootdata(max_physmem_end);
 103struct mem_detect_info __bootdata(mem_detect);
 104
 105struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
 106struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
 107unsigned long __bootdata_preserved(__swsusp_reset_dma);
 108unsigned long __bootdata_preserved(__stext_dma);
 109unsigned long __bootdata_preserved(__etext_dma);
 110unsigned long __bootdata_preserved(__sdma);
 111unsigned long __bootdata_preserved(__edma);
 112unsigned long __bootdata_preserved(__kaslr_offset);
 113
 114unsigned long VMALLOC_START;
 115EXPORT_SYMBOL(VMALLOC_START);
 116
 117unsigned long VMALLOC_END;
 118EXPORT_SYMBOL(VMALLOC_END);
 119
 120struct page *vmemmap;
 121EXPORT_SYMBOL(vmemmap);
 122
 123unsigned long MODULES_VADDR;
 124unsigned long MODULES_END;
 125
 126/* An array with a pointer to the lowcore of every CPU. */
 127struct lowcore *lowcore_ptr[NR_CPUS];
 128EXPORT_SYMBOL(lowcore_ptr);
 129
 130/*
 131 * This is set up by the setup-routine at boot-time
 132 * for S390 need to find out, what we have to setup
 133 * using address 0x10400 ...
 134 */
 135
 136#include <asm/setup.h>
 137
 138/*
 139 * condev= and conmode= setup parameter.
 140 */
 141
 142static int __init condev_setup(char *str)
 143{
 144        int vdev;
 145
 146        vdev = simple_strtoul(str, &str, 0);
 147        if (vdev >= 0 && vdev < 65536) {
 148                console_devno = vdev;
 149                console_irq = -1;
 150        }
 151        return 1;
 152}
 153
 154__setup("condev=", condev_setup);
 155
 156static void __init set_preferred_console(void)
 157{
 158        if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 159                add_preferred_console("ttyS", 0, NULL);
 160        else if (CONSOLE_IS_3270)
 161                add_preferred_console("tty3270", 0, NULL);
 162        else if (CONSOLE_IS_VT220)
 163                add_preferred_console("ttyS", 1, NULL);
 164        else if (CONSOLE_IS_HVC)
 165                add_preferred_console("hvc", 0, NULL);
 166}
 167
 168static int __init conmode_setup(char *str)
 169{
 170#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 171        if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
 172                SET_CONSOLE_SCLP;
 173#endif
 174#if defined(CONFIG_TN3215_CONSOLE)
 175        if (strncmp(str, "3215", 5) == 0)
 176                SET_CONSOLE_3215;
 177#endif
 178#if defined(CONFIG_TN3270_CONSOLE)
 179        if (strncmp(str, "3270", 5) == 0)
 180                SET_CONSOLE_3270;
 181#endif
 182        set_preferred_console();
 183        return 1;
 184}
 185
 186__setup("conmode=", conmode_setup);
 187
 188static void __init conmode_default(void)
 189{
 190        char query_buffer[1024];
 191        char *ptr;
 192
 193        if (MACHINE_IS_VM) {
 194                cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
 195                console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
 196                ptr = strstr(query_buffer, "SUBCHANNEL =");
 197                console_irq = simple_strtoul(ptr + 13, NULL, 16);
 198                cpcmd("QUERY TERM", query_buffer, 1024, NULL);
 199                ptr = strstr(query_buffer, "CONMODE");
 200                /*
 201                 * Set the conmode to 3215 so that the device recognition 
 202                 * will set the cu_type of the console to 3215. If the
 203                 * conmode is 3270 and we don't set it back then both
 204                 * 3215 and the 3270 driver will try to access the console
 205                 * device (3215 as console and 3270 as normal tty).
 206                 */
 207                cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
 208                if (ptr == NULL) {
 209#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 210                        SET_CONSOLE_SCLP;
 211#endif
 212                        return;
 213                }
 214                if (strncmp(ptr + 8, "3270", 4) == 0) {
 215#if defined(CONFIG_TN3270_CONSOLE)
 216                        SET_CONSOLE_3270;
 217#elif defined(CONFIG_TN3215_CONSOLE)
 218                        SET_CONSOLE_3215;
 219#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 220                        SET_CONSOLE_SCLP;
 221#endif
 222                } else if (strncmp(ptr + 8, "3215", 4) == 0) {
 223#if defined(CONFIG_TN3215_CONSOLE)
 224                        SET_CONSOLE_3215;
 225#elif defined(CONFIG_TN3270_CONSOLE)
 226                        SET_CONSOLE_3270;
 227#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 228                        SET_CONSOLE_SCLP;
 229#endif
 230                }
 231        } else if (MACHINE_IS_KVM) {
 232                if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
 233                        SET_CONSOLE_VT220;
 234                else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
 235                        SET_CONSOLE_SCLP;
 236                else
 237                        SET_CONSOLE_HVC;
 238        } else {
 239#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 240                SET_CONSOLE_SCLP;
 241#endif
 242        }
 243        if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
 244                conswitchp = &dummy_con;
 245}
 246
 247#ifdef CONFIG_CRASH_DUMP
 248static void __init setup_zfcpdump(void)
 249{
 250        if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 251                return;
 252        if (OLDMEM_BASE)
 253                return;
 254        strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
 255        console_loglevel = 2;
 256}
 257#else
 258static inline void setup_zfcpdump(void) {}
 259#endif /* CONFIG_CRASH_DUMP */
 260
 261 /*
 262 * Reboot, halt and power_off stubs. They just call _machine_restart,
 263 * _machine_halt or _machine_power_off. 
 264 */
 265
 266void machine_restart(char *command)
 267{
 268        if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
 269                /*
 270                 * Only unblank the console if we are called in enabled
 271                 * context or a bust_spinlocks cleared the way for us.
 272                 */
 273                console_unblank();
 274        _machine_restart(command);
 275}
 276
 277void machine_halt(void)
 278{
 279        if (!in_interrupt() || oops_in_progress)
 280                /*
 281                 * Only unblank the console if we are called in enabled
 282                 * context or a bust_spinlocks cleared the way for us.
 283                 */
 284                console_unblank();
 285        _machine_halt();
 286}
 287
 288void machine_power_off(void)
 289{
 290        if (!in_interrupt() || oops_in_progress)
 291                /*
 292                 * Only unblank the console if we are called in enabled
 293                 * context or a bust_spinlocks cleared the way for us.
 294                 */
 295                console_unblank();
 296        _machine_power_off();
 297}
 298
 299/*
 300 * Dummy power off function.
 301 */
 302void (*pm_power_off)(void) = machine_power_off;
 303EXPORT_SYMBOL_GPL(pm_power_off);
 304
 305static int __init parse_vmalloc(char *arg)
 306{
 307        if (!arg)
 308                return -EINVAL;
 309        VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
 310        return 0;
 311}
 312early_param("vmalloc", parse_vmalloc);
 313
 314void *restart_stack __section(.data);
 315
 316unsigned long stack_alloc(void)
 317{
 318#ifdef CONFIG_VMAP_STACK
 319        return (unsigned long)
 320                __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
 321                                     VMALLOC_START, VMALLOC_END,
 322                                     THREADINFO_GFP,
 323                                     PAGE_KERNEL, 0, NUMA_NO_NODE,
 324                                     __builtin_return_address(0));
 325#else
 326        return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 327#endif
 328}
 329
 330void stack_free(unsigned long stack)
 331{
 332#ifdef CONFIG_VMAP_STACK
 333        vfree((void *) stack);
 334#else
 335        free_pages(stack, THREAD_SIZE_ORDER);
 336#endif
 337}
 338
 339int __init arch_early_irq_init(void)
 340{
 341        unsigned long stack;
 342
 343        stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 344        if (!stack)
 345                panic("Couldn't allocate async stack");
 346        S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
 347        return 0;
 348}
 349
 350static int __init async_stack_realloc(void)
 351{
 352        unsigned long old, new;
 353
 354        old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
 355        new = stack_alloc();
 356        if (!new)
 357                panic("Couldn't allocate async stack");
 358        S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
 359        free_pages(old, THREAD_SIZE_ORDER);
 360        return 0;
 361}
 362early_initcall(async_stack_realloc);
 363
 364void __init arch_call_rest_init(void)
 365{
 366        struct stack_frame *frame;
 367        unsigned long stack;
 368
 369        stack = stack_alloc();
 370        if (!stack)
 371                panic("Couldn't allocate kernel stack");
 372        current->stack = (void *) stack;
 373#ifdef CONFIG_VMAP_STACK
 374        current->stack_vm_area = (void *) stack;
 375#endif
 376        set_task_stack_end_magic(current);
 377        stack += STACK_INIT_OFFSET;
 378        S390_lowcore.kernel_stack = stack;
 379        frame = (struct stack_frame *) stack;
 380        memset(frame, 0, sizeof(*frame));
 381        /* Branch to rest_init on the new stack, never returns */
 382        asm volatile(
 383                "       la      15,0(%[_frame])\n"
 384                "       jg      rest_init\n"
 385                : : [_frame] "a" (frame));
 386}
 387
 388static void __init setup_lowcore_dat_off(void)
 389{
 390        struct lowcore *lc;
 391
 392        /*
 393         * Setup lowcore for boot cpu
 394         */
 395        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
 396        lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
 397        if (!lc)
 398                panic("%s: Failed to allocate %zu bytes align=%zx\n",
 399                      __func__, sizeof(*lc), sizeof(*lc));
 400
 401        lc->restart_psw.mask = PSW_KERNEL_BITS;
 402        lc->restart_psw.addr = (unsigned long) restart_int_handler;
 403        lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 404        lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 405        lc->svc_new_psw.mask = PSW_KERNEL_BITS |
 406                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 407        lc->svc_new_psw.addr = (unsigned long) system_call;
 408        lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 409        lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 410        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
 411        lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 412        lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
 413        lc->io_new_psw.addr = (unsigned long) io_int_handler;
 414        lc->clock_comparator = clock_comparator_max;
 415        lc->nodat_stack = ((unsigned long) &init_thread_union)
 416                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 417        lc->current_task = (unsigned long)&init_task;
 418        lc->lpp = LPP_MAGIC;
 419        lc->machine_flags = S390_lowcore.machine_flags;
 420        lc->preempt_count = S390_lowcore.preempt_count;
 421        lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 422        memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 423               sizeof(lc->stfle_fac_list));
 424        memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
 425               sizeof(lc->alt_stfle_fac_list));
 426        nmi_alloc_boot_cpu(lc);
 427        vdso_alloc_boot_cpu(lc);
 428        lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 429        lc->async_enter_timer = S390_lowcore.async_enter_timer;
 430        lc->exit_timer = S390_lowcore.exit_timer;
 431        lc->user_timer = S390_lowcore.user_timer;
 432        lc->system_timer = S390_lowcore.system_timer;
 433        lc->steal_timer = S390_lowcore.steal_timer;
 434        lc->last_update_timer = S390_lowcore.last_update_timer;
 435        lc->last_update_clock = S390_lowcore.last_update_clock;
 436
 437        /*
 438         * Allocate the global restart stack which is the same for
 439         * all CPUs in cast *one* of them does a PSW restart.
 440         */
 441        restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 442        if (!restart_stack)
 443                panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 444                      __func__, THREAD_SIZE, THREAD_SIZE);
 445        restart_stack += STACK_INIT_OFFSET;
 446
 447        /*
 448         * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
 449         * restart data to the absolute zero lowcore. This is necessary if
 450         * PSW restart is done on an offline CPU that has lowcore zero.
 451         */
 452        lc->restart_stack = (unsigned long) restart_stack;
 453        lc->restart_fn = (unsigned long) do_restart;
 454        lc->restart_data = 0;
 455        lc->restart_source = -1UL;
 456
 457        /* Setup absolute zero lowcore */
 458        mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
 459        mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
 460        mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
 461        mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
 462        mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
 463
 464        lc->spinlock_lockval = arch_spin_lockval(0);
 465        lc->spinlock_index = 0;
 466        arch_spin_lock_setup(0);
 467        lc->br_r1_trampoline = 0x07f1;  /* br %r1 */
 468
 469        set_prefix((u32)(unsigned long) lc);
 470        lowcore_ptr[0] = lc;
 471}
 472
 473static void __init setup_lowcore_dat_on(void)
 474{
 475        __ctl_clear_bit(0, 28);
 476        S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
 477        S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
 478        S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
 479        S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
 480        __ctl_set_bit(0, 28);
 481}
 482
 483static struct resource code_resource = {
 484        .name  = "Kernel code",
 485        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 486};
 487
 488static struct resource data_resource = {
 489        .name = "Kernel data",
 490        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 491};
 492
 493static struct resource bss_resource = {
 494        .name = "Kernel bss",
 495        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 496};
 497
 498static struct resource __initdata *standard_resources[] = {
 499        &code_resource,
 500        &data_resource,
 501        &bss_resource,
 502};
 503
 504static void __init setup_resources(void)
 505{
 506        struct resource *res, *std_res, *sub_res;
 507        struct memblock_region *reg;
 508        int j;
 509
 510        code_resource.start = (unsigned long) _text;
 511        code_resource.end = (unsigned long) _etext - 1;
 512        data_resource.start = (unsigned long) _etext;
 513        data_resource.end = (unsigned long) _edata - 1;
 514        bss_resource.start = (unsigned long) __bss_start;
 515        bss_resource.end = (unsigned long) __bss_stop - 1;
 516
 517        for_each_memblock(memory, reg) {
 518                res = memblock_alloc(sizeof(*res), 8);
 519                if (!res)
 520                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 521                              __func__, sizeof(*res), 8);
 522                res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 523
 524                res->name = "System RAM";
 525                res->start = reg->base;
 526                res->end = reg->base + reg->size - 1;
 527                request_resource(&iomem_resource, res);
 528
 529                for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
 530                        std_res = standard_resources[j];
 531                        if (std_res->start < res->start ||
 532                            std_res->start > res->end)
 533                                continue;
 534                        if (std_res->end > res->end) {
 535                                sub_res = memblock_alloc(sizeof(*sub_res), 8);
 536                                if (!sub_res)
 537                                        panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 538                                              __func__, sizeof(*sub_res), 8);
 539                                *sub_res = *std_res;
 540                                sub_res->end = res->end;
 541                                std_res->start = res->end + 1;
 542                                request_resource(res, sub_res);
 543                        } else {
 544                                request_resource(res, std_res);
 545                        }
 546                }
 547        }
 548#ifdef CONFIG_CRASH_DUMP
 549        /*
 550         * Re-add removed crash kernel memory as reserved memory. This makes
 551         * sure it will be mapped with the identity mapping and struct pages
 552         * will be created, so it can be resized later on.
 553         * However add it later since the crash kernel resource should not be
 554         * part of the System RAM resource.
 555         */
 556        if (crashk_res.end) {
 557                memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
 558                memblock_reserve(crashk_res.start, resource_size(&crashk_res));
 559                insert_resource(&iomem_resource, &crashk_res);
 560        }
 561#endif
 562}
 563
 564static void __init setup_memory_end(void)
 565{
 566        unsigned long vmax, vmalloc_size, tmp;
 567
 568        /* Choose kernel address space layout: 3 or 4 levels. */
 569        vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
 570        if (IS_ENABLED(CONFIG_KASAN)) {
 571                vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
 572                           ? _REGION1_SIZE
 573                           : _REGION2_SIZE;
 574        } else {
 575                tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
 576                tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
 577                if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
 578                        vmax = _REGION2_SIZE; /* 3-level kernel page table */
 579                else
 580                        vmax = _REGION1_SIZE; /* 4-level kernel page table */
 581        }
 582
 583        /* module area is at the end of the kernel address space. */
 584        MODULES_END = vmax;
 585        MODULES_VADDR = MODULES_END - MODULES_LEN;
 586        VMALLOC_END = MODULES_VADDR;
 587        VMALLOC_START = VMALLOC_END - vmalloc_size;
 588
 589        /* Split remaining virtual space between 1:1 mapping & vmemmap array */
 590        tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
 591        /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
 592        tmp = SECTION_ALIGN_UP(tmp);
 593        tmp = VMALLOC_START - tmp * sizeof(struct page);
 594        tmp &= ~((vmax >> 11) - 1);     /* align to page table level */
 595        tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
 596        vmemmap = (struct page *) tmp;
 597
 598        /* Take care that memory_end is set and <= vmemmap */
 599        memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
 600#ifdef CONFIG_KASAN
 601        /* fit in kasan shadow memory region between 1:1 and vmemmap */
 602        memory_end = min(memory_end, KASAN_SHADOW_START);
 603        vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
 604#endif
 605        max_pfn = max_low_pfn = PFN_DOWN(memory_end);
 606        memblock_remove(memory_end, ULONG_MAX);
 607
 608        pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 609}
 610
 611#ifdef CONFIG_CRASH_DUMP
 612
 613/*
 614 * When kdump is enabled, we have to ensure that no memory from
 615 * the area [0 - crashkernel memory size] and
 616 * [crashk_res.start - crashk_res.end] is set offline.
 617 */
 618static int kdump_mem_notifier(struct notifier_block *nb,
 619                              unsigned long action, void *data)
 620{
 621        struct memory_notify *arg = data;
 622
 623        if (action != MEM_GOING_OFFLINE)
 624                return NOTIFY_OK;
 625        if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
 626                return NOTIFY_BAD;
 627        if (arg->start_pfn > PFN_DOWN(crashk_res.end))
 628                return NOTIFY_OK;
 629        if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
 630                return NOTIFY_OK;
 631        return NOTIFY_BAD;
 632}
 633
 634static struct notifier_block kdump_mem_nb = {
 635        .notifier_call = kdump_mem_notifier,
 636};
 637
 638#endif
 639
 640/*
 641 * Make sure that the area behind memory_end is protected
 642 */
 643static void reserve_memory_end(void)
 644{
 645        if (memory_end_set)
 646                memblock_reserve(memory_end, ULONG_MAX);
 647}
 648
 649/*
 650 * Make sure that oldmem, where the dump is stored, is protected
 651 */
 652static void reserve_oldmem(void)
 653{
 654#ifdef CONFIG_CRASH_DUMP
 655        if (OLDMEM_BASE)
 656                /* Forget all memory above the running kdump system */
 657                memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 658#endif
 659}
 660
 661/*
 662 * Make sure that oldmem, where the dump is stored, is protected
 663 */
 664static void remove_oldmem(void)
 665{
 666#ifdef CONFIG_CRASH_DUMP
 667        if (OLDMEM_BASE)
 668                /* Forget all memory above the running kdump system */
 669                memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 670#endif
 671}
 672
 673/*
 674 * Reserve memory for kdump kernel to be loaded with kexec
 675 */
 676static void __init reserve_crashkernel(void)
 677{
 678#ifdef CONFIG_CRASH_DUMP
 679        unsigned long long crash_base, crash_size;
 680        phys_addr_t low, high;
 681        int rc;
 682
 683        rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
 684                               &crash_base);
 685
 686        crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 687        crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
 688        if (rc || crash_size == 0)
 689                return;
 690
 691        if (memblock.memory.regions[0].size < crash_size) {
 692                pr_info("crashkernel reservation failed: %s\n",
 693                        "first memory chunk must be at least crashkernel size");
 694                return;
 695        }
 696
 697        low = crash_base ?: OLDMEM_BASE;
 698        high = low + crash_size;
 699        if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
 700                /* The crashkernel fits into OLDMEM, reuse OLDMEM */
 701                crash_base = low;
 702        } else {
 703                /* Find suitable area in free memory */
 704                low = max_t(unsigned long, crash_size, sclp.hsa_size);
 705                high = crash_base ? crash_base + crash_size : ULONG_MAX;
 706
 707                if (crash_base && crash_base < low) {
 708                        pr_info("crashkernel reservation failed: %s\n",
 709                                "crash_base too low");
 710                        return;
 711                }
 712                low = crash_base ?: low;
 713                crash_base = memblock_find_in_range(low, high, crash_size,
 714                                                    KEXEC_CRASH_MEM_ALIGN);
 715        }
 716
 717        if (!crash_base) {
 718                pr_info("crashkernel reservation failed: %s\n",
 719                        "no suitable area found");
 720                return;
 721        }
 722
 723        if (register_memory_notifier(&kdump_mem_nb))
 724                return;
 725
 726        if (!OLDMEM_BASE && MACHINE_IS_VM)
 727                diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 728        crashk_res.start = crash_base;
 729        crashk_res.end = crash_base + crash_size - 1;
 730        memblock_remove(crash_base, crash_size);
 731        pr_info("Reserving %lluMB of memory at %lluMB "
 732                "for crashkernel (System RAM: %luMB)\n",
 733                crash_size >> 20, crash_base >> 20,
 734                (unsigned long)memblock.memory.total_size >> 20);
 735        os_info_crashkernel_add(crash_base, crash_size);
 736#endif
 737}
 738
 739/*
 740 * Reserve the initrd from being used by memblock
 741 */
 742static void __init reserve_initrd(void)
 743{
 744#ifdef CONFIG_BLK_DEV_INITRD
 745        if (!INITRD_START || !INITRD_SIZE)
 746                return;
 747        initrd_start = INITRD_START;
 748        initrd_end = initrd_start + INITRD_SIZE;
 749        memblock_reserve(INITRD_START, INITRD_SIZE);
 750#endif
 751}
 752
 753/*
 754 * Reserve the memory area used to pass the certificate lists
 755 */
 756static void __init reserve_certificate_list(void)
 757{
 758        if (ipl_cert_list_addr)
 759                memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
 760}
 761
 762static void __init reserve_mem_detect_info(void)
 763{
 764        unsigned long start, size;
 765
 766        get_mem_detect_reserved(&start, &size);
 767        if (size)
 768                memblock_reserve(start, size);
 769}
 770
 771static void __init free_mem_detect_info(void)
 772{
 773        unsigned long start, size;
 774
 775        get_mem_detect_reserved(&start, &size);
 776        if (size)
 777                memblock_free(start, size);
 778}
 779
 780static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
 781{
 782        memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
 783                     start, start + size - 1);
 784        memblock_add_range(&memblock.memory, start, size, 0, 0);
 785        memblock_add_range(&memblock.physmem, start, size, 0, 0);
 786}
 787
 788static const char * __init get_mem_info_source(void)
 789{
 790        switch (mem_detect.info_source) {
 791        case MEM_DETECT_SCLP_STOR_INFO:
 792                return "sclp storage info";
 793        case MEM_DETECT_DIAG260:
 794                return "diag260";
 795        case MEM_DETECT_SCLP_READ_INFO:
 796                return "sclp read info";
 797        case MEM_DETECT_BIN_SEARCH:
 798                return "binary search";
 799        }
 800        return "none";
 801}
 802
 803static void __init memblock_add_mem_detect_info(void)
 804{
 805        unsigned long start, end;
 806        int i;
 807
 808        memblock_dbg("physmem info source: %s (%hhd)\n",
 809                     get_mem_info_source(), mem_detect.info_source);
 810        /* keep memblock lists close to the kernel */
 811        memblock_set_bottom_up(true);
 812        for_each_mem_detect_block(i, &start, &end)
 813                memblock_physmem_add(start, end - start);
 814        memblock_set_bottom_up(false);
 815        memblock_dump_all();
 816}
 817
 818/*
 819 * Check for initrd being in usable memory
 820 */
 821static void __init check_initrd(void)
 822{
 823#ifdef CONFIG_BLK_DEV_INITRD
 824        if (INITRD_START && INITRD_SIZE &&
 825            !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
 826                pr_err("The initial RAM disk does not fit into the memory\n");
 827                memblock_free(INITRD_START, INITRD_SIZE);
 828                initrd_start = initrd_end = 0;
 829        }
 830#endif
 831}
 832
 833/*
 834 * Reserve memory used for lowcore/command line/kernel image.
 835 */
 836static void __init reserve_kernel(void)
 837{
 838        unsigned long start_pfn = PFN_UP(__pa(_end));
 839
 840        memblock_reserve(0, HEAD_END);
 841        memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
 842                         - (unsigned long)_stext);
 843        memblock_reserve(__sdma, __edma - __sdma);
 844}
 845
 846static void __init setup_memory(void)
 847{
 848        struct memblock_region *reg;
 849
 850        /*
 851         * Init storage key for present memory
 852         */
 853        for_each_memblock(memory, reg) {
 854                storage_key_init_range(reg->base, reg->base + reg->size);
 855        }
 856        psw_set_key(PAGE_DEFAULT_KEY);
 857
 858        /* Only cosmetics */
 859        memblock_enforce_memory_limit(memblock_end_of_DRAM());
 860}
 861
 862/*
 863 * Setup hardware capabilities.
 864 */
 865static int __init setup_hwcaps(void)
 866{
 867        static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 868        struct cpuid cpu_id;
 869        int i;
 870
 871        /*
 872         * The store facility list bits numbers as found in the principles
 873         * of operation are numbered with bit 1UL<<31 as number 0 to
 874         * bit 1UL<<0 as number 31.
 875         *   Bit 0: instructions named N3, "backported" to esa-mode
 876         *   Bit 2: z/Architecture mode is active
 877         *   Bit 7: the store-facility-list-extended facility is installed
 878         *   Bit 17: the message-security assist is installed
 879         *   Bit 19: the long-displacement facility is installed
 880         *   Bit 21: the extended-immediate facility is installed
 881         *   Bit 22: extended-translation facility 3 is installed
 882         *   Bit 30: extended-translation facility 3 enhancement facility
 883         * These get translated to:
 884         *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
 885         *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
 886         *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
 887         *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
 888         */
 889        for (i = 0; i < 6; i++)
 890                if (test_facility(stfl_bits[i]))
 891                        elf_hwcap |= 1UL << i;
 892
 893        if (test_facility(22) && test_facility(30))
 894                elf_hwcap |= HWCAP_S390_ETF3EH;
 895
 896        /*
 897         * Check for additional facilities with store-facility-list-extended.
 898         * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
 899         * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
 900         * as stored by stfl, bits 32-xxx contain additional facilities.
 901         * How many facility words are stored depends on the number of
 902         * doublewords passed to the instruction. The additional facilities
 903         * are:
 904         *   Bit 42: decimal floating point facility is installed
 905         *   Bit 44: perform floating point operation facility is installed
 906         * translated to:
 907         *   HWCAP_S390_DFP bit 6 (42 && 44).
 908         */
 909        if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
 910                elf_hwcap |= HWCAP_S390_DFP;
 911
 912        /*
 913         * Huge page support HWCAP_S390_HPAGE is bit 7.
 914         */
 915        if (MACHINE_HAS_EDAT1)
 916                elf_hwcap |= HWCAP_S390_HPAGE;
 917
 918        /*
 919         * 64-bit register support for 31-bit processes
 920         * HWCAP_S390_HIGH_GPRS is bit 9.
 921         */
 922        elf_hwcap |= HWCAP_S390_HIGH_GPRS;
 923
 924        /*
 925         * Transactional execution support HWCAP_S390_TE is bit 10.
 926         */
 927        if (MACHINE_HAS_TE)
 928                elf_hwcap |= HWCAP_S390_TE;
 929
 930        /*
 931         * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
 932         * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
 933         * instead of facility bit 129.
 934         */
 935        if (MACHINE_HAS_VX) {
 936                elf_hwcap |= HWCAP_S390_VXRS;
 937                if (test_facility(134))
 938                        elf_hwcap |= HWCAP_S390_VXRS_EXT;
 939                if (test_facility(135))
 940                        elf_hwcap |= HWCAP_S390_VXRS_BCD;
 941                if (test_facility(148))
 942                        elf_hwcap |= HWCAP_S390_VXRS_EXT2;
 943                if (test_facility(152))
 944                        elf_hwcap |= HWCAP_S390_VXRS_PDE;
 945        }
 946        if (test_facility(150))
 947                elf_hwcap |= HWCAP_S390_SORT;
 948        if (test_facility(151))
 949                elf_hwcap |= HWCAP_S390_DFLT;
 950
 951        /*
 952         * Guarded storage support HWCAP_S390_GS is bit 12.
 953         */
 954        if (MACHINE_HAS_GS)
 955                elf_hwcap |= HWCAP_S390_GS;
 956
 957        get_cpu_id(&cpu_id);
 958        add_device_randomness(&cpu_id, sizeof(cpu_id));
 959        switch (cpu_id.machine) {
 960        case 0x2064:
 961        case 0x2066:
 962        default:        /* Use "z900" as default for 64 bit kernels. */
 963                strcpy(elf_platform, "z900");
 964                break;
 965        case 0x2084:
 966        case 0x2086:
 967                strcpy(elf_platform, "z990");
 968                break;
 969        case 0x2094:
 970        case 0x2096:
 971                strcpy(elf_platform, "z9-109");
 972                break;
 973        case 0x2097:
 974        case 0x2098:
 975                strcpy(elf_platform, "z10");
 976                break;
 977        case 0x2817:
 978        case 0x2818:
 979                strcpy(elf_platform, "z196");
 980                break;
 981        case 0x2827:
 982        case 0x2828:
 983                strcpy(elf_platform, "zEC12");
 984                break;
 985        case 0x2964:
 986        case 0x2965:
 987                strcpy(elf_platform, "z13");
 988                break;
 989        case 0x3906:
 990        case 0x3907:
 991                strcpy(elf_platform, "z14");
 992                break;
 993        }
 994
 995        /*
 996         * Virtualization support HWCAP_INT_SIE is bit 0.
 997         */
 998        if (sclp.has_sief2)
 999                int_hwcap |= HWCAP_INT_SIE;
1000
1001        return 0;
1002}
1003arch_initcall(setup_hwcaps);
1004
1005/*
1006 * Add system information as device randomness
1007 */
1008static void __init setup_randomness(void)
1009{
1010        struct sysinfo_3_2_2 *vmms;
1011
1012        vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
1013                                                            PAGE_SIZE);
1014        if (!vmms)
1015                panic("Failed to allocate memory for sysinfo structure\n");
1016
1017        if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
1018                add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
1019        memblock_free((unsigned long) vmms, PAGE_SIZE);
1020}
1021
1022/*
1023 * Find the correct size for the task_struct. This depends on
1024 * the size of the struct fpu at the end of the thread_struct
1025 * which is embedded in the task_struct.
1026 */
1027static void __init setup_task_size(void)
1028{
1029        int task_size = sizeof(struct task_struct);
1030
1031        if (!MACHINE_HAS_VX) {
1032                task_size -= sizeof(__vector128) * __NUM_VXRS;
1033                task_size += sizeof(freg_t) * __NUM_FPRS;
1034        }
1035        arch_task_struct_size = task_size;
1036}
1037
1038/*
1039 * Issue diagnose 318 to set the control program name and
1040 * version codes.
1041 */
1042static void __init setup_control_program_code(void)
1043{
1044        union diag318_info diag318_info = {
1045                .cpnc = CPNC_LINUX,
1046                .cpvc_linux = 0,
1047                .cpvc_distro = {0},
1048        };
1049
1050        if (!sclp.has_diag318)
1051                return;
1052
1053        diag_stat_inc(DIAG_STAT_X318);
1054        asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
1055}
1056
1057/*
1058 * Print the component list from the IPL report
1059 */
1060static void __init log_component_list(void)
1061{
1062        struct ipl_rb_component_entry *ptr, *end;
1063        char *str;
1064
1065        if (!early_ipl_comp_list_addr)
1066                return;
1067        if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
1068                pr_info("Linux is running with Secure-IPL enabled\n");
1069        else
1070                pr_info("Linux is running with Secure-IPL disabled\n");
1071        ptr = (void *) early_ipl_comp_list_addr;
1072        end = (void *) ptr + early_ipl_comp_list_size;
1073        pr_info("The IPL report contains the following components:\n");
1074        while (ptr < end) {
1075                if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
1076                        if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
1077                                str = "signed, verified";
1078                        else
1079                                str = "signed, verification failed";
1080                } else {
1081                        str = "not signed";
1082                }
1083                pr_info("%016llx - %016llx (%s)\n",
1084                        ptr->addr, ptr->addr + ptr->len, str);
1085                ptr++;
1086        }
1087}
1088
1089/*
1090 * Setup function called from init/main.c just after the banner
1091 * was printed.
1092 */
1093
1094void __init setup_arch(char **cmdline_p)
1095{
1096        /*
1097         * print what head.S has found out about the machine
1098         */
1099        if (MACHINE_IS_VM)
1100                pr_info("Linux is running as a z/VM "
1101                        "guest operating system in 64-bit mode\n");
1102        else if (MACHINE_IS_KVM)
1103                pr_info("Linux is running under KVM in 64-bit mode\n");
1104        else if (MACHINE_IS_LPAR)
1105                pr_info("Linux is running natively in 64-bit mode\n");
1106        else
1107                pr_info("Linux is running as a guest in 64-bit mode\n");
1108
1109        log_component_list();
1110
1111        /* Have one command line that is parsed and saved in /proc/cmdline */
1112        /* boot_command_line has been already set up in early.c */
1113        *cmdline_p = boot_command_line;
1114
1115        ROOT_DEV = Root_RAM0;
1116
1117        init_mm.start_code = (unsigned long) _text;
1118        init_mm.end_code = (unsigned long) _etext;
1119        init_mm.end_data = (unsigned long) _edata;
1120        init_mm.brk = (unsigned long) _end;
1121
1122        if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
1123                nospec_auto_detect();
1124
1125        parse_early_param();
1126#ifdef CONFIG_CRASH_DUMP
1127        /* Deactivate elfcorehdr= kernel parameter */
1128        elfcorehdr_addr = ELFCORE_ADDR_MAX;
1129#endif
1130
1131        os_info_init();
1132        setup_ipl();
1133        setup_task_size();
1134        setup_control_program_code();
1135
1136        /* Do some memory reservations *before* memory is added to memblock */
1137        reserve_memory_end();
1138        reserve_oldmem();
1139        reserve_kernel();
1140        reserve_initrd();
1141        reserve_certificate_list();
1142        reserve_mem_detect_info();
1143        memblock_allow_resize();
1144
1145        /* Get information about *all* installed memory */
1146        memblock_add_mem_detect_info();
1147
1148        free_mem_detect_info();
1149        remove_oldmem();
1150
1151        /*
1152         * Make sure all chunks are MAX_ORDER aligned so we don't need the
1153         * extra checks that HOLES_IN_ZONE would require.
1154         *
1155         * Is this still required?
1156         */
1157        memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
1158
1159        setup_memory_end();
1160        setup_memory();
1161        dma_contiguous_reserve(memory_end);
1162        vmcp_cma_reserve();
1163
1164        check_initrd();
1165        reserve_crashkernel();
1166#ifdef CONFIG_CRASH_DUMP
1167        /*
1168         * Be aware that smp_save_dump_cpus() triggers a system reset.
1169         * Therefore CPU and device initialization should be done afterwards.
1170         */
1171        smp_save_dump_cpus();
1172#endif
1173
1174        setup_resources();
1175        setup_lowcore_dat_off();
1176        smp_fill_possible_mask();
1177        cpu_detect_mhz_feature();
1178        cpu_init();
1179        numa_setup();
1180        smp_detect_cpus();
1181        topology_init_early();
1182
1183        /*
1184         * Create kernel page tables and switch to virtual addressing.
1185         */
1186        paging_init();
1187
1188        /*
1189         * After paging_init created the kernel page table, the new PSWs
1190         * in lowcore can now run with DAT enabled.
1191         */
1192        setup_lowcore_dat_on();
1193
1194        /* Setup default console */
1195        conmode_default();
1196        set_preferred_console();
1197
1198        apply_alternative_instructions();
1199        if (IS_ENABLED(CONFIG_EXPOLINE))
1200                nospec_init_branches();
1201
1202        /* Setup zfcpdump support */
1203        setup_zfcpdump();
1204
1205        /* Add system specific data to the random pool */
1206        setup_randomness();
1207}
1208