linux/init/main.c
<<
>>
Prefs
   1/*
   2 *  linux/init/main.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 *
   6 *  GK 2/5/95  -  Changed to support mounting root fs via NFS
   7 *  Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
   8 *  Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
   9 *  Simplified starting of init:  Michael A. Griffith <grif@acm.org> 
  10 */
  11
  12#define DEBUG           /* Enable initcall_debug */
  13
  14#include <linux/types.h>
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/syscalls.h>
  19#include <linux/stackprotector.h>
  20#include <linux/string.h>
  21#include <linux/ctype.h>
  22#include <linux/delay.h>
  23#include <linux/ioport.h>
  24#include <linux/init.h>
  25#include <linux/initrd.h>
  26#include <linux/bootmem.h>
  27#include <linux/acpi.h>
  28#include <linux/tty.h>
  29#include <linux/percpu.h>
  30#include <linux/kmod.h>
  31#include <linux/vmalloc.h>
  32#include <linux/kernel_stat.h>
  33#include <linux/start_kernel.h>
  34#include <linux/security.h>
  35#include <linux/smp.h>
  36#include <linux/profile.h>
  37#include <linux/rcupdate.h>
  38#include <linux/moduleparam.h>
  39#include <linux/kallsyms.h>
  40#include <linux/writeback.h>
  41#include <linux/cpu.h>
  42#include <linux/cpuset.h>
  43#include <linux/cgroup.h>
  44#include <linux/efi.h>
  45#include <linux/tick.h>
  46#include <linux/interrupt.h>
  47#include <linux/taskstats_kern.h>
  48#include <linux/delayacct.h>
  49#include <linux/unistd.h>
  50#include <linux/rmap.h>
  51#include <linux/mempolicy.h>
  52#include <linux/key.h>
  53#include <linux/buffer_head.h>
  54#include <linux/page_cgroup.h>
  55#include <linux/debug_locks.h>
  56#include <linux/debugobjects.h>
  57#include <linux/lockdep.h>
  58#include <linux/kmemleak.h>
  59#include <linux/pid_namespace.h>
  60#include <linux/device.h>
  61#include <linux/kthread.h>
  62#include <linux/sched.h>
  63#include <linux/signal.h>
  64#include <linux/idr.h>
  65#include <linux/kgdb.h>
  66#include <linux/ftrace.h>
  67#include <linux/async.h>
  68#include <linux/kmemcheck.h>
  69#include <linux/sfi.h>
  70#include <linux/shmem_fs.h>
  71#include <linux/slab.h>
  72#include <linux/perf_event.h>
  73#include <linux/file.h>
  74#include <linux/ptrace.h>
  75#include <linux/blkdev.h>
  76#include <linux/elevator.h>
  77#include <linux/sched_clock.h>
  78
  79#include <asm/io.h>
  80#include <asm/bugs.h>
  81#include <asm/setup.h>
  82#include <asm/sections.h>
  83#include <asm/cacheflush.h>
  84
  85#ifdef CONFIG_X86_LOCAL_APIC
  86#include <asm/smp.h>
  87#endif
  88
  89static int kernel_init(void *);
  90
  91extern void init_IRQ(void);
  92extern void fork_init(unsigned long);
  93extern void mca_init(void);
  94extern void sbus_init(void);
  95extern void radix_tree_init(void);
  96#ifndef CONFIG_DEBUG_RODATA
  97static inline void mark_rodata_ro(void) { }
  98#endif
  99
 100#ifdef CONFIG_TC
 101extern void tc_init(void);
 102#endif
 103
 104/*
 105 * Debug helper: via this flag we know that we are in 'early bootup code'
 106 * where only the boot processor is running with IRQ disabled.  This means
 107 * two things - IRQ must not be enabled before the flag is cleared and some
 108 * operations which are not allowed with IRQ disabled are allowed while the
 109 * flag is set.
 110 */
 111bool early_boot_irqs_disabled __read_mostly;
 112
 113enum system_states system_state __read_mostly;
 114EXPORT_SYMBOL(system_state);
 115
 116/*
 117 * Boot command-line arguments
 118 */
 119#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
 120#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
 121
 122extern void time_init(void);
 123/* Default late time init is NULL. archs can override this later. */
 124void (*__initdata late_time_init)(void);
 125extern void softirq_init(void);
 126
 127/* Untouched command line saved by arch-specific code. */
 128char __initdata boot_command_line[COMMAND_LINE_SIZE];
 129/* Untouched saved command line (eg. for /proc) */
 130char *saved_command_line;
 131/* Command line for parameter parsing */
 132static char *static_command_line;
 133
 134static char *execute_command;
 135static char *ramdisk_execute_command;
 136
 137/*
 138 * If set, this is an indication to the drivers that reset the underlying
 139 * device before going ahead with the initialization otherwise driver might
 140 * rely on the BIOS and skip the reset operation.
 141 *
 142 * This is useful if kernel is booting in an unreliable environment.
 143 * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
 144 * skipped and devices will be in unknown state.
 145 */
 146unsigned int reset_devices;
 147EXPORT_SYMBOL(reset_devices);
 148
 149static int __init set_reset_devices(char *str)
 150{
 151        reset_devices = 1;
 152        return 1;
 153}
 154
 155__setup("reset_devices", set_reset_devices);
 156
 157static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 158const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 159static const char *panic_later, *panic_param;
 160
 161extern const struct obs_kernel_param __setup_start[], __setup_end[];
 162
 163static int __init obsolete_checksetup(char *line)
 164{
 165        const struct obs_kernel_param *p;
 166        int had_early_param = 0;
 167
 168        p = __setup_start;
 169        do {
 170                int n = strlen(p->str);
 171                if (parameqn(line, p->str, n)) {
 172                        if (p->early) {
 173                                /* Already done in parse_early_param?
 174                                 * (Needs exact match on param part).
 175                                 * Keep iterating, as we can have early
 176                                 * params and __setups of same names 8( */
 177                                if (line[n] == '\0' || line[n] == '=')
 178                                        had_early_param = 1;
 179                        } else if (!p->setup_func) {
 180                                pr_warn("Parameter %s is obsolete, ignored\n",
 181                                        p->str);
 182                                return 1;
 183                        } else if (p->setup_func(line + n))
 184                                return 1;
 185                }
 186                p++;
 187        } while (p < __setup_end);
 188
 189        return had_early_param;
 190}
 191
 192/*
 193 * This should be approx 2 Bo*oMips to start (note initial shift), and will
 194 * still work even if initially too large, it will just take slightly longer
 195 */
 196unsigned long loops_per_jiffy = (1<<12);
 197
 198EXPORT_SYMBOL(loops_per_jiffy);
 199
 200static int __init debug_kernel(char *str)
 201{
 202        console_loglevel = 10;
 203        return 0;
 204}
 205
 206static int __init quiet_kernel(char *str)
 207{
 208        console_loglevel = 4;
 209        return 0;
 210}
 211
 212early_param("debug", debug_kernel);
 213early_param("quiet", quiet_kernel);
 214
 215static int __init loglevel(char *str)
 216{
 217        int newlevel;
 218
 219        /*
 220         * Only update loglevel value when a correct setting was passed,
 221         * to prevent blind crashes (when loglevel being set to 0) that
 222         * are quite hard to debug
 223         */
 224        if (get_option(&str, &newlevel)) {
 225                console_loglevel = newlevel;
 226                return 0;
 227        }
 228
 229        return -EINVAL;
 230}
 231
 232early_param("loglevel", loglevel);
 233
 234/* Change NUL term back to "=", to make "param" the whole string. */
 235static int __init repair_env_string(char *param, char *val, const char *unused)
 236{
 237        if (val) {
 238                /* param=val or param="val"? */
 239                if (val == param+strlen(param)+1)
 240                        val[-1] = '=';
 241                else if (val == param+strlen(param)+2) {
 242                        val[-2] = '=';
 243                        memmove(val-1, val, strlen(val)+1);
 244                        val--;
 245                } else
 246                        BUG();
 247        }
 248        return 0;
 249}
 250
 251/*
 252 * Unknown boot options get handed to init, unless they look like
 253 * unused parameters (modprobe will find them in /proc/cmdline).
 254 */
 255static int __init unknown_bootoption(char *param, char *val, const char *unused)
 256{
 257        repair_env_string(param, val, unused);
 258
 259        /* Handle obsolete-style parameters */
 260        if (obsolete_checksetup(param))
 261                return 0;
 262
 263        /* Unused module parameter. */
 264        if (strchr(param, '.') && (!val || strchr(param, '.') < val))
 265                return 0;
 266
 267        if (panic_later)
 268                return 0;
 269
 270        if (val) {
 271                /* Environment option */
 272                unsigned int i;
 273                for (i = 0; envp_init[i]; i++) {
 274                        if (i == MAX_INIT_ENVS) {
 275                                panic_later = "Too many boot env vars at `%s'";
 276                                panic_param = param;
 277                        }
 278                        if (!strncmp(param, envp_init[i], val - param))
 279                                break;
 280                }
 281                envp_init[i] = param;
 282        } else {
 283                /* Command line option */
 284                unsigned int i;
 285                for (i = 0; argv_init[i]; i++) {
 286                        if (i == MAX_INIT_ARGS) {
 287                                panic_later = "Too many boot init vars at `%s'";
 288                                panic_param = param;
 289                        }
 290                }
 291                argv_init[i] = param;
 292        }
 293        return 0;
 294}
 295
 296static int __init init_setup(char *str)
 297{
 298        unsigned int i;
 299
 300        execute_command = str;
 301        /*
 302         * In case LILO is going to boot us with default command line,
 303         * it prepends "auto" before the whole cmdline which makes
 304         * the shell think it should execute a script with such name.
 305         * So we ignore all arguments entered _before_ init=... [MJ]
 306         */
 307        for (i = 1; i < MAX_INIT_ARGS; i++)
 308                argv_init[i] = NULL;
 309        return 1;
 310}
 311__setup("init=", init_setup);
 312
 313static int __init rdinit_setup(char *str)
 314{
 315        unsigned int i;
 316
 317        ramdisk_execute_command = str;
 318        /* See "auto" comment in init_setup */
 319        for (i = 1; i < MAX_INIT_ARGS; i++)
 320                argv_init[i] = NULL;
 321        return 1;
 322}
 323__setup("rdinit=", rdinit_setup);
 324
 325#ifndef CONFIG_SMP
 326static const unsigned int setup_max_cpus = NR_CPUS;
 327#ifdef CONFIG_X86_LOCAL_APIC
 328static void __init smp_init(void)
 329{
 330        APIC_init_uniprocessor();
 331}
 332#else
 333#define smp_init()      do { } while (0)
 334#endif
 335
 336static inline void setup_nr_cpu_ids(void) { }
 337static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 338#endif
 339
 340/*
 341 * We need to store the untouched command line for future reference.
 342 * We also need to store the touched command line since the parameter
 343 * parsing is performed in place, and we should allow a component to
 344 * store reference of name/value for future reference.
 345 */
 346static void __init setup_command_line(char *command_line)
 347{
 348        saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
 349        static_command_line = alloc_bootmem(strlen (command_line)+1);
 350        strcpy (saved_command_line, boot_command_line);
 351        strcpy (static_command_line, command_line);
 352}
 353
 354/*
 355 * We need to finalize in a non-__init function or else race conditions
 356 * between the root thread and the init thread may cause start_kernel to
 357 * be reaped by free_initmem before the root thread has proceeded to
 358 * cpu_idle.
 359 *
 360 * gcc-3.4 accidentally inlines this function, so use noinline.
 361 */
 362
 363static __initdata DECLARE_COMPLETION(kthreadd_done);
 364
 365static noinline void __init_refok rest_init(void)
 366{
 367        int pid;
 368
 369        rcu_scheduler_starting();
 370        /*
 371         * We need to spawn init first so that it obtains pid 1, however
 372         * the init task will end up wanting to create kthreads, which, if
 373         * we schedule it before we create kthreadd, will OOPS.
 374         */
 375        kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
 376        numa_default_policy();
 377        pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
 378        rcu_read_lock();
 379        kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
 380        rcu_read_unlock();
 381        complete(&kthreadd_done);
 382
 383        /*
 384         * The boot idle thread must execute schedule()
 385         * at least once to get things moving:
 386         */
 387        init_idle_bootup_task(current);
 388        schedule_preempt_disabled();
 389        /* Call into cpu_idle with preempt disabled */
 390        cpu_startup_entry(CPUHP_ONLINE);
 391}
 392
 393/* Check for early params. */
 394static int __init do_early_param(char *param, char *val, const char *unused)
 395{
 396        const struct obs_kernel_param *p;
 397
 398        for (p = __setup_start; p < __setup_end; p++) {
 399                if ((p->early && parameq(param, p->str)) ||
 400                    (strcmp(param, "console") == 0 &&
 401                     strcmp(p->str, "earlycon") == 0)
 402                ) {
 403                        if (p->setup_func(val) != 0)
 404                                pr_warn("Malformed early option '%s'\n", param);
 405                }
 406        }
 407        /* We accept everything at this stage. */
 408        return 0;
 409}
 410
 411void __init parse_early_options(char *cmdline)
 412{
 413        parse_args("early options", cmdline, NULL, 0, 0, 0, do_early_param);
 414}
 415
 416/* Arch code calls this early on, or if not, just before other parsing. */
 417void __init parse_early_param(void)
 418{
 419        static __initdata int done = 0;
 420        static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];
 421
 422        if (done)
 423                return;
 424
 425        /* All fall through to do_early_param. */
 426        strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
 427        parse_early_options(tmp_cmdline);
 428        done = 1;
 429}
 430
 431/*
 432 *      Activate the first processor.
 433 */
 434
 435static void __init boot_cpu_init(void)
 436{
 437        int cpu = smp_processor_id();
 438        /* Mark the boot cpu "present", "online" etc for SMP and UP case */
 439        set_cpu_online(cpu, true);
 440        set_cpu_active(cpu, true);
 441        set_cpu_present(cpu, true);
 442        set_cpu_possible(cpu, true);
 443}
 444
 445void __init __weak smp_setup_processor_id(void)
 446{
 447}
 448
 449# if THREAD_SIZE >= PAGE_SIZE
 450void __init __weak thread_info_cache_init(void)
 451{
 452}
 453#endif
 454
 455/*
 456 * Set up kernel memory allocators
 457 */
 458static void __init mm_init(void)
 459{
 460        /*
 461         * page_cgroup requires contiguous pages,
 462         * bigger than MAX_ORDER unless SPARSEMEM.
 463         */
 464        page_cgroup_init_flatmem();
 465        mem_init();
 466        kmem_cache_init();
 467        percpu_init_late();
 468        pgtable_cache_init();
 469        vmalloc_init();
 470}
 471
 472asmlinkage void __init start_kernel(void)
 473{
 474        char * command_line;
 475        extern const struct kernel_param __start___param[], __stop___param[];
 476
 477        /*
 478         * Need to run as early as possible, to initialize the
 479         * lockdep hash:
 480         */
 481        lockdep_init();
 482        smp_setup_processor_id();
 483        debug_objects_early_init();
 484
 485        /*
 486         * Set up the the initial canary ASAP:
 487         */
 488        boot_init_stack_canary();
 489
 490        cgroup_init_early();
 491
 492        local_irq_disable();
 493        early_boot_irqs_disabled = true;
 494
 495/*
 496 * Interrupts are still disabled. Do necessary setups, then
 497 * enable them
 498 */
 499        boot_cpu_init();
 500        page_address_init();
 501        pr_notice("%s", linux_banner);
 502        setup_arch(&command_line);
 503        mm_init_owner(&init_mm, &init_task);
 504        mm_init_cpumask(&init_mm);
 505        setup_command_line(command_line);
 506        setup_nr_cpu_ids();
 507        setup_per_cpu_areas();
 508        smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
 509
 510        build_all_zonelists(NULL, NULL);
 511        page_alloc_init();
 512
 513        pr_notice("Kernel command line: %s\n", boot_command_line);
 514        parse_early_param();
 515        parse_args("Booting kernel", static_command_line, __start___param,
 516                   __stop___param - __start___param,
 517                   -1, -1, &unknown_bootoption);
 518
 519        jump_label_init();
 520
 521        /*
 522         * These use large bootmem allocations and must precede
 523         * kmem_cache_init()
 524         */
 525        setup_log_buf(0);
 526        pidhash_init();
 527        vfs_caches_init_early();
 528        sort_main_extable();
 529        trap_init();
 530        mm_init();
 531
 532        /*
 533         * Set up the scheduler prior starting any interrupts (such as the
 534         * timer interrupt). Full topology setup happens at smp_init()
 535         * time - but meanwhile we still have a functioning scheduler.
 536         */
 537        sched_init();
 538        /*
 539         * Disable preemption - early bootup scheduling is extremely
 540         * fragile until we cpu_idle() for the first time.
 541         */
 542        preempt_disable();
 543        if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
 544                local_irq_disable();
 545        idr_init_cache();
 546        rcu_init();
 547        tick_nohz_init();
 548        radix_tree_init();
 549        /* init some links before init_ISA_irqs() */
 550        early_irq_init();
 551        init_IRQ();
 552        tick_init();
 553        init_timers();
 554        hrtimers_init();
 555        softirq_init();
 556        timekeeping_init();
 557        time_init();
 558        sched_clock_postinit();
 559        perf_event_init();
 560        profile_init();
 561        call_function_init();
 562        WARN(!irqs_disabled(), "Interrupts were enabled early\n");
 563        early_boot_irqs_disabled = false;
 564        local_irq_enable();
 565
 566        kmem_cache_init_late();
 567
 568        /*
 569         * HACK ALERT! This is early. We're enabling the console before
 570         * we've done PCI setups etc, and console_init() must be aware of
 571         * this. But we do want output early, in case something goes wrong.
 572         */
 573        console_init();
 574        if (panic_later)
 575                panic(panic_later, panic_param);
 576
 577        lockdep_info();
 578
 579        /*
 580         * Need to run this when irqs are enabled, because it wants
 581         * to self-test [hard/soft]-irqs on/off lock inversion bugs
 582         * too:
 583         */
 584        locking_selftest();
 585
 586#ifdef CONFIG_BLK_DEV_INITRD
 587        if (initrd_start && !initrd_below_start_ok &&
 588            page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
 589                pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
 590                    page_to_pfn(virt_to_page((void *)initrd_start)),
 591                    min_low_pfn);
 592                initrd_start = 0;
 593        }
 594#endif
 595        page_cgroup_init();
 596        debug_objects_mem_init();
 597        kmemleak_init();
 598        setup_per_cpu_pageset();
 599        numa_policy_init();
 600        if (late_time_init)
 601                late_time_init();
 602        sched_clock_init();
 603        calibrate_delay();
 604        pidmap_init();
 605        anon_vma_init();
 606#ifdef CONFIG_X86
 607        if (efi_enabled(EFI_RUNTIME_SERVICES))
 608                efi_enter_virtual_mode();
 609#endif
 610        thread_info_cache_init();
 611        cred_init();
 612        fork_init(totalram_pages);
 613        proc_caches_init();
 614        buffer_init();
 615        key_init();
 616        security_init();
 617        dbg_late_init();
 618        vfs_caches_init(totalram_pages);
 619        signals_init();
 620        /* rootfs populating might need page-writeback */
 621        page_writeback_init();
 622#ifdef CONFIG_PROC_FS
 623        proc_root_init();
 624#endif
 625        cgroup_init();
 626        cpuset_init();
 627        taskstats_init_early();
 628        delayacct_init();
 629
 630        check_bugs();
 631
 632        acpi_early_init(); /* before LAPIC and SMP init */
 633        sfi_init_late();
 634
 635        if (efi_enabled(EFI_RUNTIME_SERVICES)) {
 636                efi_late_init();
 637                efi_free_boot_services();
 638        }
 639
 640        ftrace_init();
 641
 642        /* Do the rest non-__init'ed, we're now alive */
 643        rest_init();
 644}
 645
 646/* Call all constructor functions linked into the kernel. */
 647static void __init do_ctors(void)
 648{
 649#ifdef CONFIG_CONSTRUCTORS
 650        ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
 651
 652        for (; fn < (ctor_fn_t *) __ctors_end; fn++)
 653                (*fn)();
 654#endif
 655}
 656
 657bool initcall_debug;
 658core_param(initcall_debug, initcall_debug, bool, 0644);
 659
 660static int __init_or_module do_one_initcall_debug(initcall_t fn)
 661{
 662        ktime_t calltime, delta, rettime;
 663        unsigned long long duration;
 664        int ret;
 665
 666        pr_debug("calling  %pF @ %i\n", fn, task_pid_nr(current));
 667        calltime = ktime_get();
 668        ret = fn();
 669        rettime = ktime_get();
 670        delta = ktime_sub(rettime, calltime);
 671        duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 672        pr_debug("initcall %pF returned %d after %lld usecs\n",
 673                 fn, ret, duration);
 674
 675        return ret;
 676}
 677
 678int __init_or_module do_one_initcall(initcall_t fn)
 679{
 680        int count = preempt_count();
 681        int ret;
 682        char msgbuf[64];
 683
 684        if (initcall_debug)
 685                ret = do_one_initcall_debug(fn);
 686        else
 687                ret = fn();
 688
 689        msgbuf[0] = 0;
 690
 691        if (preempt_count() != count) {
 692                sprintf(msgbuf, "preemption imbalance ");
 693                preempt_count() = count;
 694        }
 695        if (irqs_disabled()) {
 696                strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
 697                local_irq_enable();
 698        }
 699        WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
 700
 701        return ret;
 702}
 703
 704
 705extern initcall_t __initcall_start[];
 706extern initcall_t __initcall0_start[];
 707extern initcall_t __initcall1_start[];
 708extern initcall_t __initcall2_start[];
 709extern initcall_t __initcall3_start[];
 710extern initcall_t __initcall4_start[];
 711extern initcall_t __initcall5_start[];
 712extern initcall_t __initcall6_start[];
 713extern initcall_t __initcall7_start[];
 714extern initcall_t __initcall_end[];
 715
 716static initcall_t *initcall_levels[] __initdata = {
 717        __initcall0_start,
 718        __initcall1_start,
 719        __initcall2_start,
 720        __initcall3_start,
 721        __initcall4_start,
 722        __initcall5_start,
 723        __initcall6_start,
 724        __initcall7_start,
 725        __initcall_end,
 726};
 727
 728/* Keep these in sync with initcalls in include/linux/init.h */
 729static char *initcall_level_names[] __initdata = {
 730        "early",
 731        "core",
 732        "postcore",
 733        "arch",
 734        "subsys",
 735        "fs",
 736        "device",
 737        "late",
 738};
 739
 740static void __init do_initcall_level(int level)
 741{
 742        extern const struct kernel_param __start___param[], __stop___param[];
 743        initcall_t *fn;
 744
 745        strcpy(static_command_line, saved_command_line);
 746        parse_args(initcall_level_names[level],
 747                   static_command_line, __start___param,
 748                   __stop___param - __start___param,
 749                   level, level,
 750                   &repair_env_string);
 751
 752        for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
 753                do_one_initcall(*fn);
 754}
 755
 756static void __init do_initcalls(void)
 757{
 758        int level;
 759
 760        for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
 761                do_initcall_level(level);
 762}
 763
 764/*
 765 * Ok, the machine is now initialized. None of the devices
 766 * have been touched yet, but the CPU subsystem is up and
 767 * running, and memory and process management works.
 768 *
 769 * Now we can finally start doing some real work..
 770 */
 771static void __init do_basic_setup(void)
 772{
 773        cpuset_init_smp();
 774        usermodehelper_init();
 775        shmem_init();
 776        driver_init();
 777        init_irq_proc();
 778        do_ctors();
 779        usermodehelper_enable();
 780        do_initcalls();
 781}
 782
 783static void __init do_pre_smp_initcalls(void)
 784{
 785        initcall_t *fn;
 786
 787        for (fn = __initcall_start; fn < __initcall0_start; fn++)
 788                do_one_initcall(*fn);
 789}
 790
 791/*
 792 * This function requests modules which should be loaded by default and is
 793 * called twice right after initrd is mounted and right before init is
 794 * exec'd.  If such modules are on either initrd or rootfs, they will be
 795 * loaded before control is passed to userland.
 796 */
 797void __init load_default_modules(void)
 798{
 799        load_default_elevator_module();
 800}
 801
 802static int run_init_process(const char *init_filename)
 803{
 804        argv_init[0] = init_filename;
 805        return do_execve(init_filename,
 806                (const char __user *const __user *)argv_init,
 807                (const char __user *const __user *)envp_init);
 808}
 809
 810static noinline void __init kernel_init_freeable(void);
 811
 812static int __ref kernel_init(void *unused)
 813{
 814        kernel_init_freeable();
 815        /* need to finish all async __init code before freeing the memory */
 816        async_synchronize_full();
 817        free_initmem();
 818        mark_rodata_ro();
 819        system_state = SYSTEM_RUNNING;
 820        numa_default_policy();
 821
 822        flush_delayed_fput();
 823
 824        if (ramdisk_execute_command) {
 825                if (!run_init_process(ramdisk_execute_command))
 826                        return 0;
 827                pr_err("Failed to execute %s\n", ramdisk_execute_command);
 828        }
 829
 830        /*
 831         * We try each of these until one succeeds.
 832         *
 833         * The Bourne shell can be used instead of init if we are
 834         * trying to recover a really broken machine.
 835         */
 836        if (execute_command) {
 837                if (!run_init_process(execute_command))
 838                        return 0;
 839                pr_err("Failed to execute %s.  Attempting defaults...\n",
 840                        execute_command);
 841        }
 842        if (!run_init_process("/sbin/init") ||
 843            !run_init_process("/etc/init") ||
 844            !run_init_process("/bin/init") ||
 845            !run_init_process("/bin/sh"))
 846                return 0;
 847
 848        panic("No init found.  Try passing init= option to kernel. "
 849              "See Linux Documentation/init.txt for guidance.");
 850}
 851
 852static noinline void __init kernel_init_freeable(void)
 853{
 854        /*
 855         * Wait until kthreadd is all set-up.
 856         */
 857        wait_for_completion(&kthreadd_done);
 858
 859        /* Now the scheduler is fully set up and can do blocking allocations */
 860        gfp_allowed_mask = __GFP_BITS_MASK;
 861
 862        /*
 863         * init can allocate pages on any node
 864         */
 865        set_mems_allowed(node_states[N_MEMORY]);
 866        /*
 867         * init can run on any cpu.
 868         */
 869        set_cpus_allowed_ptr(current, cpu_all_mask);
 870
 871        cad_pid = task_pid(current);
 872
 873        smp_prepare_cpus(setup_max_cpus);
 874
 875        do_pre_smp_initcalls();
 876        lockup_detector_init();
 877
 878        smp_init();
 879        sched_init_smp();
 880
 881        do_basic_setup();
 882
 883        /* Open the /dev/console on the rootfs, this should never fail */
 884        if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
 885                pr_err("Warning: unable to open an initial console.\n");
 886
 887        (void) sys_dup(0);
 888        (void) sys_dup(0);
 889        /*
 890         * check if there is an early userspace init.  If yes, let it do all
 891         * the work
 892         */
 893
 894        if (!ramdisk_execute_command)
 895                ramdisk_execute_command = "/init";
 896
 897        if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
 898                ramdisk_execute_command = NULL;
 899                prepare_namespace();
 900        }
 901
 902        /*
 903         * Ok, we have completed the initial bootup, and
 904         * we're essentially up and running. Get rid of the
 905         * initmem segments and start the user-mode stuff..
 906         */
 907
 908        /* rootfs is available now, try loading default modules */
 909        load_default_modules();
 910}
 911