linux/init/initramfs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/init.h>
   3#include <linux/async.h>
   4#include <linux/fs.h>
   5#include <linux/slab.h>
   6#include <linux/types.h>
   7#include <linux/fcntl.h>
   8#include <linux/delay.h>
   9#include <linux/string.h>
  10#include <linux/dirent.h>
  11#include <linux/syscalls.h>
  12#include <linux/utime.h>
  13#include <linux/file.h>
  14#include <linux/memblock.h>
  15#include <linux/mm.h>
  16#include <linux/namei.h>
  17#include <linux/init_syscalls.h>
  18
  19static ssize_t __init xwrite(struct file *file, const char *p, size_t count,
  20                loff_t *pos)
  21{
  22        ssize_t out = 0;
  23
  24        /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
  25        while (count) {
  26                ssize_t rv = kernel_write(file, p, count, pos);
  27
  28                if (rv < 0) {
  29                        if (rv == -EINTR || rv == -EAGAIN)
  30                                continue;
  31                        return out ? out : rv;
  32                } else if (rv == 0)
  33                        break;
  34
  35                p += rv;
  36                out += rv;
  37                count -= rv;
  38        }
  39
  40        return out;
  41}
  42
  43static __initdata char *message;
  44static void __init error(char *x)
  45{
  46        if (!message)
  47                message = x;
  48}
  49
  50static void panic_show_mem(const char *fmt, ...)
  51{
  52        va_list args;
  53
  54        show_mem(0, NULL);
  55        va_start(args, fmt);
  56        panic(fmt, args);
  57        va_end(args);
  58}
  59
  60/* link hash */
  61
  62#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
  63
  64static __initdata struct hash {
  65        int ino, minor, major;
  66        umode_t mode;
  67        struct hash *next;
  68        char name[N_ALIGN(PATH_MAX)];
  69} *head[32];
  70
  71static inline int hash(int major, int minor, int ino)
  72{
  73        unsigned long tmp = ino + minor + (major << 3);
  74        tmp += tmp >> 5;
  75        return tmp & 31;
  76}
  77
  78static char __init *find_link(int major, int minor, int ino,
  79                              umode_t mode, char *name)
  80{
  81        struct hash **p, *q;
  82        for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) {
  83                if ((*p)->ino != ino)
  84                        continue;
  85                if ((*p)->minor != minor)
  86                        continue;
  87                if ((*p)->major != major)
  88                        continue;
  89                if (((*p)->mode ^ mode) & S_IFMT)
  90                        continue;
  91                return (*p)->name;
  92        }
  93        q = kmalloc(sizeof(struct hash), GFP_KERNEL);
  94        if (!q)
  95                panic_show_mem("can't allocate link hash entry");
  96        q->major = major;
  97        q->minor = minor;
  98        q->ino = ino;
  99        q->mode = mode;
 100        strcpy(q->name, name);
 101        q->next = NULL;
 102        *p = q;
 103        return NULL;
 104}
 105
 106static void __init free_hash(void)
 107{
 108        struct hash **p, *q;
 109        for (p = head; p < head + 32; p++) {
 110                while (*p) {
 111                        q = *p;
 112                        *p = q->next;
 113                        kfree(q);
 114                }
 115        }
 116}
 117
 118static long __init do_utime(char *filename, time64_t mtime)
 119{
 120        struct timespec64 t[2];
 121
 122        t[0].tv_sec = mtime;
 123        t[0].tv_nsec = 0;
 124        t[1].tv_sec = mtime;
 125        t[1].tv_nsec = 0;
 126        return init_utimes(filename, t);
 127}
 128
 129static __initdata LIST_HEAD(dir_list);
 130struct dir_entry {
 131        struct list_head list;
 132        char *name;
 133        time64_t mtime;
 134};
 135
 136static void __init dir_add(const char *name, time64_t mtime)
 137{
 138        struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL);
 139        if (!de)
 140                panic_show_mem("can't allocate dir_entry buffer");
 141        INIT_LIST_HEAD(&de->list);
 142        de->name = kstrdup(name, GFP_KERNEL);
 143        de->mtime = mtime;
 144        list_add(&de->list, &dir_list);
 145}
 146
 147static void __init dir_utime(void)
 148{
 149        struct dir_entry *de, *tmp;
 150        list_for_each_entry_safe(de, tmp, &dir_list, list) {
 151                list_del(&de->list);
 152                do_utime(de->name, de->mtime);
 153                kfree(de->name);
 154                kfree(de);
 155        }
 156}
 157
 158static __initdata time64_t mtime;
 159
 160/* cpio header parsing */
 161
 162static __initdata unsigned long ino, major, minor, nlink;
 163static __initdata umode_t mode;
 164static __initdata unsigned long body_len, name_len;
 165static __initdata uid_t uid;
 166static __initdata gid_t gid;
 167static __initdata unsigned rdev;
 168
 169static void __init parse_header(char *s)
 170{
 171        unsigned long parsed[12];
 172        char buf[9];
 173        int i;
 174
 175        buf[8] = '\0';
 176        for (i = 0, s += 6; i < 12; i++, s += 8) {
 177                memcpy(buf, s, 8);
 178                parsed[i] = simple_strtoul(buf, NULL, 16);
 179        }
 180        ino = parsed[0];
 181        mode = parsed[1];
 182        uid = parsed[2];
 183        gid = parsed[3];
 184        nlink = parsed[4];
 185        mtime = parsed[5]; /* breaks in y2106 */
 186        body_len = parsed[6];
 187        major = parsed[7];
 188        minor = parsed[8];
 189        rdev = new_encode_dev(MKDEV(parsed[9], parsed[10]));
 190        name_len = parsed[11];
 191}
 192
 193/* FSM */
 194
 195static __initdata enum state {
 196        Start,
 197        Collect,
 198        GotHeader,
 199        SkipIt,
 200        GotName,
 201        CopyFile,
 202        GotSymlink,
 203        Reset
 204} state, next_state;
 205
 206static __initdata char *victim;
 207static unsigned long byte_count __initdata;
 208static __initdata loff_t this_header, next_header;
 209
 210static inline void __init eat(unsigned n)
 211{
 212        victim += n;
 213        this_header += n;
 214        byte_count -= n;
 215}
 216
 217static __initdata char *collected;
 218static long remains __initdata;
 219static __initdata char *collect;
 220
 221static void __init read_into(char *buf, unsigned size, enum state next)
 222{
 223        if (byte_count >= size) {
 224                collected = victim;
 225                eat(size);
 226                state = next;
 227        } else {
 228                collect = collected = buf;
 229                remains = size;
 230                next_state = next;
 231                state = Collect;
 232        }
 233}
 234
 235static __initdata char *header_buf, *symlink_buf, *name_buf;
 236
 237static int __init do_start(void)
 238{
 239        read_into(header_buf, 110, GotHeader);
 240        return 0;
 241}
 242
 243static int __init do_collect(void)
 244{
 245        unsigned long n = remains;
 246        if (byte_count < n)
 247                n = byte_count;
 248        memcpy(collect, victim, n);
 249        eat(n);
 250        collect += n;
 251        if ((remains -= n) != 0)
 252                return 1;
 253        state = next_state;
 254        return 0;
 255}
 256
 257static int __init do_header(void)
 258{
 259        if (memcmp(collected, "070707", 6)==0) {
 260                error("incorrect cpio method used: use -H newc option");
 261                return 1;
 262        }
 263        if (memcmp(collected, "070701", 6)) {
 264                error("no cpio magic");
 265                return 1;
 266        }
 267        parse_header(collected);
 268        next_header = this_header + N_ALIGN(name_len) + body_len;
 269        next_header = (next_header + 3) & ~3;
 270        state = SkipIt;
 271        if (name_len <= 0 || name_len > PATH_MAX)
 272                return 0;
 273        if (S_ISLNK(mode)) {
 274                if (body_len > PATH_MAX)
 275                        return 0;
 276                collect = collected = symlink_buf;
 277                remains = N_ALIGN(name_len) + body_len;
 278                next_state = GotSymlink;
 279                state = Collect;
 280                return 0;
 281        }
 282        if (S_ISREG(mode) || !body_len)
 283                read_into(name_buf, N_ALIGN(name_len), GotName);
 284        return 0;
 285}
 286
 287static int __init do_skip(void)
 288{
 289        if (this_header + byte_count < next_header) {
 290                eat(byte_count);
 291                return 1;
 292        } else {
 293                eat(next_header - this_header);
 294                state = next_state;
 295                return 0;
 296        }
 297}
 298
 299static int __init do_reset(void)
 300{
 301        while (byte_count && *victim == '\0')
 302                eat(1);
 303        if (byte_count && (this_header & 3))
 304                error("broken padding");
 305        return 1;
 306}
 307
 308static void __init clean_path(char *path, umode_t fmode)
 309{
 310        struct kstat st;
 311
 312        if (!init_stat(path, &st, AT_SYMLINK_NOFOLLOW) &&
 313            (st.mode ^ fmode) & S_IFMT) {
 314                if (S_ISDIR(st.mode))
 315                        init_rmdir(path);
 316                else
 317                        init_unlink(path);
 318        }
 319}
 320
 321static int __init maybe_link(void)
 322{
 323        if (nlink >= 2) {
 324                char *old = find_link(major, minor, ino, mode, collected);
 325                if (old) {
 326                        clean_path(collected, 0);
 327                        return (init_link(old, collected) < 0) ? -1 : 1;
 328                }
 329        }
 330        return 0;
 331}
 332
 333static __initdata struct file *wfile;
 334static __initdata loff_t wfile_pos;
 335
 336static int __init do_name(void)
 337{
 338        state = SkipIt;
 339        next_state = Reset;
 340        if (strcmp(collected, "TRAILER!!!") == 0) {
 341                free_hash();
 342                return 0;
 343        }
 344        clean_path(collected, mode);
 345        if (S_ISREG(mode)) {
 346                int ml = maybe_link();
 347                if (ml >= 0) {
 348                        int openflags = O_WRONLY|O_CREAT;
 349                        if (ml != 1)
 350                                openflags |= O_TRUNC;
 351                        wfile = filp_open(collected, openflags, mode);
 352                        if (IS_ERR(wfile))
 353                                return 0;
 354                        wfile_pos = 0;
 355
 356                        vfs_fchown(wfile, uid, gid);
 357                        vfs_fchmod(wfile, mode);
 358                        if (body_len)
 359                                vfs_truncate(&wfile->f_path, body_len);
 360                        state = CopyFile;
 361                }
 362        } else if (S_ISDIR(mode)) {
 363                init_mkdir(collected, mode);
 364                init_chown(collected, uid, gid, 0);
 365                init_chmod(collected, mode);
 366                dir_add(collected, mtime);
 367        } else if (S_ISBLK(mode) || S_ISCHR(mode) ||
 368                   S_ISFIFO(mode) || S_ISSOCK(mode)) {
 369                if (maybe_link() == 0) {
 370                        init_mknod(collected, mode, rdev);
 371                        init_chown(collected, uid, gid, 0);
 372                        init_chmod(collected, mode);
 373                        do_utime(collected, mtime);
 374                }
 375        }
 376        return 0;
 377}
 378
 379static int __init do_copy(void)
 380{
 381        if (byte_count >= body_len) {
 382                struct timespec64 t[2] = { };
 383                if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
 384                        error("write error");
 385
 386                t[0].tv_sec = mtime;
 387                t[1].tv_sec = mtime;
 388                vfs_utimes(&wfile->f_path, t);
 389
 390                fput(wfile);
 391                eat(body_len);
 392                state = SkipIt;
 393                return 0;
 394        } else {
 395                if (xwrite(wfile, victim, byte_count, &wfile_pos) != byte_count)
 396                        error("write error");
 397                body_len -= byte_count;
 398                eat(byte_count);
 399                return 1;
 400        }
 401}
 402
 403static int __init do_symlink(void)
 404{
 405        collected[N_ALIGN(name_len) + body_len] = '\0';
 406        clean_path(collected, 0);
 407        init_symlink(collected + N_ALIGN(name_len), collected);
 408        init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW);
 409        do_utime(collected, mtime);
 410        state = SkipIt;
 411        next_state = Reset;
 412        return 0;
 413}
 414
 415static __initdata int (*actions[])(void) = {
 416        [Start]         = do_start,
 417        [Collect]       = do_collect,
 418        [GotHeader]     = do_header,
 419        [SkipIt]        = do_skip,
 420        [GotName]       = do_name,
 421        [CopyFile]      = do_copy,
 422        [GotSymlink]    = do_symlink,
 423        [Reset]         = do_reset,
 424};
 425
 426static long __init write_buffer(char *buf, unsigned long len)
 427{
 428        byte_count = len;
 429        victim = buf;
 430
 431        while (!actions[state]())
 432                ;
 433        return len - byte_count;
 434}
 435
 436static long __init flush_buffer(void *bufv, unsigned long len)
 437{
 438        char *buf = (char *) bufv;
 439        long written;
 440        long origLen = len;
 441        if (message)
 442                return -1;
 443        while ((written = write_buffer(buf, len)) < len && !message) {
 444                char c = buf[written];
 445                if (c == '0') {
 446                        buf += written;
 447                        len -= written;
 448                        state = Start;
 449                } else if (c == 0) {
 450                        buf += written;
 451                        len -= written;
 452                        state = Reset;
 453                } else
 454                        error("junk within compressed archive");
 455        }
 456        return origLen;
 457}
 458
 459static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
 460
 461#include <linux/decompress/generic.h>
 462
 463static char * __init unpack_to_rootfs(char *buf, unsigned long len)
 464{
 465        long written;
 466        decompress_fn decompress;
 467        const char *compress_name;
 468        static __initdata char msg_buf[64];
 469
 470        header_buf = kmalloc(110, GFP_KERNEL);
 471        symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
 472        name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
 473
 474        if (!header_buf || !symlink_buf || !name_buf)
 475                panic_show_mem("can't allocate buffers");
 476
 477        state = Start;
 478        this_header = 0;
 479        message = NULL;
 480        while (!message && len) {
 481                loff_t saved_offset = this_header;
 482                if (*buf == '0' && !(this_header & 3)) {
 483                        state = Start;
 484                        written = write_buffer(buf, len);
 485                        buf += written;
 486                        len -= written;
 487                        continue;
 488                }
 489                if (!*buf) {
 490                        buf++;
 491                        len--;
 492                        this_header++;
 493                        continue;
 494                }
 495                this_header = 0;
 496                decompress = decompress_method(buf, len, &compress_name);
 497                pr_debug("Detected %s compressed data\n", compress_name);
 498                if (decompress) {
 499                        int res = decompress(buf, len, NULL, flush_buffer, NULL,
 500                                   &my_inptr, error);
 501                        if (res)
 502                                error("decompressor failed");
 503                } else if (compress_name) {
 504                        if (!message) {
 505                                snprintf(msg_buf, sizeof msg_buf,
 506                                         "compression method %s not configured",
 507                                         compress_name);
 508                                message = msg_buf;
 509                        }
 510                } else
 511                        error("invalid magic at start of compressed archive");
 512                if (state != Reset)
 513                        error("junk at the end of compressed archive");
 514                this_header = saved_offset + my_inptr;
 515                buf += my_inptr;
 516                len -= my_inptr;
 517        }
 518        dir_utime();
 519        kfree(name_buf);
 520        kfree(symlink_buf);
 521        kfree(header_buf);
 522        return message;
 523}
 524
 525static int __initdata do_retain_initrd;
 526
 527static int __init retain_initrd_param(char *str)
 528{
 529        if (*str)
 530                return 0;
 531        do_retain_initrd = 1;
 532        return 1;
 533}
 534__setup("retain_initrd", retain_initrd_param);
 535
 536#ifdef CONFIG_ARCH_HAS_KEEPINITRD
 537static int __init keepinitrd_setup(char *__unused)
 538{
 539        do_retain_initrd = 1;
 540        return 1;
 541}
 542__setup("keepinitrd", keepinitrd_setup);
 543#endif
 544
 545static bool __initdata initramfs_async = true;
 546static int __init initramfs_async_setup(char *str)
 547{
 548        strtobool(str, &initramfs_async);
 549        return 1;
 550}
 551__setup("initramfs_async=", initramfs_async_setup);
 552
 553extern char __initramfs_start[];
 554extern unsigned long __initramfs_size;
 555#include <linux/initrd.h>
 556#include <linux/kexec.h>
 557
 558void __init reserve_initrd_mem(void)
 559{
 560        phys_addr_t start;
 561        unsigned long size;
 562
 563        /* Ignore the virtul address computed during device tree parsing */
 564        initrd_start = initrd_end = 0;
 565
 566        if (!phys_initrd_size)
 567                return;
 568        /*
 569         * Round the memory region to page boundaries as per free_initrd_mem()
 570         * This allows us to detect whether the pages overlapping the initrd
 571         * are in use, but more importantly, reserves the entire set of pages
 572         * as we don't want these pages allocated for other purposes.
 573         */
 574        start = round_down(phys_initrd_start, PAGE_SIZE);
 575        size = phys_initrd_size + (phys_initrd_start - start);
 576        size = round_up(size, PAGE_SIZE);
 577
 578        if (!memblock_is_region_memory(start, size)) {
 579                pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
 580                       (u64)start, size);
 581                goto disable;
 582        }
 583
 584        if (memblock_is_region_reserved(start, size)) {
 585                pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
 586                       (u64)start, size);
 587                goto disable;
 588        }
 589
 590        memblock_reserve(start, size);
 591        /* Now convert initrd to virtual addresses */
 592        initrd_start = (unsigned long)__va(phys_initrd_start);
 593        initrd_end = initrd_start + phys_initrd_size;
 594        initrd_below_start_ok = 1;
 595
 596        return;
 597disable:
 598        pr_cont(" - disabling initrd\n");
 599        initrd_start = 0;
 600        initrd_end = 0;
 601}
 602
 603void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
 604{
 605#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
 606        unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
 607        unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
 608
 609        memblock_free(__pa(aligned_start), aligned_end - aligned_start);
 610#endif
 611
 612        free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
 613                        "initrd");
 614}
 615
 616#ifdef CONFIG_KEXEC_CORE
 617static bool __init kexec_free_initrd(void)
 618{
 619        unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
 620        unsigned long crashk_end   = (unsigned long)__va(crashk_res.end);
 621
 622        /*
 623         * If the initrd region is overlapped with crashkernel reserved region,
 624         * free only memory that is not part of crashkernel region.
 625         */
 626        if (initrd_start >= crashk_end || initrd_end <= crashk_start)
 627                return false;
 628
 629        /*
 630         * Initialize initrd memory region since the kexec boot does not do.
 631         */
 632        memset((void *)initrd_start, 0, initrd_end - initrd_start);
 633        if (initrd_start < crashk_start)
 634                free_initrd_mem(initrd_start, crashk_start);
 635        if (initrd_end > crashk_end)
 636                free_initrd_mem(crashk_end, initrd_end);
 637        return true;
 638}
 639#else
 640static inline bool kexec_free_initrd(void)
 641{
 642        return false;
 643}
 644#endif /* CONFIG_KEXEC_CORE */
 645
 646#ifdef CONFIG_BLK_DEV_RAM
 647static void __init populate_initrd_image(char *err)
 648{
 649        ssize_t written;
 650        struct file *file;
 651        loff_t pos = 0;
 652
 653        unpack_to_rootfs(__initramfs_start, __initramfs_size);
 654
 655        printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
 656                        err);
 657        file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
 658        if (IS_ERR(file))
 659                return;
 660
 661        written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start,
 662                        &pos);
 663        if (written != initrd_end - initrd_start)
 664                pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
 665                       written, initrd_end - initrd_start);
 666        fput(file);
 667}
 668#endif /* CONFIG_BLK_DEV_RAM */
 669
 670static void __init do_populate_rootfs(void *unused, async_cookie_t cookie)
 671{
 672        /* Load the built in initramfs */
 673        char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
 674        if (err)
 675                panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
 676
 677        if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
 678                goto done;
 679
 680        if (IS_ENABLED(CONFIG_BLK_DEV_RAM))
 681                printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
 682        else
 683                printk(KERN_INFO "Unpacking initramfs...\n");
 684
 685        err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
 686        if (err) {
 687#ifdef CONFIG_BLK_DEV_RAM
 688                populate_initrd_image(err);
 689#else
 690                printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
 691#endif
 692        }
 693
 694done:
 695        /*
 696         * If the initrd region is overlapped with crashkernel reserved region,
 697         * free only memory that is not part of crashkernel region.
 698         */
 699        if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
 700                free_initrd_mem(initrd_start, initrd_end);
 701        initrd_start = 0;
 702        initrd_end = 0;
 703
 704        flush_delayed_fput();
 705}
 706
 707static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
 708static async_cookie_t initramfs_cookie;
 709
 710void wait_for_initramfs(void)
 711{
 712        if (!initramfs_cookie) {
 713                /*
 714                 * Something before rootfs_initcall wants to access
 715                 * the filesystem/initramfs. Probably a bug. Make a
 716                 * note, avoid deadlocking the machine, and let the
 717                 * caller's access fail as it used to.
 718                 */
 719                pr_warn_once("wait_for_initramfs() called before rootfs_initcalls\n");
 720                return;
 721        }
 722        async_synchronize_cookie_domain(initramfs_cookie + 1, &initramfs_domain);
 723}
 724EXPORT_SYMBOL_GPL(wait_for_initramfs);
 725
 726static int __init populate_rootfs(void)
 727{
 728        initramfs_cookie = async_schedule_domain(do_populate_rootfs, NULL,
 729                                                 &initramfs_domain);
 730        if (!initramfs_async)
 731                wait_for_initramfs();
 732        return 0;
 733}
 734rootfs_initcall(populate_rootfs);
 735