1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#define BOOT_CTYPE_H
21
22
23
24
25
26
27#define __DISABLE_EXPORTS
28
29#include "misc.h"
30#include "error.h"
31#include "../string.h"
32
33#include <generated/compile.h>
34#include <linux/module.h>
35#include <linux/uts.h>
36#include <linux/utsname.h>
37#include <linux/ctype.h>
38#include <linux/efi.h>
39#include <generated/utsrelease.h>
40#include <asm/efi.h>
41
42
43#define STATIC
44#include <linux/decompress/mm.h>
45
46#ifdef CONFIG_X86_5LEVEL
47unsigned int __pgtable_l5_enabled;
48unsigned int pgdir_shift __ro_after_init = 39;
49unsigned int ptrs_per_p4d __ro_after_init = 1;
50#endif
51
52extern unsigned long get_cmd_line_ptr(void);
53
54
55pteval_t __default_kernel_pte_mask __read_mostly = ~0;
56
57
58static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
59 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
60
61static unsigned long rotate_xor(unsigned long hash, const void *area,
62 size_t size)
63{
64 size_t i;
65 unsigned long *ptr = (unsigned long *)area;
66
67 for (i = 0; i < size / sizeof(hash); i++) {
68
69 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
70 hash ^= ptr[i];
71 }
72
73 return hash;
74}
75
76
77static unsigned long get_boot_seed(void)
78{
79 unsigned long hash = 0;
80
81 hash = rotate_xor(hash, build_str, sizeof(build_str));
82 hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
83
84 return hash;
85}
86
87#define KASLR_COMPRESSED_BOOT
88#include "../../lib/kaslr.c"
89
90
91
92#define MAX_MEMMAP_REGIONS 4
93
94static bool memmap_too_large;
95
96
97
98static unsigned long long mem_limit = ULLONG_MAX;
99
100
101static int num_immovable_mem;
102
103enum mem_avoid_index {
104 MEM_AVOID_ZO_RANGE = 0,
105 MEM_AVOID_INITRD,
106 MEM_AVOID_CMDLINE,
107 MEM_AVOID_BOOTPARAMS,
108 MEM_AVOID_MEMMAP_BEGIN,
109 MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
110 MEM_AVOID_MAX,
111};
112
113static struct mem_vector mem_avoid[MEM_AVOID_MAX];
114
115static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
116{
117
118 if (one->start + one->size <= two->start)
119 return false;
120
121 if (one->start >= two->start + two->size)
122 return false;
123 return true;
124}
125
126char *skip_spaces(const char *str)
127{
128 while (isspace(*str))
129 ++str;
130 return (char *)str;
131}
132#include "../../../../lib/ctype.c"
133#include "../../../../lib/cmdline.c"
134
135enum parse_mode {
136 PARSE_MEMMAP,
137 PARSE_EFI,
138};
139
140static int
141parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
142 enum parse_mode mode)
143{
144 char *oldp;
145
146 if (!p)
147 return -EINVAL;
148
149
150 if (!strncmp(p, "exactmap", 8))
151 return -EINVAL;
152
153 oldp = p;
154 *size = memparse(p, &p);
155 if (p == oldp)
156 return -EINVAL;
157
158 switch (*p) {
159 case '#':
160 case '$':
161 case '!':
162 *start = memparse(p + 1, &p);
163 return 0;
164 case '@':
165 if (mode == PARSE_MEMMAP) {
166
167
168
169
170 *size = 0;
171 } else {
172 unsigned long long flags;
173
174
175
176
177
178 *start = memparse(p + 1, &p);
179 if (p && *p == ':') {
180 p++;
181 if (kstrtoull(p, 0, &flags) < 0)
182 *size = 0;
183 else if (flags & EFI_MEMORY_SP)
184 return 0;
185 }
186 *size = 0;
187 }
188
189 default:
190
191
192
193
194
195 *start = 0;
196 return 0;
197 }
198
199 return -EINVAL;
200}
201
202static void mem_avoid_memmap(enum parse_mode mode, char *str)
203{
204 static int i;
205
206 if (i >= MAX_MEMMAP_REGIONS)
207 return;
208
209 while (str && (i < MAX_MEMMAP_REGIONS)) {
210 int rc;
211 unsigned long long start, size;
212 char *k = strchr(str, ',');
213
214 if (k)
215 *k++ = 0;
216
217 rc = parse_memmap(str, &start, &size, mode);
218 if (rc < 0)
219 break;
220 str = k;
221
222 if (start == 0) {
223
224 if (size > 0)
225 mem_limit = size;
226
227 continue;
228 }
229
230 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
231 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
232 i++;
233 }
234
235
236 if ((i >= MAX_MEMMAP_REGIONS) && str)
237 memmap_too_large = true;
238}
239
240
241static unsigned long max_gb_huge_pages;
242
243static void parse_gb_huge_pages(char *param, char *val)
244{
245 static bool gbpage_sz;
246 char *p;
247
248 if (!strcmp(param, "hugepagesz")) {
249 p = val;
250 if (memparse(p, &p) != PUD_SIZE) {
251 gbpage_sz = false;
252 return;
253 }
254
255 if (gbpage_sz)
256 warn("Repeatedly set hugeTLB page size of 1G!\n");
257 gbpage_sz = true;
258 return;
259 }
260
261 if (!strcmp(param, "hugepages") && gbpage_sz) {
262 p = val;
263 max_gb_huge_pages = simple_strtoull(p, &p, 0);
264 return;
265 }
266}
267
268static void handle_mem_options(void)
269{
270 char *args = (char *)get_cmd_line_ptr();
271 size_t len = strlen((char *)args);
272 char *tmp_cmdline;
273 char *param, *val;
274 u64 mem_size;
275
276 if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
277 !strstr(args, "hugepages"))
278 return;
279
280 tmp_cmdline = malloc(len + 1);
281 if (!tmp_cmdline)
282 error("Failed to allocate space for tmp_cmdline");
283
284 memcpy(tmp_cmdline, args, len);
285 tmp_cmdline[len] = 0;
286 args = tmp_cmdline;
287
288
289 args = skip_spaces(args);
290
291 while (*args) {
292 args = next_arg(args, ¶m, &val);
293
294 if (!val && strcmp(param, "--") == 0) {
295 warn("Only '--' specified in cmdline");
296 goto out;
297 }
298
299 if (!strcmp(param, "memmap")) {
300 mem_avoid_memmap(PARSE_MEMMAP, val);
301 } else if (strstr(param, "hugepages")) {
302 parse_gb_huge_pages(param, val);
303 } else if (!strcmp(param, "mem")) {
304 char *p = val;
305
306 if (!strcmp(p, "nopentium"))
307 continue;
308 mem_size = memparse(p, &p);
309 if (mem_size == 0)
310 goto out;
311
312 mem_limit = mem_size;
313 } else if (!strcmp(param, "efi_fake_mem")) {
314 mem_avoid_memmap(PARSE_EFI, val);
315 }
316 }
317
318out:
319 free(tmp_cmdline);
320 return;
321}
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397static void mem_avoid_init(unsigned long input, unsigned long input_size,
398 unsigned long output)
399{
400 unsigned long init_size = boot_params->hdr.init_size;
401 u64 initrd_start, initrd_size;
402 u64 cmd_line, cmd_line_size;
403 char *ptr;
404
405
406
407
408
409 mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
410 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
411 add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
412 mem_avoid[MEM_AVOID_ZO_RANGE].size);
413
414
415 initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
416 initrd_start |= boot_params->hdr.ramdisk_image;
417 initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
418 initrd_size |= boot_params->hdr.ramdisk_size;
419 mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
420 mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
421
422
423
424 cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
425 cmd_line |= boot_params->hdr.cmd_line_ptr;
426
427 ptr = (char *)(unsigned long)cmd_line;
428 for (cmd_line_size = 0; ptr[cmd_line_size++];)
429 ;
430 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
431 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
432 add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
433 mem_avoid[MEM_AVOID_CMDLINE].size);
434
435
436 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
437 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
438 add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
439 mem_avoid[MEM_AVOID_BOOTPARAMS].size);
440
441
442
443
444 handle_mem_options();
445
446
447 num_immovable_mem = count_immovable_mem_regions();
448
449#ifdef CONFIG_X86_VERBOSE_BOOTUP
450
451 add_identity_map(0, PMD_SIZE);
452#endif
453}
454
455
456
457
458
459static bool mem_avoid_overlap(struct mem_vector *img,
460 struct mem_vector *overlap)
461{
462 int i;
463 struct setup_data *ptr;
464 unsigned long earliest = img->start + img->size;
465 bool is_overlapping = false;
466
467 for (i = 0; i < MEM_AVOID_MAX; i++) {
468 if (mem_overlaps(img, &mem_avoid[i]) &&
469 mem_avoid[i].start < earliest) {
470 *overlap = mem_avoid[i];
471 earliest = overlap->start;
472 is_overlapping = true;
473 }
474 }
475
476
477 ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
478 while (ptr) {
479 struct mem_vector avoid;
480
481 avoid.start = (unsigned long)ptr;
482 avoid.size = sizeof(*ptr) + ptr->len;
483
484 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
485 *overlap = avoid;
486 earliest = overlap->start;
487 is_overlapping = true;
488 }
489
490 if (ptr->type == SETUP_INDIRECT &&
491 ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
492 avoid.start = ((struct setup_indirect *)ptr->data)->addr;
493 avoid.size = ((struct setup_indirect *)ptr->data)->len;
494
495 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
496 *overlap = avoid;
497 earliest = overlap->start;
498 is_overlapping = true;
499 }
500 }
501
502 ptr = (struct setup_data *)(unsigned long)ptr->next;
503 }
504
505 return is_overlapping;
506}
507
508struct slot_area {
509 unsigned long addr;
510 int num;
511};
512
513#define MAX_SLOT_AREA 100
514
515static struct slot_area slot_areas[MAX_SLOT_AREA];
516
517static unsigned long slot_max;
518
519static unsigned long slot_area_index;
520
521static void store_slot_info(struct mem_vector *region, unsigned long image_size)
522{
523 struct slot_area slot_area;
524
525 if (slot_area_index == MAX_SLOT_AREA)
526 return;
527
528 slot_area.addr = region->start;
529 slot_area.num = (region->size - image_size) /
530 CONFIG_PHYSICAL_ALIGN + 1;
531
532 if (slot_area.num > 0) {
533 slot_areas[slot_area_index++] = slot_area;
534 slot_max += slot_area.num;
535 }
536}
537
538
539
540
541
542static void
543process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
544{
545 unsigned long addr, size = 0;
546 struct mem_vector tmp;
547 int i = 0;
548
549 if (!max_gb_huge_pages) {
550 store_slot_info(region, image_size);
551 return;
552 }
553
554 addr = ALIGN(region->start, PUD_SIZE);
555
556 if (addr < region->start + region->size)
557 size = region->size - (addr - region->start);
558
559
560 while (size > PUD_SIZE && max_gb_huge_pages) {
561 size -= PUD_SIZE;
562 max_gb_huge_pages--;
563 i++;
564 }
565
566
567 if (!i) {
568 store_slot_info(region, image_size);
569 return;
570 }
571
572
573
574
575
576
577
578 if (addr >= region->start + image_size) {
579 tmp.start = region->start;
580 tmp.size = addr - region->start;
581 store_slot_info(&tmp, image_size);
582 }
583
584 size = region->size - (addr - region->start) - i * PUD_SIZE;
585 if (size >= image_size) {
586 tmp.start = addr + i * PUD_SIZE;
587 tmp.size = size;
588 store_slot_info(&tmp, image_size);
589 }
590}
591
592static unsigned long slots_fetch_random(void)
593{
594 unsigned long slot;
595 int i;
596
597
598 if (slot_max == 0)
599 return 0;
600
601 slot = kaslr_get_random_long("Physical") % slot_max;
602
603 for (i = 0; i < slot_area_index; i++) {
604 if (slot >= slot_areas[i].num) {
605 slot -= slot_areas[i].num;
606 continue;
607 }
608 return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
609 }
610
611 if (i == slot_area_index)
612 debug_putstr("slots_fetch_random() failed!?\n");
613 return 0;
614}
615
616static void __process_mem_region(struct mem_vector *entry,
617 unsigned long minimum,
618 unsigned long image_size)
619{
620 struct mem_vector region, overlap;
621 unsigned long start_orig, end;
622 struct mem_vector cur_entry;
623
624
625 if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
626 return;
627
628
629 if (entry->start + entry->size < minimum)
630 return;
631
632
633 end = min(entry->size + entry->start, mem_limit);
634 if (entry->start >= end)
635 return;
636 cur_entry.start = entry->start;
637 cur_entry.size = end - entry->start;
638
639 region.start = cur_entry.start;
640 region.size = cur_entry.size;
641
642
643 while (slot_area_index < MAX_SLOT_AREA) {
644 start_orig = region.start;
645
646
647 if (region.start < minimum)
648 region.start = minimum;
649
650
651 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
652
653
654 if (region.start > cur_entry.start + cur_entry.size)
655 return;
656
657
658 region.size -= region.start - start_orig;
659
660
661 if (IS_ENABLED(CONFIG_X86_32) &&
662 region.start + region.size > KERNEL_IMAGE_SIZE)
663 region.size = KERNEL_IMAGE_SIZE - region.start;
664
665
666 if (region.size < image_size)
667 return;
668
669
670 if (!mem_avoid_overlap(®ion, &overlap)) {
671 process_gb_huge_pages(®ion, image_size);
672 return;
673 }
674
675
676 if (overlap.start > region.start + image_size) {
677 struct mem_vector beginning;
678
679 beginning.start = region.start;
680 beginning.size = overlap.start - region.start;
681 process_gb_huge_pages(&beginning, image_size);
682 }
683
684
685 if (overlap.start + overlap.size >= region.start + region.size)
686 return;
687
688
689 region.size -= overlap.start - region.start + overlap.size;
690 region.start = overlap.start + overlap.size;
691 }
692}
693
694static bool process_mem_region(struct mem_vector *region,
695 unsigned long long minimum,
696 unsigned long long image_size)
697{
698 int i;
699
700
701
702
703 if (!num_immovable_mem) {
704 __process_mem_region(region, minimum, image_size);
705
706 if (slot_area_index == MAX_SLOT_AREA) {
707 debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
708 return 1;
709 }
710 return 0;
711 }
712
713#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
714
715
716
717
718 for (i = 0; i < num_immovable_mem; i++) {
719 unsigned long long start, end, entry_end, region_end;
720 struct mem_vector entry;
721
722 if (!mem_overlaps(region, &immovable_mem[i]))
723 continue;
724
725 start = immovable_mem[i].start;
726 end = start + immovable_mem[i].size;
727 region_end = region->start + region->size;
728
729 entry.start = clamp(region->start, start, end);
730 entry_end = clamp(region_end, start, end);
731 entry.size = entry_end - entry.start;
732
733 __process_mem_region(&entry, minimum, image_size);
734
735 if (slot_area_index == MAX_SLOT_AREA) {
736 debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
737 return 1;
738 }
739 }
740#endif
741 return 0;
742}
743
744#ifdef CONFIG_EFI
745
746
747
748
749static bool
750process_efi_entries(unsigned long minimum, unsigned long image_size)
751{
752 struct efi_info *e = &boot_params->efi_info;
753 bool efi_mirror_found = false;
754 struct mem_vector region;
755 efi_memory_desc_t *md;
756 unsigned long pmap;
757 char *signature;
758 u32 nr_desc;
759 int i;
760
761 signature = (char *)&e->efi_loader_signature;
762 if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
763 strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
764 return false;
765
766#ifdef CONFIG_X86_32
767
768 if (e->efi_memmap_hi) {
769 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
770 return false;
771 }
772 pmap = e->efi_memmap;
773#else
774 pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
775#endif
776
777 nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
778 for (i = 0; i < nr_desc; i++) {
779 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
780 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
781 efi_mirror_found = true;
782 break;
783 }
784 }
785
786 for (i = 0; i < nr_desc; i++) {
787 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
788
789
790
791
792
793
794
795
796
797
798
799
800 if (md->type != EFI_CONVENTIONAL_MEMORY)
801 continue;
802
803 if (efi_soft_reserve_enabled() &&
804 (md->attribute & EFI_MEMORY_SP))
805 continue;
806
807 if (efi_mirror_found &&
808 !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
809 continue;
810
811 region.start = md->phys_addr;
812 region.size = md->num_pages << EFI_PAGE_SHIFT;
813 if (process_mem_region(®ion, minimum, image_size))
814 break;
815 }
816 return true;
817}
818#else
819static inline bool
820process_efi_entries(unsigned long minimum, unsigned long image_size)
821{
822 return false;
823}
824#endif
825
826static void process_e820_entries(unsigned long minimum,
827 unsigned long image_size)
828{
829 int i;
830 struct mem_vector region;
831 struct boot_e820_entry *entry;
832
833
834 for (i = 0; i < boot_params->e820_entries; i++) {
835 entry = &boot_params->e820_table[i];
836
837 if (entry->type != E820_TYPE_RAM)
838 continue;
839 region.start = entry->addr;
840 region.size = entry->size;
841 if (process_mem_region(®ion, minimum, image_size))
842 break;
843 }
844}
845
846static unsigned long find_random_phys_addr(unsigned long minimum,
847 unsigned long image_size)
848{
849
850 if (memmap_too_large) {
851 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
852 return 0;
853 }
854
855
856 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
857
858 if (process_efi_entries(minimum, image_size))
859 return slots_fetch_random();
860
861 process_e820_entries(minimum, image_size);
862 return slots_fetch_random();
863}
864
865static unsigned long find_random_virt_addr(unsigned long minimum,
866 unsigned long image_size)
867{
868 unsigned long slots, random_addr;
869
870
871 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
872
873 image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
874
875
876
877
878
879
880 slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
881 CONFIG_PHYSICAL_ALIGN + 1;
882
883 random_addr = kaslr_get_random_long("Virtual") % slots;
884
885 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
886}
887
888
889
890
891
892void choose_random_location(unsigned long input,
893 unsigned long input_size,
894 unsigned long *output,
895 unsigned long output_size,
896 unsigned long *virt_addr)
897{
898 unsigned long random_addr, min_addr;
899
900 if (cmdline_find_option_bool("nokaslr")) {
901 warn("KASLR disabled: 'nokaslr' on cmdline.");
902 return;
903 }
904
905#ifdef CONFIG_X86_5LEVEL
906 if (__read_cr4() & X86_CR4_LA57) {
907 __pgtable_l5_enabled = 1;
908 pgdir_shift = 48;
909 ptrs_per_p4d = 512;
910 }
911#endif
912
913 boot_params->hdr.loadflags |= KASLR_FLAG;
914
915
916 initialize_identity_maps();
917
918
919 mem_avoid_init(input, input_size, *output);
920
921
922
923
924
925
926 min_addr = min(*output, 512UL << 20);
927
928
929 random_addr = find_random_phys_addr(min_addr, output_size);
930 if (!random_addr) {
931 warn("Physical KASLR disabled: no suitable memory region!");
932 } else {
933
934 if (*output != random_addr) {
935 add_identity_map(random_addr, output_size);
936 *output = random_addr;
937 }
938
939
940
941
942
943
944
945
946 finalize_identity_maps();
947 }
948
949
950
951 if (IS_ENABLED(CONFIG_X86_64))
952 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
953 *virt_addr = random_addr;
954}
955