1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#define BOOT_CTYPE_H
21
22#include "misc.h"
23#include "error.h"
24#include "../string.h"
25
26#include <generated/compile.h>
27#include <linux/module.h>
28#include <linux/uts.h>
29#include <linux/utsname.h>
30#include <linux/ctype.h>
31#include <linux/efi.h>
32#include <generated/utsrelease.h>
33#include <asm/efi.h>
34
35
36#define STATIC
37#include <linux/decompress/mm.h>
38
39#ifdef CONFIG_X86_5LEVEL
40unsigned int __pgtable_l5_enabled;
41unsigned int pgdir_shift __ro_after_init = 39;
42unsigned int ptrs_per_p4d __ro_after_init = 1;
43#endif
44
45extern unsigned long get_cmd_line_ptr(void);
46
47
48pteval_t __default_kernel_pte_mask __read_mostly = ~0;
49
50
51static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
52 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
53
54static unsigned long rotate_xor(unsigned long hash, const void *area,
55 size_t size)
56{
57 size_t i;
58 unsigned long *ptr = (unsigned long *)area;
59
60 for (i = 0; i < size / sizeof(hash); i++) {
61
62 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
63 hash ^= ptr[i];
64 }
65
66 return hash;
67}
68
69
70static unsigned long get_boot_seed(void)
71{
72 unsigned long hash = 0;
73
74 hash = rotate_xor(hash, build_str, sizeof(build_str));
75 hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
76
77 return hash;
78}
79
80#define KASLR_COMPRESSED_BOOT
81#include "../../lib/kaslr.c"
82
83
84
85#define MAX_MEMMAP_REGIONS 4
86
87static bool memmap_too_large;
88
89
90
91static unsigned long long mem_limit = ULLONG_MAX;
92
93
94static int num_immovable_mem;
95
96enum mem_avoid_index {
97 MEM_AVOID_ZO_RANGE = 0,
98 MEM_AVOID_INITRD,
99 MEM_AVOID_CMDLINE,
100 MEM_AVOID_BOOTPARAMS,
101 MEM_AVOID_MEMMAP_BEGIN,
102 MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
103 MEM_AVOID_MAX,
104};
105
106static struct mem_vector mem_avoid[MEM_AVOID_MAX];
107
108static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
109{
110
111 if (one->start + one->size <= two->start)
112 return false;
113
114 if (one->start >= two->start + two->size)
115 return false;
116 return true;
117}
118
119char *skip_spaces(const char *str)
120{
121 while (isspace(*str))
122 ++str;
123 return (char *)str;
124}
125#include "../../../../lib/ctype.c"
126#include "../../../../lib/cmdline.c"
127
128enum parse_mode {
129 PARSE_MEMMAP,
130 PARSE_EFI,
131};
132
133static int
134parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
135 enum parse_mode mode)
136{
137 char *oldp;
138
139 if (!p)
140 return -EINVAL;
141
142
143 if (!strncmp(p, "exactmap", 8))
144 return -EINVAL;
145
146 oldp = p;
147 *size = memparse(p, &p);
148 if (p == oldp)
149 return -EINVAL;
150
151 switch (*p) {
152 case '#':
153 case '$':
154 case '!':
155 *start = memparse(p + 1, &p);
156 return 0;
157 case '@':
158 if (mode == PARSE_MEMMAP) {
159
160
161
162
163 *size = 0;
164 } else {
165 unsigned long long flags;
166
167
168
169
170
171 *start = memparse(p + 1, &p);
172 if (p && *p == ':') {
173 p++;
174 if (kstrtoull(p, 0, &flags) < 0)
175 *size = 0;
176 else if (flags & EFI_MEMORY_SP)
177 return 0;
178 }
179 *size = 0;
180 }
181 fallthrough;
182 default:
183
184
185
186
187
188 *start = 0;
189 return 0;
190 }
191
192 return -EINVAL;
193}
194
195static void mem_avoid_memmap(enum parse_mode mode, char *str)
196{
197 static int i;
198
199 if (i >= MAX_MEMMAP_REGIONS)
200 return;
201
202 while (str && (i < MAX_MEMMAP_REGIONS)) {
203 int rc;
204 unsigned long long start, size;
205 char *k = strchr(str, ',');
206
207 if (k)
208 *k++ = 0;
209
210 rc = parse_memmap(str, &start, &size, mode);
211 if (rc < 0)
212 break;
213 str = k;
214
215 if (start == 0) {
216
217 if (size > 0)
218 mem_limit = size;
219
220 continue;
221 }
222
223 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
224 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
225 i++;
226 }
227
228
229 if ((i >= MAX_MEMMAP_REGIONS) && str)
230 memmap_too_large = true;
231}
232
233
234static unsigned long max_gb_huge_pages;
235
236static void parse_gb_huge_pages(char *param, char *val)
237{
238 static bool gbpage_sz;
239 char *p;
240
241 if (!strcmp(param, "hugepagesz")) {
242 p = val;
243 if (memparse(p, &p) != PUD_SIZE) {
244 gbpage_sz = false;
245 return;
246 }
247
248 if (gbpage_sz)
249 warn("Repeatedly set hugeTLB page size of 1G!\n");
250 gbpage_sz = true;
251 return;
252 }
253
254 if (!strcmp(param, "hugepages") && gbpage_sz) {
255 p = val;
256 max_gb_huge_pages = simple_strtoull(p, &p, 0);
257 return;
258 }
259}
260
261static void handle_mem_options(void)
262{
263 char *args = (char *)get_cmd_line_ptr();
264 size_t len = strlen((char *)args);
265 char *tmp_cmdline;
266 char *param, *val;
267 u64 mem_size;
268
269 if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
270 !strstr(args, "hugepages"))
271 return;
272
273 tmp_cmdline = malloc(len + 1);
274 if (!tmp_cmdline)
275 error("Failed to allocate space for tmp_cmdline");
276
277 memcpy(tmp_cmdline, args, len);
278 tmp_cmdline[len] = 0;
279 args = tmp_cmdline;
280
281
282 args = skip_spaces(args);
283
284 while (*args) {
285 args = next_arg(args, ¶m, &val);
286
287 if (!val && strcmp(param, "--") == 0) {
288 warn("Only '--' specified in cmdline");
289 goto out;
290 }
291
292 if (!strcmp(param, "memmap")) {
293 mem_avoid_memmap(PARSE_MEMMAP, val);
294 } else if (strstr(param, "hugepages")) {
295 parse_gb_huge_pages(param, val);
296 } else if (!strcmp(param, "mem")) {
297 char *p = val;
298
299 if (!strcmp(p, "nopentium"))
300 continue;
301 mem_size = memparse(p, &p);
302 if (mem_size == 0)
303 goto out;
304
305 mem_limit = mem_size;
306 } else if (!strcmp(param, "efi_fake_mem")) {
307 mem_avoid_memmap(PARSE_EFI, val);
308 }
309 }
310
311out:
312 free(tmp_cmdline);
313 return;
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390static void mem_avoid_init(unsigned long input, unsigned long input_size,
391 unsigned long output)
392{
393 unsigned long init_size = boot_params->hdr.init_size;
394 u64 initrd_start, initrd_size;
395 u64 cmd_line, cmd_line_size;
396 char *ptr;
397
398
399
400
401
402 mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
403 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
404 add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
405 mem_avoid[MEM_AVOID_ZO_RANGE].size);
406
407
408 initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
409 initrd_start |= boot_params->hdr.ramdisk_image;
410 initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
411 initrd_size |= boot_params->hdr.ramdisk_size;
412 mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
413 mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
414
415
416
417 cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
418 cmd_line |= boot_params->hdr.cmd_line_ptr;
419
420 ptr = (char *)(unsigned long)cmd_line;
421 for (cmd_line_size = 0; ptr[cmd_line_size++];)
422 ;
423 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
424 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
425 add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
426 mem_avoid[MEM_AVOID_CMDLINE].size);
427
428
429 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
430 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
431 add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
432 mem_avoid[MEM_AVOID_BOOTPARAMS].size);
433
434
435
436
437 handle_mem_options();
438
439
440 num_immovable_mem = count_immovable_mem_regions();
441
442#ifdef CONFIG_X86_VERBOSE_BOOTUP
443
444 add_identity_map(0, PMD_SIZE);
445#endif
446}
447
448
449
450
451
452static bool mem_avoid_overlap(struct mem_vector *img,
453 struct mem_vector *overlap)
454{
455 int i;
456 struct setup_data *ptr;
457 unsigned long earliest = img->start + img->size;
458 bool is_overlapping = false;
459
460 for (i = 0; i < MEM_AVOID_MAX; i++) {
461 if (mem_overlaps(img, &mem_avoid[i]) &&
462 mem_avoid[i].start < earliest) {
463 *overlap = mem_avoid[i];
464 earliest = overlap->start;
465 is_overlapping = true;
466 }
467 }
468
469
470 ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
471 while (ptr) {
472 struct mem_vector avoid;
473
474 avoid.start = (unsigned long)ptr;
475 avoid.size = sizeof(*ptr) + ptr->len;
476
477 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
478 *overlap = avoid;
479 earliest = overlap->start;
480 is_overlapping = true;
481 }
482
483 if (ptr->type == SETUP_INDIRECT &&
484 ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
485 avoid.start = ((struct setup_indirect *)ptr->data)->addr;
486 avoid.size = ((struct setup_indirect *)ptr->data)->len;
487
488 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
489 *overlap = avoid;
490 earliest = overlap->start;
491 is_overlapping = true;
492 }
493 }
494
495 ptr = (struct setup_data *)(unsigned long)ptr->next;
496 }
497
498 return is_overlapping;
499}
500
501struct slot_area {
502 unsigned long addr;
503 int num;
504};
505
506#define MAX_SLOT_AREA 100
507
508static struct slot_area slot_areas[MAX_SLOT_AREA];
509
510static unsigned long slot_max;
511
512static unsigned long slot_area_index;
513
514static void store_slot_info(struct mem_vector *region, unsigned long image_size)
515{
516 struct slot_area slot_area;
517
518 if (slot_area_index == MAX_SLOT_AREA)
519 return;
520
521 slot_area.addr = region->start;
522 slot_area.num = (region->size - image_size) /
523 CONFIG_PHYSICAL_ALIGN + 1;
524
525 if (slot_area.num > 0) {
526 slot_areas[slot_area_index++] = slot_area;
527 slot_max += slot_area.num;
528 }
529}
530
531
532
533
534
535static void
536process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
537{
538 unsigned long addr, size = 0;
539 struct mem_vector tmp;
540 int i = 0;
541
542 if (!max_gb_huge_pages) {
543 store_slot_info(region, image_size);
544 return;
545 }
546
547 addr = ALIGN(region->start, PUD_SIZE);
548
549 if (addr < region->start + region->size)
550 size = region->size - (addr - region->start);
551
552
553 while (size > PUD_SIZE && max_gb_huge_pages) {
554 size -= PUD_SIZE;
555 max_gb_huge_pages--;
556 i++;
557 }
558
559
560 if (!i) {
561 store_slot_info(region, image_size);
562 return;
563 }
564
565
566
567
568
569
570
571 if (addr >= region->start + image_size) {
572 tmp.start = region->start;
573 tmp.size = addr - region->start;
574 store_slot_info(&tmp, image_size);
575 }
576
577 size = region->size - (addr - region->start) - i * PUD_SIZE;
578 if (size >= image_size) {
579 tmp.start = addr + i * PUD_SIZE;
580 tmp.size = size;
581 store_slot_info(&tmp, image_size);
582 }
583}
584
585static unsigned long slots_fetch_random(void)
586{
587 unsigned long slot;
588 int i;
589
590
591 if (slot_max == 0)
592 return 0;
593
594 slot = kaslr_get_random_long("Physical") % slot_max;
595
596 for (i = 0; i < slot_area_index; i++) {
597 if (slot >= slot_areas[i].num) {
598 slot -= slot_areas[i].num;
599 continue;
600 }
601 return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
602 }
603
604 if (i == slot_area_index)
605 debug_putstr("slots_fetch_random() failed!?\n");
606 return 0;
607}
608
609static void __process_mem_region(struct mem_vector *entry,
610 unsigned long minimum,
611 unsigned long image_size)
612{
613 struct mem_vector region, overlap;
614 unsigned long start_orig, end;
615 struct mem_vector cur_entry;
616
617
618 if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
619 return;
620
621
622 if (entry->start + entry->size < minimum)
623 return;
624
625
626 end = min(entry->size + entry->start, mem_limit);
627 if (entry->start >= end)
628 return;
629 cur_entry.start = entry->start;
630 cur_entry.size = end - entry->start;
631
632 region.start = cur_entry.start;
633 region.size = cur_entry.size;
634
635
636 while (slot_area_index < MAX_SLOT_AREA) {
637 start_orig = region.start;
638
639
640 if (region.start < minimum)
641 region.start = minimum;
642
643
644 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
645
646
647 if (region.start > cur_entry.start + cur_entry.size)
648 return;
649
650
651 region.size -= region.start - start_orig;
652
653
654 if (IS_ENABLED(CONFIG_X86_32) &&
655 region.start + region.size > KERNEL_IMAGE_SIZE)
656 region.size = KERNEL_IMAGE_SIZE - region.start;
657
658
659 if (region.size < image_size)
660 return;
661
662
663 if (!mem_avoid_overlap(®ion, &overlap)) {
664 process_gb_huge_pages(®ion, image_size);
665 return;
666 }
667
668
669 if (overlap.start > region.start + image_size) {
670 struct mem_vector beginning;
671
672 beginning.start = region.start;
673 beginning.size = overlap.start - region.start;
674 process_gb_huge_pages(&beginning, image_size);
675 }
676
677
678 if (overlap.start + overlap.size >= region.start + region.size)
679 return;
680
681
682 region.size -= overlap.start - region.start + overlap.size;
683 region.start = overlap.start + overlap.size;
684 }
685}
686
687static bool process_mem_region(struct mem_vector *region,
688 unsigned long long minimum,
689 unsigned long long image_size)
690{
691 int i;
692
693
694
695
696 if (!num_immovable_mem) {
697 __process_mem_region(region, minimum, image_size);
698
699 if (slot_area_index == MAX_SLOT_AREA) {
700 debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
701 return 1;
702 }
703 return 0;
704 }
705
706#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
707
708
709
710
711 for (i = 0; i < num_immovable_mem; i++) {
712 unsigned long long start, end, entry_end, region_end;
713 struct mem_vector entry;
714
715 if (!mem_overlaps(region, &immovable_mem[i]))
716 continue;
717
718 start = immovable_mem[i].start;
719 end = start + immovable_mem[i].size;
720 region_end = region->start + region->size;
721
722 entry.start = clamp(region->start, start, end);
723 entry_end = clamp(region_end, start, end);
724 entry.size = entry_end - entry.start;
725
726 __process_mem_region(&entry, minimum, image_size);
727
728 if (slot_area_index == MAX_SLOT_AREA) {
729 debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
730 return 1;
731 }
732 }
733#endif
734 return 0;
735}
736
737#ifdef CONFIG_EFI
738
739
740
741
742static bool
743process_efi_entries(unsigned long minimum, unsigned long image_size)
744{
745 struct efi_info *e = &boot_params->efi_info;
746 bool efi_mirror_found = false;
747 struct mem_vector region;
748 efi_memory_desc_t *md;
749 unsigned long pmap;
750 char *signature;
751 u32 nr_desc;
752 int i;
753
754 signature = (char *)&e->efi_loader_signature;
755 if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
756 strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
757 return false;
758
759#ifdef CONFIG_X86_32
760
761 if (e->efi_memmap_hi) {
762 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
763 return false;
764 }
765 pmap = e->efi_memmap;
766#else
767 pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
768#endif
769
770 nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
771 for (i = 0; i < nr_desc; i++) {
772 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
773 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
774 efi_mirror_found = true;
775 break;
776 }
777 }
778
779 for (i = 0; i < nr_desc; i++) {
780 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
781
782
783
784
785
786
787
788
789
790
791
792
793 if (md->type != EFI_CONVENTIONAL_MEMORY)
794 continue;
795
796 if (efi_soft_reserve_enabled() &&
797 (md->attribute & EFI_MEMORY_SP))
798 continue;
799
800 if (efi_mirror_found &&
801 !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
802 continue;
803
804 region.start = md->phys_addr;
805 region.size = md->num_pages << EFI_PAGE_SHIFT;
806 if (process_mem_region(®ion, minimum, image_size))
807 break;
808 }
809 return true;
810}
811#else
812static inline bool
813process_efi_entries(unsigned long minimum, unsigned long image_size)
814{
815 return false;
816}
817#endif
818
819static void process_e820_entries(unsigned long minimum,
820 unsigned long image_size)
821{
822 int i;
823 struct mem_vector region;
824 struct boot_e820_entry *entry;
825
826
827 for (i = 0; i < boot_params->e820_entries; i++) {
828 entry = &boot_params->e820_table[i];
829
830 if (entry->type != E820_TYPE_RAM)
831 continue;
832 region.start = entry->addr;
833 region.size = entry->size;
834 if (process_mem_region(®ion, minimum, image_size))
835 break;
836 }
837}
838
839static unsigned long find_random_phys_addr(unsigned long minimum,
840 unsigned long image_size)
841{
842
843 if (memmap_too_large) {
844 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
845 return 0;
846 }
847
848
849 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
850
851 if (process_efi_entries(minimum, image_size))
852 return slots_fetch_random();
853
854 process_e820_entries(minimum, image_size);
855 return slots_fetch_random();
856}
857
858static unsigned long find_random_virt_addr(unsigned long minimum,
859 unsigned long image_size)
860{
861 unsigned long slots, random_addr;
862
863
864 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
865
866 image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
867
868
869
870
871
872
873 slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
874 CONFIG_PHYSICAL_ALIGN + 1;
875
876 random_addr = kaslr_get_random_long("Virtual") % slots;
877
878 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
879}
880
881
882
883
884
885void choose_random_location(unsigned long input,
886 unsigned long input_size,
887 unsigned long *output,
888 unsigned long output_size,
889 unsigned long *virt_addr)
890{
891 unsigned long random_addr, min_addr;
892
893 if (cmdline_find_option_bool("nokaslr")) {
894 warn("KASLR disabled: 'nokaslr' on cmdline.");
895 return;
896 }
897
898#ifdef CONFIG_X86_5LEVEL
899 if (__read_cr4() & X86_CR4_LA57) {
900 __pgtable_l5_enabled = 1;
901 pgdir_shift = 48;
902 ptrs_per_p4d = 512;
903 }
904#endif
905
906 boot_params->hdr.loadflags |= KASLR_FLAG;
907
908
909 initialize_identity_maps();
910
911
912 mem_avoid_init(input, input_size, *output);
913
914
915
916
917
918
919 min_addr = min(*output, 512UL << 20);
920
921
922 random_addr = find_random_phys_addr(min_addr, output_size);
923 if (!random_addr) {
924 warn("Physical KASLR disabled: no suitable memory region!");
925 } else {
926
927 if (*output != random_addr) {
928 add_identity_map(random_addr, output_size);
929 *output = random_addr;
930 }
931
932
933
934
935
936
937
938
939 finalize_identity_maps();
940 }
941
942
943
944 if (IS_ENABLED(CONFIG_X86_64))
945 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
946 *virt_addr = random_addr;
947}
948