1
2
3
4
5
6
7
8
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/initrd.h>
22#include <linux/pagemap.h>
23#include <linux/bootmem.h>
24#include <linux/memblock.h>
25#include <linux/proc_fs.h>
26#include <linux/pci.h>
27#include <linux/pfn.h>
28#include <linux/poison.h>
29#include <linux/dma-mapping.h>
30#include <linux/module.h>
31#include <linux/memory.h>
32#include <linux/memory_hotplug.h>
33#include <linux/memremap.h>
34#include <linux/nmi.h>
35#include <linux/gfp.h>
36#include <linux/kcore.h>
37
38#include <asm/processor.h>
39#include <asm/bios_ebda.h>
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/pgalloc.h>
43#include <asm/dma.h>
44#include <asm/fixmap.h>
45#include <asm/e820.h>
46#include <asm/apic.h>
47#include <asm/tlb.h>
48#include <asm/mmu_context.h>
49#include <asm/proto.h>
50#include <asm/smp.h>
51#include <asm/sections.h>
52#include <asm/kdebug.h>
53#include <asm/numa.h>
54#include <asm/cacheflush.h>
55#include <asm/init.h>
56#include <asm/uv/uv.h>
57#include <asm/setup.h>
58
59#include "mm_internal.h"
60
61#include "ident_map.c"
62
63static int __init parse_direct_gbpages_off(char *arg)
64{
65 direct_gbpages = 0;
66 return 0;
67}
68early_param("nogbpages", parse_direct_gbpages_off);
69
70static int __init parse_direct_gbpages_on(char *arg)
71{
72 direct_gbpages = 1;
73 return 0;
74}
75early_param("gbpages", parse_direct_gbpages_on);
76
77
78
79
80
81
82
83pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
84EXPORT_SYMBOL_GPL(__supported_pte_mask);
85
86int force_personality32;
87
88
89
90
91
92
93
94
95
96static int __init nonx32_setup(char *str)
97{
98 if (!strcmp(str, "on"))
99 force_personality32 &= ~READ_IMPLIES_EXEC;
100 else if (!strcmp(str, "off"))
101 force_personality32 |= READ_IMPLIES_EXEC;
102 return 1;
103}
104__setup("noexec32=", nonx32_setup);
105
106
107
108
109
110void sync_global_pgds(unsigned long start, unsigned long end, int removed)
111{
112 unsigned long addr;
113
114 for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
115 const pgd_t *pgd_ref = pgd_offset_k(addr);
116 struct page *page;
117
118
119
120
121
122
123 if (pgd_none(*pgd_ref) && !removed)
124 continue;
125
126 spin_lock(&pgd_lock);
127 list_for_each_entry(page, &pgd_list, lru) {
128 pgd_t *pgd;
129 spinlock_t *pgt_lock;
130
131 pgd = (pgd_t *)page_address(page) + pgd_index(addr);
132
133 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
134 spin_lock(pgt_lock);
135
136 if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
137 BUG_ON(pgd_page_vaddr(*pgd)
138 != pgd_page_vaddr(*pgd_ref));
139
140 if (removed) {
141 if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
142 pgd_clear(pgd);
143 } else {
144 if (pgd_none(*pgd))
145 set_pgd(pgd, *pgd_ref);
146 }
147
148 spin_unlock(pgt_lock);
149 }
150 spin_unlock(&pgd_lock);
151 }
152}
153
154
155
156
157
158static __ref void *spp_getpage(void)
159{
160 void *ptr;
161
162 if (after_bootmem)
163 ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
164 else
165 ptr = alloc_bootmem_pages(PAGE_SIZE);
166
167 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
168 panic("set_pte_phys: cannot allocate page data %s\n",
169 after_bootmem ? "after bootmem" : "");
170 }
171
172 pr_debug("spp_getpage %p\n", ptr);
173
174 return ptr;
175}
176
177static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
178{
179 if (pgd_none(*pgd)) {
180 pud_t *pud = (pud_t *)spp_getpage();
181 pgd_populate(&init_mm, pgd, pud);
182 if (pud != pud_offset(pgd, 0))
183 printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
184 pud, pud_offset(pgd, 0));
185 }
186 return pud_offset(pgd, vaddr);
187}
188
189static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
190{
191 if (pud_none(*pud)) {
192 pmd_t *pmd = (pmd_t *) spp_getpage();
193 pud_populate(&init_mm, pud, pmd);
194 if (pmd != pmd_offset(pud, 0))
195 printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
196 pmd, pmd_offset(pud, 0));
197 }
198 return pmd_offset(pud, vaddr);
199}
200
201static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
202{
203 if (pmd_none(*pmd)) {
204 pte_t *pte = (pte_t *) spp_getpage();
205 pmd_populate_kernel(&init_mm, pmd, pte);
206 if (pte != pte_offset_kernel(pmd, 0))
207 printk(KERN_ERR "PAGETABLE BUG #02!\n");
208 }
209 return pte_offset_kernel(pmd, vaddr);
210}
211
212void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
213{
214 pud_t *pud;
215 pmd_t *pmd;
216 pte_t *pte;
217
218 pud = pud_page + pud_index(vaddr);
219 pmd = fill_pmd(pud, vaddr);
220 pte = fill_pte(pmd, vaddr);
221
222 set_pte(pte, new_pte);
223
224
225
226
227
228 __flush_tlb_one(vaddr);
229}
230
231void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
232{
233 pgd_t *pgd;
234 pud_t *pud_page;
235
236 pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
237
238 pgd = pgd_offset_k(vaddr);
239 if (pgd_none(*pgd)) {
240 printk(KERN_ERR
241 "PGD FIXMAP MISSING, it should be setup in head.S!\n");
242 return;
243 }
244 pud_page = (pud_t*)pgd_page_vaddr(*pgd);
245 set_pte_vaddr_pud(pud_page, vaddr, pteval);
246}
247
248pmd_t * __init populate_extra_pmd(unsigned long vaddr)
249{
250 pgd_t *pgd;
251 pud_t *pud;
252
253 pgd = pgd_offset_k(vaddr);
254 pud = fill_pud(pgd, vaddr);
255 return fill_pmd(pud, vaddr);
256}
257
258pte_t * __init populate_extra_pte(unsigned long vaddr)
259{
260 pmd_t *pmd;
261
262 pmd = populate_extra_pmd(vaddr);
263 return fill_pte(pmd, vaddr);
264}
265
266
267
268
269static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
270 pgprot_t prot)
271{
272 pgd_t *pgd;
273 pud_t *pud;
274 pmd_t *pmd;
275
276 BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
277 for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
278 pgd = pgd_offset_k((unsigned long)__va(phys));
279 if (pgd_none(*pgd)) {
280 pud = (pud_t *) spp_getpage();
281 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
282 _PAGE_USER));
283 }
284 pud = pud_offset(pgd, (unsigned long)__va(phys));
285 if (pud_none(*pud)) {
286 pmd = (pmd_t *) spp_getpage();
287 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
288 _PAGE_USER));
289 }
290 pmd = pmd_offset(pud, phys);
291 BUG_ON(!pmd_none(*pmd));
292 set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
293 }
294}
295
296void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
297{
298 __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
299}
300
301void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
302{
303 __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
304}
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319void __init cleanup_highmap(void)
320{
321 unsigned long vaddr = __START_KERNEL_map;
322 unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
323 unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
324 pmd_t *pmd = level2_kernel_pgt;
325
326
327
328
329
330
331 if (max_pfn_mapped)
332 vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
333
334 for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
335 if (pmd_none(*pmd))
336 continue;
337 if (vaddr < (unsigned long) _text || vaddr > end)
338 set_pmd(pmd, __pmd(0));
339 }
340}
341
342
343
344
345
346static unsigned long __meminit
347phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
348 pgprot_t prot)
349{
350 unsigned long pages = 0, paddr_next;
351 unsigned long paddr_last = paddr_end;
352 pte_t *pte;
353 int i;
354
355 pte = pte_page + pte_index(paddr);
356 i = pte_index(paddr);
357
358 for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
359 paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
360 if (paddr >= paddr_end) {
361 if (!after_bootmem &&
362 !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
363 E820_RAM) &&
364 !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
365 E820_RESERVED_KERN))
366 set_pte(pte, __pte(0));
367 continue;
368 }
369
370
371
372
373
374
375
376 if (!pte_none(*pte)) {
377 if (!after_bootmem)
378 pages++;
379 continue;
380 }
381
382 if (0)
383 pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
384 pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
385 pages++;
386 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
387 paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
388 }
389
390 update_page_count(PG_LEVEL_4K, pages);
391
392 return paddr_last;
393}
394
395
396
397
398
399
400static unsigned long __meminit
401phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
402 unsigned long page_size_mask, pgprot_t prot)
403{
404 unsigned long pages = 0, paddr_next;
405 unsigned long paddr_last = paddr_end;
406
407 int i = pmd_index(paddr);
408
409 for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
410 pmd_t *pmd = pmd_page + pmd_index(paddr);
411 pte_t *pte;
412 pgprot_t new_prot = prot;
413
414 paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
415 if (paddr >= paddr_end) {
416 if (!after_bootmem &&
417 !e820_any_mapped(paddr & PMD_MASK, paddr_next,
418 E820_RAM) &&
419 !e820_any_mapped(paddr & PMD_MASK, paddr_next,
420 E820_RESERVED_KERN))
421 set_pmd(pmd, __pmd(0));
422 continue;
423 }
424
425 if (!pmd_none(*pmd)) {
426 if (!pmd_large(*pmd)) {
427 spin_lock(&init_mm.page_table_lock);
428 pte = (pte_t *)pmd_page_vaddr(*pmd);
429 paddr_last = phys_pte_init(pte, paddr,
430 paddr_end, prot);
431 spin_unlock(&init_mm.page_table_lock);
432 continue;
433 }
434
435
436
437
438
439
440
441
442
443
444
445
446 if (page_size_mask & (1 << PG_LEVEL_2M)) {
447 if (!after_bootmem)
448 pages++;
449 paddr_last = paddr_next;
450 continue;
451 }
452 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
453 }
454
455 if (page_size_mask & (1<<PG_LEVEL_2M)) {
456 pages++;
457 spin_lock(&init_mm.page_table_lock);
458 set_pte((pte_t *)pmd,
459 pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
460 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
461 spin_unlock(&init_mm.page_table_lock);
462 paddr_last = paddr_next;
463 continue;
464 }
465
466 pte = alloc_low_page();
467 paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
468
469 spin_lock(&init_mm.page_table_lock);
470 pmd_populate_kernel(&init_mm, pmd, pte);
471 spin_unlock(&init_mm.page_table_lock);
472 }
473 update_page_count(PG_LEVEL_2M, pages);
474 return paddr_last;
475}
476
477
478
479
480
481
482
483static unsigned long __meminit
484phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
485 unsigned long page_size_mask)
486{
487 unsigned long pages = 0, paddr_next;
488 unsigned long paddr_last = paddr_end;
489 unsigned long vaddr = (unsigned long)__va(paddr);
490 int i = pud_index(vaddr);
491
492 for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
493 pud_t *pud;
494 pmd_t *pmd;
495 pgprot_t prot = PAGE_KERNEL;
496
497 vaddr = (unsigned long)__va(paddr);
498 pud = pud_page + pud_index(vaddr);
499 paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
500
501 if (paddr >= paddr_end) {
502 if (!after_bootmem &&
503 !e820_any_mapped(paddr & PUD_MASK, paddr_next,
504 E820_RAM) &&
505 !e820_any_mapped(paddr & PUD_MASK, paddr_next,
506 E820_RESERVED_KERN))
507 set_pud(pud, __pud(0));
508 continue;
509 }
510
511 if (!pud_none(*pud)) {
512 if (!pud_large(*pud)) {
513 pmd = pmd_offset(pud, 0);
514 paddr_last = phys_pmd_init(pmd, paddr,
515 paddr_end,
516 page_size_mask,
517 prot);
518 __flush_tlb_all();
519 continue;
520 }
521
522
523
524
525
526
527
528
529
530
531
532
533 if (page_size_mask & (1 << PG_LEVEL_1G)) {
534 if (!after_bootmem)
535 pages++;
536 paddr_last = paddr_next;
537 continue;
538 }
539 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
540 }
541
542 if (page_size_mask & (1<<PG_LEVEL_1G)) {
543 pages++;
544 spin_lock(&init_mm.page_table_lock);
545 set_pte((pte_t *)pud,
546 pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
547 PAGE_KERNEL_LARGE));
548 spin_unlock(&init_mm.page_table_lock);
549 paddr_last = paddr_next;
550 continue;
551 }
552
553 pmd = alloc_low_page();
554 paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
555 page_size_mask, prot);
556
557 spin_lock(&init_mm.page_table_lock);
558 pud_populate(&init_mm, pud, pmd);
559 spin_unlock(&init_mm.page_table_lock);
560 }
561 __flush_tlb_all();
562
563 update_page_count(PG_LEVEL_1G, pages);
564
565 return paddr_last;
566}
567
568
569
570
571
572
573unsigned long __meminit
574kernel_physical_mapping_init(unsigned long paddr_start,
575 unsigned long paddr_end,
576 unsigned long page_size_mask)
577{
578 bool pgd_changed = false;
579 unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
580
581 paddr_last = paddr_end;
582 vaddr = (unsigned long)__va(paddr_start);
583 vaddr_end = (unsigned long)__va(paddr_end);
584 vaddr_start = vaddr;
585
586 for (; vaddr < vaddr_end; vaddr = vaddr_next) {
587 pgd_t *pgd = pgd_offset_k(vaddr);
588 pud_t *pud;
589
590 vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
591
592 if (pgd_val(*pgd)) {
593 pud = (pud_t *)pgd_page_vaddr(*pgd);
594 paddr_last = phys_pud_init(pud, __pa(vaddr),
595 __pa(vaddr_end),
596 page_size_mask);
597 continue;
598 }
599
600 pud = alloc_low_page();
601 paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
602 page_size_mask);
603
604 spin_lock(&init_mm.page_table_lock);
605 pgd_populate(&init_mm, pgd, pud);
606 spin_unlock(&init_mm.page_table_lock);
607 pgd_changed = true;
608 }
609
610 if (pgd_changed)
611 sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
612
613 __flush_tlb_all();
614
615 return paddr_last;
616}
617
618#ifndef CONFIG_NUMA
619void __init initmem_init(void)
620{
621 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
622}
623#endif
624
625void __init paging_init(void)
626{
627 sparse_memory_present_with_active_regions(MAX_NUMNODES);
628 sparse_init();
629
630
631
632
633
634
635
636 node_clear_state(0, N_MEMORY);
637 if (N_MEMORY != N_NORMAL_MEMORY)
638 node_clear_state(0, N_NORMAL_MEMORY);
639
640 zone_sizes_init();
641}
642
643
644
645
646#ifdef CONFIG_MEMORY_HOTPLUG
647
648
649
650
651static void update_end_of_memory_vars(u64 start, u64 size)
652{
653 unsigned long end_pfn = PFN_UP(start + size);
654
655 if (end_pfn > max_pfn) {
656 max_pfn = end_pfn;
657 max_low_pfn = end_pfn;
658 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
659 }
660}
661
662
663
664
665
666int add_pages(int nid, unsigned long start,
667 unsigned long size, bool for_device)
668{
669 struct pglist_data *pgdat = NODE_DATA(nid);
670 int zoneid = zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
671 struct zone *zone = pgdat->node_zones + zoneid;
672 int ret;
673
674#ifdef CONFIG_ZONE_DEVICE
675 if (zoneid == ZONE_DEVICE)
676 zone = pgdat->zone_device;
677#endif
678
679 ret = __add_pages(nid, zone, start >> PAGE_SHIFT, size >> PAGE_SHIFT);
680 WARN_ON_ONCE(ret);
681
682
683 update_end_of_memory_vars(start, size);
684
685 return ret;
686}
687
688int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
689{
690 init_memory_mapping(start, start + size);
691
692 return add_pages(nid, start, size, for_device);
693}
694EXPORT_SYMBOL_GPL(arch_add_memory);
695
696#define PAGE_INUSE 0xFD
697
698static void __meminit free_pagetable(struct page *page, int order)
699{
700 unsigned long magic;
701 unsigned int nr_pages = 1 << order;
702 struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
703
704 if (altmap) {
705 vmem_altmap_free(altmap, nr_pages);
706 return;
707 }
708
709
710 if (PageReserved(page)) {
711 __ClearPageReserved(page);
712
713 magic = (unsigned long)page->freelist;
714 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
715 while (nr_pages--)
716 put_page_bootmem(page++);
717 } else
718 while (nr_pages--)
719 free_reserved_page(page++);
720 } else
721 free_pages((unsigned long)page_address(page), order);
722}
723
724static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
725{
726 pte_t *pte;
727 int i;
728
729 for (i = 0; i < PTRS_PER_PTE; i++) {
730 pte = pte_start + i;
731 if (!pte_none(*pte))
732 return;
733 }
734
735
736 free_pagetable(pmd_page(*pmd), 0);
737 spin_lock(&init_mm.page_table_lock);
738 pmd_clear(pmd);
739 spin_unlock(&init_mm.page_table_lock);
740}
741
742static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
743{
744 pmd_t *pmd;
745 int i;
746
747 for (i = 0; i < PTRS_PER_PMD; i++) {
748 pmd = pmd_start + i;
749 if (!pmd_none(*pmd))
750 return;
751 }
752
753
754 free_pagetable(pud_page(*pud), 0);
755 spin_lock(&init_mm.page_table_lock);
756 pud_clear(pud);
757 spin_unlock(&init_mm.page_table_lock);
758}
759
760static void __meminit
761remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
762 bool direct)
763{
764 unsigned long next, pages = 0;
765 pte_t *pte;
766 void *page_addr;
767 phys_addr_t phys_addr;
768
769 pte = pte_start + pte_index(addr);
770 for (; addr < end; addr = next, pte++) {
771 next = (addr + PAGE_SIZE) & PAGE_MASK;
772 if (next > end)
773 next = end;
774
775 if (!pte_present(*pte))
776 continue;
777
778
779
780
781
782
783 phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
784 if (phys_addr < (phys_addr_t)0x40000000)
785 return;
786
787 if (IS_ALIGNED(addr, PAGE_SIZE) &&
788 IS_ALIGNED(next, PAGE_SIZE)) {
789
790
791
792
793 if (!direct)
794 free_pagetable(pte_page(*pte), 0);
795
796 spin_lock(&init_mm.page_table_lock);
797 pte_clear(&init_mm, addr, pte);
798 spin_unlock(&init_mm.page_table_lock);
799
800
801 pages++;
802 } else {
803
804
805
806
807
808
809
810
811
812
813 memset((void *)addr, PAGE_INUSE, next - addr);
814
815 page_addr = page_address(pte_page(*pte));
816 if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
817 free_pagetable(pte_page(*pte), 0);
818
819 spin_lock(&init_mm.page_table_lock);
820 pte_clear(&init_mm, addr, pte);
821 spin_unlock(&init_mm.page_table_lock);
822 }
823 }
824 }
825
826
827 flush_tlb_all();
828 if (direct)
829 update_page_count(PG_LEVEL_4K, -pages);
830}
831
832static void __meminit
833remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
834 bool direct)
835{
836 unsigned long next, pages = 0;
837 pte_t *pte_base;
838 pmd_t *pmd;
839 void *page_addr;
840
841 pmd = pmd_start + pmd_index(addr);
842 for (; addr < end; addr = next, pmd++) {
843 next = pmd_addr_end(addr, end);
844
845 if (!pmd_present(*pmd))
846 continue;
847
848 if (pmd_large(*pmd)) {
849 if (IS_ALIGNED(addr, PMD_SIZE) &&
850 IS_ALIGNED(next, PMD_SIZE)) {
851 if (!direct)
852 free_pagetable(pmd_page(*pmd),
853 get_order(PMD_SIZE));
854
855 spin_lock(&init_mm.page_table_lock);
856 pmd_clear(pmd);
857 spin_unlock(&init_mm.page_table_lock);
858 pages++;
859 } else {
860
861 memset((void *)addr, PAGE_INUSE, next - addr);
862
863 page_addr = page_address(pmd_page(*pmd));
864 if (!memchr_inv(page_addr, PAGE_INUSE,
865 PMD_SIZE)) {
866 free_pagetable(pmd_page(*pmd),
867 get_order(PMD_SIZE));
868
869 spin_lock(&init_mm.page_table_lock);
870 pmd_clear(pmd);
871 spin_unlock(&init_mm.page_table_lock);
872 }
873 }
874
875 continue;
876 }
877
878 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
879 remove_pte_table(pte_base, addr, next, direct);
880 free_pte_table(pte_base, pmd);
881 }
882
883
884 if (direct)
885 update_page_count(PG_LEVEL_2M, -pages);
886}
887
888static void __meminit
889remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
890 bool direct)
891{
892 unsigned long next, pages = 0;
893 pmd_t *pmd_base;
894 pud_t *pud;
895 void *page_addr;
896
897 pud = pud_start + pud_index(addr);
898 for (; addr < end; addr = next, pud++) {
899 next = pud_addr_end(addr, end);
900
901 if (!pud_present(*pud))
902 continue;
903
904 if (pud_large(*pud)) {
905 if (IS_ALIGNED(addr, PUD_SIZE) &&
906 IS_ALIGNED(next, PUD_SIZE)) {
907 if (!direct)
908 free_pagetable(pud_page(*pud),
909 get_order(PUD_SIZE));
910
911 spin_lock(&init_mm.page_table_lock);
912 pud_clear(pud);
913 spin_unlock(&init_mm.page_table_lock);
914 pages++;
915 } else {
916
917 memset((void *)addr, PAGE_INUSE, next - addr);
918
919 page_addr = page_address(pud_page(*pud));
920 if (!memchr_inv(page_addr, PAGE_INUSE,
921 PUD_SIZE)) {
922 free_pagetable(pud_page(*pud),
923 get_order(PUD_SIZE));
924
925 spin_lock(&init_mm.page_table_lock);
926 pud_clear(pud);
927 spin_unlock(&init_mm.page_table_lock);
928 }
929 }
930
931 continue;
932 }
933
934 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
935 remove_pmd_table(pmd_base, addr, next, direct);
936 free_pmd_table(pmd_base, pud);
937 }
938
939 if (direct)
940 update_page_count(PG_LEVEL_1G, -pages);
941}
942
943
944static void __meminit
945remove_pagetable(unsigned long start, unsigned long end, bool direct)
946{
947 unsigned long next;
948 unsigned long addr;
949 pgd_t *pgd;
950 pud_t *pud;
951
952 for (addr = start; addr < end; addr = next) {
953 next = pgd_addr_end(addr, end);
954
955 pgd = pgd_offset_k(addr);
956 if (!pgd_present(*pgd))
957 continue;
958
959 pud = (pud_t *)pgd_page_vaddr(*pgd);
960 remove_pud_table(pud, addr, next, direct);
961 }
962
963 flush_tlb_all();
964}
965
966void __ref vmemmap_free(unsigned long start, unsigned long end)
967{
968 remove_pagetable(start, end, false);
969}
970
971#ifdef CONFIG_MEMORY_HOTREMOVE
972static void __meminit
973kernel_physical_mapping_remove(unsigned long start, unsigned long end)
974{
975 start = (unsigned long)__va(start);
976 end = (unsigned long)__va(end);
977
978 remove_pagetable(start, end, true);
979}
980
981int __ref arch_remove_memory(u64 start, u64 size)
982{
983 unsigned long start_pfn = start >> PAGE_SHIFT;
984 unsigned long nr_pages = size >> PAGE_SHIFT;
985 struct page *page = pfn_to_page(start_pfn);
986 struct vmem_altmap *altmap;
987 struct zone *zone;
988 int ret;
989
990
991 altmap = to_vmem_altmap((unsigned long) page);
992 if (altmap)
993 page += vmem_altmap_offset(altmap);
994 zone = page_zone(page);
995 ret = __remove_pages(zone, start_pfn, nr_pages);
996 WARN_ON_ONCE(ret);
997 kernel_physical_mapping_remove(start, start + size);
998
999 return ret;
1000}
1001#endif
1002#endif
1003
1004static struct kcore_list kcore_vsyscall;
1005
1006static void __init register_page_bootmem_info(void)
1007{
1008#ifdef CONFIG_NUMA
1009 int i;
1010
1011 for_each_online_node(i)
1012 register_page_bootmem_info_node(NODE_DATA(i));
1013#endif
1014}
1015
1016void __init mem_init(void)
1017{
1018 long codesize, reservedpages, datasize, initsize;
1019 unsigned long absent_pages;
1020
1021 pci_iommu_alloc();
1022
1023
1024
1025 register_page_bootmem_info();
1026
1027
1028 totalram_pages = free_all_bootmem();
1029
1030 absent_pages = absent_pages_in_range(0, max_pfn);
1031 reservedpages = max_pfn - totalram_pages - absent_pages;
1032 after_bootmem = 1;
1033
1034 codesize = (unsigned long) &_etext - (unsigned long) &_text;
1035 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
1036 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
1037
1038
1039 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
1040 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
1041
1042 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
1043 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
1044 nr_free_pages() << (PAGE_SHIFT-10),
1045 max_pfn << (PAGE_SHIFT-10),
1046 codesize >> 10,
1047 absent_pages << (PAGE_SHIFT-10),
1048 reservedpages << (PAGE_SHIFT-10),
1049 datasize >> 10,
1050 initsize >> 10);
1051}
1052
1053#ifdef CONFIG_DEBUG_RODATA
1054const int rodata_test_data = 0xC3;
1055EXPORT_SYMBOL_GPL(rodata_test_data);
1056
1057int kernel_set_to_readonly;
1058
1059void set_kernel_text_rw(void)
1060{
1061 unsigned long start = PFN_ALIGN(_text);
1062 unsigned long end = PFN_ALIGN(__stop___ex_table);
1063
1064 if (!kernel_set_to_readonly)
1065 return;
1066
1067 pr_debug("Set kernel text: %lx - %lx for read write\n",
1068 start, end);
1069
1070
1071
1072
1073
1074
1075 set_memory_rw(start, (end - start) >> PAGE_SHIFT);
1076}
1077
1078void set_kernel_text_ro(void)
1079{
1080 unsigned long start = PFN_ALIGN(_text);
1081 unsigned long end = PFN_ALIGN(__stop___ex_table);
1082
1083 if (!kernel_set_to_readonly)
1084 return;
1085
1086 pr_debug("Set kernel text: %lx - %lx for read only\n",
1087 start, end);
1088
1089
1090
1091
1092 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
1093}
1094
1095void mark_rodata_ro(void)
1096{
1097 unsigned long start = PFN_ALIGN(_text);
1098 unsigned long rodata_start = PFN_ALIGN(__start_rodata);
1099 unsigned long end = (unsigned long) &__end_rodata_hpage_align;
1100 unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
1101 unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
1102 unsigned long all_end = PFN_ALIGN(&_end);
1103
1104 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
1105 (end - start) >> 10);
1106 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
1107
1108 kernel_set_to_readonly = 1;
1109
1110
1111
1112
1113
1114 set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
1115
1116 rodata_test();
1117
1118#ifdef CONFIG_CPA_DEBUG
1119 printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
1120 set_memory_rw(start, (end-start) >> PAGE_SHIFT);
1121
1122 printk(KERN_INFO "Testing CPA: again\n");
1123 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
1124#endif
1125
1126 free_init_pages("unused kernel memory",
1127 (unsigned long) __va(__pa_symbol(text_end)),
1128 (unsigned long) __va(__pa_symbol(rodata_start)));
1129
1130 free_init_pages("unused kernel memory",
1131 (unsigned long) __va(__pa_symbol(rodata_end)),
1132 (unsigned long) __va(__pa_symbol(_sdata)));
1133}
1134
1135#endif
1136
1137int kern_addr_valid(unsigned long addr)
1138{
1139 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
1140 pgd_t *pgd;
1141 pud_t *pud;
1142 pmd_t *pmd;
1143 pte_t *pte;
1144
1145 if (above != 0 && above != -1UL)
1146 return 0;
1147
1148 pgd = pgd_offset_k(addr);
1149 if (pgd_none(*pgd))
1150 return 0;
1151
1152 pud = pud_offset(pgd, addr);
1153 if (pud_none(*pud))
1154 return 0;
1155
1156 if (pud_large(*pud))
1157 return pfn_valid(pud_pfn(*pud));
1158
1159 pmd = pmd_offset(pud, addr);
1160 if (pmd_none(*pmd))
1161 return 0;
1162
1163 if (pmd_large(*pmd))
1164 return pfn_valid(pmd_pfn(*pmd));
1165
1166 pte = pte_offset_kernel(pmd, addr);
1167 if (pte_none(*pte))
1168 return 0;
1169
1170 return pfn_valid(pte_pfn(*pte));
1171}
1172
1173
1174
1175
1176
1177
1178static struct vm_area_struct gate_vma = {
1179 .vm_start = VSYSCALL_START,
1180 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
1181 .vm_page_prot = PAGE_READONLY_EXEC,
1182 .vm_flags = VM_READ | VM_EXEC
1183};
1184
1185struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
1186{
1187#ifdef CONFIG_IA32_EMULATION
1188 if (!mm || mm->context.ia32_compat)
1189 return NULL;
1190#endif
1191 return &gate_vma;
1192}
1193
1194int in_gate_area(struct mm_struct *mm, unsigned long addr)
1195{
1196 struct vm_area_struct *vma = get_gate_vma(mm);
1197
1198 if (!vma)
1199 return 0;
1200
1201 return (addr >= vma->vm_start) && (addr < vma->vm_end);
1202}
1203
1204
1205
1206
1207
1208
1209int in_gate_area_no_mm(unsigned long addr)
1210{
1211 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1212}
1213
1214const char *arch_vma_name(struct vm_area_struct *vma)
1215{
1216 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
1217 return "[vdso]";
1218 if (vma == &gate_vma)
1219 return "[vsyscall]";
1220 return NULL;
1221}
1222
1223#ifdef CONFIG_X86_UV
1224unsigned long memory_block_size_bytes(void)
1225{
1226 if (is_uv_system()) {
1227 printk(KERN_INFO "UV: memory block size 2GB\n");
1228 return 2UL * 1024 * 1024 * 1024;
1229 }
1230 return MIN_MEMORY_BLOCK_SIZE;
1231}
1232#endif
1233
1234#ifdef CONFIG_SPARSEMEM_VMEMMAP
1235
1236
1237
1238static long __meminitdata addr_start, addr_end;
1239static void __meminitdata *p_start, *p_end;
1240static int __meminitdata node_start;
1241
1242static int __meminit vmemmap_populate_hugepages(unsigned long start,
1243 unsigned long end, int node, struct vmem_altmap *altmap)
1244{
1245 unsigned long addr;
1246 unsigned long next;
1247 pgd_t *pgd;
1248 pud_t *pud;
1249 pmd_t *pmd;
1250
1251 for (addr = start; addr < end; addr = next) {
1252 next = pmd_addr_end(addr, end);
1253
1254 pgd = vmemmap_pgd_populate(addr, node);
1255 if (!pgd)
1256 return -ENOMEM;
1257
1258 pud = vmemmap_pud_populate(pgd, addr, node);
1259 if (!pud)
1260 return -ENOMEM;
1261
1262 pmd = pmd_offset(pud, addr);
1263 if (pmd_none(*pmd)) {
1264 void *p;
1265
1266 p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
1267 if (p) {
1268 pte_t entry;
1269
1270 entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
1271 PAGE_KERNEL_LARGE);
1272 set_pmd(pmd, __pmd(pte_val(entry)));
1273
1274
1275 if (p_end != p || node_start != node) {
1276 if (p_start)
1277 pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
1278 addr_start, addr_end-1, p_start, p_end-1, node_start);
1279 addr_start = addr;
1280 node_start = node;
1281 p_start = p;
1282 }
1283
1284 addr_end = addr + PMD_SIZE;
1285 p_end = p + PMD_SIZE;
1286 continue;
1287 } else if (altmap)
1288 return -ENOMEM;
1289 } else if (pmd_large(*pmd)) {
1290 vmemmap_verify((pte_t *)pmd, node, addr, next);
1291 continue;
1292 }
1293 pr_warn_once("vmemmap: falling back to regular page backing\n");
1294 if (vmemmap_populate_basepages(addr, next, node))
1295 return -ENOMEM;
1296 }
1297 return 0;
1298}
1299
1300int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
1301{
1302 struct vmem_altmap *altmap = to_vmem_altmap(start);
1303 int err;
1304
1305 if (cpu_has_pse)
1306 err = vmemmap_populate_hugepages(start, end, node, altmap);
1307 else if (altmap) {
1308 pr_err_once("%s: no cpu support for altmap allocations\n",
1309 __func__);
1310 err = -ENOMEM;
1311 } else
1312 err = vmemmap_populate_basepages(start, end, node);
1313 if (!err)
1314 sync_global_pgds(start, end - 1, 0);
1315 return err;
1316}
1317
1318#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
1319void register_page_bootmem_memmap(unsigned long section_nr,
1320 struct page *start_page, unsigned long size)
1321{
1322 unsigned long addr = (unsigned long)start_page;
1323 unsigned long end = (unsigned long)(start_page + size);
1324 unsigned long next;
1325 pgd_t *pgd;
1326 pud_t *pud;
1327 pmd_t *pmd;
1328 unsigned int nr_pages;
1329 struct page *page;
1330
1331 for (; addr < end; addr = next) {
1332 pte_t *pte = NULL;
1333
1334 pgd = pgd_offset_k(addr);
1335 if (pgd_none(*pgd)) {
1336 next = (addr + PAGE_SIZE) & PAGE_MASK;
1337 continue;
1338 }
1339 get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
1340
1341 pud = pud_offset(pgd, addr);
1342 if (pud_none(*pud)) {
1343 next = (addr + PAGE_SIZE) & PAGE_MASK;
1344 continue;
1345 }
1346 get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
1347
1348 if (!cpu_has_pse) {
1349 next = (addr + PAGE_SIZE) & PAGE_MASK;
1350 pmd = pmd_offset(pud, addr);
1351 if (pmd_none(*pmd))
1352 continue;
1353 get_page_bootmem(section_nr, pmd_page(*pmd),
1354 MIX_SECTION_INFO);
1355
1356 pte = pte_offset_kernel(pmd, addr);
1357 if (pte_none(*pte))
1358 continue;
1359 get_page_bootmem(section_nr, pte_page(*pte),
1360 SECTION_INFO);
1361 } else {
1362 next = pmd_addr_end(addr, end);
1363
1364 pmd = pmd_offset(pud, addr);
1365 if (pmd_none(*pmd))
1366 continue;
1367
1368 nr_pages = 1 << (get_order(PMD_SIZE));
1369 page = pmd_page(*pmd);
1370 while (nr_pages--)
1371 get_page_bootmem(section_nr, page++,
1372 SECTION_INFO);
1373 }
1374 }
1375}
1376#endif
1377
1378void __meminit vmemmap_populate_print_last(void)
1379{
1380 if (p_start) {
1381 pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
1382 addr_start, addr_end-1, p_start, p_end-1, node_start);
1383 p_start = NULL;
1384 p_end = NULL;
1385 node_start = 0;
1386 }
1387}
1388#endif
1389