linux/arch/x86/mm/dump_pagetables.c
<<
>>
Prefs
   1/*
   2 * Debug helper to dump the current kernel pagetables of the system
   3 * so that we can see what the various memory ranges are set to.
   4 *
   5 * (C) Copyright 2008 Intel Corporation
   6 *
   7 * Author: Arjan van de Ven <arjan@linux.intel.com>
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License
  11 * as published by the Free Software Foundation; version 2
  12 * of the License.
  13 */
  14
  15#include <linux/debugfs.h>
  16#include <linux/mm.h>
  17#include <linux/module.h>
  18#include <linux/seq_file.h>
  19
  20#include <asm/pgtable.h>
  21
  22/*
  23 * The dumper groups pagetable entries of the same type into one, and for
  24 * that it needs to keep some state when walking, and flush this state
  25 * when a "break" in the continuity is found.
  26 */
  27struct pg_state {
  28        int level;
  29        pgprot_t current_prot;
  30        unsigned long start_address;
  31        unsigned long current_address;
  32        const struct addr_marker *marker;
  33};
  34
  35struct addr_marker {
  36        unsigned long start_address;
  37        const char *name;
  38};
  39
  40/* indices for address_markers; keep sync'd w/ address_markers below */
  41enum address_markers_idx {
  42        USER_SPACE_NR = 0,
  43#ifdef CONFIG_X86_64
  44        KERNEL_SPACE_NR,
  45        LOW_KERNEL_NR,
  46        VMALLOC_START_NR,
  47        VMEMMAP_START_NR,
  48        HIGH_KERNEL_NR,
  49        MODULES_VADDR_NR,
  50        MODULES_END_NR,
  51#else
  52        KERNEL_SPACE_NR,
  53        VMALLOC_START_NR,
  54        VMALLOC_END_NR,
  55# ifdef CONFIG_HIGHMEM
  56        PKMAP_BASE_NR,
  57# endif
  58        FIXADDR_START_NR,
  59#endif
  60};
  61
  62/* Address space markers hints */
  63static struct addr_marker address_markers[] = {
  64        { 0, "User Space" },
  65#ifdef CONFIG_X86_64
  66        { 0x8000000000000000UL, "Kernel Space" },
  67        { PAGE_OFFSET,          "Low Kernel Mapping" },
  68        { VMALLOC_START,        "vmalloc() Area" },
  69        { VMEMMAP_START,        "Vmemmap" },
  70        { __START_KERNEL_map,   "High Kernel Mapping" },
  71        { MODULES_VADDR,        "Modules" },
  72        { MODULES_END,          "End Modules" },
  73#else
  74        { PAGE_OFFSET,          "Kernel Mapping" },
  75        { 0/* VMALLOC_START */, "vmalloc() Area" },
  76        { 0/*VMALLOC_END*/,     "vmalloc() End" },
  77# ifdef CONFIG_HIGHMEM
  78        { 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
  79# endif
  80        { 0/*FIXADDR_START*/,   "Fixmap Area" },
  81#endif
  82        { -1, NULL }            /* End of list */
  83};
  84
  85/* Multipliers for offsets within the PTEs */
  86#define PTE_LEVEL_MULT (PAGE_SIZE)
  87#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
  88#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
  89#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
  90
  91/*
  92 * Print a readable form of a pgprot_t to the seq_file
  93 */
  94static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
  95{
  96        pgprotval_t pr = pgprot_val(prot);
  97        static const char * const level_name[] =
  98                { "cr3", "pgd", "pud", "pmd", "pte" };
  99
 100        if (!pgprot_val(prot)) {
 101                /* Not present */
 102                seq_printf(m, "                          ");
 103        } else {
 104                if (pr & _PAGE_USER)
 105                        seq_printf(m, "USR ");
 106                else
 107                        seq_printf(m, "    ");
 108                if (pr & _PAGE_RW)
 109                        seq_printf(m, "RW ");
 110                else
 111                        seq_printf(m, "ro ");
 112                if (pr & _PAGE_PWT)
 113                        seq_printf(m, "PWT ");
 114                else
 115                        seq_printf(m, "    ");
 116                if (pr & _PAGE_PCD)
 117                        seq_printf(m, "PCD ");
 118                else
 119                        seq_printf(m, "    ");
 120
 121                /* Bit 9 has a different meaning on level 3 vs 4 */
 122                if (level <= 3) {
 123                        if (pr & _PAGE_PSE)
 124                                seq_printf(m, "PSE ");
 125                        else
 126                                seq_printf(m, "    ");
 127                } else {
 128                        if (pr & _PAGE_PAT)
 129                                seq_printf(m, "pat ");
 130                        else
 131                                seq_printf(m, "    ");
 132                }
 133                if (pr & _PAGE_GLOBAL)
 134                        seq_printf(m, "GLB ");
 135                else
 136                        seq_printf(m, "    ");
 137                if (pr & _PAGE_NX)
 138                        seq_printf(m, "NX ");
 139                else
 140                        seq_printf(m, "x  ");
 141        }
 142        seq_printf(m, "%s\n", level_name[level]);
 143}
 144
 145/*
 146 * On 64 bits, sign-extend the 48 bit address to 64 bit
 147 */
 148static unsigned long normalize_addr(unsigned long u)
 149{
 150#ifdef CONFIG_X86_64
 151        return (signed long)(u << 16) >> 16;
 152#else
 153        return u;
 154#endif
 155}
 156
 157/*
 158 * This function gets called on a break in a continuous series
 159 * of PTE entries; the next one is different so we need to
 160 * print what we collected so far.
 161 */
 162static void note_page(struct seq_file *m, struct pg_state *st,
 163                      pgprot_t new_prot, int level)
 164{
 165        pgprotval_t prot, cur;
 166        static const char units[] = "KMGTPE";
 167
 168        /*
 169         * If we have a "break" in the series, we need to flush the state that
 170         * we have now. "break" is either changing perms, levels or
 171         * address space marker.
 172         */
 173        prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
 174        cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
 175
 176        if (!st->level) {
 177                /* First entry */
 178                st->current_prot = new_prot;
 179                st->level = level;
 180                st->marker = address_markers;
 181                seq_printf(m, "---[ %s ]---\n", st->marker->name);
 182        } else if (prot != cur || level != st->level ||
 183                   st->current_address >= st->marker[1].start_address) {
 184                const char *unit = units;
 185                unsigned long delta;
 186                int width = sizeof(unsigned long) * 2;
 187
 188                /*
 189                 * Now print the actual finished series
 190                 */
 191                seq_printf(m, "0x%0*lx-0x%0*lx   ",
 192                           width, st->start_address,
 193                           width, st->current_address);
 194
 195                delta = (st->current_address - st->start_address) >> 10;
 196                while (!(delta & 1023) && unit[1]) {
 197                        delta >>= 10;
 198                        unit++;
 199                }
 200                seq_printf(m, "%9lu%c ", delta, *unit);
 201                printk_prot(m, st->current_prot, st->level);
 202
 203                /*
 204                 * We print markers for special areas of address space,
 205                 * such as the start of vmalloc space etc.
 206                 * This helps in the interpretation.
 207                 */
 208                if (st->current_address >= st->marker[1].start_address) {
 209                        st->marker++;
 210                        seq_printf(m, "---[ %s ]---\n", st->marker->name);
 211                }
 212
 213                st->start_address = st->current_address;
 214                st->current_prot = new_prot;
 215                st->level = level;
 216        }
 217}
 218
 219static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 220                                                        unsigned long P)
 221{
 222        int i;
 223        pte_t *start;
 224
 225        start = (pte_t *) pmd_page_vaddr(addr);
 226        for (i = 0; i < PTRS_PER_PTE; i++) {
 227                pgprot_t prot = pte_pgprot(*start);
 228
 229                st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
 230                note_page(m, st, prot, 4);
 231                start++;
 232        }
 233}
 234
 235#if PTRS_PER_PMD > 1
 236
 237static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 238                                                        unsigned long P)
 239{
 240        int i;
 241        pmd_t *start;
 242
 243        start = (pmd_t *) pud_page_vaddr(addr);
 244        for (i = 0; i < PTRS_PER_PMD; i++) {
 245                st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 246                if (!pmd_none(*start)) {
 247                        pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
 248
 249                        if (pmd_large(*start) || !pmd_present(*start))
 250                                note_page(m, st, __pgprot(prot), 3);
 251                        else
 252                                walk_pte_level(m, st, *start,
 253                                               P + i * PMD_LEVEL_MULT);
 254                } else
 255                        note_page(m, st, __pgprot(0), 3);
 256                start++;
 257        }
 258}
 259
 260#else
 261#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
 262#define pud_large(a) pmd_large(__pmd(pud_val(a)))
 263#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
 264#endif
 265
 266#if PTRS_PER_PUD > 1
 267
 268static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 269                                                        unsigned long P)
 270{
 271        int i;
 272        pud_t *start;
 273
 274        start = (pud_t *) pgd_page_vaddr(addr);
 275
 276        for (i = 0; i < PTRS_PER_PUD; i++) {
 277                st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
 278                if (!pud_none(*start)) {
 279                        pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
 280
 281                        if (pud_large(*start) || !pud_present(*start))
 282                                note_page(m, st, __pgprot(prot), 2);
 283                        else
 284                                walk_pmd_level(m, st, *start,
 285                                               P + i * PUD_LEVEL_MULT);
 286                } else
 287                        note_page(m, st, __pgprot(0), 2);
 288
 289                start++;
 290        }
 291}
 292
 293#else
 294#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
 295#define pgd_large(a) pud_large(__pud(pgd_val(a)))
 296#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 297#endif
 298
 299static void walk_pgd_level(struct seq_file *m)
 300{
 301#ifdef CONFIG_X86_64
 302        pgd_t *start = (pgd_t *) &init_level4_pgt;
 303#else
 304        pgd_t *start = swapper_pg_dir;
 305#endif
 306        int i;
 307        struct pg_state st;
 308
 309        memset(&st, 0, sizeof(st));
 310
 311        for (i = 0; i < PTRS_PER_PGD; i++) {
 312                st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
 313                if (!pgd_none(*start)) {
 314                        pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
 315
 316                        if (pgd_large(*start) || !pgd_present(*start))
 317                                note_page(m, &st, __pgprot(prot), 1);
 318                        else
 319                                walk_pud_level(m, &st, *start,
 320                                               i * PGD_LEVEL_MULT);
 321                } else
 322                        note_page(m, &st, __pgprot(0), 1);
 323
 324                start++;
 325        }
 326
 327        /* Flush out the last page */
 328        st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
 329        note_page(m, &st, __pgprot(0), 0);
 330}
 331
 332static int ptdump_show(struct seq_file *m, void *v)
 333{
 334        walk_pgd_level(m);
 335        return 0;
 336}
 337
 338static int ptdump_open(struct inode *inode, struct file *filp)
 339{
 340        return single_open(filp, ptdump_show, NULL);
 341}
 342
 343static const struct file_operations ptdump_fops = {
 344        .open           = ptdump_open,
 345        .read           = seq_read,
 346        .llseek         = seq_lseek,
 347        .release        = single_release,
 348};
 349
 350static int pt_dump_init(void)
 351{
 352        struct dentry *pe;
 353
 354#ifdef CONFIG_X86_32
 355        /* Not a compile-time constant on x86-32 */
 356        address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
 357        address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
 358# ifdef CONFIG_HIGHMEM
 359        address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
 360# endif
 361        address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
 362#endif
 363
 364        pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
 365                                 &ptdump_fops);
 366        if (!pe)
 367                return -ENOMEM;
 368
 369        return 0;
 370}
 371
 372__initcall(pt_dump_init);
 373MODULE_LICENSE("GPL");
 374MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
 375MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
 376