linux/arch/powerpc/mm/dump_linuxpagetables.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016, Rashmica Gupta, IBM Corp.
   3 *
   4 * This traverses the kernel pagetables and dumps the
   5 * information about the used sections of memory to
   6 * /sys/kernel/debug/kernel_pagetables.
   7 *
   8 * Derived from the arm64 implementation:
   9 * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
  10 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
  11 *
  12 * This program is free software; you can redistribute it and/or
  13 * modify it under the terms of the GNU General Public License
  14 * as published by the Free Software Foundation; version 2
  15 * of the License.
  16 */
  17#include <linux/debugfs.h>
  18#include <linux/fs.h>
  19#include <linux/io.h>
  20#include <linux/mm.h>
  21#include <linux/sched.h>
  22#include <linux/seq_file.h>
  23#include <asm/fixmap.h>
  24#include <asm/pgtable.h>
  25#include <linux/const.h>
  26#include <asm/page.h>
  27#include <asm/pgalloc.h>
  28
  29/*
  30 * To visualise what is happening,
  31 *
  32 *  - PTRS_PER_P** = how many entries there are in the corresponding P**
  33 *  - P**_SHIFT = how many bits of the address we use to index into the
  34 * corresponding P**
  35 *  - P**_SIZE is how much memory we can access through the table - not the
  36 * size of the table itself.
  37 * P**={PGD, PUD, PMD, PTE}
  38 *
  39 *
  40 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
  41 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
  42 * a page.
  43 *
  44 * In the case where there are only 3 levels, the PUD is folded into the
  45 * PGD: every PUD has only one entry which points to the PMD.
  46 *
  47 * The page dumper groups page table entries of the same type into a single
  48 * description. It uses pg_state to track the range information while
  49 * iterating over the PTE entries. When the continuity is broken it then
  50 * dumps out a description of the range - ie PTEs that are virtually contiguous
  51 * with the same PTE flags are chunked together. This is to make it clear how
  52 * different areas of the kernel virtual memory are used.
  53 *
  54 */
  55struct pg_state {
  56        struct seq_file *seq;
  57        const struct addr_marker *marker;
  58        unsigned long start_address;
  59        unsigned int level;
  60        u64 current_flags;
  61};
  62
  63struct addr_marker {
  64        unsigned long start_address;
  65        const char *name;
  66};
  67
  68static struct addr_marker address_markers[] = {
  69        { 0,    "Start of kernel VM" },
  70        { 0,    "vmalloc() Area" },
  71        { 0,    "vmalloc() End" },
  72        { 0,    "isa I/O start" },
  73        { 0,    "isa I/O end" },
  74        { 0,    "phb I/O start" },
  75        { 0,    "phb I/O end" },
  76        { 0,    "I/O remap start" },
  77        { 0,    "I/O remap end" },
  78        { 0,    "vmemmap start" },
  79        { -1,   NULL },
  80};
  81
  82struct flag_info {
  83        u64             mask;
  84        u64             val;
  85        const char      *set;
  86        const char      *clear;
  87        bool            is_val;
  88        int             shift;
  89};
  90
  91static const struct flag_info flag_array[] = {
  92        {
  93#ifdef CONFIG_PPC_STD_MMU_64
  94                .mask   = _PAGE_PRIVILEGED,
  95                .val    = 0,
  96#else
  97                .mask   = _PAGE_USER,
  98                .val    = _PAGE_USER,
  99#endif
 100                .set    = "user",
 101                .clear  = "    ",
 102        }, {
 103                .mask   = _PAGE_RW,
 104                .val    = _PAGE_RW,
 105                .set    = "rw",
 106                .clear  = "ro",
 107        }, {
 108                .mask   = _PAGE_EXEC,
 109                .val    = _PAGE_EXEC,
 110                .set    = " X ",
 111                .clear  = "   ",
 112        }, {
 113                .mask   = _PAGE_PTE,
 114                .val    = _PAGE_PTE,
 115                .set    = "pte",
 116                .clear  = "   ",
 117        }, {
 118                .mask   = _PAGE_PRESENT,
 119                .val    = _PAGE_PRESENT,
 120                .set    = "present",
 121                .clear  = "       ",
 122        }, {
 123#ifdef CONFIG_PPC_STD_MMU_64
 124                .mask   = H_PAGE_HASHPTE,
 125                .val    = H_PAGE_HASHPTE,
 126#else
 127                .mask   = _PAGE_HASHPTE,
 128                .val    = _PAGE_HASHPTE,
 129#endif
 130                .set    = "hpte",
 131                .clear  = "    ",
 132        }, {
 133#ifndef CONFIG_PPC_STD_MMU_64
 134                .mask   = _PAGE_GUARDED,
 135                .val    = _PAGE_GUARDED,
 136                .set    = "guarded",
 137                .clear  = "       ",
 138        }, {
 139#endif
 140                .mask   = _PAGE_DIRTY,
 141                .val    = _PAGE_DIRTY,
 142                .set    = "dirty",
 143                .clear  = "     ",
 144        }, {
 145                .mask   = _PAGE_ACCESSED,
 146                .val    = _PAGE_ACCESSED,
 147                .set    = "accessed",
 148                .clear  = "        ",
 149        }, {
 150#ifndef CONFIG_PPC_STD_MMU_64
 151                .mask   = _PAGE_WRITETHRU,
 152                .val    = _PAGE_WRITETHRU,
 153                .set    = "write through",
 154                .clear  = "             ",
 155        }, {
 156#endif
 157                .mask   = _PAGE_NO_CACHE,
 158                .val    = _PAGE_NO_CACHE,
 159                .set    = "no cache",
 160                .clear  = "        ",
 161        }, {
 162#ifdef CONFIG_PPC_BOOK3S_64
 163                .mask   = H_PAGE_BUSY,
 164                .val    = H_PAGE_BUSY,
 165                .set    = "busy",
 166        }, {
 167#ifdef CONFIG_PPC_64K_PAGES
 168                .mask   = H_PAGE_COMBO,
 169                .val    = H_PAGE_COMBO,
 170                .set    = "combo",
 171        }, {
 172                .mask   = H_PAGE_4K_PFN,
 173                .val    = H_PAGE_4K_PFN,
 174                .set    = "4K_pfn",
 175        }, {
 176#endif
 177                .mask   = H_PAGE_F_GIX,
 178                .val    = H_PAGE_F_GIX,
 179                .set    = "f_gix",
 180                .is_val = true,
 181                .shift  = H_PAGE_F_GIX_SHIFT,
 182        }, {
 183                .mask   = H_PAGE_F_SECOND,
 184                .val    = H_PAGE_F_SECOND,
 185                .set    = "f_second",
 186        }, {
 187#endif
 188                .mask   = _PAGE_SPECIAL,
 189                .val    = _PAGE_SPECIAL,
 190                .set    = "special",
 191        }
 192};
 193
 194struct pgtable_level {
 195        const struct flag_info *flag;
 196        size_t num;
 197        u64 mask;
 198};
 199
 200static struct pgtable_level pg_level[] = {
 201        {
 202        }, { /* pgd */
 203                .flag   = flag_array,
 204                .num    = ARRAY_SIZE(flag_array),
 205        }, { /* pud */
 206                .flag   = flag_array,
 207                .num    = ARRAY_SIZE(flag_array),
 208        }, { /* pmd */
 209                .flag   = flag_array,
 210                .num    = ARRAY_SIZE(flag_array),
 211        }, { /* pte */
 212                .flag   = flag_array,
 213                .num    = ARRAY_SIZE(flag_array),
 214        },
 215};
 216
 217static void dump_flag_info(struct pg_state *st, const struct flag_info
 218                *flag, u64 pte, int num)
 219{
 220        unsigned int i;
 221
 222        for (i = 0; i < num; i++, flag++) {
 223                const char *s = NULL;
 224                u64 val;
 225
 226                /* flag not defined so don't check it */
 227                if (flag->mask == 0)
 228                        continue;
 229                /* Some 'flags' are actually values */
 230                if (flag->is_val) {
 231                        val = pte & flag->val;
 232                        if (flag->shift)
 233                                val = val >> flag->shift;
 234                        seq_printf(st->seq, "  %s:%llx", flag->set, val);
 235                } else {
 236                        if ((pte & flag->mask) == flag->val)
 237                                s = flag->set;
 238                        else
 239                                s = flag->clear;
 240                        if (s)
 241                                seq_printf(st->seq, "  %s", s);
 242                }
 243                st->current_flags &= ~flag->mask;
 244        }
 245        if (st->current_flags != 0)
 246                seq_printf(st->seq, "  unknown flags:%llx", st->current_flags);
 247}
 248
 249static void dump_addr(struct pg_state *st, unsigned long addr)
 250{
 251        static const char units[] = "KMGTPE";
 252        const char *unit = units;
 253        unsigned long delta;
 254
 255        seq_printf(st->seq, "0x%016lx-0x%016lx   ", st->start_address, addr-1);
 256        delta = (addr - st->start_address) >> 10;
 257        /* Work out what appropriate unit to use */
 258        while (!(delta & 1023) && unit[1]) {
 259                delta >>= 10;
 260                unit++;
 261        }
 262        seq_printf(st->seq, "%9lu%c", delta, *unit);
 263
 264}
 265
 266static void note_page(struct pg_state *st, unsigned long addr,
 267               unsigned int level, u64 val)
 268{
 269        u64 flag = val & pg_level[level].mask;
 270        /* At first no level is set */
 271        if (!st->level) {
 272                st->level = level;
 273                st->current_flags = flag;
 274                st->start_address = addr;
 275                seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 276        /*
 277         * Dump the section of virtual memory when:
 278         *   - the PTE flags from one entry to the next differs.
 279         *   - we change levels in the tree.
 280         *   - the address is in a different section of memory and is thus
 281         *   used for a different purpose, regardless of the flags.
 282         */
 283        } else if (flag != st->current_flags || level != st->level ||
 284                   addr >= st->marker[1].start_address) {
 285
 286                /* Check the PTE flags */
 287                if (st->current_flags) {
 288                        dump_addr(st, addr);
 289
 290                        /* Dump all the flags */
 291                        if (pg_level[st->level].flag)
 292                                dump_flag_info(st, pg_level[st->level].flag,
 293                                          st->current_flags,
 294                                          pg_level[st->level].num);
 295
 296                        seq_puts(st->seq, "\n");
 297                }
 298
 299                /*
 300                 * Address indicates we have passed the end of the
 301                 * current section of virtual memory
 302                 */
 303                while (addr >= st->marker[1].start_address) {
 304                        st->marker++;
 305                        seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 306                }
 307                st->start_address = addr;
 308                st->current_flags = flag;
 309                st->level = level;
 310        }
 311}
 312
 313static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 314{
 315        pte_t *pte = pte_offset_kernel(pmd, 0);
 316        unsigned long addr;
 317        unsigned int i;
 318
 319        for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 320                addr = start + i * PAGE_SIZE;
 321                note_page(st, addr, 4, pte_val(*pte));
 322
 323        }
 324}
 325
 326static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 327{
 328        pmd_t *pmd = pmd_offset(pud, 0);
 329        unsigned long addr;
 330        unsigned int i;
 331
 332        for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 333                addr = start + i * PMD_SIZE;
 334                if (!pmd_none(*pmd))
 335                        /* pmd exists */
 336                        walk_pte(st, pmd, addr);
 337                else
 338                        note_page(st, addr, 3, pmd_val(*pmd));
 339        }
 340}
 341
 342static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 343{
 344        pud_t *pud = pud_offset(pgd, 0);
 345        unsigned long addr;
 346        unsigned int i;
 347
 348        for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 349                addr = start + i * PUD_SIZE;
 350                if (!pud_none(*pud))
 351                        /* pud exists */
 352                        walk_pmd(st, pud, addr);
 353                else
 354                        note_page(st, addr, 2, pud_val(*pud));
 355        }
 356}
 357
 358static void walk_pagetables(struct pg_state *st)
 359{
 360        pgd_t *pgd = pgd_offset_k(0UL);
 361        unsigned int i;
 362        unsigned long addr;
 363
 364        /*
 365         * Traverse the linux pagetable structure and dump pages that are in
 366         * the hash pagetable.
 367         */
 368        for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 369                addr = KERN_VIRT_START + i * PGDIR_SIZE;
 370                if (!pgd_none(*pgd))
 371                        /* pgd exists */
 372                        walk_pud(st, pgd, addr);
 373                else
 374                        note_page(st, addr, 1, pgd_val(*pgd));
 375        }
 376}
 377
 378static void populate_markers(void)
 379{
 380        address_markers[0].start_address = PAGE_OFFSET;
 381        address_markers[1].start_address = VMALLOC_START;
 382        address_markers[2].start_address = VMALLOC_END;
 383        address_markers[3].start_address = ISA_IO_BASE;
 384        address_markers[4].start_address = ISA_IO_END;
 385        address_markers[5].start_address = PHB_IO_BASE;
 386        address_markers[6].start_address = PHB_IO_END;
 387        address_markers[7].start_address = IOREMAP_BASE;
 388        address_markers[8].start_address = IOREMAP_END;
 389#ifdef CONFIG_PPC_STD_MMU_64
 390        address_markers[9].start_address =  H_VMEMMAP_BASE;
 391#else
 392        address_markers[9].start_address =  VMEMMAP_BASE;
 393#endif
 394}
 395
 396static int ptdump_show(struct seq_file *m, void *v)
 397{
 398        struct pg_state st = {
 399                .seq = m,
 400                .start_address = KERN_VIRT_START,
 401                .marker = address_markers,
 402        };
 403        /* Traverse kernel page tables */
 404        walk_pagetables(&st);
 405        note_page(&st, 0, 0, 0);
 406        return 0;
 407}
 408
 409
 410static int ptdump_open(struct inode *inode, struct file *file)
 411{
 412        return single_open(file, ptdump_show, NULL);
 413}
 414
 415static const struct file_operations ptdump_fops = {
 416        .open           = ptdump_open,
 417        .read           = seq_read,
 418        .llseek         = seq_lseek,
 419        .release        = single_release,
 420};
 421
 422static void build_pgtable_complete_mask(void)
 423{
 424        unsigned int i, j;
 425
 426        for (i = 0; i < ARRAY_SIZE(pg_level); i++)
 427                if (pg_level[i].flag)
 428                        for (j = 0; j < pg_level[i].num; j++)
 429                                pg_level[i].mask |= pg_level[i].flag[j].mask;
 430}
 431
 432static int ptdump_init(void)
 433{
 434        struct dentry *debugfs_file;
 435
 436        populate_markers();
 437        build_pgtable_complete_mask();
 438        debugfs_file = debugfs_create_file("kernel_pagetables", 0400, NULL,
 439                        NULL, &ptdump_fops);
 440        return debugfs_file ? 0 : -ENOMEM;
 441}
 442device_initcall(ptdump_init);
 443