linux/arch/powerpc/mm/ptdump/ptdump.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright 2016, Rashmica Gupta, IBM Corp.
   4 *
   5 * This traverses the kernel pagetables and dumps the
   6 * information about the used sections of memory to
   7 * /sys/kernel/debug/kernel_pagetables.
   8 *
   9 * Derived from the arm64 implementation:
  10 * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
  11 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
  12 */
  13#include <linux/debugfs.h>
  14#include <linux/fs.h>
  15#include <linux/hugetlb.h>
  16#include <linux/io.h>
  17#include <linux/mm.h>
  18#include <linux/highmem.h>
  19#include <linux/sched.h>
  20#include <linux/seq_file.h>
  21#include <asm/fixmap.h>
  22#include <linux/const.h>
  23#include <asm/page.h>
  24#include <asm/hugetlb.h>
  25
  26#include <mm/mmu_decl.h>
  27
  28#include "ptdump.h"
  29
  30/*
  31 * To visualise what is happening,
  32 *
  33 *  - PTRS_PER_P** = how many entries there are in the corresponding P**
  34 *  - P**_SHIFT = how many bits of the address we use to index into the
  35 * corresponding P**
  36 *  - P**_SIZE is how much memory we can access through the table - not the
  37 * size of the table itself.
  38 * P**={PGD, PUD, PMD, PTE}
  39 *
  40 *
  41 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
  42 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
  43 * a page.
  44 *
  45 * In the case where there are only 3 levels, the PUD is folded into the
  46 * PGD: every PUD has only one entry which points to the PMD.
  47 *
  48 * The page dumper groups page table entries of the same type into a single
  49 * description. It uses pg_state to track the range information while
  50 * iterating over the PTE entries. When the continuity is broken it then
  51 * dumps out a description of the range - ie PTEs that are virtually contiguous
  52 * with the same PTE flags are chunked together. This is to make it clear how
  53 * different areas of the kernel virtual memory are used.
  54 *
  55 */
  56struct pg_state {
  57        struct seq_file *seq;
  58        const struct addr_marker *marker;
  59        unsigned long start_address;
  60        unsigned long start_pa;
  61        unsigned int level;
  62        u64 current_flags;
  63        bool check_wx;
  64        unsigned long wx_pages;
  65};
  66
  67struct addr_marker {
  68        unsigned long start_address;
  69        const char *name;
  70};
  71
  72static struct addr_marker address_markers[] = {
  73        { 0,    "Start of kernel VM" },
  74#ifdef MODULES_VADDR
  75        { 0,    "modules start" },
  76        { 0,    "modules end" },
  77#endif
  78        { 0,    "vmalloc() Area" },
  79        { 0,    "vmalloc() End" },
  80#ifdef CONFIG_PPC64
  81        { 0,    "isa I/O start" },
  82        { 0,    "isa I/O end" },
  83        { 0,    "phb I/O start" },
  84        { 0,    "phb I/O end" },
  85        { 0,    "I/O remap start" },
  86        { 0,    "I/O remap end" },
  87        { 0,    "vmemmap start" },
  88#else
  89        { 0,    "Early I/O remap start" },
  90        { 0,    "Early I/O remap end" },
  91#ifdef CONFIG_HIGHMEM
  92        { 0,    "Highmem PTEs start" },
  93        { 0,    "Highmem PTEs end" },
  94#endif
  95        { 0,    "Fixmap start" },
  96        { 0,    "Fixmap end" },
  97#endif
  98#ifdef CONFIG_KASAN
  99        { 0,    "kasan shadow mem start" },
 100        { 0,    "kasan shadow mem end" },
 101#endif
 102        { -1,   NULL },
 103};
 104
 105#define pt_dump_seq_printf(m, fmt, args...)     \
 106({                                              \
 107        if (m)                                  \
 108                seq_printf(m, fmt, ##args);     \
 109})
 110
 111#define pt_dump_seq_putc(m, c)          \
 112({                                      \
 113        if (m)                          \
 114                seq_putc(m, c);         \
 115})
 116
 117void pt_dump_size(struct seq_file *m, unsigned long size)
 118{
 119        static const char units[] = "KMGTPE";
 120        const char *unit = units;
 121
 122        /* Work out what appropriate unit to use */
 123        while (!(size & 1023) && unit[1]) {
 124                size >>= 10;
 125                unit++;
 126        }
 127        pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
 128}
 129
 130static void dump_flag_info(struct pg_state *st, const struct flag_info
 131                *flag, u64 pte, int num)
 132{
 133        unsigned int i;
 134
 135        for (i = 0; i < num; i++, flag++) {
 136                const char *s = NULL;
 137                u64 val;
 138
 139                /* flag not defined so don't check it */
 140                if (flag->mask == 0)
 141                        continue;
 142                /* Some 'flags' are actually values */
 143                if (flag->is_val) {
 144                        val = pte & flag->val;
 145                        if (flag->shift)
 146                                val = val >> flag->shift;
 147                        pt_dump_seq_printf(st->seq, "  %s:%llx", flag->set, val);
 148                } else {
 149                        if ((pte & flag->mask) == flag->val)
 150                                s = flag->set;
 151                        else
 152                                s = flag->clear;
 153                        if (s)
 154                                pt_dump_seq_printf(st->seq, "  %s", s);
 155                }
 156                st->current_flags &= ~flag->mask;
 157        }
 158        if (st->current_flags != 0)
 159                pt_dump_seq_printf(st->seq, "  unknown flags:%llx", st->current_flags);
 160}
 161
 162static void dump_addr(struct pg_state *st, unsigned long addr)
 163{
 164#ifdef CONFIG_PPC64
 165#define REG             "0x%016lx"
 166#else
 167#define REG             "0x%08lx"
 168#endif
 169
 170        pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
 171        pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
 172        pt_dump_size(st->seq, (addr - st->start_address) >> 10);
 173}
 174
 175static void note_prot_wx(struct pg_state *st, unsigned long addr)
 176{
 177        pte_t pte = __pte(st->current_flags);
 178
 179        if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
 180                return;
 181
 182        if (!pte_write(pte) || !pte_exec(pte))
 183                return;
 184
 185        WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
 186                  (void *)st->start_address, (void *)st->start_address);
 187
 188        st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 189}
 190
 191static void note_page_update_state(struct pg_state *st, unsigned long addr,
 192                                   unsigned int level, u64 val, unsigned long page_size)
 193{
 194        u64 flag = val & pg_level[level].mask;
 195        u64 pa = val & PTE_RPN_MASK;
 196
 197        st->level = level;
 198        st->current_flags = flag;
 199        st->start_address = addr;
 200        st->start_pa = pa;
 201
 202        while (addr >= st->marker[1].start_address) {
 203                st->marker++;
 204                pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 205        }
 206}
 207
 208static void note_page(struct pg_state *st, unsigned long addr,
 209               unsigned int level, u64 val, unsigned long page_size)
 210{
 211        u64 flag = val & pg_level[level].mask;
 212
 213        /* At first no level is set */
 214        if (!st->level) {
 215                pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 216                note_page_update_state(st, addr, level, val, page_size);
 217        /*
 218         * Dump the section of virtual memory when:
 219         *   - the PTE flags from one entry to the next differs.
 220         *   - we change levels in the tree.
 221         *   - the address is in a different section of memory and is thus
 222         *   used for a different purpose, regardless of the flags.
 223         */
 224        } else if (flag != st->current_flags || level != st->level ||
 225                   addr >= st->marker[1].start_address) {
 226
 227                /* Check the PTE flags */
 228                if (st->current_flags) {
 229                        note_prot_wx(st, addr);
 230                        dump_addr(st, addr);
 231
 232                        /* Dump all the flags */
 233                        if (pg_level[st->level].flag)
 234                                dump_flag_info(st, pg_level[st->level].flag,
 235                                          st->current_flags,
 236                                          pg_level[st->level].num);
 237
 238                        pt_dump_seq_putc(st->seq, '\n');
 239                }
 240
 241                /*
 242                 * Address indicates we have passed the end of the
 243                 * current section of virtual memory
 244                 */
 245                note_page_update_state(st, addr, level, val, page_size);
 246        }
 247}
 248
 249static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 250{
 251        pte_t *pte = pte_offset_kernel(pmd, 0);
 252        unsigned long addr;
 253        unsigned int i;
 254
 255        for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 256                addr = start + i * PAGE_SIZE;
 257                note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
 258
 259        }
 260}
 261
 262static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
 263                        int pdshift, int level)
 264{
 265#ifdef CONFIG_ARCH_HAS_HUGEPD
 266        unsigned int i;
 267        int shift = hugepd_shift(*phpd);
 268        int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
 269
 270        if (start & ((1 << shift) - 1))
 271                return;
 272
 273        for (i = 0; i < ptrs_per_hpd; i++) {
 274                unsigned long addr = start + (i << shift);
 275                pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
 276
 277                note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
 278        }
 279#endif
 280}
 281
 282static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 283{
 284        pmd_t *pmd = pmd_offset(pud, 0);
 285        unsigned long addr;
 286        unsigned int i;
 287
 288        for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 289                addr = start + i * PMD_SIZE;
 290                if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
 291                        /* pmd exists */
 292                        walk_pte(st, pmd, addr);
 293                else
 294                        note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
 295        }
 296}
 297
 298static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
 299{
 300        pud_t *pud = pud_offset(p4d, 0);
 301        unsigned long addr;
 302        unsigned int i;
 303
 304        for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 305                addr = start + i * PUD_SIZE;
 306                if (!pud_none(*pud) && !pud_is_leaf(*pud))
 307                        /* pud exists */
 308                        walk_pmd(st, pud, addr);
 309                else
 310                        note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
 311        }
 312}
 313
 314static void walk_pagetables(struct pg_state *st)
 315{
 316        unsigned int i;
 317        unsigned long addr = st->start_address & PGDIR_MASK;
 318        pgd_t *pgd = pgd_offset_k(addr);
 319
 320        /*
 321         * Traverse the linux pagetable structure and dump pages that are in
 322         * the hash pagetable.
 323         */
 324        for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
 325                p4d_t *p4d = p4d_offset(pgd, 0);
 326
 327                if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
 328                        note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
 329                else if (is_hugepd(__hugepd(p4d_val(*p4d))))
 330                        walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
 331                else
 332                        /* p4d exists */
 333                        walk_pud(st, p4d, addr);
 334        }
 335}
 336
 337static void populate_markers(void)
 338{
 339        int i = 0;
 340
 341#ifdef CONFIG_PPC64
 342        address_markers[i++].start_address = PAGE_OFFSET;
 343#else
 344        address_markers[i++].start_address = TASK_SIZE;
 345#endif
 346#ifdef MODULES_VADDR
 347        address_markers[i++].start_address = MODULES_VADDR;
 348        address_markers[i++].start_address = MODULES_END;
 349#endif
 350        address_markers[i++].start_address = VMALLOC_START;
 351        address_markers[i++].start_address = VMALLOC_END;
 352#ifdef CONFIG_PPC64
 353        address_markers[i++].start_address = ISA_IO_BASE;
 354        address_markers[i++].start_address = ISA_IO_END;
 355        address_markers[i++].start_address = PHB_IO_BASE;
 356        address_markers[i++].start_address = PHB_IO_END;
 357        address_markers[i++].start_address = IOREMAP_BASE;
 358        address_markers[i++].start_address = IOREMAP_END;
 359        /* What is the ifdef about? */
 360#ifdef CONFIG_PPC_BOOK3S_64
 361        address_markers[i++].start_address =  H_VMEMMAP_START;
 362#else
 363        address_markers[i++].start_address =  VMEMMAP_BASE;
 364#endif
 365#else /* !CONFIG_PPC64 */
 366        address_markers[i++].start_address = ioremap_bot;
 367        address_markers[i++].start_address = IOREMAP_TOP;
 368#ifdef CONFIG_HIGHMEM
 369        address_markers[i++].start_address = PKMAP_BASE;
 370        address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
 371#endif
 372        address_markers[i++].start_address = FIXADDR_START;
 373        address_markers[i++].start_address = FIXADDR_TOP;
 374#ifdef CONFIG_KASAN
 375        address_markers[i++].start_address = KASAN_SHADOW_START;
 376        address_markers[i++].start_address = KASAN_SHADOW_END;
 377#endif
 378#endif /* CONFIG_PPC64 */
 379}
 380
 381static int ptdump_show(struct seq_file *m, void *v)
 382{
 383        struct pg_state st = {
 384                .seq = m,
 385                .marker = address_markers,
 386                .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 387        };
 388
 389#ifdef CONFIG_PPC64
 390        if (!radix_enabled())
 391                st.start_address = KERN_VIRT_START;
 392#endif
 393
 394        /* Traverse kernel page tables */
 395        walk_pagetables(&st);
 396        note_page(&st, 0, 0, 0, 0);
 397        return 0;
 398}
 399
 400
 401static int ptdump_open(struct inode *inode, struct file *file)
 402{
 403        return single_open(file, ptdump_show, NULL);
 404}
 405
 406static const struct file_operations ptdump_fops = {
 407        .open           = ptdump_open,
 408        .read           = seq_read,
 409        .llseek         = seq_lseek,
 410        .release        = single_release,
 411};
 412
 413static void build_pgtable_complete_mask(void)
 414{
 415        unsigned int i, j;
 416
 417        for (i = 0; i < ARRAY_SIZE(pg_level); i++)
 418                if (pg_level[i].flag)
 419                        for (j = 0; j < pg_level[i].num; j++)
 420                                pg_level[i].mask |= pg_level[i].flag[j].mask;
 421}
 422
 423#ifdef CONFIG_PPC_DEBUG_WX
 424void ptdump_check_wx(void)
 425{
 426        struct pg_state st = {
 427                .seq = NULL,
 428                .marker = address_markers,
 429                .check_wx = true,
 430                .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 431        };
 432
 433#ifdef CONFIG_PPC64
 434        if (!radix_enabled())
 435                st.start_address = KERN_VIRT_START;
 436#endif
 437
 438        walk_pagetables(&st);
 439
 440        if (st.wx_pages)
 441                pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
 442                        st.wx_pages);
 443        else
 444                pr_info("Checked W+X mappings: passed, no W+X pages found\n");
 445}
 446#endif
 447
 448static int ptdump_init(void)
 449{
 450        populate_markers();
 451        build_pgtable_complete_mask();
 452        debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
 453                            &ptdump_fops);
 454        return 0;
 455}
 456device_initcall(ptdump_init);
 457