linux/arch/powerpc/mm/ptdump/hashpagetable.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright 2016, Rashmica Gupta, IBM Corp.
   4 *
   5 * This traverses the kernel virtual memory and dumps the pages that are in
   6 * the hash pagetable, along with their flags to
   7 * /sys/kernel/debug/kernel_hash_pagetable.
   8 *
   9 * If radix is enabled then there is no hash page table and so no debugfs file
  10 * is generated.
  11 */
  12#include <linux/debugfs.h>
  13#include <linux/fs.h>
  14#include <linux/io.h>
  15#include <linux/mm.h>
  16#include <linux/sched.h>
  17#include <linux/seq_file.h>
  18#include <asm/pgtable.h>
  19#include <linux/const.h>
  20#include <asm/page.h>
  21#include <asm/pgalloc.h>
  22#include <asm/plpar_wrappers.h>
  23#include <linux/memblock.h>
  24#include <asm/firmware.h>
  25
  26struct pg_state {
  27        struct seq_file *seq;
  28        const struct addr_marker *marker;
  29        unsigned long start_address;
  30        unsigned int level;
  31        u64 current_flags;
  32};
  33
  34struct addr_marker {
  35        unsigned long start_address;
  36        const char *name;
  37};
  38
  39static struct addr_marker address_markers[] = {
  40        { 0,    "Start of kernel VM" },
  41        { 0,    "vmalloc() Area" },
  42        { 0,    "vmalloc() End" },
  43        { 0,    "isa I/O start" },
  44        { 0,    "isa I/O end" },
  45        { 0,    "phb I/O start" },
  46        { 0,    "phb I/O end" },
  47        { 0,    "I/O remap start" },
  48        { 0,    "I/O remap end" },
  49        { 0,    "vmemmap start" },
  50        { -1,   NULL },
  51};
  52
  53struct flag_info {
  54        u64             mask;
  55        u64             val;
  56        const char      *set;
  57        const char      *clear;
  58        bool            is_val;
  59        int             shift;
  60};
  61
  62static const struct flag_info v_flag_array[] = {
  63        {
  64                .mask   = SLB_VSID_B,
  65                .val    = SLB_VSID_B_256M,
  66                .set    = "ssize: 256M",
  67                .clear  = "ssize: 1T  ",
  68        }, {
  69                .mask   = HPTE_V_SECONDARY,
  70                .val    = HPTE_V_SECONDARY,
  71                .set    = "secondary",
  72                .clear  = "primary  ",
  73        }, {
  74                .mask   = HPTE_V_VALID,
  75                .val    = HPTE_V_VALID,
  76                .set    = "valid  ",
  77                .clear  = "invalid",
  78        }, {
  79                .mask   = HPTE_V_BOLTED,
  80                .val    = HPTE_V_BOLTED,
  81                .set    = "bolted",
  82                .clear  = "",
  83        }
  84};
  85
  86static const struct flag_info r_flag_array[] = {
  87        {
  88                .mask   = HPTE_R_PP0 | HPTE_R_PP,
  89                .val    = PP_RWXX,
  90                .set    = "prot:RW--",
  91        }, {
  92                .mask   = HPTE_R_PP0 | HPTE_R_PP,
  93                .val    = PP_RWRX,
  94                .set    = "prot:RWR-",
  95        }, {
  96                .mask   = HPTE_R_PP0 | HPTE_R_PP,
  97                .val    = PP_RWRW,
  98                .set    = "prot:RWRW",
  99        }, {
 100                .mask   = HPTE_R_PP0 | HPTE_R_PP,
 101                .val    = PP_RXRX,
 102                .set    = "prot:R-R-",
 103        }, {
 104                .mask   = HPTE_R_PP0 | HPTE_R_PP,
 105                .val    = PP_RXXX,
 106                .set    = "prot:R---",
 107        }, {
 108                .mask   = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
 109                .val    = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
 110                .set    = "key",
 111                .clear  = "",
 112                .is_val = true,
 113        }, {
 114                .mask   = HPTE_R_R,
 115                .val    = HPTE_R_R,
 116                .set    = "ref",
 117                .clear  = "   ",
 118        }, {
 119                .mask   = HPTE_R_C,
 120                .val    = HPTE_R_C,
 121                .set    = "changed",
 122                .clear  = "       ",
 123        }, {
 124                .mask   = HPTE_R_N,
 125                .val    = HPTE_R_N,
 126                .set    = "no execute",
 127        }, {
 128                .mask   = HPTE_R_WIMG,
 129                .val    = HPTE_R_W,
 130                .set    = "writethru",
 131        }, {
 132                .mask   = HPTE_R_WIMG,
 133                .val    = HPTE_R_I,
 134                .set    = "no cache",
 135        }, {
 136                .mask   = HPTE_R_WIMG,
 137                .val    = HPTE_R_G,
 138                .set    = "guarded",
 139        }
 140};
 141
 142static int calculate_pagesize(struct pg_state *st, int ps, char s[])
 143{
 144        static const char units[] = "BKMGTPE";
 145        const char *unit = units;
 146
 147        while (ps > 9 && unit[1]) {
 148                ps -= 10;
 149                unit++;
 150        }
 151        seq_printf(st->seq, "  %s_ps: %i%c\t", s, 1<<ps, *unit);
 152        return ps;
 153}
 154
 155static void dump_flag_info(struct pg_state *st, const struct flag_info
 156                *flag, u64 pte, int num)
 157{
 158        unsigned int i;
 159
 160        for (i = 0; i < num; i++, flag++) {
 161                const char *s = NULL;
 162                u64 val;
 163
 164                /* flag not defined so don't check it */
 165                if (flag->mask == 0)
 166                        continue;
 167                /* Some 'flags' are actually values */
 168                if (flag->is_val) {
 169                        val = pte & flag->val;
 170                        if (flag->shift)
 171                                val = val >> flag->shift;
 172                        seq_printf(st->seq, "  %s:%llx", flag->set, val);
 173                } else {
 174                        if ((pte & flag->mask) == flag->val)
 175                                s = flag->set;
 176                        else
 177                                s = flag->clear;
 178                        if (s)
 179                                seq_printf(st->seq, "  %s", s);
 180                }
 181        }
 182}
 183
 184static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
 185                unsigned long rpn, int bps, int aps, unsigned long lp)
 186{
 187        int aps_index;
 188
 189        while (ea >= st->marker[1].start_address) {
 190                st->marker++;
 191                seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 192        }
 193        seq_printf(st->seq, "0x%lx:\t", ea);
 194        seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v));
 195        dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array));
 196        seq_printf(st->seq, "  rpn: %lx\t", rpn);
 197        dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array));
 198
 199        calculate_pagesize(st, bps, "base");
 200        aps_index = calculate_pagesize(st, aps, "actual");
 201        if (aps_index != 2)
 202                seq_printf(st->seq, "LP enc: %lx", lp);
 203        seq_putc(st->seq, '\n');
 204}
 205
 206
 207static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
 208                *r)
 209{
 210        struct hash_pte *hptep;
 211        unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v;
 212        int i, ssize = mmu_kernel_ssize;
 213        unsigned long shift = mmu_psize_defs[psize].shift;
 214
 215        /* calculate hash */
 216        vsid = get_kernel_vsid(ea, ssize);
 217        vpn  = hpt_vpn(ea, vsid, ssize);
 218        hash = hpt_hash(vpn, shift, ssize);
 219        want_v = hpte_encode_avpn(vpn, psize, ssize);
 220
 221        /* to check in the secondary hash table, we invert the hash */
 222        if (!primary)
 223                hash = ~hash;
 224        hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 225        for (i = 0; i < HPTES_PER_GROUP; i++) {
 226                hptep = htab_address + hpte_group;
 227                hpte_v = be64_to_cpu(hptep->v);
 228
 229                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
 230                        /* HPTE matches */
 231                        *v = be64_to_cpu(hptep->v);
 232                        *r = be64_to_cpu(hptep->r);
 233                        return 0;
 234                }
 235                ++hpte_group;
 236        }
 237        return -1;
 238}
 239
 240#ifdef CONFIG_PPC_PSERIES
 241static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
 242{
 243        struct hash_pte ptes[4];
 244        unsigned long vsid, vpn, hash, hpte_group, want_v;
 245        int i, j, ssize = mmu_kernel_ssize;
 246        long lpar_rc = 0;
 247        unsigned long shift = mmu_psize_defs[psize].shift;
 248
 249        /* calculate hash */
 250        vsid = get_kernel_vsid(ea, ssize);
 251        vpn  = hpt_vpn(ea, vsid, ssize);
 252        hash = hpt_hash(vpn, shift, ssize);
 253        want_v = hpte_encode_avpn(vpn, psize, ssize);
 254
 255        /* to check in the secondary hash table, we invert the hash */
 256        if (!primary)
 257                hash = ~hash;
 258        hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 259        /* see if we can find an entry in the hpte with this hash */
 260        for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
 261                lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
 262
 263                if (lpar_rc != H_SUCCESS)
 264                        continue;
 265                for (j = 0; j < 4; j++) {
 266                        if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
 267                                        (ptes[j].v & HPTE_V_VALID)) {
 268                                /* HPTE matches */
 269                                *v = ptes[j].v;
 270                                *r = ptes[j].r;
 271                                return 0;
 272                        }
 273                }
 274        }
 275        return -1;
 276}
 277#endif
 278
 279static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
 280                unsigned long *lp_bits)
 281{
 282        struct mmu_psize_def entry;
 283        unsigned long arpn, mask, lp;
 284        int penc = -2, idx = 0, shift;
 285
 286        /*.
 287         * The LP field has 8 bits. Depending on the actual page size, some of
 288         * these bits are concatenated with the APRN to get the RPN. The rest
 289         * of the bits in the LP field is the LP value and is an encoding for
 290         * the base page size and the actual page size.
 291         *
 292         *  -   find the mmu entry for our base page size
 293         *  -   go through all page encodings and use the associated mask to
 294         *      find an encoding that matches our encoding in the LP field.
 295         */
 296        arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
 297        lp = arpn & 0xff;
 298
 299        entry = mmu_psize_defs[bps];
 300        while (idx < MMU_PAGE_COUNT) {
 301                penc = entry.penc[idx];
 302                if ((penc != -1) && (mmu_psize_defs[idx].shift)) {
 303                        shift = mmu_psize_defs[idx].shift -  HPTE_R_RPN_SHIFT;
 304                        mask = (0x1 << (shift)) - 1;
 305                        if ((lp & mask) == penc) {
 306                                *aps = mmu_psize_to_shift(idx);
 307                                *lp_bits = lp & mask;
 308                                *rpn = arpn >> shift;
 309                                return;
 310                        }
 311                }
 312                idx++;
 313        }
 314}
 315
 316static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
 317                          u64 *r)
 318{
 319#ifdef CONFIG_PPC_PSERIES
 320        if (firmware_has_feature(FW_FEATURE_LPAR))
 321                return pseries_find(ea, psize, primary, v, r);
 322#endif
 323        return native_find(ea, psize, primary, v, r);
 324}
 325
 326static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
 327{
 328        unsigned long slot;
 329        u64 v  = 0, r = 0;
 330        unsigned long rpn, lp_bits;
 331        int base_psize = 0, actual_psize = 0;
 332
 333        if (ea < PAGE_OFFSET)
 334                return -1;
 335
 336        /* Look in primary table */
 337        slot = base_hpte_find(ea, psize, true, &v, &r);
 338
 339        /* Look in secondary table */
 340        if (slot == -1)
 341                slot = base_hpte_find(ea, psize, false, &v, &r);
 342
 343        /* No entry found */
 344        if (slot == -1)
 345                return -1;
 346
 347        /*
 348         * We found an entry in the hash page table:
 349         *  - check that this has the same base page
 350         *  - find the actual page size
 351         *  - find the RPN
 352         */
 353        base_psize = mmu_psize_to_shift(psize);
 354
 355        if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) {
 356                decode_r(psize, r, &rpn, &actual_psize, &lp_bits);
 357        } else {
 358                /* 4K actual page size */
 359                actual_psize = 12;
 360                rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
 361                /* In this case there are no LP bits */
 362                lp_bits = -1;
 363        }
 364        /*
 365         * We didn't find a matching encoding, so the PTE we found isn't for
 366         * this address.
 367         */
 368        if (actual_psize == -1)
 369                return -1;
 370
 371        dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits);
 372        return 0;
 373}
 374
 375static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 376{
 377        pte_t *pte = pte_offset_kernel(pmd, 0);
 378        unsigned long addr, pteval, psize;
 379        int i, status;
 380
 381        for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 382                addr = start + i * PAGE_SIZE;
 383                pteval = pte_val(*pte);
 384
 385                if (addr < VMALLOC_END)
 386                        psize = mmu_vmalloc_psize;
 387                else
 388                        psize = mmu_io_psize;
 389#ifdef CONFIG_PPC_64K_PAGES
 390                /* check for secret 4K mappings */
 391                if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) ||
 392                        ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
 393                        psize = mmu_io_psize;
 394#endif
 395                /* check for hashpte */
 396                status = hpte_find(st, addr, psize);
 397
 398                if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE)
 399                                && (status != -1)) {
 400                /* found a hpte that is not in the linux page tables */
 401                        seq_printf(st->seq, "page probably bolted before linux"
 402                                " pagetables were set: addr:%lx, pteval:%lx\n",
 403                                addr, pteval);
 404                }
 405        }
 406}
 407
 408static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 409{
 410        pmd_t *pmd = pmd_offset(pud, 0);
 411        unsigned long addr;
 412        unsigned int i;
 413
 414        for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 415                addr = start + i * PMD_SIZE;
 416                if (!pmd_none(*pmd))
 417                        /* pmd exists */
 418                        walk_pte(st, pmd, addr);
 419        }
 420}
 421
 422static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 423{
 424        pud_t *pud = pud_offset(pgd, 0);
 425        unsigned long addr;
 426        unsigned int i;
 427
 428        for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 429                addr = start + i * PUD_SIZE;
 430                if (!pud_none(*pud))
 431                        /* pud exists */
 432                        walk_pmd(st, pud, addr);
 433        }
 434}
 435
 436static void walk_pagetables(struct pg_state *st)
 437{
 438        pgd_t *pgd = pgd_offset_k(0UL);
 439        unsigned int i;
 440        unsigned long addr;
 441
 442        /*
 443         * Traverse the linux pagetable structure and dump pages that are in
 444         * the hash pagetable.
 445         */
 446        for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 447                addr = KERN_VIRT_START + i * PGDIR_SIZE;
 448                if (!pgd_none(*pgd))
 449                        /* pgd exists */
 450                        walk_pud(st, pgd, addr);
 451        }
 452}
 453
 454
 455static void walk_linearmapping(struct pg_state *st)
 456{
 457        unsigned long addr;
 458
 459        /*
 460         * Traverse the linear mapping section of virtual memory and dump pages
 461         * that are in the hash pagetable.
 462         */
 463        unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift;
 464
 465        for (addr = PAGE_OFFSET; addr < PAGE_OFFSET +
 466                        memblock_end_of_DRAM(); addr += psize)
 467                hpte_find(st, addr, mmu_linear_psize);
 468}
 469
 470static void walk_vmemmap(struct pg_state *st)
 471{
 472#ifdef CONFIG_SPARSEMEM_VMEMMAP
 473        struct vmemmap_backing *ptr = vmemmap_list;
 474
 475        /*
 476         * Traverse the vmemmaped memory and dump pages that are in the hash
 477         * pagetable.
 478         */
 479        while (ptr->list) {
 480                hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
 481                ptr = ptr->list;
 482        }
 483        seq_puts(st->seq, "---[ vmemmap end ]---\n");
 484#endif
 485}
 486
 487static void populate_markers(void)
 488{
 489        address_markers[0].start_address = PAGE_OFFSET;
 490        address_markers[1].start_address = VMALLOC_START;
 491        address_markers[2].start_address = VMALLOC_END;
 492        address_markers[3].start_address = ISA_IO_BASE;
 493        address_markers[4].start_address = ISA_IO_END;
 494        address_markers[5].start_address = PHB_IO_BASE;
 495        address_markers[6].start_address = PHB_IO_END;
 496        address_markers[7].start_address = IOREMAP_BASE;
 497        address_markers[8].start_address = IOREMAP_END;
 498#ifdef CONFIG_PPC_BOOK3S_64
 499        address_markers[9].start_address =  H_VMEMMAP_START;
 500#else
 501        address_markers[9].start_address =  VMEMMAP_BASE;
 502#endif
 503}
 504
 505static int ptdump_show(struct seq_file *m, void *v)
 506{
 507        struct pg_state st = {
 508                .seq = m,
 509                .start_address = PAGE_OFFSET,
 510                .marker = address_markers,
 511        };
 512        /*
 513         * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and
 514         * dump pages that are in the hash pagetable.
 515         */
 516        walk_linearmapping(&st);
 517        walk_pagetables(&st);
 518        walk_vmemmap(&st);
 519        return 0;
 520}
 521
 522static int ptdump_open(struct inode *inode, struct file *file)
 523{
 524        return single_open(file, ptdump_show, NULL);
 525}
 526
 527static const struct file_operations ptdump_fops = {
 528        .open           = ptdump_open,
 529        .read           = seq_read,
 530        .llseek         = seq_lseek,
 531        .release        = single_release,
 532};
 533
 534static int ptdump_init(void)
 535{
 536        struct dentry *debugfs_file;
 537
 538        if (!radix_enabled()) {
 539                populate_markers();
 540                debugfs_file = debugfs_create_file("kernel_hash_pagetable",
 541                                0400, NULL, NULL, &ptdump_fops);
 542                return debugfs_file ? 0 : -ENOMEM;
 543        }
 544        return 0;
 545}
 546device_initcall(ptdump_init);
 547