linux/arch/riscv/mm/fault.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
   4 *  Lennox Wu <lennox.wu@sunplusct.com>
   5 *  Chen Liqin <liqin.chen@sunplusct.com>
   6 * Copyright (C) 2012 Regents of the University of California
   7 */
   8
   9
  10#include <linux/mm.h>
  11#include <linux/kernel.h>
  12#include <linux/interrupt.h>
  13#include <linux/perf_event.h>
  14#include <linux/signal.h>
  15#include <linux/uaccess.h>
  16#include <linux/kprobes.h>
  17#include <linux/kfence.h>
  18
  19#include <asm/ptrace.h>
  20#include <asm/tlbflush.h>
  21
  22#include "../kernel/head.h"
  23
  24static void die_kernel_fault(const char *msg, unsigned long addr,
  25                struct pt_regs *regs)
  26{
  27        bust_spinlocks(1);
  28
  29        pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
  30                addr);
  31
  32        bust_spinlocks(0);
  33        die(regs, "Oops");
  34        do_exit(SIGKILL);
  35}
  36
  37static inline void no_context(struct pt_regs *regs, unsigned long addr)
  38{
  39        const char *msg;
  40
  41        /* Are we prepared to handle this kernel fault? */
  42        if (fixup_exception(regs))
  43                return;
  44
  45        /*
  46         * Oops. The kernel tried to access some bad page. We'll have to
  47         * terminate things with extreme prejudice.
  48         */
  49        if (addr < PAGE_SIZE)
  50                msg = "NULL pointer dereference";
  51        else {
  52                if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs))
  53                        return;
  54
  55                msg = "paging request";
  56        }
  57
  58        die_kernel_fault(msg, addr, regs);
  59}
  60
  61static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
  62{
  63        if (fault & VM_FAULT_OOM) {
  64                /*
  65                 * We ran out of memory, call the OOM killer, and return the userspace
  66                 * (which will retry the fault, or kill us if we got oom-killed).
  67                 */
  68                if (!user_mode(regs)) {
  69                        no_context(regs, addr);
  70                        return;
  71                }
  72                pagefault_out_of_memory();
  73                return;
  74        } else if (fault & VM_FAULT_SIGBUS) {
  75                /* Kernel mode? Handle exceptions or die */
  76                if (!user_mode(regs)) {
  77                        no_context(regs, addr);
  78                        return;
  79                }
  80                do_trap(regs, SIGBUS, BUS_ADRERR, addr);
  81                return;
  82        }
  83        BUG();
  84}
  85
  86static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
  87{
  88        /*
  89         * Something tried to access memory that isn't in our memory map.
  90         * Fix it, but check if it's kernel or user first.
  91         */
  92        mmap_read_unlock(mm);
  93        /* User mode accesses just cause a SIGSEGV */
  94        if (user_mode(regs)) {
  95                do_trap(regs, SIGSEGV, code, addr);
  96                return;
  97        }
  98
  99        no_context(regs, addr);
 100}
 101
 102static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
 103{
 104        pgd_t *pgd, *pgd_k;
 105        pud_t *pud, *pud_k;
 106        p4d_t *p4d, *p4d_k;
 107        pmd_t *pmd, *pmd_k;
 108        pte_t *pte_k;
 109        int index;
 110        unsigned long pfn;
 111
 112        /* User mode accesses just cause a SIGSEGV */
 113        if (user_mode(regs))
 114                return do_trap(regs, SIGSEGV, code, addr);
 115
 116        /*
 117         * Synchronize this task's top level page-table
 118         * with the 'reference' page table.
 119         *
 120         * Do _not_ use "tsk->active_mm->pgd" here.
 121         * We might be inside an interrupt in the middle
 122         * of a task switch.
 123         */
 124        index = pgd_index(addr);
 125        pfn = csr_read(CSR_SATP) & SATP_PPN;
 126        pgd = (pgd_t *)pfn_to_virt(pfn) + index;
 127        pgd_k = init_mm.pgd + index;
 128
 129        if (!pgd_present(*pgd_k)) {
 130                no_context(regs, addr);
 131                return;
 132        }
 133        set_pgd(pgd, *pgd_k);
 134
 135        p4d = p4d_offset(pgd, addr);
 136        p4d_k = p4d_offset(pgd_k, addr);
 137        if (!p4d_present(*p4d_k)) {
 138                no_context(regs, addr);
 139                return;
 140        }
 141
 142        pud = pud_offset(p4d, addr);
 143        pud_k = pud_offset(p4d_k, addr);
 144        if (!pud_present(*pud_k)) {
 145                no_context(regs, addr);
 146                return;
 147        }
 148
 149        /*
 150         * Since the vmalloc area is global, it is unnecessary
 151         * to copy individual PTEs
 152         */
 153        pmd = pmd_offset(pud, addr);
 154        pmd_k = pmd_offset(pud_k, addr);
 155        if (!pmd_present(*pmd_k)) {
 156                no_context(regs, addr);
 157                return;
 158        }
 159        set_pmd(pmd, *pmd_k);
 160
 161        /*
 162         * Make sure the actual PTE exists as well to
 163         * catch kernel vmalloc-area accesses to non-mapped
 164         * addresses. If we don't do this, this will just
 165         * silently loop forever.
 166         */
 167        pte_k = pte_offset_kernel(pmd_k, addr);
 168        if (!pte_present(*pte_k)) {
 169                no_context(regs, addr);
 170                return;
 171        }
 172
 173        /*
 174         * The kernel assumes that TLBs don't cache invalid
 175         * entries, but in RISC-V, SFENCE.VMA specifies an
 176         * ordering constraint, not a cache flush; it is
 177         * necessary even after writing invalid entries.
 178         */
 179        local_flush_tlb_page(addr);
 180}
 181
 182static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
 183{
 184        switch (cause) {
 185        case EXC_INST_PAGE_FAULT:
 186                if (!(vma->vm_flags & VM_EXEC)) {
 187                        return true;
 188                }
 189                break;
 190        case EXC_LOAD_PAGE_FAULT:
 191                if (!(vma->vm_flags & VM_READ)) {
 192                        return true;
 193                }
 194                break;
 195        case EXC_STORE_PAGE_FAULT:
 196                if (!(vma->vm_flags & VM_WRITE)) {
 197                        return true;
 198                }
 199                break;
 200        default:
 201                panic("%s: unhandled cause %lu", __func__, cause);
 202        }
 203        return false;
 204}
 205
 206/*
 207 * This routine handles page faults.  It determines the address and the
 208 * problem, and then passes it off to one of the appropriate routines.
 209 */
 210asmlinkage void do_page_fault(struct pt_regs *regs)
 211{
 212        struct task_struct *tsk;
 213        struct vm_area_struct *vma;
 214        struct mm_struct *mm;
 215        unsigned long addr, cause;
 216        unsigned int flags = FAULT_FLAG_DEFAULT;
 217        int code = SEGV_MAPERR;
 218        vm_fault_t fault;
 219
 220        cause = regs->cause;
 221        addr = regs->badaddr;
 222
 223        tsk = current;
 224        mm = tsk->mm;
 225
 226        if (kprobe_page_fault(regs, cause))
 227                return;
 228
 229        /*
 230         * Fault-in kernel-space virtual memory on-demand.
 231         * The 'reference' page table is init_mm.pgd.
 232         *
 233         * NOTE! We MUST NOT take any locks for this case. We may
 234         * be in an interrupt or a critical region, and should
 235         * only copy the information from the master page table,
 236         * nothing more.
 237         */
 238        if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
 239                vmalloc_fault(regs, code, addr);
 240                return;
 241        }
 242
 243#ifdef CONFIG_64BIT
 244        /*
 245         * Modules in 64bit kernels lie in their own virtual region which is not
 246         * in the vmalloc region, but dealing with page faults in this region
 247         * or the vmalloc region amounts to doing the same thing: checking that
 248         * the mapping exists in init_mm.pgd and updating user page table, so
 249         * just use vmalloc_fault.
 250         */
 251        if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
 252                vmalloc_fault(regs, code, addr);
 253                return;
 254        }
 255#endif
 256        /* Enable interrupts if they were enabled in the parent context. */
 257        if (likely(regs->status & SR_PIE))
 258                local_irq_enable();
 259
 260        /*
 261         * If we're in an interrupt, have no user context, or are running
 262         * in an atomic region, then we must not take the fault.
 263         */
 264        if (unlikely(faulthandler_disabled() || !mm)) {
 265                tsk->thread.bad_cause = cause;
 266                no_context(regs, addr);
 267                return;
 268        }
 269
 270        if (user_mode(regs))
 271                flags |= FAULT_FLAG_USER;
 272
 273        if (!user_mode(regs) && addr < TASK_SIZE &&
 274                        unlikely(!(regs->status & SR_SUM)))
 275                die_kernel_fault("access to user memory without uaccess routines",
 276                                addr, regs);
 277
 278        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 279
 280        if (cause == EXC_STORE_PAGE_FAULT)
 281                flags |= FAULT_FLAG_WRITE;
 282        else if (cause == EXC_INST_PAGE_FAULT)
 283                flags |= FAULT_FLAG_INSTRUCTION;
 284retry:
 285        mmap_read_lock(mm);
 286        vma = find_vma(mm, addr);
 287        if (unlikely(!vma)) {
 288                tsk->thread.bad_cause = cause;
 289                bad_area(regs, mm, code, addr);
 290                return;
 291        }
 292        if (likely(vma->vm_start <= addr))
 293                goto good_area;
 294        if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
 295                tsk->thread.bad_cause = cause;
 296                bad_area(regs, mm, code, addr);
 297                return;
 298        }
 299        if (unlikely(expand_stack(vma, addr))) {
 300                tsk->thread.bad_cause = cause;
 301                bad_area(regs, mm, code, addr);
 302                return;
 303        }
 304
 305        /*
 306         * Ok, we have a good vm_area for this memory access, so
 307         * we can handle it.
 308         */
 309good_area:
 310        code = SEGV_ACCERR;
 311
 312        if (unlikely(access_error(cause, vma))) {
 313                tsk->thread.bad_cause = cause;
 314                bad_area(regs, mm, code, addr);
 315                return;
 316        }
 317
 318        /*
 319         * If for any reason at all we could not handle the fault,
 320         * make sure we exit gracefully rather than endlessly redo
 321         * the fault.
 322         */
 323        fault = handle_mm_fault(vma, addr, flags, regs);
 324
 325        /*
 326         * If we need to retry but a fatal signal is pending, handle the
 327         * signal first. We do not need to release the mmap_lock because it
 328         * would already be released in __lock_page_or_retry in mm/filemap.c.
 329         */
 330        if (fault_signal_pending(fault, regs))
 331                return;
 332
 333        if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
 334                flags |= FAULT_FLAG_TRIED;
 335
 336                /*
 337                 * No need to mmap_read_unlock(mm) as we would
 338                 * have already released it in __lock_page_or_retry
 339                 * in mm/filemap.c.
 340                 */
 341                goto retry;
 342        }
 343
 344        mmap_read_unlock(mm);
 345
 346        if (unlikely(fault & VM_FAULT_ERROR)) {
 347                tsk->thread.bad_cause = cause;
 348                mm_fault_error(regs, addr, fault);
 349                return;
 350        }
 351        return;
 352}
 353NOKPROBE_SYMBOL(do_page_fault);
 354