LXR linux/arch/riscv/mm/fault.c

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
   4 *  Lennox Wu <lennox.wu@sunplusct.com>
   5 *  Chen Liqin <liqin.chen@sunplusct.com>
   6 * Copyright (C) 2012 Regents of the University of California
   7 */
   8
   9
  10#include <linux/mm.h>
  11#include <linux/kernel.h>
  12#include <linux/interrupt.h>
  13#include <linux/perf_event.h>
  14#include <linux/signal.h>
  15#include <linux/uaccess.h>
  16#include <linux/kprobes.h>
  17
  18#include <asm/ptrace.h>
  19#include <asm/tlbflush.h>
  20
  21#include "../kernel/head.h"
  22
  23static void die_kernel_fault(const char *msg, unsigned long addr,
  24                struct pt_regs *regs)
  25{
  26        bust_spinlocks(1);
  27
  28        pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
  29                addr);
  30
  31        bust_spinlocks(0);
  32        die(regs, "Oops");
  33        do_exit(SIGKILL);
  34}
  35
  36static inline void no_context(struct pt_regs *regs, unsigned long addr)
  37{
  38        const char *msg;
  39
  40        /* Are we prepared to handle this kernel fault? */
  41        if (fixup_exception(regs))
  42                return;
  43
  44        /*
  45         * Oops. The kernel tried to access some bad page. We'll have to
  46         * terminate things with extreme prejudice.
  47         */
  48        msg = (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request";
  49        die_kernel_fault(msg, addr, regs);
  50}
  51
  52static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
  53{
  54        if (fault & VM_FAULT_OOM) {
  55                /*
  56                 * We ran out of memory, call the OOM killer, and return the userspace
  57                 * (which will retry the fault, or kill us if we got oom-killed).
  58                 */
  59                if (!user_mode(regs)) {
  60                        no_context(regs, addr);
  61                        return;
  62                }
  63                pagefault_out_of_memory();
  64                return;
  65        } else if (fault & VM_FAULT_SIGBUS) {
  66                /* Kernel mode? Handle exceptions or die */
  67                if (!user_mode(regs)) {
  68                        no_context(regs, addr);
  69                        return;
  70                }
  71                do_trap(regs, SIGBUS, BUS_ADRERR, addr);
  72                return;
  73        }
  74        BUG();
  75}
  76
  77static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
  78{
  79        /*
  80         * Something tried to access memory that isn't in our memory map.
  81         * Fix it, but check if it's kernel or user first.
  82         */
  83        mmap_read_unlock(mm);
  84        /* User mode accesses just cause a SIGSEGV */
  85        if (user_mode(regs)) {
  86                do_trap(regs, SIGSEGV, code, addr);
  87                return;
  88        }
  89
  90        no_context(regs, addr);
  91}
  92
  93static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
  94{
  95        pgd_t *pgd, *pgd_k;
  96        pud_t *pud, *pud_k;
  97        p4d_t *p4d, *p4d_k;
  98        pmd_t *pmd, *pmd_k;
  99        pte_t *pte_k;
 100        int index;
 101        unsigned long pfn;
 102
 103        /* User mode accesses just cause a SIGSEGV */
 104        if (user_mode(regs))
 105                return do_trap(regs, SIGSEGV, code, addr);
 106
 107        /*
 108         * Synchronize this task's top level page-table
 109         * with the 'reference' page table.
 110         *
 111         * Do _not_ use "tsk->active_mm->pgd" here.
 112         * We might be inside an interrupt in the middle
 113         * of a task switch.
 114         */
 115        index = pgd_index(addr);
 116        pfn = csr_read(CSR_SATP) & SATP_PPN;
 117        pgd = (pgd_t *)pfn_to_virt(pfn) + index;
 118        pgd_k = init_mm.pgd + index;
 119
 120        if (!pgd_present(*pgd_k)) {
 121                no_context(regs, addr);
 122                return;
 123        }
 124        set_pgd(pgd, *pgd_k);
 125
 126        p4d = p4d_offset(pgd, addr);
 127        p4d_k = p4d_offset(pgd_k, addr);
 128        if (!p4d_present(*p4d_k)) {
 129                no_context(regs, addr);
 130                return;
 131        }
 132
 133        pud = pud_offset(p4d, addr);
 134        pud_k = pud_offset(p4d_k, addr);
 135        if (!pud_present(*pud_k)) {
 136                no_context(regs, addr);
 137                return;
 138        }
 139
 140        /*
 141         * Since the vmalloc area is global, it is unnecessary
 142         * to copy individual PTEs
 143         */
 144        pmd = pmd_offset(pud, addr);
 145        pmd_k = pmd_offset(pud_k, addr);
 146        if (!pmd_present(*pmd_k)) {
 147                no_context(regs, addr);
 148                return;
 149        }
 150        set_pmd(pmd, *pmd_k);
 151
 152        /*
 153         * Make sure the actual PTE exists as well to
 154         * catch kernel vmalloc-area accesses to non-mapped
 155         * addresses. If we don't do this, this will just
 156         * silently loop forever.
 157         */
 158        pte_k = pte_offset_kernel(pmd_k, addr);
 159        if (!pte_present(*pte_k)) {
 160                no_context(regs, addr);
 161                return;
 162        }
 163
 164        /*
 165         * The kernel assumes that TLBs don't cache invalid
 166         * entries, but in RISC-V, SFENCE.VMA specifies an
 167         * ordering constraint, not a cache flush; it is
 168         * necessary even after writing invalid entries.
 169         */
 170        local_flush_tlb_page(addr);
 171}
 172
 173static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
 174{
 175        switch (cause) {
 176        case EXC_INST_PAGE_FAULT:
 177                if (!(vma->vm_flags & VM_EXEC)) {
 178                        return true;
 179                }
 180                break;
 181        case EXC_LOAD_PAGE_FAULT:
 182                if (!(vma->vm_flags & VM_READ)) {
 183                        return true;
 184                }
 185                break;
 186        case EXC_STORE_PAGE_FAULT:
 187                if (!(vma->vm_flags & VM_WRITE)) {
 188                        return true;
 189                }
 190                break;
 191        default:
 192                panic("%s: unhandled cause %lu", __func__, cause);
 193        }
 194        return false;
 195}
 196
 197/*
 198 * This routine handles page faults.  It determines the address and the
 199 * problem, and then passes it off to one of the appropriate routines.
 200 */
 201asmlinkage void do_page_fault(struct pt_regs *regs)
 202{
 203        struct task_struct *tsk;
 204        struct vm_area_struct *vma;
 205        struct mm_struct *mm;
 206        unsigned long addr, cause;
 207        unsigned int flags = FAULT_FLAG_DEFAULT;
 208        int code = SEGV_MAPERR;
 209        vm_fault_t fault;
 210
 211        cause = regs->cause;
 212        addr = regs->badaddr;
 213
 214        tsk = current;
 215        mm = tsk->mm;
 216
 217        if (kprobe_page_fault(regs, cause))
 218                return;
 219
 220        /*
 221         * Fault-in kernel-space virtual memory on-demand.
 222         * The 'reference' page table is init_mm.pgd.
 223         *
 224         * NOTE! We MUST NOT take any locks for this case. We may
 225         * be in an interrupt or a critical region, and should
 226         * only copy the information from the master page table,
 227         * nothing more.
 228         */
 229        if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
 230                vmalloc_fault(regs, code, addr);
 231                return;
 232        }
 233
 234        /* Enable interrupts if they were enabled in the parent context. */
 235        if (likely(regs->status & SR_PIE))
 236                local_irq_enable();
 237
 238        /*
 239         * If we're in an interrupt, have no user context, or are running
 240         * in an atomic region, then we must not take the fault.
 241         */
 242        if (unlikely(faulthandler_disabled() || !mm)) {
 243                tsk->thread.bad_cause = cause;
 244                no_context(regs, addr);
 245                return;
 246        }
 247
 248        if (user_mode(regs))
 249                flags |= FAULT_FLAG_USER;
 250
 251        if (!user_mode(regs) && addr < TASK_SIZE &&
 252                        unlikely(!(regs->status & SR_SUM)))
 253                die_kernel_fault("access to user memory without uaccess routines",
 254                                addr, regs);
 255
 256        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 257
 258        if (cause == EXC_STORE_PAGE_FAULT)
 259                flags |= FAULT_FLAG_WRITE;
 260        else if (cause == EXC_INST_PAGE_FAULT)
 261                flags |= FAULT_FLAG_INSTRUCTION;
 262retry:
 263        mmap_read_lock(mm);
 264        vma = find_vma(mm, addr);
 265        if (unlikely(!vma)) {
 266                tsk->thread.bad_cause = cause;
 267                bad_area(regs, mm, code, addr);
 268                return;
 269        }
 270        if (likely(vma->vm_start <= addr))
 271                goto good_area;
 272        if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
 273                tsk->thread.bad_cause = cause;
 274                bad_area(regs, mm, code, addr);
 275                return;
 276        }
 277        if (unlikely(expand_stack(vma, addr))) {
 278                tsk->thread.bad_cause = cause;
 279                bad_area(regs, mm, code, addr);
 280                return;
 281        }
 282
 283        /*
 284         * Ok, we have a good vm_area for this memory access, so
 285         * we can handle it.
 286         */
 287good_area:
 288        code = SEGV_ACCERR;
 289
 290        if (unlikely(access_error(cause, vma))) {
 291                tsk->thread.bad_cause = cause;
 292                bad_area(regs, mm, code, addr);
 293                return;
 294        }
 295
 296        /*
 297         * If for any reason at all we could not handle the fault,
 298         * make sure we exit gracefully rather than endlessly redo
 299         * the fault.
 300         */
 301        fault = handle_mm_fault(vma, addr, flags, regs);
 302
 303        /*
 304         * If we need to retry but a fatal signal is pending, handle the
 305         * signal first. We do not need to release the mmap_lock because it
 306         * would already be released in __lock_page_or_retry in mm/filemap.c.
 307         */
 308        if (fault_signal_pending(fault, regs))
 309                return;
 310
 311        if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
 312                flags |= FAULT_FLAG_TRIED;
 313
 314                /*
 315                 * No need to mmap_read_unlock(mm) as we would
 316                 * have already released it in __lock_page_or_retry
 317                 * in mm/filemap.c.
 318                 */
 319                goto retry;
 320        }
 321
 322        mmap_read_unlock(mm);
 323
 324        if (unlikely(fault & VM_FAULT_ERROR)) {
 325                tsk->thread.bad_cause = cause;
 326                mm_fault_error(regs, addr, fault);
 327                return;
 328        }
 329        return;
 330}
 331NOKPROBE_SYMBOL(do_page_fault);
 332