linux/arch/nds32/mm/fault.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (C) 2005-2017 Andes Technology Corporation
   3
   4#include <linux/extable.h>
   5#include <linux/module.h>
   6#include <linux/signal.h>
   7#include <linux/ptrace.h>
   8#include <linux/mm.h>
   9#include <linux/init.h>
  10#include <linux/hardirq.h>
  11#include <linux/uaccess.h>
  12#include <linux/perf_event.h>
  13
  14#include <asm/pgtable.h>
  15#include <asm/tlbflush.h>
  16
  17extern void die(const char *str, struct pt_regs *regs, long err);
  18
  19/*
  20 * This is useful to dump out the page tables associated with
  21 * 'addr' in mm 'mm'.
  22 */
  23void show_pte(struct mm_struct *mm, unsigned long addr)
  24{
  25        pgd_t *pgd;
  26        if (!mm)
  27                mm = &init_mm;
  28
  29        pr_alert("pgd = %p\n", mm->pgd);
  30        pgd = pgd_offset(mm, addr);
  31        pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
  32
  33        do {
  34                p4d_t *p4d;
  35                pud_t *pud;
  36                pmd_t *pmd;
  37
  38                if (pgd_none(*pgd))
  39                        break;
  40
  41                if (pgd_bad(*pgd)) {
  42                        pr_alert("(bad)");
  43                        break;
  44                }
  45
  46                p4d = p4d_offset(pgd, addr);
  47                pud = pud_offset(p4d, addr);
  48                pmd = pmd_offset(pud, addr);
  49#if PTRS_PER_PMD != 1
  50                pr_alert(", *pmd=%08lx", pmd_val(*pmd));
  51#endif
  52
  53                if (pmd_none(*pmd))
  54                        break;
  55
  56                if (pmd_bad(*pmd)) {
  57                        pr_alert("(bad)");
  58                        break;
  59                }
  60
  61                if (IS_ENABLED(CONFIG_HIGHMEM))
  62                {
  63                        pte_t *pte;
  64                        /* We must not map this if we have highmem enabled */
  65                        pte = pte_offset_map(pmd, addr);
  66                        pr_alert(", *pte=%08lx", pte_val(*pte));
  67                        pte_unmap(pte);
  68                }
  69        } while (0);
  70
  71        pr_alert("\n");
  72}
  73
  74void do_page_fault(unsigned long entry, unsigned long addr,
  75                   unsigned int error_code, struct pt_regs *regs)
  76{
  77        struct task_struct *tsk;
  78        struct mm_struct *mm;
  79        struct vm_area_struct *vma;
  80        int si_code;
  81        vm_fault_t fault;
  82        unsigned int mask = VM_ACCESS_FLAGS;
  83        unsigned int flags = FAULT_FLAG_DEFAULT;
  84
  85        error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
  86        tsk = current;
  87        mm = tsk->mm;
  88        si_code = SEGV_MAPERR;
  89        /*
  90         * We fault-in kernel-space virtual memory on-demand. The
  91         * 'reference' page table is init_mm.pgd.
  92         *
  93         * NOTE! We MUST NOT take any locks for this case. We may
  94         * be in an interrupt or a critical region, and should
  95         * only copy the information from the master page table,
  96         * nothing more.
  97         */
  98        if (addr >= TASK_SIZE) {
  99                if (user_mode(regs))
 100                        goto bad_area_nosemaphore;
 101
 102                if (addr >= TASK_SIZE && addr < VMALLOC_END
 103                    && (entry == ENTRY_PTE_NOT_PRESENT))
 104                        goto vmalloc_fault;
 105                else
 106                        goto no_context;
 107        }
 108
 109        /* Send a signal to the task for handling the unalignment access. */
 110        if (entry == ENTRY_GENERAL_EXCPETION
 111            && error_code == ETYPE_ALIGNMENT_CHECK) {
 112                if (user_mode(regs))
 113                        goto bad_area_nosemaphore;
 114                else
 115                        goto no_context;
 116        }
 117
 118        /*
 119         * If we're in an interrupt or have no user
 120         * context, we must not take the fault..
 121         */
 122        if (unlikely(faulthandler_disabled() || !mm))
 123                goto no_context;
 124
 125        /*
 126         * As per x86, we may deadlock here. However, since the kernel only
 127         * validly references user space from well defined areas of the code,
 128         * we can bug out early if this is from code which shouldn't.
 129         */
 130        if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
 131                if (!user_mode(regs) &&
 132                    !search_exception_tables(instruction_pointer(regs)))
 133                        goto no_context;
 134retry:
 135                down_read(&mm->mmap_sem);
 136        } else {
 137                /*
 138                 * The above down_read_trylock() might have succeeded in which
 139                 * case, we'll have missed the might_sleep() from down_read().
 140                 */
 141                might_sleep();
 142                if (IS_ENABLED(CONFIG_DEBUG_VM)) {
 143                        if (!user_mode(regs) &&
 144                            !search_exception_tables(instruction_pointer(regs)))
 145                                goto no_context;
 146                }
 147        }
 148
 149        vma = find_vma(mm, addr);
 150
 151        if (unlikely(!vma))
 152                goto bad_area;
 153
 154        if (vma->vm_start <= addr)
 155                goto good_area;
 156
 157        if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
 158                goto bad_area;
 159
 160        if (unlikely(expand_stack(vma, addr)))
 161                goto bad_area;
 162
 163        /*
 164         * Ok, we have a good vm_area for this memory access, so
 165         * we can handle it..
 166         */
 167
 168good_area:
 169        si_code = SEGV_ACCERR;
 170
 171        /* first do some preliminary protection checks */
 172        if (entry == ENTRY_PTE_NOT_PRESENT) {
 173                if (error_code & ITYPE_mskINST)
 174                        mask = VM_EXEC;
 175                else {
 176                        mask = VM_READ | VM_WRITE;
 177                }
 178        } else if (entry == ENTRY_TLB_MISC) {
 179                switch (error_code & ITYPE_mskETYPE) {
 180                case RD_PROT:
 181                        mask = VM_READ;
 182                        break;
 183                case WRT_PROT:
 184                        mask = VM_WRITE;
 185                        flags |= FAULT_FLAG_WRITE;
 186                        break;
 187                case NOEXEC:
 188                        mask = VM_EXEC;
 189                        break;
 190                case PAGE_MODIFY:
 191                        mask = VM_WRITE;
 192                        flags |= FAULT_FLAG_WRITE;
 193                        break;
 194                case ACC_BIT:
 195                        BUG();
 196                default:
 197                        break;
 198                }
 199
 200        }
 201        if (!(vma->vm_flags & mask))
 202                goto bad_area;
 203
 204        /*
 205         * If for any reason at all we couldn't handle the fault,
 206         * make sure we exit gracefully rather than endlessly redo
 207         * the fault.
 208         */
 209
 210        fault = handle_mm_fault(vma, addr, flags);
 211
 212        /*
 213         * If we need to retry but a fatal signal is pending, handle the
 214         * signal first. We do not need to release the mmap_sem because it
 215         * would already be released in __lock_page_or_retry in mm/filemap.c.
 216         */
 217        if (fault_signal_pending(fault, regs)) {
 218                if (!user_mode(regs))
 219                        goto no_context;
 220                return;
 221        }
 222
 223        if (unlikely(fault & VM_FAULT_ERROR)) {
 224                if (fault & VM_FAULT_OOM)
 225                        goto out_of_memory;
 226                else if (fault & VM_FAULT_SIGBUS)
 227                        goto do_sigbus;
 228                else
 229                        goto bad_area;
 230        }
 231
 232        /*
 233         * Major/minor page fault accounting is only done on the initial
 234         * attempt. If we go through a retry, it is extremely likely that the
 235         * page will be found in page cache at that point.
 236         */
 237        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 238        if (flags & FAULT_FLAG_ALLOW_RETRY) {
 239                if (fault & VM_FAULT_MAJOR) {
 240                        tsk->maj_flt++;
 241                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
 242                                      1, regs, addr);
 243                } else {
 244                        tsk->min_flt++;
 245                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
 246                                      1, regs, addr);
 247                }
 248                if (fault & VM_FAULT_RETRY) {
 249                        flags |= FAULT_FLAG_TRIED;
 250
 251                        /* No need to up_read(&mm->mmap_sem) as we would
 252                         * have already released it in __lock_page_or_retry
 253                         * in mm/filemap.c.
 254                         */
 255                        goto retry;
 256                }
 257        }
 258
 259        up_read(&mm->mmap_sem);
 260        return;
 261
 262        /*
 263         * Something tried to access memory that isn't in our memory map..
 264         * Fix it, but check if it's kernel or user first..
 265         */
 266bad_area:
 267        up_read(&mm->mmap_sem);
 268
 269bad_area_nosemaphore:
 270
 271        /* User mode accesses just cause a SIGSEGV */
 272
 273        if (user_mode(regs)) {
 274                tsk->thread.address = addr;
 275                tsk->thread.error_code = error_code;
 276                tsk->thread.trap_no = entry;
 277                force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
 278                return;
 279        }
 280
 281no_context:
 282
 283        /* Are we prepared to handle this kernel fault?
 284         *
 285         * (The kernel has valid exception-points in the source
 286         *  when it acesses user-memory. When it fails in one
 287         *  of those points, we find it in a table and do a jump
 288         *  to some fixup code that loads an appropriate error
 289         *  code)
 290         */
 291
 292        {
 293                const struct exception_table_entry *entry;
 294
 295                if ((entry =
 296                     search_exception_tables(instruction_pointer(regs))) !=
 297                    NULL) {
 298                        /* Adjust the instruction pointer in the stackframe */
 299                        instruction_pointer(regs) = entry->fixup;
 300                        return;
 301                }
 302        }
 303
 304        /*
 305         * Oops. The kernel tried to access some bad page. We'll have to
 306         * terminate things with extreme prejudice.
 307         */
 308
 309        bust_spinlocks(1);
 310        pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 311                 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 312                 "paging request", addr);
 313
 314        show_pte(mm, addr);
 315        die("Oops", regs, error_code);
 316        bust_spinlocks(0);
 317        do_exit(SIGKILL);
 318
 319        return;
 320
 321        /*
 322         * We ran out of memory, or some other thing happened to us that made
 323         * us unable to handle the page fault gracefully.
 324         */
 325
 326out_of_memory:
 327        up_read(&mm->mmap_sem);
 328        if (!user_mode(regs))
 329                goto no_context;
 330        pagefault_out_of_memory();
 331        return;
 332
 333do_sigbus:
 334        up_read(&mm->mmap_sem);
 335
 336        /* Kernel mode? Handle exceptions or die */
 337        if (!user_mode(regs))
 338                goto no_context;
 339
 340        /*
 341         * Send a sigbus
 342         */
 343        tsk->thread.address = addr;
 344        tsk->thread.error_code = error_code;
 345        tsk->thread.trap_no = entry;
 346        force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
 347
 348        return;
 349
 350vmalloc_fault:
 351        {
 352                /*
 353                 * Synchronize this task's top level page-table
 354                 * with the 'reference' page table.
 355                 *
 356                 * Use current_pgd instead of tsk->active_mm->pgd
 357                 * since the latter might be unavailable if this
 358                 * code is executed in a misfortunately run irq
 359                 * (like inside schedule() between switch_mm and
 360                 *  switch_to...).
 361                 */
 362
 363                unsigned int index = pgd_index(addr);
 364                pgd_t *pgd, *pgd_k;
 365                p4d_t *p4d, *p4d_k;
 366                pud_t *pud, *pud_k;
 367                pmd_t *pmd, *pmd_k;
 368                pte_t *pte_k;
 369
 370                pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index;
 371                pgd_k = init_mm.pgd + index;
 372
 373                if (!pgd_present(*pgd_k))
 374                        goto no_context;
 375
 376                p4d = p4d_offset(pgd, addr);
 377                p4d_k = p4d_offset(pgd_k, addr);
 378                if (!p4d_present(*p4d_k))
 379                        goto no_context;
 380
 381                pud = pud_offset(p4d, addr);
 382                pud_k = pud_offset(p4d_k, addr);
 383                if (!pud_present(*pud_k))
 384                        goto no_context;
 385
 386                pmd = pmd_offset(pud, addr);
 387                pmd_k = pmd_offset(pud_k, addr);
 388                if (!pmd_present(*pmd_k))
 389                        goto no_context;
 390
 391                if (!pmd_present(*pmd))
 392                        set_pmd(pmd, *pmd_k);
 393                else
 394                        BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
 395
 396                /*
 397                 * Since the vmalloc area is global, we don't
 398                 * need to copy individual PTE's, it is enough to
 399                 * copy the pgd pointer into the pte page of the
 400                 * root task. If that is there, we'll find our pte if
 401                 * it exists.
 402                 */
 403
 404                /* Make sure the actual PTE exists as well to
 405                 * catch kernel vmalloc-area accesses to non-mapped
 406                 * addres. If we don't do this, this will just
 407                 * silently loop forever.
 408                 */
 409
 410                pte_k = pte_offset_kernel(pmd_k, addr);
 411                if (!pte_present(*pte_k))
 412                        goto no_context;
 413
 414                return;
 415        }
 416}
 417