linux/arch/arm/mm/fault.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/mm/fault.c
   3 *
   4 *  Copyright (C) 1995  Linus Torvalds
   5 *  Modifications for ARM processor (c) 1995-2004 Russell King
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/extable.h>
  12#include <linux/signal.h>
  13#include <linux/mm.h>
  14#include <linux/hardirq.h>
  15#include <linux/init.h>
  16#include <linux/kprobes.h>
  17#include <linux/uaccess.h>
  18#include <linux/page-flags.h>
  19#include <linux/sched/signal.h>
  20#include <linux/sched/debug.h>
  21#include <linux/highmem.h>
  22#include <linux/perf_event.h>
  23
  24#include <asm/exception.h>
  25#include <asm/pgtable.h>
  26#include <asm/system_misc.h>
  27#include <asm/system_info.h>
  28#include <asm/tlbflush.h>
  29
  30#include "fault.h"
  31
  32#ifdef CONFIG_MMU
  33
  34#ifdef CONFIG_KPROBES
  35static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
  36{
  37        int ret = 0;
  38
  39        if (!user_mode(regs)) {
  40                /* kprobe_running() needs smp_processor_id() */
  41                preempt_disable();
  42                if (kprobe_running() && kprobe_fault_handler(regs, fsr))
  43                        ret = 1;
  44                preempt_enable();
  45        }
  46
  47        return ret;
  48}
  49#else
  50static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
  51{
  52        return 0;
  53}
  54#endif
  55
  56/*
  57 * This is useful to dump out the page tables associated with
  58 * 'addr' in mm 'mm'.
  59 */
  60void show_pte(struct mm_struct *mm, unsigned long addr)
  61{
  62        pgd_t *pgd;
  63
  64        if (!mm)
  65                mm = &init_mm;
  66
  67        pr_alert("pgd = %p\n", mm->pgd);
  68        pgd = pgd_offset(mm, addr);
  69        pr_alert("[%08lx] *pgd=%08llx",
  70                        addr, (long long)pgd_val(*pgd));
  71
  72        do {
  73                pud_t *pud;
  74                pmd_t *pmd;
  75                pte_t *pte;
  76
  77                if (pgd_none(*pgd))
  78                        break;
  79
  80                if (pgd_bad(*pgd)) {
  81                        pr_cont("(bad)");
  82                        break;
  83                }
  84
  85                pud = pud_offset(pgd, addr);
  86                if (PTRS_PER_PUD != 1)
  87                        pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
  88
  89                if (pud_none(*pud))
  90                        break;
  91
  92                if (pud_bad(*pud)) {
  93                        pr_cont("(bad)");
  94                        break;
  95                }
  96
  97                pmd = pmd_offset(pud, addr);
  98                if (PTRS_PER_PMD != 1)
  99                        pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
 100
 101                if (pmd_none(*pmd))
 102                        break;
 103
 104                if (pmd_bad(*pmd)) {
 105                        pr_cont("(bad)");
 106                        break;
 107                }
 108
 109                /* We must not map this if we have highmem enabled */
 110                if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
 111                        break;
 112
 113                pte = pte_offset_map(pmd, addr);
 114                pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
 115#ifndef CONFIG_ARM_LPAE
 116                pr_cont(", *ppte=%08llx",
 117                       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
 118#endif
 119                pte_unmap(pte);
 120        } while(0);
 121
 122        pr_cont("\n");
 123}
 124#else                                   /* CONFIG_MMU */
 125void show_pte(struct mm_struct *mm, unsigned long addr)
 126{ }
 127#endif                                  /* CONFIG_MMU */
 128
 129/*
 130 * Oops.  The kernel tried to access some page that wasn't present.
 131 */
 132static void
 133__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 134                  struct pt_regs *regs)
 135{
 136        /*
 137         * Are we prepared to handle this kernel fault?
 138         */
 139        if (fixup_exception(regs))
 140                return;
 141
 142        /*
 143         * No handler, we'll have to terminate things with extreme prejudice.
 144         */
 145        bust_spinlocks(1);
 146        pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 147                 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 148                 "paging request", addr);
 149
 150        show_pte(mm, addr);
 151        die("Oops", regs, fsr);
 152        bust_spinlocks(0);
 153        do_exit(SIGKILL);
 154}
 155
 156/*
 157 * Something tried to access memory that isn't in our memory map..
 158 * User mode accesses just cause a SIGSEGV
 159 */
 160static void
 161__do_user_fault(struct task_struct *tsk, unsigned long addr,
 162                unsigned int fsr, unsigned int sig, int code,
 163                struct pt_regs *regs)
 164{
 165        struct siginfo si;
 166
 167#ifdef CONFIG_DEBUG_USER
 168        if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
 169            ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
 170                printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
 171                       tsk->comm, sig, addr, fsr);
 172                show_pte(tsk->mm, addr);
 173                show_regs(regs);
 174        }
 175#endif
 176
 177        tsk->thread.address = addr;
 178        tsk->thread.error_code = fsr;
 179        tsk->thread.trap_no = 14;
 180        si.si_signo = sig;
 181        si.si_errno = 0;
 182        si.si_code = code;
 183        si.si_addr = (void __user *)addr;
 184        force_sig_info(sig, &si, tsk);
 185}
 186
 187void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 188{
 189        struct task_struct *tsk = current;
 190        struct mm_struct *mm = tsk->active_mm;
 191
 192        /*
 193         * If we are in kernel mode at this point, we
 194         * have no context to handle this fault with.
 195         */
 196        if (user_mode(regs))
 197                __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
 198        else
 199                __do_kernel_fault(mm, addr, fsr, regs);
 200}
 201
 202#ifdef CONFIG_MMU
 203#define VM_FAULT_BADMAP         0x010000
 204#define VM_FAULT_BADACCESS      0x020000
 205
 206/*
 207 * Check that the permissions on the VMA allow for the fault which occurred.
 208 * If we encountered a write fault, we must have write permission, otherwise
 209 * we allow any permission.
 210 */
 211static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 212{
 213        unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
 214
 215        if (fsr & FSR_WRITE)
 216                mask = VM_WRITE;
 217        if (fsr & FSR_LNX_PF)
 218                mask = VM_EXEC;
 219
 220        return vma->vm_flags & mask ? false : true;
 221}
 222
 223static int __kprobes
 224__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 225                unsigned int flags, struct task_struct *tsk)
 226{
 227        struct vm_area_struct *vma;
 228        int fault;
 229
 230        vma = find_vma(mm, addr);
 231        fault = VM_FAULT_BADMAP;
 232        if (unlikely(!vma))
 233                goto out;
 234        if (unlikely(vma->vm_start > addr))
 235                goto check_stack;
 236
 237        /*
 238         * Ok, we have a good vm_area for this
 239         * memory access, so we can handle it.
 240         */
 241good_area:
 242        if (access_error(fsr, vma)) {
 243                fault = VM_FAULT_BADACCESS;
 244                goto out;
 245        }
 246
 247        return handle_mm_fault(vma, addr & PAGE_MASK, flags);
 248
 249check_stack:
 250        /* Don't allow expansion below FIRST_USER_ADDRESS */
 251        if (vma->vm_flags & VM_GROWSDOWN &&
 252            addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
 253                goto good_area;
 254out:
 255        return fault;
 256}
 257
 258static int __kprobes
 259do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 260{
 261        struct task_struct *tsk;
 262        struct mm_struct *mm;
 263        int fault, sig, code;
 264        unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 265
 266        if (notify_page_fault(regs, fsr))
 267                return 0;
 268
 269        tsk = current;
 270        mm  = tsk->mm;
 271
 272        /* Enable interrupts if they were enabled in the parent context. */
 273        if (interrupts_enabled(regs))
 274                local_irq_enable();
 275
 276        /*
 277         * If we're in an interrupt or have no user
 278         * context, we must not take the fault..
 279         */
 280        if (faulthandler_disabled() || !mm)
 281                goto no_context;
 282
 283        if (user_mode(regs))
 284                flags |= FAULT_FLAG_USER;
 285        if (fsr & FSR_WRITE)
 286                flags |= FAULT_FLAG_WRITE;
 287
 288        /*
 289         * As per x86, we may deadlock here.  However, since the kernel only
 290         * validly references user space from well defined areas of the code,
 291         * we can bug out early if this is from code which shouldn't.
 292         */
 293        if (!down_read_trylock(&mm->mmap_sem)) {
 294                if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
 295                        goto no_context;
 296retry:
 297                down_read(&mm->mmap_sem);
 298        } else {
 299                /*
 300                 * The above down_read_trylock() might have succeeded in
 301                 * which case, we'll have missed the might_sleep() from
 302                 * down_read()
 303                 */
 304                might_sleep();
 305#ifdef CONFIG_DEBUG_VM
 306                if (!user_mode(regs) &&
 307                    !search_exception_tables(regs->ARM_pc))
 308                        goto no_context;
 309#endif
 310        }
 311
 312        fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 313
 314        /* If we need to retry but a fatal signal is pending, handle the
 315         * signal first. We do not need to release the mmap_sem because
 316         * it would already be released in __lock_page_or_retry in
 317         * mm/filemap.c. */
 318        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
 319                if (!user_mode(regs))
 320                        goto no_context;
 321                return 0;
 322        }
 323
 324        /*
 325         * Major/minor page fault accounting is only done on the
 326         * initial attempt. If we go through a retry, it is extremely
 327         * likely that the page will be found in page cache at that point.
 328         */
 329
 330        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 331        if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
 332                if (fault & VM_FAULT_MAJOR) {
 333                        tsk->maj_flt++;
 334                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 335                                        regs, addr);
 336                } else {
 337                        tsk->min_flt++;
 338                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 339                                        regs, addr);
 340                }
 341                if (fault & VM_FAULT_RETRY) {
 342                        /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 343                        * of starvation. */
 344                        flags &= ~FAULT_FLAG_ALLOW_RETRY;
 345                        flags |= FAULT_FLAG_TRIED;
 346                        goto retry;
 347                }
 348        }
 349
 350        up_read(&mm->mmap_sem);
 351
 352        /*
 353         * Handle the "normal" case first - VM_FAULT_MAJOR
 354         */
 355        if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 356                return 0;
 357
 358        /*
 359         * If we are in kernel mode at this point, we
 360         * have no context to handle this fault with.
 361         */
 362        if (!user_mode(regs))
 363                goto no_context;
 364
 365        if (fault & VM_FAULT_OOM) {
 366                /*
 367                 * We ran out of memory, call the OOM killer, and return to
 368                 * userspace (which will retry the fault, or kill us if we
 369                 * got oom-killed)
 370                 */
 371                pagefault_out_of_memory();
 372                return 0;
 373        }
 374
 375        if (fault & VM_FAULT_SIGBUS) {
 376                /*
 377                 * We had some memory, but were unable to
 378                 * successfully fix up this page fault.
 379                 */
 380                sig = SIGBUS;
 381                code = BUS_ADRERR;
 382        } else {
 383                /*
 384                 * Something tried to access memory that
 385                 * isn't in our memory map..
 386                 */
 387                sig = SIGSEGV;
 388                code = fault == VM_FAULT_BADACCESS ?
 389                        SEGV_ACCERR : SEGV_MAPERR;
 390        }
 391
 392        __do_user_fault(tsk, addr, fsr, sig, code, regs);
 393        return 0;
 394
 395no_context:
 396        __do_kernel_fault(mm, addr, fsr, regs);
 397        return 0;
 398}
 399#else                                   /* CONFIG_MMU */
 400static int
 401do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 402{
 403        return 0;
 404}
 405#endif                                  /* CONFIG_MMU */
 406
 407/*
 408 * First Level Translation Fault Handler
 409 *
 410 * We enter here because the first level page table doesn't contain
 411 * a valid entry for the address.
 412 *
 413 * If the address is in kernel space (>= TASK_SIZE), then we are
 414 * probably faulting in the vmalloc() area.
 415 *
 416 * If the init_task's first level page tables contains the relevant
 417 * entry, we copy the it to this task.  If not, we send the process
 418 * a signal, fixup the exception, or oops the kernel.
 419 *
 420 * NOTE! We MUST NOT take any locks for this case. We may be in an
 421 * interrupt or a critical region, and should only copy the information
 422 * from the master page table, nothing more.
 423 */
 424#ifdef CONFIG_MMU
 425static int __kprobes
 426do_translation_fault(unsigned long addr, unsigned int fsr,
 427                     struct pt_regs *regs)
 428{
 429        unsigned int index;
 430        pgd_t *pgd, *pgd_k;
 431        pud_t *pud, *pud_k;
 432        pmd_t *pmd, *pmd_k;
 433
 434        if (addr < TASK_SIZE)
 435                return do_page_fault(addr, fsr, regs);
 436
 437        if (user_mode(regs))
 438                goto bad_area;
 439
 440        index = pgd_index(addr);
 441
 442        pgd = cpu_get_pgd() + index;
 443        pgd_k = init_mm.pgd + index;
 444
 445        if (pgd_none(*pgd_k))
 446                goto bad_area;
 447        if (!pgd_present(*pgd))
 448                set_pgd(pgd, *pgd_k);
 449
 450        pud = pud_offset(pgd, addr);
 451        pud_k = pud_offset(pgd_k, addr);
 452
 453        if (pud_none(*pud_k))
 454                goto bad_area;
 455        if (!pud_present(*pud))
 456                set_pud(pud, *pud_k);
 457
 458        pmd = pmd_offset(pud, addr);
 459        pmd_k = pmd_offset(pud_k, addr);
 460
 461#ifdef CONFIG_ARM_LPAE
 462        /*
 463         * Only one hardware entry per PMD with LPAE.
 464         */
 465        index = 0;
 466#else
 467        /*
 468         * On ARM one Linux PGD entry contains two hardware entries (see page
 469         * tables layout in pgtable.h). We normally guarantee that we always
 470         * fill both L1 entries. But create_mapping() doesn't follow the rule.
 471         * It can create inidividual L1 entries, so here we have to call
 472         * pmd_none() check for the entry really corresponded to address, not
 473         * for the first of pair.
 474         */
 475        index = (addr >> SECTION_SHIFT) & 1;
 476#endif
 477        if (pmd_none(pmd_k[index]))
 478                goto bad_area;
 479
 480        copy_pmd(pmd, pmd_k);
 481        return 0;
 482
 483bad_area:
 484        do_bad_area(addr, fsr, regs);
 485        return 0;
 486}
 487#else                                   /* CONFIG_MMU */
 488static int
 489do_translation_fault(unsigned long addr, unsigned int fsr,
 490                     struct pt_regs *regs)
 491{
 492        return 0;
 493}
 494#endif                                  /* CONFIG_MMU */
 495
 496/*
 497 * Some section permission faults need to be handled gracefully.
 498 * They can happen due to a __{get,put}_user during an oops.
 499 */
 500#ifndef CONFIG_ARM_LPAE
 501static int
 502do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 503{
 504        do_bad_area(addr, fsr, regs);
 505        return 0;
 506}
 507#endif /* CONFIG_ARM_LPAE */
 508
 509/*
 510 * This abort handler always returns "fault".
 511 */
 512static int
 513do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 514{
 515        return 1;
 516}
 517
 518struct fsr_info {
 519        int     (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 520        int     sig;
 521        int     code;
 522        const char *name;
 523};
 524
 525/* FSR definition */
 526#ifdef CONFIG_ARM_LPAE
 527#include "fsr-3level.c"
 528#else
 529#include "fsr-2level.c"
 530#endif
 531
 532void __init
 533hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 534                int sig, int code, const char *name)
 535{
 536        if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
 537                BUG();
 538
 539        fsr_info[nr].fn   = fn;
 540        fsr_info[nr].sig  = sig;
 541        fsr_info[nr].code = code;
 542        fsr_info[nr].name = name;
 543}
 544
 545/*
 546 * Dispatch a data abort to the relevant handler.
 547 */
 548asmlinkage void __exception
 549do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 550{
 551        const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
 552        struct siginfo info;
 553
 554        if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 555                return;
 556
 557        pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 558                inf->name, fsr, addr);
 559        show_pte(current->mm, addr);
 560
 561        info.si_signo = inf->sig;
 562        info.si_errno = 0;
 563        info.si_code  = inf->code;
 564        info.si_addr  = (void __user *)addr;
 565        arm_notify_die("", regs, &info, fsr, 0);
 566}
 567
 568void __init
 569hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 570                 int sig, int code, const char *name)
 571{
 572        if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
 573                BUG();
 574
 575        ifsr_info[nr].fn   = fn;
 576        ifsr_info[nr].sig  = sig;
 577        ifsr_info[nr].code = code;
 578        ifsr_info[nr].name = name;
 579}
 580
 581asmlinkage void __exception
 582do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 583{
 584        const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
 585        struct siginfo info;
 586
 587        if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 588                return;
 589
 590        pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 591                inf->name, ifsr, addr);
 592
 593        info.si_signo = inf->sig;
 594        info.si_errno = 0;
 595        info.si_code  = inf->code;
 596        info.si_addr  = (void __user *)addr;
 597        arm_notify_die("", regs, &info, ifsr, 0);
 598}
 599
 600/*
 601 * Abort handler to be used only during first unmasking of asynchronous aborts
 602 * on the boot CPU. This makes sure that the machine will not die if the
 603 * firmware/bootloader left an imprecise abort pending for us to trip over.
 604 */
 605static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
 606                                      struct pt_regs *regs)
 607{
 608        pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
 609                "first unmask, this is most likely caused by a "
 610                "firmware/bootloader bug.\n", fsr);
 611
 612        return 0;
 613}
 614
 615void __init early_abt_enable(void)
 616{
 617        fsr_info[FSR_FS_AEA].fn = early_abort_handler;
 618        local_abt_enable();
 619        fsr_info[FSR_FS_AEA].fn = do_bad;
 620}
 621
 622#ifndef CONFIG_ARM_LPAE
 623static int __init exceptions_init(void)
 624{
 625        if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 626                hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
 627                                "I-cache maintenance fault");
 628        }
 629
 630        if (cpu_architecture() >= CPU_ARCH_ARMv7) {
 631                /*
 632                 * TODO: Access flag faults introduced in ARMv6K.
 633                 * Runtime check for 'K' extension is needed
 634                 */
 635                hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
 636                                "section access flag fault");
 637                hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
 638                                "section access flag fault");
 639        }
 640
 641        return 0;
 642}
 643
 644arch_initcall(exceptions_init);
 645#endif
 646