linux/arch/arm64/mm/fault.c
<<
>>
Prefs
   1/*
   2 * Based on arch/arm/mm/fault.c
   3 *
   4 * Copyright (C) 1995  Linus Torvalds
   5 * Copyright (C) 1995-2004 Russell King
   6 * Copyright (C) 2012 ARM Ltd.
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/signal.h>
  23#include <linux/mm.h>
  24#include <linux/hardirq.h>
  25#include <linux/init.h>
  26#include <linux/kprobes.h>
  27#include <linux/uaccess.h>
  28#include <linux/page-flags.h>
  29#include <linux/sched.h>
  30#include <linux/highmem.h>
  31#include <linux/perf_event.h>
  32
  33#include <asm/cpufeature.h>
  34#include <asm/exception.h>
  35#include <asm/debug-monitors.h>
  36#include <asm/esr.h>
  37#include <asm/sysreg.h>
  38#include <asm/system_misc.h>
  39#include <asm/pgtable.h>
  40#include <asm/tlbflush.h>
  41
  42static const char *fault_name(unsigned int esr);
  43
  44/*
  45 * Dump out the page tables associated with 'addr' in mm 'mm'.
  46 */
  47void show_pte(struct mm_struct *mm, unsigned long addr)
  48{
  49        pgd_t *pgd;
  50
  51        if (!mm)
  52                mm = &init_mm;
  53
  54        pr_alert("pgd = %p\n", mm->pgd);
  55        pgd = pgd_offset(mm, addr);
  56        pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
  57
  58        do {
  59                pud_t *pud;
  60                pmd_t *pmd;
  61                pte_t *pte;
  62
  63                if (pgd_none(*pgd) || pgd_bad(*pgd))
  64                        break;
  65
  66                pud = pud_offset(pgd, addr);
  67                printk(", *pud=%016llx", pud_val(*pud));
  68                if (pud_none(*pud) || pud_bad(*pud))
  69                        break;
  70
  71                pmd = pmd_offset(pud, addr);
  72                printk(", *pmd=%016llx", pmd_val(*pmd));
  73                if (pmd_none(*pmd) || pmd_bad(*pmd))
  74                        break;
  75
  76                pte = pte_offset_map(pmd, addr);
  77                printk(", *pte=%016llx", pte_val(*pte));
  78                pte_unmap(pte);
  79        } while(0);
  80
  81        printk("\n");
  82}
  83
  84/*
  85 * The kernel tried to access some page that wasn't present.
  86 */
  87static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
  88                              unsigned int esr, struct pt_regs *regs)
  89{
  90        /*
  91         * Are we prepared to handle this kernel fault?
  92         */
  93        if (fixup_exception(regs))
  94                return;
  95
  96        /*
  97         * No handler, we'll have to terminate things with extreme prejudice.
  98         */
  99        bust_spinlocks(1);
 100        pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 101                 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 102                 "paging request", addr);
 103
 104        show_pte(mm, addr);
 105        die("Oops", regs, esr);
 106        bust_spinlocks(0);
 107        do_exit(SIGKILL);
 108}
 109
 110/*
 111 * Something tried to access memory that isn't in our memory map. User mode
 112 * accesses just cause a SIGSEGV
 113 */
 114static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
 115                            unsigned int esr, unsigned int sig, int code,
 116                            struct pt_regs *regs)
 117{
 118        struct siginfo si;
 119
 120        if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
 121                pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
 122                        tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
 123                        addr, esr);
 124                show_pte(tsk->mm, addr);
 125                show_regs(regs);
 126        }
 127
 128        tsk->thread.fault_address = addr;
 129        tsk->thread.fault_code = esr;
 130        si.si_signo = sig;
 131        si.si_errno = 0;
 132        si.si_code = code;
 133        si.si_addr = (void __user *)addr;
 134        force_sig_info(sig, &si, tsk);
 135}
 136
 137static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 138{
 139        struct task_struct *tsk = current;
 140        struct mm_struct *mm = tsk->active_mm;
 141
 142        /*
 143         * If we are in kernel mode at this point, we have no context to
 144         * handle this fault with.
 145         */
 146        if (user_mode(regs))
 147                __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
 148        else
 149                __do_kernel_fault(mm, addr, esr, regs);
 150}
 151
 152#define VM_FAULT_BADMAP         0x010000
 153#define VM_FAULT_BADACCESS      0x020000
 154
 155#define ESR_LNX_EXEC            (1 << 24)
 156
 157static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 158                           unsigned int mm_flags, unsigned long vm_flags,
 159                           struct task_struct *tsk)
 160{
 161        struct vm_area_struct *vma;
 162        int fault;
 163
 164        vma = find_vma(mm, addr);
 165        fault = VM_FAULT_BADMAP;
 166        if (unlikely(!vma))
 167                goto out;
 168        if (unlikely(vma->vm_start > addr))
 169                goto check_stack;
 170
 171        /*
 172         * Ok, we have a good vm_area for this memory access, so we can handle
 173         * it.
 174         */
 175good_area:
 176        /*
 177         * Check that the permissions on the VMA allow for the fault which
 178         * occurred. If we encountered a write or exec fault, we must have
 179         * appropriate permissions, otherwise we allow any permission.
 180         */
 181        if (!(vma->vm_flags & vm_flags)) {
 182                fault = VM_FAULT_BADACCESS;
 183                goto out;
 184        }
 185
 186        return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
 187
 188check_stack:
 189        if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
 190                goto good_area;
 191out:
 192        return fault;
 193}
 194
 195static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 196                                   struct pt_regs *regs)
 197{
 198        struct task_struct *tsk;
 199        struct mm_struct *mm;
 200        int fault, sig, code;
 201        unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 202        unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 203
 204        tsk = current;
 205        mm  = tsk->mm;
 206
 207        /* Enable interrupts if they were enabled in the parent context. */
 208        if (interrupts_enabled(regs))
 209                local_irq_enable();
 210
 211        /*
 212         * If we're in an interrupt or have no user context, we must not take
 213         * the fault.
 214         */
 215        if (faulthandler_disabled() || !mm)
 216                goto no_context;
 217
 218        if (user_mode(regs))
 219                mm_flags |= FAULT_FLAG_USER;
 220
 221        if (esr & ESR_LNX_EXEC) {
 222                vm_flags = VM_EXEC;
 223        } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
 224                vm_flags = VM_WRITE;
 225                mm_flags |= FAULT_FLAG_WRITE;
 226        }
 227
 228        /*
 229         * PAN bit set implies the fault happened in kernel space, but not
 230         * in the arch's user access functions.
 231         */
 232        if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
 233                goto no_context;
 234
 235        /*
 236         * As per x86, we may deadlock here. However, since the kernel only
 237         * validly references user space from well defined areas of the code,
 238         * we can bug out early if this is from code which shouldn't.
 239         */
 240        if (!down_read_trylock(&mm->mmap_sem)) {
 241                if (!user_mode(regs) && !search_exception_tables(regs->pc))
 242                        goto no_context;
 243retry:
 244                down_read(&mm->mmap_sem);
 245        } else {
 246                /*
 247                 * The above down_read_trylock() might have succeeded in which
 248                 * case, we'll have missed the might_sleep() from down_read().
 249                 */
 250                might_sleep();
 251#ifdef CONFIG_DEBUG_VM
 252                if (!user_mode(regs) && !search_exception_tables(regs->pc))
 253                        goto no_context;
 254#endif
 255        }
 256
 257        fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
 258
 259        /*
 260         * If we need to retry but a fatal signal is pending, handle the
 261         * signal first. We do not need to release the mmap_sem because it
 262         * would already be released in __lock_page_or_retry in mm/filemap.c.
 263         */
 264        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 265                return 0;
 266
 267        /*
 268         * Major/minor page fault accounting is only done on the initial
 269         * attempt. If we go through a retry, it is extremely likely that the
 270         * page will be found in page cache at that point.
 271         */
 272
 273        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 274        if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
 275                if (fault & VM_FAULT_MAJOR) {
 276                        tsk->maj_flt++;
 277                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
 278                                      addr);
 279                } else {
 280                        tsk->min_flt++;
 281                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
 282                                      addr);
 283                }
 284                if (fault & VM_FAULT_RETRY) {
 285                        /*
 286                         * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
 287                         * starvation.
 288                         */
 289                        mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
 290                        mm_flags |= FAULT_FLAG_TRIED;
 291                        goto retry;
 292                }
 293        }
 294
 295        up_read(&mm->mmap_sem);
 296
 297        /*
 298         * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
 299         */
 300        if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
 301                              VM_FAULT_BADACCESS))))
 302                return 0;
 303
 304        /*
 305         * If we are in kernel mode at this point, we have no context to
 306         * handle this fault with.
 307         */
 308        if (!user_mode(regs))
 309                goto no_context;
 310
 311        if (fault & VM_FAULT_OOM) {
 312                /*
 313                 * We ran out of memory, call the OOM killer, and return to
 314                 * userspace (which will retry the fault, or kill us if we got
 315                 * oom-killed).
 316                 */
 317                pagefault_out_of_memory();
 318                return 0;
 319        }
 320
 321        if (fault & VM_FAULT_SIGBUS) {
 322                /*
 323                 * We had some memory, but were unable to successfully fix up
 324                 * this page fault.
 325                 */
 326                sig = SIGBUS;
 327                code = BUS_ADRERR;
 328        } else {
 329                /*
 330                 * Something tried to access memory that isn't in our memory
 331                 * map.
 332                 */
 333                sig = SIGSEGV;
 334                code = fault == VM_FAULT_BADACCESS ?
 335                        SEGV_ACCERR : SEGV_MAPERR;
 336        }
 337
 338        __do_user_fault(tsk, addr, esr, sig, code, regs);
 339        return 0;
 340
 341no_context:
 342        __do_kernel_fault(mm, addr, esr, regs);
 343        return 0;
 344}
 345
 346/*
 347 * First Level Translation Fault Handler
 348 *
 349 * We enter here because the first level page table doesn't contain a valid
 350 * entry for the address.
 351 *
 352 * If the address is in kernel space (>= TASK_SIZE), then we are probably
 353 * faulting in the vmalloc() area.
 354 *
 355 * If the init_task's first level page tables contains the relevant entry, we
 356 * copy the it to this task.  If not, we send the process a signal, fixup the
 357 * exception, or oops the kernel.
 358 *
 359 * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
 360 * or a critical region, and should only copy the information from the master
 361 * page table, nothing more.
 362 */
 363static int __kprobes do_translation_fault(unsigned long addr,
 364                                          unsigned int esr,
 365                                          struct pt_regs *regs)
 366{
 367        if (addr < TASK_SIZE)
 368                return do_page_fault(addr, esr, regs);
 369
 370        do_bad_area(addr, esr, regs);
 371        return 0;
 372}
 373
 374/*
 375 * This abort handler always returns "fault".
 376 */
 377static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 378{
 379        return 1;
 380}
 381
 382static struct fault_info {
 383        int     (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 384        int     sig;
 385        int     code;
 386        const char *name;
 387} fault_info[] = {
 388        { do_bad,               SIGBUS,  0,             "ttbr address size fault"       },
 389        { do_bad,               SIGBUS,  0,             "level 1 address size fault"    },
 390        { do_bad,               SIGBUS,  0,             "level 2 address size fault"    },
 391        { do_bad,               SIGBUS,  0,             "level 3 address size fault"    },
 392        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 0 translation fault"     },
 393        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 1 translation fault"     },
 394        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 2 translation fault"     },
 395        { do_page_fault,        SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
 396        { do_bad,               SIGBUS,  0,             "unknown 8"                     },
 397        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 access flag fault"     },
 398        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 access flag fault"     },
 399        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 3 access flag fault"     },
 400        { do_bad,               SIGBUS,  0,             "unknown 12"                    },
 401        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 permission fault"      },
 402        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 permission fault"      },
 403        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 3 permission fault"      },
 404        { do_bad,               SIGBUS,  0,             "synchronous external abort"    },
 405        { do_bad,               SIGBUS,  0,             "unknown 17"                    },
 406        { do_bad,               SIGBUS,  0,             "unknown 18"                    },
 407        { do_bad,               SIGBUS,  0,             "unknown 19"                    },
 408        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 409        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 410        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 411        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 412        { do_bad,               SIGBUS,  0,             "synchronous parity error"      },
 413        { do_bad,               SIGBUS,  0,             "unknown 25"                    },
 414        { do_bad,               SIGBUS,  0,             "unknown 26"                    },
 415        { do_bad,               SIGBUS,  0,             "unknown 27"                    },
 416        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk)" },
 417        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk)" },
 418        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk)" },
 419        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk)" },
 420        { do_bad,               SIGBUS,  0,             "unknown 32"                    },
 421        { do_bad,               SIGBUS,  BUS_ADRALN,    "alignment fault"               },
 422        { do_bad,               SIGBUS,  0,             "unknown 34"                    },
 423        { do_bad,               SIGBUS,  0,             "unknown 35"                    },
 424        { do_bad,               SIGBUS,  0,             "unknown 36"                    },
 425        { do_bad,               SIGBUS,  0,             "unknown 37"                    },
 426        { do_bad,               SIGBUS,  0,             "unknown 38"                    },
 427        { do_bad,               SIGBUS,  0,             "unknown 39"                    },
 428        { do_bad,               SIGBUS,  0,             "unknown 40"                    },
 429        { do_bad,               SIGBUS,  0,             "unknown 41"                    },
 430        { do_bad,               SIGBUS,  0,             "unknown 42"                    },
 431        { do_bad,               SIGBUS,  0,             "unknown 43"                    },
 432        { do_bad,               SIGBUS,  0,             "unknown 44"                    },
 433        { do_bad,               SIGBUS,  0,             "unknown 45"                    },
 434        { do_bad,               SIGBUS,  0,             "unknown 46"                    },
 435        { do_bad,               SIGBUS,  0,             "unknown 47"                    },
 436        { do_bad,               SIGBUS,  0,             "TLB conflict abort"            },
 437        { do_bad,               SIGBUS,  0,             "unknown 49"                    },
 438        { do_bad,               SIGBUS,  0,             "unknown 50"                    },
 439        { do_bad,               SIGBUS,  0,             "unknown 51"                    },
 440        { do_bad,               SIGBUS,  0,             "implementation fault (lockdown abort)" },
 441        { do_bad,               SIGBUS,  0,             "implementation fault (unsupported exclusive)" },
 442        { do_bad,               SIGBUS,  0,             "unknown 54"                    },
 443        { do_bad,               SIGBUS,  0,             "unknown 55"                    },
 444        { do_bad,               SIGBUS,  0,             "unknown 56"                    },
 445        { do_bad,               SIGBUS,  0,             "unknown 57"                    },
 446        { do_bad,               SIGBUS,  0,             "unknown 58"                    },
 447        { do_bad,               SIGBUS,  0,             "unknown 59"                    },
 448        { do_bad,               SIGBUS,  0,             "unknown 60"                    },
 449        { do_bad,               SIGBUS,  0,             "section domain fault"          },
 450        { do_bad,               SIGBUS,  0,             "page domain fault"             },
 451        { do_bad,               SIGBUS,  0,             "unknown 63"                    },
 452};
 453
 454static const char *fault_name(unsigned int esr)
 455{
 456        const struct fault_info *inf = fault_info + (esr & 63);
 457        return inf->name;
 458}
 459
 460/*
 461 * Dispatch a data abort to the relevant handler.
 462 */
 463asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 464                                         struct pt_regs *regs)
 465{
 466        const struct fault_info *inf = fault_info + (esr & 63);
 467        struct siginfo info;
 468
 469        if (!inf->fn(addr, esr, regs))
 470                return;
 471
 472        pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
 473                 inf->name, esr, addr);
 474
 475        info.si_signo = inf->sig;
 476        info.si_errno = 0;
 477        info.si_code  = inf->code;
 478        info.si_addr  = (void __user *)addr;
 479        arm64_notify_die("", regs, &info, esr);
 480}
 481
 482/*
 483 * Handle stack alignment exceptions.
 484 */
 485asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 486                                           unsigned int esr,
 487                                           struct pt_regs *regs)
 488{
 489        struct siginfo info;
 490        struct task_struct *tsk = current;
 491
 492        if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS))
 493                pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n",
 494                                    tsk->comm, task_pid_nr(tsk),
 495                                    esr_get_class_string(esr), (void *)regs->pc,
 496                                    (void *)regs->sp);
 497
 498        info.si_signo = SIGBUS;
 499        info.si_errno = 0;
 500        info.si_code  = BUS_ADRALN;
 501        info.si_addr  = (void __user *)addr;
 502        arm64_notify_die("Oops - SP/PC alignment exception", regs, &info, esr);
 503}
 504
 505int __init early_brk64(unsigned long addr, unsigned int esr,
 506                       struct pt_regs *regs);
 507
 508/*
 509 * __refdata because early_brk64 is __init, but the reference to it is
 510 * clobbered at arch_initcall time.
 511 * See traps.c and debug-monitors.c:debug_traps_init().
 512 */
 513static struct fault_info __refdata debug_fault_info[] = {
 514        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware breakpoint"   },
 515        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware single-step"  },
 516        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware watchpoint"   },
 517        { do_bad,       SIGBUS,         0,              "unknown 3"             },
 518        { do_bad,       SIGTRAP,        TRAP_BRKPT,     "aarch32 BKPT"          },
 519        { do_bad,       SIGTRAP,        0,              "aarch32 vector catch"  },
 520        { early_brk64,  SIGTRAP,        TRAP_BRKPT,     "aarch64 BRK"           },
 521        { do_bad,       SIGBUS,         0,              "unknown 7"             },
 522};
 523
 524void __init hook_debug_fault_code(int nr,
 525                                  int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 526                                  int sig, int code, const char *name)
 527{
 528        BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
 529
 530        debug_fault_info[nr].fn         = fn;
 531        debug_fault_info[nr].sig        = sig;
 532        debug_fault_info[nr].code       = code;
 533        debug_fault_info[nr].name       = name;
 534}
 535
 536asmlinkage int __exception do_debug_exception(unsigned long addr,
 537                                              unsigned int esr,
 538                                              struct pt_regs *regs)
 539{
 540        const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
 541        struct siginfo info;
 542
 543        if (!inf->fn(addr, esr, regs))
 544                return 1;
 545
 546        pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
 547                 inf->name, esr, addr);
 548
 549        info.si_signo = inf->sig;
 550        info.si_errno = 0;
 551        info.si_code  = inf->code;
 552        info.si_addr  = (void __user *)addr;
 553        arm64_notify_die("", regs, &info, 0);
 554
 555        return 0;
 556}
 557
 558#ifdef CONFIG_ARM64_PAN
 559void cpu_enable_pan(void *__unused)
 560{
 561        config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 562}
 563#endif /* CONFIG_ARM64_PAN */
 564