linux/arch/arm64/mm/fault.c
<<
>>
Prefs
   1/*
   2 * Based on arch/arm/mm/fault.c
   3 *
   4 * Copyright (C) 1995  Linus Torvalds
   5 * Copyright (C) 1995-2004 Russell King
   6 * Copyright (C) 2012 ARM Ltd.
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/signal.h>
  23#include <linux/mm.h>
  24#include <linux/hardirq.h>
  25#include <linux/init.h>
  26#include <linux/kprobes.h>
  27#include <linux/uaccess.h>
  28#include <linux/page-flags.h>
  29#include <linux/sched.h>
  30#include <linux/highmem.h>
  31#include <linux/perf_event.h>
  32
  33#include <asm/exception.h>
  34#include <asm/debug-monitors.h>
  35#include <asm/system_misc.h>
  36#include <asm/pgtable.h>
  37#include <asm/tlbflush.h>
  38
  39static const char *fault_name(unsigned int esr);
  40
  41/*
  42 * Dump out the page tables associated with 'addr' in mm 'mm'.
  43 */
  44void show_pte(struct mm_struct *mm, unsigned long addr)
  45{
  46        pgd_t *pgd;
  47
  48        if (!mm)
  49                mm = &init_mm;
  50
  51        pr_alert("pgd = %p\n", mm->pgd);
  52        pgd = pgd_offset(mm, addr);
  53        pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
  54
  55        do {
  56                pud_t *pud;
  57                pmd_t *pmd;
  58                pte_t *pte;
  59
  60                if (pgd_none(*pgd) || pgd_bad(*pgd))
  61                        break;
  62
  63                pud = pud_offset(pgd, addr);
  64                if (pud_none(*pud) || pud_bad(*pud))
  65                        break;
  66
  67                pmd = pmd_offset(pud, addr);
  68                printk(", *pmd=%016llx", pmd_val(*pmd));
  69                if (pmd_none(*pmd) || pmd_bad(*pmd))
  70                        break;
  71
  72                pte = pte_offset_map(pmd, addr);
  73                printk(", *pte=%016llx", pte_val(*pte));
  74                pte_unmap(pte);
  75        } while(0);
  76
  77        printk("\n");
  78}
  79
  80/*
  81 * The kernel tried to access some page that wasn't present.
  82 */
  83static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
  84                              unsigned int esr, struct pt_regs *regs)
  85{
  86        /*
  87         * Are we prepared to handle this kernel fault?
  88         */
  89        if (fixup_exception(regs))
  90                return;
  91
  92        /*
  93         * No handler, we'll have to terminate things with extreme prejudice.
  94         */
  95        bust_spinlocks(1);
  96        pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
  97                 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
  98                 "paging request", addr);
  99
 100        show_pte(mm, addr);
 101        die("Oops", regs, esr);
 102        bust_spinlocks(0);
 103        do_exit(SIGKILL);
 104}
 105
 106/*
 107 * Something tried to access memory that isn't in our memory map. User mode
 108 * accesses just cause a SIGSEGV
 109 */
 110static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
 111                            unsigned int esr, unsigned int sig, int code,
 112                            struct pt_regs *regs)
 113{
 114        struct siginfo si;
 115
 116        if (show_unhandled_signals && unhandled_signal(tsk, sig) &&
 117            printk_ratelimit()) {
 118                pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
 119                        tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
 120                        addr, esr);
 121                show_pte(tsk->mm, addr);
 122                show_regs(regs);
 123        }
 124
 125        tsk->thread.fault_address = addr;
 126        si.si_signo = sig;
 127        si.si_errno = 0;
 128        si.si_code = code;
 129        si.si_addr = (void __user *)addr;
 130        force_sig_info(sig, &si, tsk);
 131}
 132
 133void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 134{
 135        struct task_struct *tsk = current;
 136        struct mm_struct *mm = tsk->active_mm;
 137
 138        /*
 139         * If we are in kernel mode at this point, we have no context to
 140         * handle this fault with.
 141         */
 142        if (user_mode(regs))
 143                __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
 144        else
 145                __do_kernel_fault(mm, addr, esr, regs);
 146}
 147
 148#define VM_FAULT_BADMAP         0x010000
 149#define VM_FAULT_BADACCESS      0x020000
 150
 151#define ESR_WRITE               (1 << 6)
 152#define ESR_CM                  (1 << 8)
 153#define ESR_LNX_EXEC            (1 << 24)
 154
 155static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 156                           unsigned int mm_flags, unsigned long vm_flags,
 157                           struct task_struct *tsk)
 158{
 159        struct vm_area_struct *vma;
 160        int fault;
 161
 162        vma = find_vma(mm, addr);
 163        fault = VM_FAULT_BADMAP;
 164        if (unlikely(!vma))
 165                goto out;
 166        if (unlikely(vma->vm_start > addr))
 167                goto check_stack;
 168
 169        /*
 170         * Ok, we have a good vm_area for this memory access, so we can handle
 171         * it.
 172         */
 173good_area:
 174        /*
 175         * Check that the permissions on the VMA allow for the fault which
 176         * occurred. If we encountered a write or exec fault, we must have
 177         * appropriate permissions, otherwise we allow any permission.
 178         */
 179        if (!(vma->vm_flags & vm_flags)) {
 180                fault = VM_FAULT_BADACCESS;
 181                goto out;
 182        }
 183
 184        return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
 185
 186check_stack:
 187        if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
 188                goto good_area;
 189out:
 190        return fault;
 191}
 192
 193static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 194                                   struct pt_regs *regs)
 195{
 196        struct task_struct *tsk;
 197        struct mm_struct *mm;
 198        int fault, sig, code;
 199        unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 200        unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 201
 202        if (esr & ESR_LNX_EXEC) {
 203                vm_flags = VM_EXEC;
 204        } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
 205                vm_flags = VM_WRITE;
 206                mm_flags |= FAULT_FLAG_WRITE;
 207        }
 208
 209        tsk = current;
 210        mm  = tsk->mm;
 211
 212        /* Enable interrupts if they were enabled in the parent context. */
 213        if (interrupts_enabled(regs))
 214                local_irq_enable();
 215
 216        /*
 217         * If we're in an interrupt or have no user context, we must not take
 218         * the fault.
 219         */
 220        if (in_atomic() || !mm)
 221                goto no_context;
 222
 223        /*
 224         * As per x86, we may deadlock here. However, since the kernel only
 225         * validly references user space from well defined areas of the code,
 226         * we can bug out early if this is from code which shouldn't.
 227         */
 228        if (!down_read_trylock(&mm->mmap_sem)) {
 229                if (!user_mode(regs) && !search_exception_tables(regs->pc))
 230                        goto no_context;
 231retry:
 232                down_read(&mm->mmap_sem);
 233        } else {
 234                /*
 235                 * The above down_read_trylock() might have succeeded in which
 236                 * case, we'll have missed the might_sleep() from down_read().
 237                 */
 238                might_sleep();
 239#ifdef CONFIG_DEBUG_VM
 240                if (!user_mode(regs) && !search_exception_tables(regs->pc))
 241                        goto no_context;
 242#endif
 243        }
 244
 245        fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
 246
 247        /*
 248         * If we need to retry but a fatal signal is pending, handle the
 249         * signal first. We do not need to release the mmap_sem because it
 250         * would already be released in __lock_page_or_retry in mm/filemap.c.
 251         */
 252        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 253                return 0;
 254
 255        /*
 256         * Major/minor page fault accounting is only done on the initial
 257         * attempt. If we go through a retry, it is extremely likely that the
 258         * page will be found in page cache at that point.
 259         */
 260
 261        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 262        if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
 263                if (fault & VM_FAULT_MAJOR) {
 264                        tsk->maj_flt++;
 265                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
 266                                      addr);
 267                } else {
 268                        tsk->min_flt++;
 269                        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
 270                                      addr);
 271                }
 272                if (fault & VM_FAULT_RETRY) {
 273                        /*
 274                         * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
 275                         * starvation.
 276                         */
 277                        mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
 278                        goto retry;
 279                }
 280        }
 281
 282        up_read(&mm->mmap_sem);
 283
 284        /*
 285         * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
 286         */
 287        if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
 288                              VM_FAULT_BADACCESS))))
 289                return 0;
 290
 291        if (fault & VM_FAULT_OOM) {
 292                /*
 293                 * We ran out of memory, call the OOM killer, and return to
 294                 * userspace (which will retry the fault, or kill us if we got
 295                 * oom-killed).
 296                 */
 297                pagefault_out_of_memory();
 298                return 0;
 299        }
 300
 301        /*
 302         * If we are in kernel mode at this point, we have no context to
 303         * handle this fault with.
 304         */
 305        if (!user_mode(regs))
 306                goto no_context;
 307
 308        if (fault & VM_FAULT_SIGBUS) {
 309                /*
 310                 * We had some memory, but were unable to successfully fix up
 311                 * this page fault.
 312                 */
 313                sig = SIGBUS;
 314                code = BUS_ADRERR;
 315        } else {
 316                /*
 317                 * Something tried to access memory that isn't in our memory
 318                 * map.
 319                 */
 320                sig = SIGSEGV;
 321                code = fault == VM_FAULT_BADACCESS ?
 322                        SEGV_ACCERR : SEGV_MAPERR;
 323        }
 324
 325        __do_user_fault(tsk, addr, esr, sig, code, regs);
 326        return 0;
 327
 328no_context:
 329        __do_kernel_fault(mm, addr, esr, regs);
 330        return 0;
 331}
 332
 333/*
 334 * First Level Translation Fault Handler
 335 *
 336 * We enter here because the first level page table doesn't contain a valid
 337 * entry for the address.
 338 *
 339 * If the address is in kernel space (>= TASK_SIZE), then we are probably
 340 * faulting in the vmalloc() area.
 341 *
 342 * If the init_task's first level page tables contains the relevant entry, we
 343 * copy the it to this task.  If not, we send the process a signal, fixup the
 344 * exception, or oops the kernel.
 345 *
 346 * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
 347 * or a critical region, and should only copy the information from the master
 348 * page table, nothing more.
 349 */
 350static int __kprobes do_translation_fault(unsigned long addr,
 351                                          unsigned int esr,
 352                                          struct pt_regs *regs)
 353{
 354        if (addr < TASK_SIZE)
 355                return do_page_fault(addr, esr, regs);
 356
 357        do_bad_area(addr, esr, regs);
 358        return 0;
 359}
 360
 361/*
 362 * This abort handler always returns "fault".
 363 */
 364static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 365{
 366        return 1;
 367}
 368
 369static struct fault_info {
 370        int     (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 371        int     sig;
 372        int     code;
 373        const char *name;
 374} fault_info[] = {
 375        { do_bad,               SIGBUS,  0,             "ttbr address size fault"       },
 376        { do_bad,               SIGBUS,  0,             "level 1 address size fault"    },
 377        { do_bad,               SIGBUS,  0,             "level 2 address size fault"    },
 378        { do_bad,               SIGBUS,  0,             "level 3 address size fault"    },
 379        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "input address range fault"     },
 380        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 1 translation fault"     },
 381        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 2 translation fault"     },
 382        { do_page_fault,        SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
 383        { do_bad,               SIGBUS,  0,             "reserved access flag fault"    },
 384        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 access flag fault"     },
 385        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 access flag fault"     },
 386        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 3 access flag fault"     },
 387        { do_bad,               SIGBUS,  0,             "reserved permission fault"     },
 388        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 permission fault"      },
 389        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 permission fault"      },
 390        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 3 permission fault"      },
 391        { do_bad,               SIGBUS,  0,             "synchronous external abort"    },
 392        { do_bad,               SIGBUS,  0,             "asynchronous external abort"   },
 393        { do_bad,               SIGBUS,  0,             "unknown 18"                    },
 394        { do_bad,               SIGBUS,  0,             "unknown 19"                    },
 395        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 396        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 397        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 398        { do_bad,               SIGBUS,  0,             "synchronous abort (translation table walk)" },
 399        { do_bad,               SIGBUS,  0,             "synchronous parity error"      },
 400        { do_bad,               SIGBUS,  0,             "asynchronous parity error"     },
 401        { do_bad,               SIGBUS,  0,             "unknown 26"                    },
 402        { do_bad,               SIGBUS,  0,             "unknown 27"                    },
 403        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk" },
 404        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk" },
 405        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk" },
 406        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk" },
 407        { do_bad,               SIGBUS,  0,             "unknown 32"                    },
 408        { do_bad,               SIGBUS,  BUS_ADRALN,    "alignment fault"               },
 409        { do_bad,               SIGBUS,  0,             "debug event"                   },
 410        { do_bad,               SIGBUS,  0,             "unknown 35"                    },
 411        { do_bad,               SIGBUS,  0,             "unknown 36"                    },
 412        { do_bad,               SIGBUS,  0,             "unknown 37"                    },
 413        { do_bad,               SIGBUS,  0,             "unknown 38"                    },
 414        { do_bad,               SIGBUS,  0,             "unknown 39"                    },
 415        { do_bad,               SIGBUS,  0,             "unknown 40"                    },
 416        { do_bad,               SIGBUS,  0,             "unknown 41"                    },
 417        { do_bad,               SIGBUS,  0,             "unknown 42"                    },
 418        { do_bad,               SIGBUS,  0,             "unknown 43"                    },
 419        { do_bad,               SIGBUS,  0,             "unknown 44"                    },
 420        { do_bad,               SIGBUS,  0,             "unknown 45"                    },
 421        { do_bad,               SIGBUS,  0,             "unknown 46"                    },
 422        { do_bad,               SIGBUS,  0,             "unknown 47"                    },
 423        { do_bad,               SIGBUS,  0,             "unknown 48"                    },
 424        { do_bad,               SIGBUS,  0,             "unknown 49"                    },
 425        { do_bad,               SIGBUS,  0,             "unknown 50"                    },
 426        { do_bad,               SIGBUS,  0,             "unknown 51"                    },
 427        { do_bad,               SIGBUS,  0,             "implementation fault (lockdown abort)" },
 428        { do_bad,               SIGBUS,  0,             "unknown 53"                    },
 429        { do_bad,               SIGBUS,  0,             "unknown 54"                    },
 430        { do_bad,               SIGBUS,  0,             "unknown 55"                    },
 431        { do_bad,               SIGBUS,  0,             "unknown 56"                    },
 432        { do_bad,               SIGBUS,  0,             "unknown 57"                    },
 433        { do_bad,               SIGBUS,  0,             "implementation fault (coprocessor abort)" },
 434        { do_bad,               SIGBUS,  0,             "unknown 59"                    },
 435        { do_bad,               SIGBUS,  0,             "unknown 60"                    },
 436        { do_bad,               SIGBUS,  0,             "unknown 61"                    },
 437        { do_bad,               SIGBUS,  0,             "unknown 62"                    },
 438        { do_bad,               SIGBUS,  0,             "unknown 63"                    },
 439};
 440
 441static const char *fault_name(unsigned int esr)
 442{
 443        const struct fault_info *inf = fault_info + (esr & 63);
 444        return inf->name;
 445}
 446
 447/*
 448 * Dispatch a data abort to the relevant handler.
 449 */
 450asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 451                                         struct pt_regs *regs)
 452{
 453        const struct fault_info *inf = fault_info + (esr & 63);
 454        struct siginfo info;
 455
 456        if (!inf->fn(addr, esr, regs))
 457                return;
 458
 459        pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
 460                 inf->name, esr, addr);
 461
 462        info.si_signo = inf->sig;
 463        info.si_errno = 0;
 464        info.si_code  = inf->code;
 465        info.si_addr  = (void __user *)addr;
 466        arm64_notify_die("", regs, &info, esr);
 467}
 468
 469/*
 470 * Handle stack alignment exceptions.
 471 */
 472asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 473                                           unsigned int esr,
 474                                           struct pt_regs *regs)
 475{
 476        struct siginfo info;
 477
 478        info.si_signo = SIGBUS;
 479        info.si_errno = 0;
 480        info.si_code  = BUS_ADRALN;
 481        info.si_addr  = (void __user *)addr;
 482        arm64_notify_die("", regs, &info, esr);
 483}
 484
 485static struct fault_info debug_fault_info[] = {
 486        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware breakpoint"   },
 487        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware single-step"  },
 488        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware watchpoint"   },
 489        { do_bad,       SIGBUS,         0,              "unknown 3"             },
 490        { do_bad,       SIGTRAP,        TRAP_BRKPT,     "aarch32 BKPT"          },
 491        { do_bad,       SIGTRAP,        0,              "aarch32 vector catch"  },
 492        { do_bad,       SIGTRAP,        TRAP_BRKPT,     "aarch64 BRK"           },
 493        { do_bad,       SIGBUS,         0,              "unknown 7"             },
 494};
 495
 496void __init hook_debug_fault_code(int nr,
 497                                  int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 498                                  int sig, int code, const char *name)
 499{
 500        BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
 501
 502        debug_fault_info[nr].fn         = fn;
 503        debug_fault_info[nr].sig        = sig;
 504        debug_fault_info[nr].code       = code;
 505        debug_fault_info[nr].name       = name;
 506}
 507
 508asmlinkage int __exception do_debug_exception(unsigned long addr,
 509                                              unsigned int esr,
 510                                              struct pt_regs *regs)
 511{
 512        const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
 513        struct siginfo info;
 514
 515        if (!inf->fn(addr, esr, regs))
 516                return 1;
 517
 518        pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
 519                 inf->name, esr, addr);
 520
 521        info.si_signo = inf->sig;
 522        info.si_errno = 0;
 523        info.si_code  = inf->code;
 524        info.si_addr  = (void __user *)addr;
 525        arm64_notify_die("", regs, &info, esr);
 526
 527        return 0;
 528}
 529