linux/arch/sh/mm/fault_32.c
<<
>>
Prefs
   1/*
   2 * Page fault handler for SH with an MMU.
   3 *
   4 *  Copyright (C) 1999  Niibe Yutaka
   5 *  Copyright (C) 2003 - 2009  Paul Mundt
   6 *
   7 *  Based on linux/arch/i386/mm/fault.c:
   8 *   Copyright (C) 1995  Linus Torvalds
   9 *
  10 * This file is subject to the terms and conditions of the GNU General Public
  11 * License.  See the file "COPYING" in the main directory of this archive
  12 * for more details.
  13 */
  14#include <linux/kernel.h>
  15#include <linux/mm.h>
  16#include <linux/hardirq.h>
  17#include <linux/kprobes.h>
  18#include <linux/perf_event.h>
  19#include <asm/io_trapped.h>
  20#include <asm/system.h>
  21#include <asm/mmu_context.h>
  22#include <asm/tlbflush.h>
  23
  24static inline int notify_page_fault(struct pt_regs *regs, int trap)
  25{
  26        int ret = 0;
  27
  28        if (kprobes_built_in() && !user_mode(regs)) {
  29                preempt_disable();
  30                if (kprobe_running() && kprobe_fault_handler(regs, trap))
  31                        ret = 1;
  32                preempt_enable();
  33        }
  34
  35        return ret;
  36}
  37
  38static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
  39{
  40        unsigned index = pgd_index(address);
  41        pgd_t *pgd_k;
  42        pud_t *pud, *pud_k;
  43        pmd_t *pmd, *pmd_k;
  44
  45        pgd += index;
  46        pgd_k = init_mm.pgd + index;
  47
  48        if (!pgd_present(*pgd_k))
  49                return NULL;
  50
  51        pud = pud_offset(pgd, address);
  52        pud_k = pud_offset(pgd_k, address);
  53        if (!pud_present(*pud_k))
  54                return NULL;
  55
  56        if (!pud_present(*pud))
  57            set_pud(pud, *pud_k);
  58
  59        pmd = pmd_offset(pud, address);
  60        pmd_k = pmd_offset(pud_k, address);
  61        if (!pmd_present(*pmd_k))
  62                return NULL;
  63
  64        if (!pmd_present(*pmd))
  65                set_pmd(pmd, *pmd_k);
  66        else {
  67                /*
  68                 * The page tables are fully synchronised so there must
  69                 * be another reason for the fault. Return NULL here to
  70                 * signal that we have not taken care of the fault.
  71                 */
  72                BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
  73                return NULL;
  74        }
  75
  76        return pmd_k;
  77}
  78
  79/*
  80 * Handle a fault on the vmalloc or module mapping area
  81 */
  82static noinline int vmalloc_fault(unsigned long address)
  83{
  84        pgd_t *pgd_k;
  85        pmd_t *pmd_k;
  86        pte_t *pte_k;
  87
  88        /* Make sure we are in vmalloc/module/P3 area: */
  89        if (!(address >= VMALLOC_START && address < P3_ADDR_MAX))
  90                return -1;
  91
  92        /*
  93         * Synchronize this task's top level page-table
  94         * with the 'reference' page table.
  95         *
  96         * Do _not_ use "current" here. We might be inside
  97         * an interrupt in the middle of a task switch..
  98         */
  99        pgd_k = get_TTB();
 100        pmd_k = vmalloc_sync_one(pgd_k, address);
 101        if (!pmd_k)
 102                return -1;
 103
 104        pte_k = pte_offset_kernel(pmd_k, address);
 105        if (!pte_present(*pte_k))
 106                return -1;
 107
 108        return 0;
 109}
 110
 111static int fault_in_kernel_space(unsigned long address)
 112{
 113        return address >= TASK_SIZE;
 114}
 115
 116/*
 117 * This routine handles page faults.  It determines the address,
 118 * and the problem, and then passes it off to one of the appropriate
 119 * routines.
 120 */
 121asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 122                                        unsigned long writeaccess,
 123                                        unsigned long address)
 124{
 125        unsigned long vec;
 126        struct task_struct *tsk;
 127        struct mm_struct *mm;
 128        struct vm_area_struct * vma;
 129        int si_code;
 130        int fault;
 131        siginfo_t info;
 132
 133        tsk = current;
 134        mm = tsk->mm;
 135        si_code = SEGV_MAPERR;
 136        vec = lookup_exception_vector();
 137
 138        /*
 139         * We fault-in kernel-space virtual memory on-demand. The
 140         * 'reference' page table is init_mm.pgd.
 141         *
 142         * NOTE! We MUST NOT take any locks for this case. We may
 143         * be in an interrupt or a critical region, and should
 144         * only copy the information from the master page table,
 145         * nothing more.
 146         */
 147        if (unlikely(fault_in_kernel_space(address))) {
 148                if (vmalloc_fault(address) >= 0)
 149                        return;
 150                if (notify_page_fault(regs, vec))
 151                        return;
 152
 153                goto bad_area_nosemaphore;
 154        }
 155
 156        if (unlikely(notify_page_fault(regs, vec)))
 157                return;
 158
 159        /* Only enable interrupts if they were on before the fault */
 160        if ((regs->sr & SR_IMASK) != SR_IMASK)
 161                local_irq_enable();
 162
 163        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 164
 165        /*
 166         * If we're in an interrupt, have no user context or are running
 167         * in an atomic region then we must not take the fault:
 168         */
 169        if (in_atomic() || !mm)
 170                goto no_context;
 171
 172        down_read(&mm->mmap_sem);
 173
 174        vma = find_vma(mm, address);
 175        if (!vma)
 176                goto bad_area;
 177        if (vma->vm_start <= address)
 178                goto good_area;
 179        if (!(vma->vm_flags & VM_GROWSDOWN))
 180                goto bad_area;
 181        if (expand_stack(vma, address))
 182                goto bad_area;
 183
 184        /*
 185         * Ok, we have a good vm_area for this memory access, so
 186         * we can handle it..
 187         */
 188good_area:
 189        si_code = SEGV_ACCERR;
 190        if (writeaccess) {
 191                if (!(vma->vm_flags & VM_WRITE))
 192                        goto bad_area;
 193        } else {
 194                if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 195                        goto bad_area;
 196        }
 197
 198        /*
 199         * If for any reason at all we couldn't handle the fault,
 200         * make sure we exit gracefully rather than endlessly redo
 201         * the fault.
 202         */
 203        fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
 204        if (unlikely(fault & VM_FAULT_ERROR)) {
 205                if (fault & VM_FAULT_OOM)
 206                        goto out_of_memory;
 207                else if (fault & VM_FAULT_SIGBUS)
 208                        goto do_sigbus;
 209                BUG();
 210        }
 211        if (fault & VM_FAULT_MAJOR) {
 212                tsk->maj_flt++;
 213                perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 214                                     regs, address);
 215        } else {
 216                tsk->min_flt++;
 217                perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 218                                     regs, address);
 219        }
 220
 221        up_read(&mm->mmap_sem);
 222        return;
 223
 224        /*
 225         * Something tried to access memory that isn't in our memory map..
 226         * Fix it, but check if it's kernel or user first..
 227         */
 228bad_area:
 229        up_read(&mm->mmap_sem);
 230
 231bad_area_nosemaphore:
 232        if (user_mode(regs)) {
 233                info.si_signo = SIGSEGV;
 234                info.si_errno = 0;
 235                info.si_code = si_code;
 236                info.si_addr = (void *) address;
 237                force_sig_info(SIGSEGV, &info, tsk);
 238                return;
 239        }
 240
 241no_context:
 242        /* Are we prepared to handle this kernel fault?  */
 243        if (fixup_exception(regs))
 244                return;
 245
 246        if (handle_trapped_io(regs, address))
 247                return;
 248/*
 249 * Oops. The kernel tried to access some bad page. We'll have to
 250 * terminate things with extreme prejudice.
 251 *
 252 */
 253
 254        bust_spinlocks(1);
 255
 256        if (oops_may_print()) {
 257                unsigned long page;
 258
 259                if (address < PAGE_SIZE)
 260                        printk(KERN_ALERT "Unable to handle kernel NULL "
 261                                          "pointer dereference");
 262                else
 263                        printk(KERN_ALERT "Unable to handle kernel paging "
 264                                          "request");
 265                printk(" at virtual address %08lx\n", address);
 266                printk(KERN_ALERT "pc = %08lx\n", regs->pc);
 267                page = (unsigned long)get_TTB();
 268                if (page) {
 269                        page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
 270                        printk(KERN_ALERT "*pde = %08lx\n", page);
 271                        if (page & _PAGE_PRESENT) {
 272                                page &= PAGE_MASK;
 273                                address &= 0x003ff000;
 274                                page = ((__typeof__(page) *)
 275                                                __va(page))[address >>
 276                                                            PAGE_SHIFT];
 277                                printk(KERN_ALERT "*pte = %08lx\n", page);
 278                        }
 279                }
 280        }
 281
 282        die("Oops", regs, writeaccess);
 283        bust_spinlocks(0);
 284        do_exit(SIGKILL);
 285
 286/*
 287 * We ran out of memory, or some other thing happened to us that made
 288 * us unable to handle the page fault gracefully.
 289 */
 290out_of_memory:
 291        up_read(&mm->mmap_sem);
 292        if (!user_mode(regs))
 293                goto no_context;
 294        pagefault_out_of_memory();
 295        return;
 296
 297do_sigbus:
 298        up_read(&mm->mmap_sem);
 299
 300        /*
 301         * Send a sigbus, regardless of whether we were in kernel
 302         * or user mode.
 303         */
 304        info.si_signo = SIGBUS;
 305        info.si_errno = 0;
 306        info.si_code = BUS_ADRERR;
 307        info.si_addr = (void *)address;
 308        force_sig_info(SIGBUS, &info, tsk);
 309
 310        /* Kernel mode? Handle exceptions or die */
 311        if (!user_mode(regs))
 312                goto no_context;
 313}
 314
 315/*
 316 * Called with interrupts disabled.
 317 */
 318asmlinkage int __kprobes
 319handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
 320               unsigned long address)
 321{
 322        pgd_t *pgd;
 323        pud_t *pud;
 324        pmd_t *pmd;
 325        pte_t *pte;
 326        pte_t entry;
 327
 328        /*
 329         * We don't take page faults for P1, P2, and parts of P4, these
 330         * are always mapped, whether it be due to legacy behaviour in
 331         * 29-bit mode, or due to PMB configuration in 32-bit mode.
 332         */
 333        if (address >= P3SEG && address < P3_ADDR_MAX) {
 334                pgd = pgd_offset_k(address);
 335        } else {
 336                if (unlikely(address >= TASK_SIZE || !current->mm))
 337                        return 1;
 338
 339                pgd = pgd_offset(current->mm, address);
 340        }
 341
 342        pud = pud_offset(pgd, address);
 343        if (pud_none_or_clear_bad(pud))
 344                return 1;
 345        pmd = pmd_offset(pud, address);
 346        if (pmd_none_or_clear_bad(pmd))
 347                return 1;
 348        pte = pte_offset_kernel(pmd, address);
 349        entry = *pte;
 350        if (unlikely(pte_none(entry) || pte_not_present(entry)))
 351                return 1;
 352        if (unlikely(writeaccess && !pte_write(entry)))
 353                return 1;
 354
 355        if (writeaccess)
 356                entry = pte_mkdirty(entry);
 357        entry = pte_mkyoung(entry);
 358
 359        set_pte(pte, entry);
 360
 361#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
 362        /*
 363         * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
 364         * the case of an initial page write exception, so we need to
 365         * flush it in order to avoid potential TLB entry duplication.
 366         */
 367        if (writeaccess == 2)
 368                local_flush_tlb_one(get_asid(), address & PAGE_MASK);
 369#endif
 370
 371        update_mmu_cache(NULL, address, pte);
 372
 373        return 0;
 374}
 375