linux/arch/sh/mm/fault_32.c
<<
>>
Prefs
   1/*
   2 * Page fault handler for SH with an MMU.
   3 *
   4 *  Copyright (C) 1999  Niibe Yutaka
   5 *  Copyright (C) 2003 - 2009  Paul Mundt
   6 *
   7 *  Based on linux/arch/i386/mm/fault.c:
   8 *   Copyright (C) 1995  Linus Torvalds
   9 *
  10 * This file is subject to the terms and conditions of the GNU General Public
  11 * License.  See the file "COPYING" in the main directory of this archive
  12 * for more details.
  13 */
  14#include <linux/kernel.h>
  15#include <linux/mm.h>
  16#include <linux/hardirq.h>
  17#include <linux/kprobes.h>
  18#include <linux/perf_event.h>
  19#include <asm/io_trapped.h>
  20#include <asm/system.h>
  21#include <asm/mmu_context.h>
  22#include <asm/tlbflush.h>
  23
  24static inline int notify_page_fault(struct pt_regs *regs, int trap)
  25{
  26        int ret = 0;
  27
  28        if (kprobes_built_in() && !user_mode(regs)) {
  29                preempt_disable();
  30                if (kprobe_running() && kprobe_fault_handler(regs, trap))
  31                        ret = 1;
  32                preempt_enable();
  33        }
  34
  35        return ret;
  36}
  37
  38static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
  39{
  40        unsigned index = pgd_index(address);
  41        pgd_t *pgd_k;
  42        pud_t *pud, *pud_k;
  43        pmd_t *pmd, *pmd_k;
  44
  45        pgd += index;
  46        pgd_k = init_mm.pgd + index;
  47
  48        if (!pgd_present(*pgd_k))
  49                return NULL;
  50
  51        pud = pud_offset(pgd, address);
  52        pud_k = pud_offset(pgd_k, address);
  53        if (!pud_present(*pud_k))
  54                return NULL;
  55
  56        pmd = pmd_offset(pud, address);
  57        pmd_k = pmd_offset(pud_k, address);
  58        if (!pmd_present(*pmd_k))
  59                return NULL;
  60
  61        if (!pmd_present(*pmd))
  62                set_pmd(pmd, *pmd_k);
  63        else {
  64                /*
  65                 * The page tables are fully synchronised so there must
  66                 * be another reason for the fault. Return NULL here to
  67                 * signal that we have not taken care of the fault.
  68                 */
  69                BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
  70                return NULL;
  71        }
  72
  73        return pmd_k;
  74}
  75
  76/*
  77 * Handle a fault on the vmalloc or module mapping area
  78 */
  79static noinline int vmalloc_fault(unsigned long address)
  80{
  81        pgd_t *pgd_k;
  82        pmd_t *pmd_k;
  83        pte_t *pte_k;
  84
  85        /* Make sure we are in vmalloc/module/P3 area: */
  86        if (!(address >= VMALLOC_START && address < P3_ADDR_MAX))
  87                return -1;
  88
  89        /*
  90         * Synchronize this task's top level page-table
  91         * with the 'reference' page table.
  92         *
  93         * Do _not_ use "current" here. We might be inside
  94         * an interrupt in the middle of a task switch..
  95         */
  96        pgd_k = get_TTB();
  97        pmd_k = vmalloc_sync_one(pgd_k, address);
  98        if (!pmd_k)
  99                return -1;
 100
 101        pte_k = pte_offset_kernel(pmd_k, address);
 102        if (!pte_present(*pte_k))
 103                return -1;
 104
 105        return 0;
 106}
 107
 108static int fault_in_kernel_space(unsigned long address)
 109{
 110        return address >= TASK_SIZE;
 111}
 112
 113/*
 114 * This routine handles page faults.  It determines the address,
 115 * and the problem, and then passes it off to one of the appropriate
 116 * routines.
 117 */
 118asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 119                                        unsigned long writeaccess,
 120                                        unsigned long address)
 121{
 122        unsigned long vec;
 123        struct task_struct *tsk;
 124        struct mm_struct *mm;
 125        struct vm_area_struct * vma;
 126        int si_code;
 127        int fault;
 128        siginfo_t info;
 129
 130        tsk = current;
 131        mm = tsk->mm;
 132        si_code = SEGV_MAPERR;
 133        vec = lookup_exception_vector();
 134
 135        /*
 136         * We fault-in kernel-space virtual memory on-demand. The
 137         * 'reference' page table is init_mm.pgd.
 138         *
 139         * NOTE! We MUST NOT take any locks for this case. We may
 140         * be in an interrupt or a critical region, and should
 141         * only copy the information from the master page table,
 142         * nothing more.
 143         */
 144        if (unlikely(fault_in_kernel_space(address))) {
 145                if (vmalloc_fault(address) >= 0)
 146                        return;
 147                if (notify_page_fault(regs, vec))
 148                        return;
 149
 150                goto bad_area_nosemaphore;
 151        }
 152
 153        if (unlikely(notify_page_fault(regs, vec)))
 154                return;
 155
 156        /* Only enable interrupts if they were on before the fault */
 157        if ((regs->sr & SR_IMASK) != SR_IMASK)
 158                local_irq_enable();
 159
 160        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 161
 162        /*
 163         * If we're in an interrupt, have no user context or are running
 164         * in an atomic region then we must not take the fault:
 165         */
 166        if (in_atomic() || !mm)
 167                goto no_context;
 168
 169        down_read(&mm->mmap_sem);
 170
 171        vma = find_vma(mm, address);
 172        if (!vma)
 173                goto bad_area;
 174        if (vma->vm_start <= address)
 175                goto good_area;
 176        if (!(vma->vm_flags & VM_GROWSDOWN))
 177                goto bad_area;
 178        if (expand_stack(vma, address))
 179                goto bad_area;
 180
 181        /*
 182         * Ok, we have a good vm_area for this memory access, so
 183         * we can handle it..
 184         */
 185good_area:
 186        si_code = SEGV_ACCERR;
 187        if (writeaccess) {
 188                if (!(vma->vm_flags & VM_WRITE))
 189                        goto bad_area;
 190        } else {
 191                if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 192                        goto bad_area;
 193        }
 194
 195        /*
 196         * If for any reason at all we couldn't handle the fault,
 197         * make sure we exit gracefully rather than endlessly redo
 198         * the fault.
 199         */
 200survive:
 201        fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
 202        if (unlikely(fault & VM_FAULT_ERROR)) {
 203                if (fault & VM_FAULT_OOM)
 204                        goto out_of_memory;
 205                else if (fault & VM_FAULT_SIGBUS)
 206                        goto do_sigbus;
 207                BUG();
 208        }
 209        if (fault & VM_FAULT_MAJOR) {
 210                tsk->maj_flt++;
 211                perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 212                                     regs, address);
 213        } else {
 214                tsk->min_flt++;
 215                perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 216                                     regs, address);
 217        }
 218
 219        up_read(&mm->mmap_sem);
 220        return;
 221
 222        /*
 223         * Something tried to access memory that isn't in our memory map..
 224         * Fix it, but check if it's kernel or user first..
 225         */
 226bad_area:
 227        up_read(&mm->mmap_sem);
 228
 229bad_area_nosemaphore:
 230        if (user_mode(regs)) {
 231                info.si_signo = SIGSEGV;
 232                info.si_errno = 0;
 233                info.si_code = si_code;
 234                info.si_addr = (void *) address;
 235                force_sig_info(SIGSEGV, &info, tsk);
 236                return;
 237        }
 238
 239no_context:
 240        /* Are we prepared to handle this kernel fault?  */
 241        if (fixup_exception(regs))
 242                return;
 243
 244        if (handle_trapped_io(regs, address))
 245                return;
 246/*
 247 * Oops. The kernel tried to access some bad page. We'll have to
 248 * terminate things with extreme prejudice.
 249 *
 250 */
 251
 252        bust_spinlocks(1);
 253
 254        if (oops_may_print()) {
 255                unsigned long page;
 256
 257                if (address < PAGE_SIZE)
 258                        printk(KERN_ALERT "Unable to handle kernel NULL "
 259                                          "pointer dereference");
 260                else
 261                        printk(KERN_ALERT "Unable to handle kernel paging "
 262                                          "request");
 263                printk(" at virtual address %08lx\n", address);
 264                printk(KERN_ALERT "pc = %08lx\n", regs->pc);
 265                page = (unsigned long)get_TTB();
 266                if (page) {
 267                        page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
 268                        printk(KERN_ALERT "*pde = %08lx\n", page);
 269                        if (page & _PAGE_PRESENT) {
 270                                page &= PAGE_MASK;
 271                                address &= 0x003ff000;
 272                                page = ((__typeof__(page) *)
 273                                                __va(page))[address >>
 274                                                            PAGE_SHIFT];
 275                                printk(KERN_ALERT "*pte = %08lx\n", page);
 276                        }
 277                }
 278        }
 279
 280        die("Oops", regs, writeaccess);
 281        bust_spinlocks(0);
 282        do_exit(SIGKILL);
 283
 284/*
 285 * We ran out of memory, or some other thing happened to us that made
 286 * us unable to handle the page fault gracefully.
 287 */
 288out_of_memory:
 289        up_read(&mm->mmap_sem);
 290        if (is_global_init(current)) {
 291                yield();
 292                down_read(&mm->mmap_sem);
 293                goto survive;
 294        }
 295        printk("VM: killing process %s\n", tsk->comm);
 296        if (user_mode(regs))
 297                do_group_exit(SIGKILL);
 298        goto no_context;
 299
 300do_sigbus:
 301        up_read(&mm->mmap_sem);
 302
 303        /*
 304         * Send a sigbus, regardless of whether we were in kernel
 305         * or user mode.
 306         */
 307        info.si_signo = SIGBUS;
 308        info.si_errno = 0;
 309        info.si_code = BUS_ADRERR;
 310        info.si_addr = (void *)address;
 311        force_sig_info(SIGBUS, &info, tsk);
 312
 313        /* Kernel mode? Handle exceptions or die */
 314        if (!user_mode(regs))
 315                goto no_context;
 316}
 317
 318/*
 319 * Called with interrupts disabled.
 320 */
 321asmlinkage int __kprobes
 322handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
 323               unsigned long address)
 324{
 325        pgd_t *pgd;
 326        pud_t *pud;
 327        pmd_t *pmd;
 328        pte_t *pte;
 329        pte_t entry;
 330
 331        /*
 332         * We don't take page faults for P1, P2, and parts of P4, these
 333         * are always mapped, whether it be due to legacy behaviour in
 334         * 29-bit mode, or due to PMB configuration in 32-bit mode.
 335         */
 336        if (address >= P3SEG && address < P3_ADDR_MAX) {
 337                pgd = pgd_offset_k(address);
 338        } else {
 339                if (unlikely(address >= TASK_SIZE || !current->mm))
 340                        return 1;
 341
 342                pgd = pgd_offset(current->mm, address);
 343        }
 344
 345        pud = pud_offset(pgd, address);
 346        if (pud_none_or_clear_bad(pud))
 347                return 1;
 348        pmd = pmd_offset(pud, address);
 349        if (pmd_none_or_clear_bad(pmd))
 350                return 1;
 351        pte = pte_offset_kernel(pmd, address);
 352        entry = *pte;
 353        if (unlikely(pte_none(entry) || pte_not_present(entry)))
 354                return 1;
 355        if (unlikely(writeaccess && !pte_write(entry)))
 356                return 1;
 357
 358        if (writeaccess)
 359                entry = pte_mkdirty(entry);
 360        entry = pte_mkyoung(entry);
 361
 362        set_pte(pte, entry);
 363
 364#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
 365        /*
 366         * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
 367         * the case of an initial page write exception, so we need to
 368         * flush it in order to avoid potential TLB entry duplication.
 369         */
 370        if (writeaccess == 2)
 371                local_flush_tlb_one(get_asid(), address & PAGE_MASK);
 372#endif
 373
 374        update_mmu_cache(NULL, address, entry);
 375
 376        return 0;
 377}
 378