linux/arch/um/kernel/trap.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   3 * Licensed under the GPL
   4 */
   5
   6#include <linux/mm.h>
   7#include <linux/sched.h>
   8#include <linux/hardirq.h>
   9#include <linux/module.h>
  10#include <linux/uaccess.h>
  11#include <asm/current.h>
  12#include <asm/pgtable.h>
  13#include <asm/tlbflush.h>
  14#include <arch.h>
  15#include <as-layout.h>
  16#include <kern_util.h>
  17#include <os.h>
  18#include <skas.h>
  19
  20/*
  21 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
  22 * segv().
  23 */
  24int handle_page_fault(unsigned long address, unsigned long ip,
  25                      int is_write, int is_user, int *code_out)
  26{
  27        struct mm_struct *mm = current->mm;
  28        struct vm_area_struct *vma;
  29        pgd_t *pgd;
  30        pud_t *pud;
  31        pmd_t *pmd;
  32        pte_t *pte;
  33        int err = -EFAULT;
  34        unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  35
  36        *code_out = SEGV_MAPERR;
  37
  38        /*
  39         * If the fault was with pagefaults disabled, don't take the fault, just
  40         * fail.
  41         */
  42        if (faulthandler_disabled())
  43                goto out_nosemaphore;
  44
  45        if (is_user)
  46                flags |= FAULT_FLAG_USER;
  47retry:
  48        down_read(&mm->mmap_sem);
  49        vma = find_vma(mm, address);
  50        if (!vma)
  51                goto out;
  52        else if (vma->vm_start <= address)
  53                goto good_area;
  54        else if (!(vma->vm_flags & VM_GROWSDOWN))
  55                goto out;
  56        else if (is_user && !ARCH_IS_STACKGROW(address))
  57                goto out;
  58        else if (expand_stack(vma, address))
  59                goto out;
  60
  61good_area:
  62        *code_out = SEGV_ACCERR;
  63        if (is_write) {
  64                if (!(vma->vm_flags & VM_WRITE))
  65                        goto out;
  66                flags |= FAULT_FLAG_WRITE;
  67        } else {
  68                /* Don't require VM_READ|VM_EXEC for write faults! */
  69                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
  70                        goto out;
  71        }
  72
  73        do {
  74                int fault;
  75
  76                fault = handle_mm_fault(mm, vma, address, flags);
  77
  78                if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
  79                        goto out_nosemaphore;
  80
  81                if (unlikely(fault & VM_FAULT_ERROR)) {
  82                        if (fault & VM_FAULT_OOM) {
  83                                goto out_of_memory;
  84                        } else if (fault & VM_FAULT_SIGSEGV) {
  85                                goto out;
  86                        } else if (fault & VM_FAULT_SIGBUS) {
  87                                err = -EACCES;
  88                                goto out;
  89                        }
  90                        BUG();
  91                }
  92                if (flags & FAULT_FLAG_ALLOW_RETRY) {
  93                        if (fault & VM_FAULT_MAJOR)
  94                                current->maj_flt++;
  95                        else
  96                                current->min_flt++;
  97                        if (fault & VM_FAULT_RETRY) {
  98                                flags &= ~FAULT_FLAG_ALLOW_RETRY;
  99                                flags |= FAULT_FLAG_TRIED;
 100
 101                                goto retry;
 102                        }
 103                }
 104
 105                pgd = pgd_offset(mm, address);
 106                pud = pud_offset(pgd, address);
 107                pmd = pmd_offset(pud, address);
 108                pte = pte_offset_kernel(pmd, address);
 109        } while (!pte_present(*pte));
 110        err = 0;
 111        /*
 112         * The below warning was added in place of
 113         *      pte_mkyoung(); if (is_write) pte_mkdirty();
 114         * If it's triggered, we'd see normally a hang here (a clean pte is
 115         * marked read-only to emulate the dirty bit).
 116         * However, the generic code can mark a PTE writable but clean on a
 117         * concurrent read fault, triggering this harmlessly. So comment it out.
 118         */
 119#if 0
 120        WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
 121#endif
 122        flush_tlb_page(vma, address);
 123out:
 124        up_read(&mm->mmap_sem);
 125out_nosemaphore:
 126        return err;
 127
 128out_of_memory:
 129        /*
 130         * We ran out of memory, call the OOM killer, and return the userspace
 131         * (which will retry the fault, or kill us if we got oom-killed).
 132         */
 133        up_read(&mm->mmap_sem);
 134        if (!is_user)
 135                goto out_nosemaphore;
 136        pagefault_out_of_memory();
 137        return 0;
 138}
 139EXPORT_SYMBOL(handle_page_fault);
 140
 141static void show_segv_info(struct uml_pt_regs *regs)
 142{
 143        struct task_struct *tsk = current;
 144        struct faultinfo *fi = UPT_FAULTINFO(regs);
 145
 146        if (!unhandled_signal(tsk, SIGSEGV))
 147                return;
 148
 149        if (!printk_ratelimit())
 150                return;
 151
 152        printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
 153                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 154                tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
 155                (void *)UPT_IP(regs), (void *)UPT_SP(regs),
 156                fi->error_code);
 157
 158        print_vma_addr(KERN_CONT " in ", UPT_IP(regs));
 159        printk(KERN_CONT "\n");
 160}
 161
 162static void bad_segv(struct faultinfo fi, unsigned long ip)
 163{
 164        struct siginfo si;
 165
 166        si.si_signo = SIGSEGV;
 167        si.si_code = SEGV_ACCERR;
 168        si.si_addr = (void __user *) FAULT_ADDRESS(fi);
 169        current->thread.arch.faultinfo = fi;
 170        force_sig_info(SIGSEGV, &si, current);
 171}
 172
 173void fatal_sigsegv(void)
 174{
 175        force_sigsegv(SIGSEGV, current);
 176        do_signal(&current->thread.regs);
 177        /*
 178         * This is to tell gcc that we're not returning - do_signal
 179         * can, in general, return, but in this case, it's not, since
 180         * we just got a fatal SIGSEGV queued.
 181         */
 182        os_dump_core();
 183}
 184
 185void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 186{
 187        struct faultinfo * fi = UPT_FAULTINFO(regs);
 188
 189        if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
 190                show_segv_info(regs);
 191                bad_segv(*fi, UPT_IP(regs));
 192                return;
 193        }
 194        segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
 195}
 196
 197/*
 198 * We give a *copy* of the faultinfo in the regs to segv.
 199 * This must be done, since nesting SEGVs could overwrite
 200 * the info in the regs. A pointer to the info then would
 201 * give us bad data!
 202 */
 203unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 204                   struct uml_pt_regs *regs)
 205{
 206        struct siginfo si;
 207        jmp_buf *catcher;
 208        int err;
 209        int is_write = FAULT_WRITE(fi);
 210        unsigned long address = FAULT_ADDRESS(fi);
 211
 212        if (!is_user && regs)
 213                current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
 214
 215        if (!is_user && (address >= start_vm) && (address < end_vm)) {
 216                flush_tlb_kernel_vm();
 217                goto out;
 218        }
 219        else if (current->mm == NULL) {
 220                show_regs(container_of(regs, struct pt_regs, regs));
 221                panic("Segfault with no mm");
 222        }
 223        else if (!is_user && address > PAGE_SIZE && address < TASK_SIZE) {
 224                show_regs(container_of(regs, struct pt_regs, regs));
 225                panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx",
 226                       address, ip);
 227        }
 228
 229        if (SEGV_IS_FIXABLE(&fi))
 230                err = handle_page_fault(address, ip, is_write, is_user,
 231                                        &si.si_code);
 232        else {
 233                err = -EFAULT;
 234                /*
 235                 * A thread accessed NULL, we get a fault, but CR2 is invalid.
 236                 * This code is used in __do_copy_from_user() of TT mode.
 237                 * XXX tt mode is gone, so maybe this isn't needed any more
 238                 */
 239                address = 0;
 240        }
 241
 242        catcher = current->thread.fault_catcher;
 243        if (!err)
 244                goto out;
 245        else if (catcher != NULL) {
 246                current->thread.fault_addr = (void *) address;
 247                UML_LONGJMP(catcher, 1);
 248        }
 249        else if (current->thread.fault_addr != NULL)
 250                panic("fault_addr set but no fault catcher");
 251        else if (!is_user && arch_fixup(ip, regs))
 252                goto out;
 253
 254        if (!is_user) {
 255                show_regs(container_of(regs, struct pt_regs, regs));
 256                panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
 257                      address, ip);
 258        }
 259
 260        show_segv_info(regs);
 261
 262        if (err == -EACCES) {
 263                si.si_signo = SIGBUS;
 264                si.si_errno = 0;
 265                si.si_code = BUS_ADRERR;
 266                si.si_addr = (void __user *)address;
 267                current->thread.arch.faultinfo = fi;
 268                force_sig_info(SIGBUS, &si, current);
 269        } else {
 270                BUG_ON(err != -EFAULT);
 271                si.si_signo = SIGSEGV;
 272                si.si_addr = (void __user *) address;
 273                current->thread.arch.faultinfo = fi;
 274                force_sig_info(SIGSEGV, &si, current);
 275        }
 276
 277out:
 278        if (regs)
 279                current->thread.segv_regs = NULL;
 280
 281        return 0;
 282}
 283
 284void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
 285{
 286        struct faultinfo *fi;
 287        struct siginfo clean_si;
 288
 289        if (!UPT_IS_USER(regs)) {
 290                if (sig == SIGBUS)
 291                        printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
 292                               "mount likely just ran out of space\n");
 293                panic("Kernel mode signal %d", sig);
 294        }
 295
 296        arch_examine_signal(sig, regs);
 297
 298        memset(&clean_si, 0, sizeof(clean_si));
 299        clean_si.si_signo = si->si_signo;
 300        clean_si.si_errno = si->si_errno;
 301        clean_si.si_code = si->si_code;
 302        switch (sig) {
 303        case SIGILL:
 304        case SIGFPE:
 305        case SIGSEGV:
 306        case SIGBUS:
 307        case SIGTRAP:
 308                fi = UPT_FAULTINFO(regs);
 309                clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
 310                current->thread.arch.faultinfo = *fi;
 311#ifdef __ARCH_SI_TRAPNO
 312                clean_si.si_trapno = si->si_trapno;
 313#endif
 314                break;
 315        default:
 316                printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
 317                        sig, si->si_code);
 318        }
 319
 320        force_sig_info(sig, &clean_si, current);
 321}
 322
 323void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
 324{
 325        if (current->thread.fault_catcher != NULL)
 326                UML_LONGJMP(current->thread.fault_catcher, 1);
 327        else
 328                relay_signal(sig, si, regs);
 329}
 330
 331void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 332{
 333        do_IRQ(WINCH_IRQ, regs);
 334}
 335
 336void trap_init(void)
 337{
 338}
 339