linux/arch/x86/kernel/entry_32.S
<<
>>
Prefs
   1/*
   2 *
   3 *  Copyright (C) 1991, 1992  Linus Torvalds
   4 */
   5
   6/*
   7 * entry.S contains the system-call and fault low-level handling routines.
   8 * This also contains the timer-interrupt handler, as well as all interrupts
   9 * and faults that can result in a task-switch.
  10 *
  11 * NOTE: This code handles signal-recognition, which happens every time
  12 * after a timer-interrupt and after each system call.
  13 *
  14 * I changed all the .align's to 4 (16 byte alignment), as that's faster
  15 * on a 486.
  16 *
  17 * Stack layout in 'syscall_exit':
  18 *      ptrace needs to have all regs on the stack.
  19 *      if the order here is changed, it needs to be
  20 *      updated in fork.c:copy_process, signal.c:do_signal,
  21 *      ptrace.c and ptrace.h
  22 *
  23 *       0(%esp) - %ebx
  24 *       4(%esp) - %ecx
  25 *       8(%esp) - %edx
  26 *       C(%esp) - %esi
  27 *      10(%esp) - %edi
  28 *      14(%esp) - %ebp
  29 *      18(%esp) - %eax
  30 *      1C(%esp) - %ds
  31 *      20(%esp) - %es
  32 *      24(%esp) - %fs
  33 *      28(%esp) - %gs          saved iff !CONFIG_X86_32_LAZY_GS
  34 *      2C(%esp) - orig_eax
  35 *      30(%esp) - %eip
  36 *      34(%esp) - %cs
  37 *      38(%esp) - %eflags
  38 *      3C(%esp) - %oldesp
  39 *      40(%esp) - %oldss
  40 *
  41 * "current" is in register %ebx during any slow entries.
  42 */
  43
  44#include <linux/linkage.h>
  45#include <linux/err.h>
  46#include <asm/thread_info.h>
  47#include <asm/irqflags.h>
  48#include <asm/errno.h>
  49#include <asm/segment.h>
  50#include <asm/smp.h>
  51#include <asm/page_types.h>
  52#include <asm/percpu.h>
  53#include <asm/dwarf2.h>
  54#include <asm/processor-flags.h>
  55#include <asm/ftrace.h>
  56#include <asm/irq_vectors.h>
  57#include <asm/cpufeature.h>
  58#include <asm/alternative-asm.h>
  59#include <asm/asm.h>
  60#include <asm/smap.h>
  61
  62/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  63#include <linux/elf-em.h>
  64#define AUDIT_ARCH_I386         (EM_386|__AUDIT_ARCH_LE)
  65#define __AUDIT_ARCH_LE    0x40000000
  66
  67#ifndef CONFIG_AUDITSYSCALL
  68#define sysenter_audit  syscall_trace_entry
  69#define sysexit_audit   syscall_exit_work
  70#endif
  71
  72        .section .entry.text, "ax"
  73
  74/*
  75 * We use macros for low-level operations which need to be overridden
  76 * for paravirtualization.  The following will never clobber any registers:
  77 *   INTERRUPT_RETURN (aka. "iret")
  78 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  79 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  80 *
  81 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  82 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  83 * Allowing a register to be clobbered can shrink the paravirt replacement
  84 * enough to patch inline, increasing performance.
  85 */
  86
  87#ifdef CONFIG_PREEMPT
  88#define preempt_stop(clobbers)  DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  89#else
  90#define preempt_stop(clobbers)
  91#define resume_kernel           restore_all
  92#endif
  93
  94.macro TRACE_IRQS_IRET
  95#ifdef CONFIG_TRACE_IRQFLAGS
  96        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
  97        jz 1f
  98        TRACE_IRQS_ON
  991:
 100#endif
 101.endm
 102
 103/*
 104 * User gs save/restore
 105 *
 106 * %gs is used for userland TLS and kernel only uses it for stack
 107 * canary which is required to be at %gs:20 by gcc.  Read the comment
 108 * at the top of stackprotector.h for more info.
 109 *
 110 * Local labels 98 and 99 are used.
 111 */
 112#ifdef CONFIG_X86_32_LAZY_GS
 113
 114 /* unfortunately push/pop can't be no-op */
 115.macro PUSH_GS
 116        pushl_cfi $0
 117.endm
 118.macro POP_GS pop=0
 119        addl $(4 + \pop), %esp
 120        CFI_ADJUST_CFA_OFFSET -(4 + \pop)
 121.endm
 122.macro POP_GS_EX
 123.endm
 124
 125 /* all the rest are no-op */
 126.macro PTGS_TO_GS
 127.endm
 128.macro PTGS_TO_GS_EX
 129.endm
 130.macro GS_TO_REG reg
 131.endm
 132.macro REG_TO_PTGS reg
 133.endm
 134.macro SET_KERNEL_GS reg
 135.endm
 136
 137#else   /* CONFIG_X86_32_LAZY_GS */
 138
 139.macro PUSH_GS
 140        pushl_cfi %gs
 141        /*CFI_REL_OFFSET gs, 0*/
 142.endm
 143
 144.macro POP_GS pop=0
 14598:     popl_cfi %gs
 146        /*CFI_RESTORE gs*/
 147  .if \pop <> 0
 148        add $\pop, %esp
 149        CFI_ADJUST_CFA_OFFSET -\pop
 150  .endif
 151.endm
 152.macro POP_GS_EX
 153.pushsection .fixup, "ax"
 15499:     movl $0, (%esp)
 155        jmp 98b
 156.popsection
 157        _ASM_EXTABLE(98b,99b)
 158.endm
 159
 160.macro PTGS_TO_GS
 16198:     mov PT_GS(%esp), %gs
 162.endm
 163.macro PTGS_TO_GS_EX
 164.pushsection .fixup, "ax"
 16599:     movl $0, PT_GS(%esp)
 166        jmp 98b
 167.popsection
 168        _ASM_EXTABLE(98b,99b)
 169.endm
 170
 171.macro GS_TO_REG reg
 172        movl %gs, \reg
 173        /*CFI_REGISTER gs, \reg*/
 174.endm
 175.macro REG_TO_PTGS reg
 176        movl \reg, PT_GS(%esp)
 177        /*CFI_REL_OFFSET gs, PT_GS*/
 178.endm
 179.macro SET_KERNEL_GS reg
 180        movl $(__KERNEL_STACK_CANARY), \reg
 181        movl \reg, %gs
 182.endm
 183
 184#endif  /* CONFIG_X86_32_LAZY_GS */
 185
 186.macro SAVE_ALL
 187        cld
 188        PUSH_GS
 189        pushl_cfi %fs
 190        /*CFI_REL_OFFSET fs, 0;*/
 191        pushl_cfi %es
 192        /*CFI_REL_OFFSET es, 0;*/
 193        pushl_cfi %ds
 194        /*CFI_REL_OFFSET ds, 0;*/
 195        pushl_cfi %eax
 196        CFI_REL_OFFSET eax, 0
 197        pushl_cfi %ebp
 198        CFI_REL_OFFSET ebp, 0
 199        pushl_cfi %edi
 200        CFI_REL_OFFSET edi, 0
 201        pushl_cfi %esi
 202        CFI_REL_OFFSET esi, 0
 203        pushl_cfi %edx
 204        CFI_REL_OFFSET edx, 0
 205        pushl_cfi %ecx
 206        CFI_REL_OFFSET ecx, 0
 207        pushl_cfi %ebx
 208        CFI_REL_OFFSET ebx, 0
 209        movl $(__USER_DS), %edx
 210        movl %edx, %ds
 211        movl %edx, %es
 212        movl $(__KERNEL_PERCPU), %edx
 213        movl %edx, %fs
 214        SET_KERNEL_GS %edx
 215.endm
 216
 217.macro RESTORE_INT_REGS
 218        popl_cfi %ebx
 219        CFI_RESTORE ebx
 220        popl_cfi %ecx
 221        CFI_RESTORE ecx
 222        popl_cfi %edx
 223        CFI_RESTORE edx
 224        popl_cfi %esi
 225        CFI_RESTORE esi
 226        popl_cfi %edi
 227        CFI_RESTORE edi
 228        popl_cfi %ebp
 229        CFI_RESTORE ebp
 230        popl_cfi %eax
 231        CFI_RESTORE eax
 232.endm
 233
 234.macro RESTORE_REGS pop=0
 235        RESTORE_INT_REGS
 2361:      popl_cfi %ds
 237        /*CFI_RESTORE ds;*/
 2382:      popl_cfi %es
 239        /*CFI_RESTORE es;*/
 2403:      popl_cfi %fs
 241        /*CFI_RESTORE fs;*/
 242        POP_GS \pop
 243.pushsection .fixup, "ax"
 2444:      movl $0, (%esp)
 245        jmp 1b
 2465:      movl $0, (%esp)
 247        jmp 2b
 2486:      movl $0, (%esp)
 249        jmp 3b
 250.popsection
 251        _ASM_EXTABLE(1b,4b)
 252        _ASM_EXTABLE(2b,5b)
 253        _ASM_EXTABLE(3b,6b)
 254        POP_GS_EX
 255.endm
 256
 257.macro RING0_INT_FRAME
 258        CFI_STARTPROC simple
 259        CFI_SIGNAL_FRAME
 260        CFI_DEF_CFA esp, 3*4
 261        /*CFI_OFFSET cs, -2*4;*/
 262        CFI_OFFSET eip, -3*4
 263.endm
 264
 265.macro RING0_EC_FRAME
 266        CFI_STARTPROC simple
 267        CFI_SIGNAL_FRAME
 268        CFI_DEF_CFA esp, 4*4
 269        /*CFI_OFFSET cs, -2*4;*/
 270        CFI_OFFSET eip, -3*4
 271.endm
 272
 273.macro RING0_PTREGS_FRAME
 274        CFI_STARTPROC simple
 275        CFI_SIGNAL_FRAME
 276        CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
 277        /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
 278        CFI_OFFSET eip, PT_EIP-PT_OLDESP
 279        /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
 280        /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
 281        CFI_OFFSET eax, PT_EAX-PT_OLDESP
 282        CFI_OFFSET ebp, PT_EBP-PT_OLDESP
 283        CFI_OFFSET edi, PT_EDI-PT_OLDESP
 284        CFI_OFFSET esi, PT_ESI-PT_OLDESP
 285        CFI_OFFSET edx, PT_EDX-PT_OLDESP
 286        CFI_OFFSET ecx, PT_ECX-PT_OLDESP
 287        CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 288.endm
 289
 290ENTRY(ret_from_fork)
 291        CFI_STARTPROC
 292        pushl_cfi %eax
 293        call schedule_tail
 294        GET_THREAD_INFO(%ebp)
 295        popl_cfi %eax
 296        pushl_cfi $0x0202               # Reset kernel eflags
 297        popfl_cfi
 298        jmp syscall_exit
 299        CFI_ENDPROC
 300END(ret_from_fork)
 301
 302ENTRY(ret_from_kernel_thread)
 303        CFI_STARTPROC
 304        pushl_cfi %eax
 305        call schedule_tail
 306        GET_THREAD_INFO(%ebp)
 307        popl_cfi %eax
 308        pushl_cfi $0x0202               # Reset kernel eflags
 309        popfl_cfi
 310        movl PT_EBP(%esp),%eax
 311        call *PT_EBX(%esp)
 312        movl $0,PT_EAX(%esp)
 313        jmp syscall_exit
 314        CFI_ENDPROC
 315ENDPROC(ret_from_kernel_thread)
 316
 317/*
 318 * Return to user mode is not as complex as all this looks,
 319 * but we want the default path for a system call return to
 320 * go as quickly as possible which is why some of this is
 321 * less clear than it otherwise should be.
 322 */
 323
 324        # userspace resumption stub bypassing syscall exit tracing
 325        ALIGN
 326        RING0_PTREGS_FRAME
 327ret_from_exception:
 328        preempt_stop(CLBR_ANY)
 329ret_from_intr:
 330        GET_THREAD_INFO(%ebp)
 331#ifdef CONFIG_VM86
 332        movl PT_EFLAGS(%esp), %eax      # mix EFLAGS and CS
 333        movb PT_CS(%esp), %al
 334        andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 335#else
 336        /*
 337         * We can be coming here from child spawned by kernel_thread().
 338         */
 339        movl PT_CS(%esp), %eax
 340        andl $SEGMENT_RPL_MASK, %eax
 341#endif
 342        cmpl $USER_RPL, %eax
 343        jb resume_kernel                # not returning to v8086 or userspace
 344
 345ENTRY(resume_userspace)
 346        LOCKDEP_SYS_EXIT
 347        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 348                                        # setting need_resched or sigpending
 349                                        # between sampling and the iret
 350        TRACE_IRQS_OFF
 351        movl TI_flags(%ebp), %ecx
 352        andl $_TIF_WORK_MASK, %ecx      # is there any work to be done on
 353                                        # int/exception return?
 354        jne work_pending
 355        jmp restore_all
 356END(ret_from_exception)
 357
 358#ifdef CONFIG_PREEMPT
 359ENTRY(resume_kernel)
 360        DISABLE_INTERRUPTS(CLBR_ANY)
 361need_resched:
 362        cmpl $0,PER_CPU_VAR(__preempt_count)
 363        jnz restore_all
 364        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)    # interrupts off (exception path) ?
 365        jz restore_all
 366        call preempt_schedule_irq
 367        jmp need_resched
 368END(resume_kernel)
 369#endif
 370        CFI_ENDPROC
 371
 372/* SYSENTER_RETURN points to after the "sysenter" instruction in
 373   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 374
 375        # sysenter call handler stub
 376ENTRY(ia32_sysenter_target)
 377        CFI_STARTPROC simple
 378        CFI_SIGNAL_FRAME
 379        CFI_DEF_CFA esp, 0
 380        CFI_REGISTER esp, ebp
 381        movl TSS_sysenter_sp0(%esp),%esp
 382sysenter_past_esp:
 383        /*
 384         * Interrupts are disabled here, but we can't trace it until
 385         * enough kernel state to call TRACE_IRQS_OFF can be called - but
 386         * we immediately enable interrupts at that point anyway.
 387         */
 388        pushl_cfi $__USER_DS
 389        /*CFI_REL_OFFSET ss, 0*/
 390        pushl_cfi %ebp
 391        CFI_REL_OFFSET esp, 0
 392        pushfl_cfi
 393        orl $X86_EFLAGS_IF, (%esp)
 394        pushl_cfi $__USER_CS
 395        /*CFI_REL_OFFSET cs, 0*/
 396        /*
 397         * Push current_thread_info()->sysenter_return to the stack.
 398         * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 399         * pushed above; +8 corresponds to copy_thread's esp0 setting.
 400         */
 401        pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
 402        CFI_REL_OFFSET eip, 0
 403
 404        pushl_cfi %eax
 405        SAVE_ALL
 406        ENABLE_INTERRUPTS(CLBR_NONE)
 407
 408/*
 409 * Load the potential sixth argument from user stack.
 410 * Careful about security.
 411 */
 412        cmpl $__PAGE_OFFSET-3,%ebp
 413        jae syscall_fault
 414        ASM_STAC
 4151:      movl (%ebp),%ebp
 416        ASM_CLAC
 417        movl %ebp,PT_EBP(%esp)
 418        _ASM_EXTABLE(1b,syscall_fault)
 419
 420        GET_THREAD_INFO(%ebp)
 421
 422        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 423        jnz sysenter_audit
 424sysenter_do_call:
 425        cmpl $(NR_syscalls), %eax
 426        jae sysenter_badsys
 427        call *sys_call_table(,%eax,4)
 428sysenter_after_call:
 429        movl %eax,PT_EAX(%esp)
 430        LOCKDEP_SYS_EXIT
 431        DISABLE_INTERRUPTS(CLBR_ANY)
 432        TRACE_IRQS_OFF
 433        movl TI_flags(%ebp), %ecx
 434        testl $_TIF_ALLWORK_MASK, %ecx
 435        jne sysexit_audit
 436sysenter_exit:
 437/* if something modifies registers it must also disable sysexit */
 438        movl PT_EIP(%esp), %edx
 439        movl PT_OLDESP(%esp), %ecx
 440        xorl %ebp,%ebp
 441        TRACE_IRQS_ON
 4421:      mov  PT_FS(%esp), %fs
 443        PTGS_TO_GS
 444        ENABLE_INTERRUPTS_SYSEXIT
 445
 446#ifdef CONFIG_AUDITSYSCALL
 447sysenter_audit:
 448        testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 449        jnz syscall_trace_entry
 450        addl $4,%esp
 451        CFI_ADJUST_CFA_OFFSET -4
 452        /* %esi already in 8(%esp)         6th arg: 4th syscall arg */
 453        /* %edx already in 4(%esp)         5th arg: 3rd syscall arg */
 454        /* %ecx already in 0(%esp)         4th arg: 2nd syscall arg */
 455        movl %ebx,%ecx                  /* 3rd arg: 1st syscall arg */
 456        movl %eax,%edx                  /* 2nd arg: syscall number */
 457        movl $AUDIT_ARCH_I386,%eax      /* 1st arg: audit arch */
 458        call __audit_syscall_entry
 459        pushl_cfi %ebx
 460        movl PT_EAX(%esp),%eax          /* reload syscall number */
 461        jmp sysenter_do_call
 462
 463sysexit_audit:
 464        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
 465        jne syscall_exit_work
 466        TRACE_IRQS_ON
 467        ENABLE_INTERRUPTS(CLBR_ANY)
 468        movl %eax,%edx          /* second arg, syscall return value */
 469        cmpl $-MAX_ERRNO,%eax   /* is it an error ? */
 470        setbe %al               /* 1 if so, 0 if not */
 471        movzbl %al,%eax         /* zero-extend that */
 472        call __audit_syscall_exit
 473        DISABLE_INTERRUPTS(CLBR_ANY)
 474        TRACE_IRQS_OFF
 475        movl TI_flags(%ebp), %ecx
 476        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
 477        jne syscall_exit_work
 478        movl PT_EAX(%esp),%eax  /* reload syscall return value */
 479        jmp sysenter_exit
 480#endif
 481
 482        CFI_ENDPROC
 483.pushsection .fixup,"ax"
 4842:      movl $0,PT_FS(%esp)
 485        jmp 1b
 486.popsection
 487        _ASM_EXTABLE(1b,2b)
 488        PTGS_TO_GS_EX
 489ENDPROC(ia32_sysenter_target)
 490
 491        # system call handler stub
 492ENTRY(system_call)
 493        RING0_INT_FRAME                 # can't unwind into user space anyway
 494        ASM_CLAC
 495        pushl_cfi %eax                  # save orig_eax
 496        SAVE_ALL
 497        GET_THREAD_INFO(%ebp)
 498                                        # system call tracing in operation / emulation
 499        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 500        jnz syscall_trace_entry
 501        cmpl $(NR_syscalls), %eax
 502        jae syscall_badsys
 503syscall_call:
 504        call *sys_call_table(,%eax,4)
 505syscall_after_call:
 506        movl %eax,PT_EAX(%esp)          # store the return value
 507syscall_exit:
 508        LOCKDEP_SYS_EXIT
 509        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 510                                        # setting need_resched or sigpending
 511                                        # between sampling and the iret
 512        TRACE_IRQS_OFF
 513        movl TI_flags(%ebp), %ecx
 514        testl $_TIF_ALLWORK_MASK, %ecx  # current->work
 515        jne syscall_exit_work
 516
 517restore_all:
 518        TRACE_IRQS_IRET
 519restore_all_notrace:
 520#ifdef CONFIG_X86_ESPFIX32
 521        movl PT_EFLAGS(%esp), %eax      # mix EFLAGS, SS and CS
 522        # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 523        # are returning to the kernel.
 524        # See comments in process.c:copy_thread() for details.
 525        movb PT_OLDSS(%esp), %ah
 526        movb PT_CS(%esp), %al
 527        andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 528        cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 529        CFI_REMEMBER_STATE
 530        je ldt_ss                       # returning to user-space with LDT SS
 531#endif
 532restore_nocheck:
 533        RESTORE_REGS 4                  # skip orig_eax/error_code
 534irq_return:
 535        INTERRUPT_RETURN
 536.section .fixup,"ax"
 537ENTRY(iret_exc)
 538        pushl $0                        # no error code
 539        pushl $do_iret_error
 540        jmp error_code
 541.previous
 542        _ASM_EXTABLE(irq_return,iret_exc)
 543
 544#ifdef CONFIG_X86_ESPFIX32
 545        CFI_RESTORE_STATE
 546ldt_ss:
 547#ifdef CONFIG_PARAVIRT
 548        /*
 549         * The kernel can't run on a non-flat stack if paravirt mode
 550         * is active.  Rather than try to fixup the high bits of
 551         * ESP, bypass this code entirely.  This may break DOSemu
 552         * and/or Wine support in a paravirt VM, although the option
 553         * is still available to implement the setting of the high
 554         * 16-bits in the INTERRUPT_RETURN paravirt-op.
 555         */
 556        cmpl $0, pv_info+PARAVIRT_enabled
 557        jne restore_nocheck
 558#endif
 559
 560/*
 561 * Setup and switch to ESPFIX stack
 562 *
 563 * We're returning to userspace with a 16 bit stack. The CPU will not
 564 * restore the high word of ESP for us on executing iret... This is an
 565 * "official" bug of all the x86-compatible CPUs, which we can work
 566 * around to make dosemu and wine happy. We do this by preloading the
 567 * high word of ESP with the high word of the userspace ESP while
 568 * compensating for the offset by changing to the ESPFIX segment with
 569 * a base address that matches for the difference.
 570 */
 571#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
 572        mov %esp, %edx                  /* load kernel esp */
 573        mov PT_OLDESP(%esp), %eax       /* load userspace esp */
 574        mov %dx, %ax                    /* eax: new kernel esp */
 575        sub %eax, %edx                  /* offset (low word is 0) */
 576        shr $16, %edx
 577        mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
 578        mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
 579        pushl_cfi $__ESPFIX_SS
 580        pushl_cfi %eax                  /* new kernel esp */
 581        /* Disable interrupts, but do not irqtrace this section: we
 582         * will soon execute iret and the tracer was already set to
 583         * the irqstate after the iret */
 584        DISABLE_INTERRUPTS(CLBR_EAX)
 585        lss (%esp), %esp                /* switch to espfix segment */
 586        CFI_ADJUST_CFA_OFFSET -8
 587        jmp restore_nocheck
 588#endif
 589        CFI_ENDPROC
 590ENDPROC(system_call)
 591
 592        # perform work that needs to be done immediately before resumption
 593        ALIGN
 594        RING0_PTREGS_FRAME              # can't unwind into user space anyway
 595work_pending:
 596        testb $_TIF_NEED_RESCHED, %cl
 597        jz work_notifysig
 598work_resched:
 599        call schedule
 600        LOCKDEP_SYS_EXIT
 601        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 602                                        # setting need_resched or sigpending
 603                                        # between sampling and the iret
 604        TRACE_IRQS_OFF
 605        movl TI_flags(%ebp), %ecx
 606        andl $_TIF_WORK_MASK, %ecx      # is there any work to be done other
 607                                        # than syscall tracing?
 608        jz restore_all
 609        testb $_TIF_NEED_RESCHED, %cl
 610        jnz work_resched
 611
 612work_notifysig:                         # deal with pending signals and
 613                                        # notify-resume requests
 614#ifdef CONFIG_VM86
 615        testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 616        movl %esp, %eax
 617        jne work_notifysig_v86          # returning to kernel-space or
 618                                        # vm86-space
 6191:
 620#else
 621        movl %esp, %eax
 622#endif
 623        TRACE_IRQS_ON
 624        ENABLE_INTERRUPTS(CLBR_NONE)
 625        movb PT_CS(%esp), %bl
 626        andb $SEGMENT_RPL_MASK, %bl
 627        cmpb $USER_RPL, %bl
 628        jb resume_kernel
 629        xorl %edx, %edx
 630        call do_notify_resume
 631        jmp resume_userspace
 632
 633#ifdef CONFIG_VM86
 634        ALIGN
 635work_notifysig_v86:
 636        pushl_cfi %ecx                  # save ti_flags for do_notify_resume
 637        call save_v86_state             # %eax contains pt_regs pointer
 638        popl_cfi %ecx
 639        movl %eax, %esp
 640        jmp 1b
 641#endif
 642END(work_pending)
 643
 644        # perform syscall exit tracing
 645        ALIGN
 646syscall_trace_entry:
 647        movl $-ENOSYS,PT_EAX(%esp)
 648        movl %esp, %eax
 649        call syscall_trace_enter
 650        /* What it returned is what we'll actually use.  */
 651        cmpl $(NR_syscalls), %eax
 652        jnae syscall_call
 653        jmp syscall_exit
 654END(syscall_trace_entry)
 655
 656        # perform syscall exit tracing
 657        ALIGN
 658syscall_exit_work:
 659        testl $_TIF_WORK_SYSCALL_EXIT, %ecx
 660        jz work_pending
 661        TRACE_IRQS_ON
 662        ENABLE_INTERRUPTS(CLBR_ANY)     # could let syscall_trace_leave() call
 663                                        # schedule() instead
 664        movl %esp, %eax
 665        call syscall_trace_leave
 666        jmp resume_userspace
 667END(syscall_exit_work)
 668        CFI_ENDPROC
 669
 670        RING0_INT_FRAME                 # can't unwind into user space anyway
 671syscall_fault:
 672        ASM_CLAC
 673        GET_THREAD_INFO(%ebp)
 674        movl $-EFAULT,PT_EAX(%esp)
 675        jmp resume_userspace
 676END(syscall_fault)
 677
 678syscall_badsys:
 679        movl $-ENOSYS,%eax
 680        jmp syscall_after_call
 681END(syscall_badsys)
 682
 683sysenter_badsys:
 684        movl $-ENOSYS,%eax
 685        jmp sysenter_after_call
 686END(syscall_badsys)
 687        CFI_ENDPROC
 688
 689.macro FIXUP_ESPFIX_STACK
 690/*
 691 * Switch back for ESPFIX stack to the normal zerobased stack
 692 *
 693 * We can't call C functions using the ESPFIX stack. This code reads
 694 * the high word of the segment base from the GDT and swiches to the
 695 * normal stack and adjusts ESP with the matching offset.
 696 */
 697#ifdef CONFIG_X86_ESPFIX32
 698        /* fixup the stack */
 699        mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
 700        mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
 701        shl $16, %eax
 702        addl %esp, %eax                 /* the adjusted stack pointer */
 703        pushl_cfi $__KERNEL_DS
 704        pushl_cfi %eax
 705        lss (%esp), %esp                /* switch to the normal stack segment */
 706        CFI_ADJUST_CFA_OFFSET -8
 707#endif
 708.endm
 709.macro UNWIND_ESPFIX_STACK
 710#ifdef CONFIG_X86_ESPFIX32
 711        movl %ss, %eax
 712        /* see if on espfix stack */
 713        cmpw $__ESPFIX_SS, %ax
 714        jne 27f
 715        movl $__KERNEL_DS, %eax
 716        movl %eax, %ds
 717        movl %eax, %es
 718        /* switch to normal stack */
 719        FIXUP_ESPFIX_STACK
 72027:
 721#endif
 722.endm
 723
 724/*
 725 * Build the entry stubs and pointer table with some assembler magic.
 726 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 727 * single cache line on all modern x86 implementations.
 728 */
 729.section .init.rodata,"a"
 730ENTRY(interrupt)
 731.section .entry.text, "ax"
 732        .p2align 5
 733        .p2align CONFIG_X86_L1_CACHE_SHIFT
 734ENTRY(irq_entries_start)
 735        RING0_INT_FRAME
 736vector=FIRST_EXTERNAL_VECTOR
 737.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
 738        .balign 32
 739  .rept 7
 740    .if vector < NR_VECTORS
 741      .if vector <> FIRST_EXTERNAL_VECTOR
 742        CFI_ADJUST_CFA_OFFSET -4
 743      .endif
 7441:      pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
 745      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 746        jmp 2f
 747      .endif
 748      .previous
 749        .long 1b
 750      .section .entry.text, "ax"
 751vector=vector+1
 752    .endif
 753  .endr
 7542:      jmp common_interrupt
 755.endr
 756END(irq_entries_start)
 757
 758.previous
 759END(interrupt)
 760.previous
 761
 762/*
 763 * the CPU automatically disables interrupts when executing an IRQ vector,
 764 * so IRQ-flags tracing has to follow that:
 765 */
 766        .p2align CONFIG_X86_L1_CACHE_SHIFT
 767common_interrupt:
 768        ASM_CLAC
 769        addl $-0x80,(%esp)      /* Adjust vector into the [-256,-1] range */
 770        SAVE_ALL
 771        TRACE_IRQS_OFF
 772        movl %esp,%eax
 773        call do_IRQ
 774        jmp ret_from_intr
 775ENDPROC(common_interrupt)
 776        CFI_ENDPROC
 777
 778#define BUILD_INTERRUPT3(name, nr, fn)  \
 779ENTRY(name)                             \
 780        RING0_INT_FRAME;                \
 781        ASM_CLAC;                       \
 782        pushl_cfi $~(nr);               \
 783        SAVE_ALL;                       \
 784        TRACE_IRQS_OFF                  \
 785        movl %esp,%eax;                 \
 786        call fn;                        \
 787        jmp ret_from_intr;              \
 788        CFI_ENDPROC;                    \
 789ENDPROC(name)
 790
 791
 792#ifdef CONFIG_TRACING
 793#define TRACE_BUILD_INTERRUPT(name, nr)         \
 794        BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
 795#else
 796#define TRACE_BUILD_INTERRUPT(name, nr)
 797#endif
 798
 799#define BUILD_INTERRUPT(name, nr) \
 800        BUILD_INTERRUPT3(name, nr, smp_##name); \
 801        TRACE_BUILD_INTERRUPT(name, nr)
 802
 803/* The include is where all of the SMP etc. interrupts come from */
 804#include <asm/entry_arch.h>
 805
 806ENTRY(coprocessor_error)
 807        RING0_INT_FRAME
 808        ASM_CLAC
 809        pushl_cfi $0
 810        pushl_cfi $do_coprocessor_error
 811        jmp error_code
 812        CFI_ENDPROC
 813END(coprocessor_error)
 814
 815ENTRY(simd_coprocessor_error)
 816        RING0_INT_FRAME
 817        ASM_CLAC
 818        pushl_cfi $0
 819#ifdef CONFIG_X86_INVD_BUG
 820        /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
 821661:    pushl_cfi $do_general_protection
 822662:
 823.section .altinstructions,"a"
 824        altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
 825.previous
 826.section .altinstr_replacement,"ax"
 827663:    pushl $do_simd_coprocessor_error
 828664:
 829.previous
 830#else
 831        pushl_cfi $do_simd_coprocessor_error
 832#endif
 833        jmp error_code
 834        CFI_ENDPROC
 835END(simd_coprocessor_error)
 836
 837ENTRY(device_not_available)
 838        RING0_INT_FRAME
 839        ASM_CLAC
 840        pushl_cfi $-1                   # mark this as an int
 841        pushl_cfi $do_device_not_available
 842        jmp error_code
 843        CFI_ENDPROC
 844END(device_not_available)
 845
 846#ifdef CONFIG_PARAVIRT
 847ENTRY(native_iret)
 848        iret
 849        _ASM_EXTABLE(native_iret, iret_exc)
 850END(native_iret)
 851
 852ENTRY(native_irq_enable_sysexit)
 853        sti
 854        sysexit
 855END(native_irq_enable_sysexit)
 856#endif
 857
 858ENTRY(overflow)
 859        RING0_INT_FRAME
 860        ASM_CLAC
 861        pushl_cfi $0
 862        pushl_cfi $do_overflow
 863        jmp error_code
 864        CFI_ENDPROC
 865END(overflow)
 866
 867ENTRY(bounds)
 868        RING0_INT_FRAME
 869        ASM_CLAC
 870        pushl_cfi $0
 871        pushl_cfi $do_bounds
 872        jmp error_code
 873        CFI_ENDPROC
 874END(bounds)
 875
 876ENTRY(invalid_op)
 877        RING0_INT_FRAME
 878        ASM_CLAC
 879        pushl_cfi $0
 880        pushl_cfi $do_invalid_op
 881        jmp error_code
 882        CFI_ENDPROC
 883END(invalid_op)
 884
 885ENTRY(coprocessor_segment_overrun)
 886        RING0_INT_FRAME
 887        ASM_CLAC
 888        pushl_cfi $0
 889        pushl_cfi $do_coprocessor_segment_overrun
 890        jmp error_code
 891        CFI_ENDPROC
 892END(coprocessor_segment_overrun)
 893
 894ENTRY(invalid_TSS)
 895        RING0_EC_FRAME
 896        ASM_CLAC
 897        pushl_cfi $do_invalid_TSS
 898        jmp error_code
 899        CFI_ENDPROC
 900END(invalid_TSS)
 901
 902ENTRY(segment_not_present)
 903        RING0_EC_FRAME
 904        ASM_CLAC
 905        pushl_cfi $do_segment_not_present
 906        jmp error_code
 907        CFI_ENDPROC
 908END(segment_not_present)
 909
 910ENTRY(stack_segment)
 911        RING0_EC_FRAME
 912        ASM_CLAC
 913        pushl_cfi $do_stack_segment
 914        jmp error_code
 915        CFI_ENDPROC
 916END(stack_segment)
 917
 918ENTRY(alignment_check)
 919        RING0_EC_FRAME
 920        ASM_CLAC
 921        pushl_cfi $do_alignment_check
 922        jmp error_code
 923        CFI_ENDPROC
 924END(alignment_check)
 925
 926ENTRY(divide_error)
 927        RING0_INT_FRAME
 928        ASM_CLAC
 929        pushl_cfi $0                    # no error code
 930        pushl_cfi $do_divide_error
 931        jmp error_code
 932        CFI_ENDPROC
 933END(divide_error)
 934
 935#ifdef CONFIG_X86_MCE
 936ENTRY(machine_check)
 937        RING0_INT_FRAME
 938        ASM_CLAC
 939        pushl_cfi $0
 940        pushl_cfi machine_check_vector
 941        jmp error_code
 942        CFI_ENDPROC
 943END(machine_check)
 944#endif
 945
 946ENTRY(spurious_interrupt_bug)
 947        RING0_INT_FRAME
 948        ASM_CLAC
 949        pushl_cfi $0
 950        pushl_cfi $do_spurious_interrupt_bug
 951        jmp error_code
 952        CFI_ENDPROC
 953END(spurious_interrupt_bug)
 954
 955#ifdef CONFIG_XEN
 956/* Xen doesn't set %esp to be precisely what the normal sysenter
 957   entrypoint expects, so fix it up before using the normal path. */
 958ENTRY(xen_sysenter_target)
 959        RING0_INT_FRAME
 960        addl $5*4, %esp         /* remove xen-provided frame */
 961        CFI_ADJUST_CFA_OFFSET -5*4
 962        jmp sysenter_past_esp
 963        CFI_ENDPROC
 964
 965ENTRY(xen_hypervisor_callback)
 966        CFI_STARTPROC
 967        pushl_cfi $-1 /* orig_ax = -1 => not a system call */
 968        SAVE_ALL
 969        TRACE_IRQS_OFF
 970
 971        /* Check to see if we got the event in the critical
 972           region in xen_iret_direct, after we've reenabled
 973           events and checked for pending events.  This simulates
 974           iret instruction's behaviour where it delivers a
 975           pending interrupt when enabling interrupts. */
 976        movl PT_EIP(%esp),%eax
 977        cmpl $xen_iret_start_crit,%eax
 978        jb   1f
 979        cmpl $xen_iret_end_crit,%eax
 980        jae  1f
 981
 982        jmp  xen_iret_crit_fixup
 983
 984ENTRY(xen_do_upcall)
 9851:      mov %esp, %eax
 986        call xen_evtchn_do_upcall
 987        jmp  ret_from_intr
 988        CFI_ENDPROC
 989ENDPROC(xen_hypervisor_callback)
 990
 991# Hypervisor uses this for application faults while it executes.
 992# We get here for two reasons:
 993#  1. Fault while reloading DS, ES, FS or GS
 994#  2. Fault while executing IRET
 995# Category 1 we fix up by reattempting the load, and zeroing the segment
 996# register if the load fails.
 997# Category 2 we fix up by jumping to do_iret_error. We cannot use the
 998# normal Linux return path in this case because if we use the IRET hypercall
 999# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1000# We distinguish between categories by maintaining a status value in EAX.
1001ENTRY(xen_failsafe_callback)
1002        CFI_STARTPROC
1003        pushl_cfi %eax
1004        movl $1,%eax
10051:      mov 4(%esp),%ds
10062:      mov 8(%esp),%es
10073:      mov 12(%esp),%fs
10084:      mov 16(%esp),%gs
1009        /* EAX == 0 => Category 1 (Bad segment)
1010           EAX != 0 => Category 2 (Bad IRET) */
1011        testl %eax,%eax
1012        popl_cfi %eax
1013        lea 16(%esp),%esp
1014        CFI_ADJUST_CFA_OFFSET -16
1015        jz 5f
1016        jmp iret_exc
10175:      pushl_cfi $-1 /* orig_ax = -1 => not a system call */
1018        SAVE_ALL
1019        jmp ret_from_exception
1020        CFI_ENDPROC
1021
1022.section .fixup,"ax"
10236:      xorl %eax,%eax
1024        movl %eax,4(%esp)
1025        jmp 1b
10267:      xorl %eax,%eax
1027        movl %eax,8(%esp)
1028        jmp 2b
10298:      xorl %eax,%eax
1030        movl %eax,12(%esp)
1031        jmp 3b
10329:      xorl %eax,%eax
1033        movl %eax,16(%esp)
1034        jmp 4b
1035.previous
1036        _ASM_EXTABLE(1b,6b)
1037        _ASM_EXTABLE(2b,7b)
1038        _ASM_EXTABLE(3b,8b)
1039        _ASM_EXTABLE(4b,9b)
1040ENDPROC(xen_failsafe_callback)
1041
1042BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1043                xen_evtchn_do_upcall)
1044
1045#endif  /* CONFIG_XEN */
1046
1047#if IS_ENABLED(CONFIG_HYPERV)
1048
1049BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1050        hyperv_vector_handler)
1051
1052#endif /* CONFIG_HYPERV */
1053
1054#ifdef CONFIG_FUNCTION_TRACER
1055#ifdef CONFIG_DYNAMIC_FTRACE
1056
1057ENTRY(mcount)
1058        ret
1059END(mcount)
1060
1061ENTRY(ftrace_caller)
1062        cmpl $0, function_trace_stop
1063        jne  ftrace_stub
1064
1065        pushl %eax
1066        pushl %ecx
1067        pushl %edx
1068        pushl $0        /* Pass NULL as regs pointer */
1069        movl 4*4(%esp), %eax
1070        movl 0x4(%ebp), %edx
1071        movl function_trace_op, %ecx
1072        subl $MCOUNT_INSN_SIZE, %eax
1073
1074.globl ftrace_call
1075ftrace_call:
1076        call ftrace_stub
1077
1078        addl $4,%esp    /* skip NULL pointer */
1079        popl %edx
1080        popl %ecx
1081        popl %eax
1082ftrace_ret:
1083#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1084.globl ftrace_graph_call
1085ftrace_graph_call:
1086        jmp ftrace_stub
1087#endif
1088
1089.globl ftrace_stub
1090ftrace_stub:
1091        ret
1092END(ftrace_caller)
1093
1094ENTRY(ftrace_regs_caller)
1095        pushf   /* push flags before compare (in cs location) */
1096        cmpl $0, function_trace_stop
1097        jne ftrace_restore_flags
1098
1099        /*
1100         * i386 does not save SS and ESP when coming from kernel.
1101         * Instead, to get sp, &regs->sp is used (see ptrace.h).
1102         * Unfortunately, that means eflags must be at the same location
1103         * as the current return ip is. We move the return ip into the
1104         * ip location, and move flags into the return ip location.
1105         */
1106        pushl 4(%esp)   /* save return ip into ip slot */
1107
1108        pushl $0        /* Load 0 into orig_ax */
1109        pushl %gs
1110        pushl %fs
1111        pushl %es
1112        pushl %ds
1113        pushl %eax
1114        pushl %ebp
1115        pushl %edi
1116        pushl %esi
1117        pushl %edx
1118        pushl %ecx
1119        pushl %ebx
1120
1121        movl 13*4(%esp), %eax   /* Get the saved flags */
1122        movl %eax, 14*4(%esp)   /* Move saved flags into regs->flags location */
1123                                /* clobbering return ip */
1124        movl $__KERNEL_CS,13*4(%esp)
1125
1126        movl 12*4(%esp), %eax   /* Load ip (1st parameter) */
1127        subl $MCOUNT_INSN_SIZE, %eax    /* Adjust ip */
1128        movl 0x4(%ebp), %edx    /* Load parent ip (2nd parameter) */
1129        movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
1130        pushl %esp              /* Save pt_regs as 4th parameter */
1131
1132GLOBAL(ftrace_regs_call)
1133        call ftrace_stub
1134
1135        addl $4, %esp           /* Skip pt_regs */
1136        movl 14*4(%esp), %eax   /* Move flags back into cs */
1137        movl %eax, 13*4(%esp)   /* Needed to keep addl from modifying flags */
1138        movl 12*4(%esp), %eax   /* Get return ip from regs->ip */
1139        movl %eax, 14*4(%esp)   /* Put return ip back for ret */
1140
1141        popl %ebx
1142        popl %ecx
1143        popl %edx
1144        popl %esi
1145        popl %edi
1146        popl %ebp
1147        popl %eax
1148        popl %ds
1149        popl %es
1150        popl %fs
1151        popl %gs
1152        addl $8, %esp           /* Skip orig_ax and ip */
1153        popf                    /* Pop flags at end (no addl to corrupt flags) */
1154        jmp ftrace_ret
1155
1156ftrace_restore_flags:
1157        popf
1158        jmp  ftrace_stub
1159#else /* ! CONFIG_DYNAMIC_FTRACE */
1160
1161ENTRY(mcount)
1162        cmpl $__PAGE_OFFSET, %esp
1163        jb ftrace_stub          /* Paging not enabled yet? */
1164
1165        cmpl $0, function_trace_stop
1166        jne  ftrace_stub
1167
1168        cmpl $ftrace_stub, ftrace_trace_function
1169        jnz trace
1170#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1171        cmpl $ftrace_stub, ftrace_graph_return
1172        jnz ftrace_graph_caller
1173
1174        cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1175        jnz ftrace_graph_caller
1176#endif
1177.globl ftrace_stub
1178ftrace_stub:
1179        ret
1180
1181        /* taken from glibc */
1182trace:
1183        pushl %eax
1184        pushl %ecx
1185        pushl %edx
1186        movl 0xc(%esp), %eax
1187        movl 0x4(%ebp), %edx
1188        subl $MCOUNT_INSN_SIZE, %eax
1189
1190        call *ftrace_trace_function
1191
1192        popl %edx
1193        popl %ecx
1194        popl %eax
1195        jmp ftrace_stub
1196END(mcount)
1197#endif /* CONFIG_DYNAMIC_FTRACE */
1198#endif /* CONFIG_FUNCTION_TRACER */
1199
1200#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1201ENTRY(ftrace_graph_caller)
1202        pushl %eax
1203        pushl %ecx
1204        pushl %edx
1205        movl 0xc(%esp), %edx
1206        lea 0x4(%ebp), %eax
1207        movl (%ebp), %ecx
1208        subl $MCOUNT_INSN_SIZE, %edx
1209        call prepare_ftrace_return
1210        popl %edx
1211        popl %ecx
1212        popl %eax
1213        ret
1214END(ftrace_graph_caller)
1215
1216.globl return_to_handler
1217return_to_handler:
1218        pushl %eax
1219        pushl %edx
1220        movl %ebp, %eax
1221        call ftrace_return_to_handler
1222        movl %eax, %ecx
1223        popl %edx
1224        popl %eax
1225        jmp *%ecx
1226#endif
1227
1228#ifdef CONFIG_TRACING
1229ENTRY(trace_page_fault)
1230        RING0_EC_FRAME
1231        ASM_CLAC
1232        pushl_cfi $trace_do_page_fault
1233        jmp error_code
1234        CFI_ENDPROC
1235END(trace_page_fault)
1236#endif
1237
1238ENTRY(page_fault)
1239        RING0_EC_FRAME
1240        ASM_CLAC
1241        pushl_cfi $do_page_fault
1242        ALIGN
1243error_code:
1244        /* the function address is in %gs's slot on the stack */
1245        pushl_cfi %fs
1246        /*CFI_REL_OFFSET fs, 0*/
1247        pushl_cfi %es
1248        /*CFI_REL_OFFSET es, 0*/
1249        pushl_cfi %ds
1250        /*CFI_REL_OFFSET ds, 0*/
1251        pushl_cfi %eax
1252        CFI_REL_OFFSET eax, 0
1253        pushl_cfi %ebp
1254        CFI_REL_OFFSET ebp, 0
1255        pushl_cfi %edi
1256        CFI_REL_OFFSET edi, 0
1257        pushl_cfi %esi
1258        CFI_REL_OFFSET esi, 0
1259        pushl_cfi %edx
1260        CFI_REL_OFFSET edx, 0
1261        pushl_cfi %ecx
1262        CFI_REL_OFFSET ecx, 0
1263        pushl_cfi %ebx
1264        CFI_REL_OFFSET ebx, 0
1265        cld
1266        movl $(__KERNEL_PERCPU), %ecx
1267        movl %ecx, %fs
1268        UNWIND_ESPFIX_STACK
1269        GS_TO_REG %ecx
1270        movl PT_GS(%esp), %edi          # get the function address
1271        movl PT_ORIG_EAX(%esp), %edx    # get the error code
1272        movl $-1, PT_ORIG_EAX(%esp)     # no syscall to restart
1273        REG_TO_PTGS %ecx
1274        SET_KERNEL_GS %ecx
1275        movl $(__USER_DS), %ecx
1276        movl %ecx, %ds
1277        movl %ecx, %es
1278        TRACE_IRQS_OFF
1279        movl %esp,%eax                  # pt_regs pointer
1280        call *%edi
1281        jmp ret_from_exception
1282        CFI_ENDPROC
1283END(page_fault)
1284
1285/*
1286 * Debug traps and NMI can happen at the one SYSENTER instruction
1287 * that sets up the real kernel stack. Check here, since we can't
1288 * allow the wrong stack to be used.
1289 *
1290 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1291 * already pushed 3 words if it hits on the sysenter instruction:
1292 * eflags, cs and eip.
1293 *
1294 * We just load the right stack, and push the three (known) values
1295 * by hand onto the new stack - while updating the return eip past
1296 * the instruction that would have done it for sysenter.
1297 */
1298.macro FIX_STACK offset ok label
1299        cmpw $__KERNEL_CS, 4(%esp)
1300        jne \ok
1301\label:
1302        movl TSS_sysenter_sp0 + \offset(%esp), %esp
1303        CFI_DEF_CFA esp, 0
1304        CFI_UNDEFINED eip
1305        pushfl_cfi
1306        pushl_cfi $__KERNEL_CS
1307        pushl_cfi $sysenter_past_esp
1308        CFI_REL_OFFSET eip, 0
1309.endm
1310
1311ENTRY(debug)
1312        RING0_INT_FRAME
1313        ASM_CLAC
1314        cmpl $ia32_sysenter_target,(%esp)
1315        jne debug_stack_correct
1316        FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1317debug_stack_correct:
1318        pushl_cfi $-1                   # mark this as an int
1319        SAVE_ALL
1320        TRACE_IRQS_OFF
1321        xorl %edx,%edx                  # error code 0
1322        movl %esp,%eax                  # pt_regs pointer
1323        call do_debug
1324        jmp ret_from_exception
1325        CFI_ENDPROC
1326END(debug)
1327
1328/*
1329 * NMI is doubly nasty. It can happen _while_ we're handling
1330 * a debug fault, and the debug fault hasn't yet been able to
1331 * clear up the stack. So we first check whether we got  an
1332 * NMI on the sysenter entry path, but after that we need to
1333 * check whether we got an NMI on the debug path where the debug
1334 * fault happened on the sysenter path.
1335 */
1336ENTRY(nmi)
1337        RING0_INT_FRAME
1338        ASM_CLAC
1339#ifdef CONFIG_X86_ESPFIX32
1340        pushl_cfi %eax
1341        movl %ss, %eax
1342        cmpw $__ESPFIX_SS, %ax
1343        popl_cfi %eax
1344        je nmi_espfix_stack
1345#endif
1346        cmpl $ia32_sysenter_target,(%esp)
1347        je nmi_stack_fixup
1348        pushl_cfi %eax
1349        movl %esp,%eax
1350        /* Do not access memory above the end of our stack page,
1351         * it might not exist.
1352         */
1353        andl $(THREAD_SIZE-1),%eax
1354        cmpl $(THREAD_SIZE-20),%eax
1355        popl_cfi %eax
1356        jae nmi_stack_correct
1357        cmpl $ia32_sysenter_target,12(%esp)
1358        je nmi_debug_stack_check
1359nmi_stack_correct:
1360        /* We have a RING0_INT_FRAME here */
1361        pushl_cfi %eax
1362        SAVE_ALL
1363        xorl %edx,%edx          # zero error code
1364        movl %esp,%eax          # pt_regs pointer
1365        call do_nmi
1366        jmp restore_all_notrace
1367        CFI_ENDPROC
1368
1369nmi_stack_fixup:
1370        RING0_INT_FRAME
1371        FIX_STACK 12, nmi_stack_correct, 1
1372        jmp nmi_stack_correct
1373
1374nmi_debug_stack_check:
1375        /* We have a RING0_INT_FRAME here */
1376        cmpw $__KERNEL_CS,16(%esp)
1377        jne nmi_stack_correct
1378        cmpl $debug,(%esp)
1379        jb nmi_stack_correct
1380        cmpl $debug_esp_fix_insn,(%esp)
1381        ja nmi_stack_correct
1382        FIX_STACK 24, nmi_stack_correct, 1
1383        jmp nmi_stack_correct
1384
1385#ifdef CONFIG_X86_ESPFIX32
1386nmi_espfix_stack:
1387        /* We have a RING0_INT_FRAME here.
1388         *
1389         * create the pointer to lss back
1390         */
1391        pushl_cfi %ss
1392        pushl_cfi %esp
1393        addl $4, (%esp)
1394        /* copy the iret frame of 12 bytes */
1395        .rept 3
1396        pushl_cfi 16(%esp)
1397        .endr
1398        pushl_cfi %eax
1399        SAVE_ALL
1400        FIXUP_ESPFIX_STACK              # %eax == %esp
1401        xorl %edx,%edx                  # zero error code
1402        call do_nmi
1403        RESTORE_REGS
1404        lss 12+4(%esp), %esp            # back to espfix stack
1405        CFI_ADJUST_CFA_OFFSET -24
1406        jmp irq_return
1407#endif
1408        CFI_ENDPROC
1409END(nmi)
1410
1411ENTRY(int3)
1412        RING0_INT_FRAME
1413        ASM_CLAC
1414        pushl_cfi $-1                   # mark this as an int
1415        SAVE_ALL
1416        TRACE_IRQS_OFF
1417        xorl %edx,%edx          # zero error code
1418        movl %esp,%eax          # pt_regs pointer
1419        call do_int3
1420        jmp ret_from_exception
1421        CFI_ENDPROC
1422END(int3)
1423
1424ENTRY(general_protection)
1425        RING0_EC_FRAME
1426        pushl_cfi $do_general_protection
1427        jmp error_code
1428        CFI_ENDPROC
1429END(general_protection)
1430
1431#ifdef CONFIG_KVM_GUEST
1432ENTRY(async_page_fault)
1433        RING0_EC_FRAME
1434        ASM_CLAC
1435        pushl_cfi $do_async_page_fault
1436        jmp error_code
1437        CFI_ENDPROC
1438END(async_page_fault)
1439#endif
1440
1441