linux/arch/x86/entry/calling.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#include <linux/jump_label.h>
   3#include <asm/unwind_hints.h>
   4#include <asm/cpufeatures.h>
   5#include <asm/page_types.h>
   6#include <asm/percpu.h>
   7#include <asm/asm-offsets.h>
   8#include <asm/processor-flags.h>
   9
  10/*
  11
  12 x86 function call convention, 64-bit:
  13 -------------------------------------
  14  arguments           |  callee-saved      | extra caller-saved | return
  15 [callee-clobbered]   |                    | [callee-clobbered] |
  16 ---------------------------------------------------------------------------
  17 rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11             | rax, rdx [**]
  18
  19 ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
  20   functions when it sees tail-call optimization possibilities) rflags is
  21   clobbered. Leftover arguments are passed over the stack frame.)
  22
  23 [*]  In the frame-pointers case rbp is fixed to the stack frame.
  24
  25 [**] for struct return values wider than 64 bits the return convention is a
  26      bit more complex: up to 128 bits width we return small structures
  27      straight in rax, rdx. For structures larger than that (3 words or
  28      larger) the caller puts a pointer to an on-stack return struct
  29      [allocated in the caller's stack frame] into the first argument - i.e.
  30      into rdi. All other arguments shift up by one in this case.
  31      Fortunately this case is rare in the kernel.
  32
  33For 32-bit we have the following conventions - kernel is built with
  34-mregparm=3 and -freg-struct-return:
  35
  36 x86 function calling convention, 32-bit:
  37 ----------------------------------------
  38  arguments         | callee-saved        | extra caller-saved | return
  39 [callee-clobbered] |                     | [callee-clobbered] |
  40 -------------------------------------------------------------------------
  41 eax edx ecx        | ebx edi esi ebp [*] | <none>             | eax, edx [**]
  42
  43 ( here too esp is obviously invariant across normal function calls. eflags
  44   is clobbered. Leftover arguments are passed over the stack frame. )
  45
  46 [*]  In the frame-pointers case ebp is fixed to the stack frame.
  47
  48 [**] We build with -freg-struct-return, which on 32-bit means similar
  49      semantics as on 64-bit: edx can be used for a second return value
  50      (i.e. covering integer and structure sizes up to 64 bits) - after that
  51      it gets more complex and more expensive: 3-word or larger struct returns
  52      get done in the caller's frame and the pointer to the return struct goes
  53      into regparm0, i.e. eax - the other arguments shift up and the
  54      function's register parameters degenerate to regparm=2 in essence.
  55
  56*/
  57
  58#ifdef CONFIG_X86_64
  59
  60/*
  61 * 64-bit system call stack frame layout defines and helpers,
  62 * for assembly code:
  63 */
  64
  65/* The layout forms the "struct pt_regs" on the stack: */
  66/*
  67 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
  68 * unless syscall needs a complete, fully filled "struct pt_regs".
  69 */
  70#define R15             0*8
  71#define R14             1*8
  72#define R13             2*8
  73#define R12             3*8
  74#define RBP             4*8
  75#define RBX             5*8
  76/* These regs are callee-clobbered. Always saved on kernel entry. */
  77#define R11             6*8
  78#define R10             7*8
  79#define R9              8*8
  80#define R8              9*8
  81#define RAX             10*8
  82#define RCX             11*8
  83#define RDX             12*8
  84#define RSI             13*8
  85#define RDI             14*8
  86/*
  87 * On syscall entry, this is syscall#. On CPU exception, this is error code.
  88 * On hw interrupt, it's IRQ number:
  89 */
  90#define ORIG_RAX        15*8
  91/* Return frame for iretq */
  92#define RIP             16*8
  93#define CS              17*8
  94#define EFLAGS          18*8
  95#define RSP             19*8
  96#define SS              20*8
  97
  98#define SIZEOF_PTREGS   21*8
  99
 100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
 101        /*
 102         * Push registers and sanitize registers of values that a
 103         * speculation attack might otherwise want to exploit. The
 104         * lower registers are likely clobbered well before they
 105         * could be put to use in a speculative execution gadget.
 106         * Interleave XOR with PUSH for better uop scheduling:
 107         */
 108        .if \save_ret
 109        pushq   %rsi            /* pt_regs->si */
 110        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
 111        movq    %rdi, 8(%rsp)   /* pt_regs->di (overwriting original return address) */
 112        .else
 113        pushq   %rdi            /* pt_regs->di */
 114        pushq   %rsi            /* pt_regs->si */
 115        .endif
 116        pushq   \rdx            /* pt_regs->dx */
 117        xorl    %edx, %edx      /* nospec   dx */
 118        pushq   %rcx            /* pt_regs->cx */
 119        xorl    %ecx, %ecx      /* nospec   cx */
 120        pushq   \rax            /* pt_regs->ax */
 121        pushq   %r8             /* pt_regs->r8 */
 122        xorl    %r8d, %r8d      /* nospec   r8 */
 123        pushq   %r9             /* pt_regs->r9 */
 124        xorl    %r9d, %r9d      /* nospec   r9 */
 125        pushq   %r10            /* pt_regs->r10 */
 126        xorl    %r10d, %r10d    /* nospec   r10 */
 127        pushq   %r11            /* pt_regs->r11 */
 128        xorl    %r11d, %r11d    /* nospec   r11*/
 129        pushq   %rbx            /* pt_regs->rbx */
 130        xorl    %ebx, %ebx      /* nospec   rbx*/
 131        pushq   %rbp            /* pt_regs->rbp */
 132        xorl    %ebp, %ebp      /* nospec   rbp*/
 133        pushq   %r12            /* pt_regs->r12 */
 134        xorl    %r12d, %r12d    /* nospec   r12*/
 135        pushq   %r13            /* pt_regs->r13 */
 136        xorl    %r13d, %r13d    /* nospec   r13*/
 137        pushq   %r14            /* pt_regs->r14 */
 138        xorl    %r14d, %r14d    /* nospec   r14*/
 139        pushq   %r15            /* pt_regs->r15 */
 140        xorl    %r15d, %r15d    /* nospec   r15*/
 141        UNWIND_HINT_REGS
 142        .if \save_ret
 143        pushq   %rsi            /* return address on top of stack */
 144        .endif
 145.endm
 146
 147.macro POP_REGS pop_rdi=1 skip_r11rcx=0
 148        popq %r15
 149        popq %r14
 150        popq %r13
 151        popq %r12
 152        popq %rbp
 153        popq %rbx
 154        .if \skip_r11rcx
 155        popq %rsi
 156        .else
 157        popq %r11
 158        .endif
 159        popq %r10
 160        popq %r9
 161        popq %r8
 162        popq %rax
 163        .if \skip_r11rcx
 164        popq %rsi
 165        .else
 166        popq %rcx
 167        .endif
 168        popq %rdx
 169        popq %rsi
 170        .if \pop_rdi
 171        popq %rdi
 172        .endif
 173.endm
 174
 175/*
 176 * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
 177 * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
 178 * is just setting the LSB, which makes it an invalid stack address and is also
 179 * a signal to the unwinder that it's a pt_regs pointer in disguise.
 180 *
 181 * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
 182 * the original rbp.
 183 */
 184.macro ENCODE_FRAME_POINTER ptregs_offset=0
 185#ifdef CONFIG_FRAME_POINTER
 186        leaq 1+\ptregs_offset(%rsp), %rbp
 187#endif
 188.endm
 189
 190#ifdef CONFIG_PAGE_TABLE_ISOLATION
 191
 192/*
 193 * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
 194 * halves:
 195 */
 196#define PTI_USER_PGTABLE_BIT            PAGE_SHIFT
 197#define PTI_USER_PGTABLE_MASK           (1 << PTI_USER_PGTABLE_BIT)
 198#define PTI_USER_PCID_BIT               X86_CR3_PTI_PCID_USER_BIT
 199#define PTI_USER_PCID_MASK              (1 << PTI_USER_PCID_BIT)
 200#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
 201
 202.macro SET_NOFLUSH_BIT  reg:req
 203        bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
 204.endm
 205
 206.macro ADJUST_KERNEL_CR3 reg:req
 207        ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
 208        /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
 209        andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
 210.endm
 211
 212.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 213        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 214        mov     %cr3, \scratch_reg
 215        ADJUST_KERNEL_CR3 \scratch_reg
 216        mov     \scratch_reg, %cr3
 217.Lend_\@:
 218.endm
 219
 220#define THIS_CPU_user_pcid_flush_mask   \
 221        PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
 222
 223.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
 224        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 225        mov     %cr3, \scratch_reg
 226
 227        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
 228
 229        /*
 230         * Test if the ASID needs a flush.
 231         */
 232        movq    \scratch_reg, \scratch_reg2
 233        andq    $(0x7FF), \scratch_reg          /* mask ASID */
 234        bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
 235        jnc     .Lnoflush_\@
 236
 237        /* Flush needed, clear the bit */
 238        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
 239        movq    \scratch_reg2, \scratch_reg
 240        jmp     .Lwrcr3_pcid_\@
 241
 242.Lnoflush_\@:
 243        movq    \scratch_reg2, \scratch_reg
 244        SET_NOFLUSH_BIT \scratch_reg
 245
 246.Lwrcr3_pcid_\@:
 247        /* Flip the ASID to the user version */
 248        orq     $(PTI_USER_PCID_MASK), \scratch_reg
 249
 250.Lwrcr3_\@:
 251        /* Flip the PGD to the user version */
 252        orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
 253        mov     \scratch_reg, %cr3
 254.Lend_\@:
 255.endm
 256
 257.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
 258        pushq   %rax
 259        SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
 260        popq    %rax
 261.endm
 262
 263.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
 264        ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
 265        movq    %cr3, \scratch_reg
 266        movq    \scratch_reg, \save_reg
 267        /*
 268         * Test the user pagetable bit. If set, then the user page tables
 269         * are active. If clear CR3 already has the kernel page table
 270         * active.
 271         */
 272        bt      $PTI_USER_PGTABLE_BIT, \scratch_reg
 273        jnc     .Ldone_\@
 274
 275        ADJUST_KERNEL_CR3 \scratch_reg
 276        movq    \scratch_reg, %cr3
 277
 278.Ldone_\@:
 279.endm
 280
 281.macro RESTORE_CR3 scratch_reg:req save_reg:req
 282        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 283
 284        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
 285
 286        /*
 287         * KERNEL pages can always resume with NOFLUSH as we do
 288         * explicit flushes.
 289         */
 290        bt      $PTI_USER_PGTABLE_BIT, \save_reg
 291        jnc     .Lnoflush_\@
 292
 293        /*
 294         * Check if there's a pending flush for the user ASID we're
 295         * about to set.
 296         */
 297        movq    \save_reg, \scratch_reg
 298        andq    $(0x7FF), \scratch_reg
 299        bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
 300        jnc     .Lnoflush_\@
 301
 302        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
 303        jmp     .Lwrcr3_\@
 304
 305.Lnoflush_\@:
 306        SET_NOFLUSH_BIT \save_reg
 307
 308.Lwrcr3_\@:
 309        /*
 310         * The CR3 write could be avoided when not changing its value,
 311         * but would require a CR3 read *and* a scratch register.
 312         */
 313        movq    \save_reg, %cr3
 314.Lend_\@:
 315.endm
 316
 317#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
 318
 319.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 320.endm
 321.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
 322.endm
 323.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
 324.endm
 325.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
 326.endm
 327.macro RESTORE_CR3 scratch_reg:req save_reg:req
 328.endm
 329
 330#endif
 331
 332.macro STACKLEAK_ERASE_NOCLOBBER
 333#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 334        PUSH_AND_CLEAR_REGS
 335        call stackleak_erase
 336        POP_REGS
 337#endif
 338.endm
 339
 340#endif /* CONFIG_X86_64 */
 341
 342.macro STACKLEAK_ERASE
 343#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 344        call stackleak_erase
 345#endif
 346.endm
 347
 348/*
 349 * This does 'call enter_from_user_mode' unless we can avoid it based on
 350 * kernel config or using the static jump infrastructure.
 351 */
 352.macro CALL_enter_from_user_mode
 353#ifdef CONFIG_CONTEXT_TRACKING
 354#ifdef CONFIG_JUMP_LABEL
 355        STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
 356#endif
 357        call enter_from_user_mode
 358.Lafter_call_\@:
 359#endif
 360.endm
 361