linux/arch/x86/entry/calling.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#include <linux/jump_label.h>
   3#include <asm/unwind_hints.h>
   4#include <asm/cpufeatures.h>
   5#include <asm/page_types.h>
   6#include <asm/percpu.h>
   7#include <asm/asm-offsets.h>
   8#include <asm/processor-flags.h>
   9#include <asm/ptrace-abi.h>
  10
  11/*
  12
  13 x86 function call convention, 64-bit:
  14 -------------------------------------
  15  arguments           |  callee-saved      | extra caller-saved | return
  16 [callee-clobbered]   |                    | [callee-clobbered] |
  17 ---------------------------------------------------------------------------
  18 rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11             | rax, rdx [**]
  19
  20 ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
  21   functions when it sees tail-call optimization possibilities) rflags is
  22   clobbered. Leftover arguments are passed over the stack frame.)
  23
  24 [*]  In the frame-pointers case rbp is fixed to the stack frame.
  25
  26 [**] for struct return values wider than 64 bits the return convention is a
  27      bit more complex: up to 128 bits width we return small structures
  28      straight in rax, rdx. For structures larger than that (3 words or
  29      larger) the caller puts a pointer to an on-stack return struct
  30      [allocated in the caller's stack frame] into the first argument - i.e.
  31      into rdi. All other arguments shift up by one in this case.
  32      Fortunately this case is rare in the kernel.
  33
  34For 32-bit we have the following conventions - kernel is built with
  35-mregparm=3 and -freg-struct-return:
  36
  37 x86 function calling convention, 32-bit:
  38 ----------------------------------------
  39  arguments         | callee-saved        | extra caller-saved | return
  40 [callee-clobbered] |                     | [callee-clobbered] |
  41 -------------------------------------------------------------------------
  42 eax edx ecx        | ebx edi esi ebp [*] | <none>             | eax, edx [**]
  43
  44 ( here too esp is obviously invariant across normal function calls. eflags
  45   is clobbered. Leftover arguments are passed over the stack frame. )
  46
  47 [*]  In the frame-pointers case ebp is fixed to the stack frame.
  48
  49 [**] We build with -freg-struct-return, which on 32-bit means similar
  50      semantics as on 64-bit: edx can be used for a second return value
  51      (i.e. covering integer and structure sizes up to 64 bits) - after that
  52      it gets more complex and more expensive: 3-word or larger struct returns
  53      get done in the caller's frame and the pointer to the return struct goes
  54      into regparm0, i.e. eax - the other arguments shift up and the
  55      function's register parameters degenerate to regparm=2 in essence.
  56
  57*/
  58
  59#ifdef CONFIG_X86_64
  60
  61/*
  62 * 64-bit system call stack frame layout defines and helpers,
  63 * for assembly code:
  64 */
  65
  66.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
  67        .if \save_ret
  68        pushq   %rsi            /* pt_regs->si */
  69        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
  70        movq    %rdi, 8(%rsp)   /* pt_regs->di (overwriting original return address) */
  71        .else
  72        pushq   %rdi            /* pt_regs->di */
  73        pushq   %rsi            /* pt_regs->si */
  74        .endif
  75        pushq   \rdx            /* pt_regs->dx */
  76        pushq   %rcx            /* pt_regs->cx */
  77        pushq   \rax            /* pt_regs->ax */
  78        pushq   %r8             /* pt_regs->r8 */
  79        pushq   %r9             /* pt_regs->r9 */
  80        pushq   %r10            /* pt_regs->r10 */
  81        pushq   %r11            /* pt_regs->r11 */
  82        pushq   %rbx            /* pt_regs->rbx */
  83        pushq   %rbp            /* pt_regs->rbp */
  84        pushq   %r12            /* pt_regs->r12 */
  85        pushq   %r13            /* pt_regs->r13 */
  86        pushq   %r14            /* pt_regs->r14 */
  87        pushq   %r15            /* pt_regs->r15 */
  88        UNWIND_HINT_REGS
  89
  90        .if \save_ret
  91        pushq   %rsi            /* return address on top of stack */
  92        .endif
  93.endm
  94
  95.macro CLEAR_REGS
  96        /*
  97         * Sanitize registers of values that a speculation attack might
  98         * otherwise want to exploit. The lower registers are likely clobbered
  99         * well before they could be put to use in a speculative execution
 100         * gadget.
 101         */
 102        xorl    %edx,  %edx     /* nospec dx  */
 103        xorl    %ecx,  %ecx     /* nospec cx  */
 104        xorl    %r8d,  %r8d     /* nospec r8  */
 105        xorl    %r9d,  %r9d     /* nospec r9  */
 106        xorl    %r10d, %r10d    /* nospec r10 */
 107        xorl    %r11d, %r11d    /* nospec r11 */
 108        xorl    %ebx,  %ebx     /* nospec rbx */
 109        xorl    %ebp,  %ebp     /* nospec rbp */
 110        xorl    %r12d, %r12d    /* nospec r12 */
 111        xorl    %r13d, %r13d    /* nospec r13 */
 112        xorl    %r14d, %r14d    /* nospec r14 */
 113        xorl    %r15d, %r15d    /* nospec r15 */
 114
 115.endm
 116
 117.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
 118        PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
 119        CLEAR_REGS
 120.endm
 121
 122.macro POP_REGS pop_rdi=1 skip_r11rcx=0
 123        popq %r15
 124        popq %r14
 125        popq %r13
 126        popq %r12
 127        popq %rbp
 128        popq %rbx
 129        .if \skip_r11rcx
 130        popq %rsi
 131        .else
 132        popq %r11
 133        .endif
 134        popq %r10
 135        popq %r9
 136        popq %r8
 137        popq %rax
 138        .if \skip_r11rcx
 139        popq %rsi
 140        .else
 141        popq %rcx
 142        .endif
 143        popq %rdx
 144        popq %rsi
 145        .if \pop_rdi
 146        popq %rdi
 147        .endif
 148.endm
 149
 150#ifdef CONFIG_PAGE_TABLE_ISOLATION
 151
 152/*
 153 * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
 154 * halves:
 155 */
 156#define PTI_USER_PGTABLE_BIT            PAGE_SHIFT
 157#define PTI_USER_PGTABLE_MASK           (1 << PTI_USER_PGTABLE_BIT)
 158#define PTI_USER_PCID_BIT               X86_CR3_PTI_PCID_USER_BIT
 159#define PTI_USER_PCID_MASK              (1 << PTI_USER_PCID_BIT)
 160#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
 161
 162.macro SET_NOFLUSH_BIT  reg:req
 163        bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
 164.endm
 165
 166.macro ADJUST_KERNEL_CR3 reg:req
 167        ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
 168        /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
 169        andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
 170.endm
 171
 172.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 173        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 174        mov     %cr3, \scratch_reg
 175        ADJUST_KERNEL_CR3 \scratch_reg
 176        mov     \scratch_reg, %cr3
 177.Lend_\@:
 178.endm
 179
 180#define THIS_CPU_user_pcid_flush_mask   \
 181        PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
 182
 183.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
 184        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 185        mov     %cr3, \scratch_reg
 186
 187        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
 188
 189        /*
 190         * Test if the ASID needs a flush.
 191         */
 192        movq    \scratch_reg, \scratch_reg2
 193        andq    $(0x7FF), \scratch_reg          /* mask ASID */
 194        bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
 195        jnc     .Lnoflush_\@
 196
 197        /* Flush needed, clear the bit */
 198        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
 199        movq    \scratch_reg2, \scratch_reg
 200        jmp     .Lwrcr3_pcid_\@
 201
 202.Lnoflush_\@:
 203        movq    \scratch_reg2, \scratch_reg
 204        SET_NOFLUSH_BIT \scratch_reg
 205
 206.Lwrcr3_pcid_\@:
 207        /* Flip the ASID to the user version */
 208        orq     $(PTI_USER_PCID_MASK), \scratch_reg
 209
 210.Lwrcr3_\@:
 211        /* Flip the PGD to the user version */
 212        orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
 213        mov     \scratch_reg, %cr3
 214.Lend_\@:
 215.endm
 216
 217.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
 218        pushq   %rax
 219        SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
 220        popq    %rax
 221.endm
 222
 223.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
 224        ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
 225        movq    %cr3, \scratch_reg
 226        movq    \scratch_reg, \save_reg
 227        /*
 228         * Test the user pagetable bit. If set, then the user page tables
 229         * are active. If clear CR3 already has the kernel page table
 230         * active.
 231         */
 232        bt      $PTI_USER_PGTABLE_BIT, \scratch_reg
 233        jnc     .Ldone_\@
 234
 235        ADJUST_KERNEL_CR3 \scratch_reg
 236        movq    \scratch_reg, %cr3
 237
 238.Ldone_\@:
 239.endm
 240
 241.macro RESTORE_CR3 scratch_reg:req save_reg:req
 242        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 243
 244        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
 245
 246        /*
 247         * KERNEL pages can always resume with NOFLUSH as we do
 248         * explicit flushes.
 249         */
 250        bt      $PTI_USER_PGTABLE_BIT, \save_reg
 251        jnc     .Lnoflush_\@
 252
 253        /*
 254         * Check if there's a pending flush for the user ASID we're
 255         * about to set.
 256         */
 257        movq    \save_reg, \scratch_reg
 258        andq    $(0x7FF), \scratch_reg
 259        bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
 260        jnc     .Lnoflush_\@
 261
 262        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
 263        jmp     .Lwrcr3_\@
 264
 265.Lnoflush_\@:
 266        SET_NOFLUSH_BIT \save_reg
 267
 268.Lwrcr3_\@:
 269        /*
 270         * The CR3 write could be avoided when not changing its value,
 271         * but would require a CR3 read *and* a scratch register.
 272         */
 273        movq    \save_reg, %cr3
 274.Lend_\@:
 275.endm
 276
 277#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
 278
 279.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 280.endm
 281.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
 282.endm
 283.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
 284.endm
 285.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
 286.endm
 287.macro RESTORE_CR3 scratch_reg:req save_reg:req
 288.endm
 289
 290#endif
 291
 292/*
 293 * Mitigate Spectre v1 for conditional swapgs code paths.
 294 *
 295 * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
 296 * prevent a speculative swapgs when coming from kernel space.
 297 *
 298 * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
 299 * to prevent the swapgs from getting speculatively skipped when coming from
 300 * user space.
 301 */
 302.macro FENCE_SWAPGS_USER_ENTRY
 303        ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
 304.endm
 305.macro FENCE_SWAPGS_KERNEL_ENTRY
 306        ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
 307.endm
 308
 309.macro STACKLEAK_ERASE_NOCLOBBER
 310#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 311        PUSH_AND_CLEAR_REGS
 312        call stackleak_erase
 313        POP_REGS
 314#endif
 315.endm
 316
 317.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
 318        rdgsbase \save_reg
 319        GET_PERCPU_BASE \scratch_reg
 320        wrgsbase \scratch_reg
 321.endm
 322
 323#else /* CONFIG_X86_64 */
 324# undef         UNWIND_HINT_IRET_REGS
 325# define        UNWIND_HINT_IRET_REGS
 326#endif /* !CONFIG_X86_64 */
 327
 328.macro STACKLEAK_ERASE
 329#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 330        call stackleak_erase
 331#endif
 332.endm
 333
 334#ifdef CONFIG_SMP
 335
 336/*
 337 * CPU/node NR is loaded from the limit (size) field of a special segment
 338 * descriptor entry in GDT.
 339 */
 340.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
 341        movq    $__CPUNODE_SEG, \reg
 342        lsl     \reg, \reg
 343.endm
 344
 345/*
 346 * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
 347 * We normally use %gs for accessing per-CPU data, but we are setting up
 348 * %gs here and obviously can not use %gs itself to access per-CPU data.
 349 *
 350 * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
 351 * may not restore the host's value until the CPU returns to userspace.
 352 * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
 353 * while running KVM's run loop.
 354 */
 355.macro GET_PERCPU_BASE reg:req
 356        LOAD_CPU_AND_NODE_SEG_LIMIT \reg
 357        andq    $VDSO_CPUNODE_MASK, \reg
 358        movq    __per_cpu_offset(, \reg, 8), \reg
 359.endm
 360
 361#else
 362
 363.macro GET_PERCPU_BASE reg:req
 364        movq    pcpu_unit_offsets(%rip), \reg
 365.endm
 366
 367#endif /* CONFIG_SMP */
 368