linux/arch/x86/include/asm/processor.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_X86_PROCESSOR_H
   3#define _ASM_X86_PROCESSOR_H
   4
   5#include <asm/processor-flags.h>
   6
   7/* Forward declaration, a strange C thing */
   8struct task_struct;
   9struct mm_struct;
  10struct vm86;
  11
  12#include <asm/math_emu.h>
  13#include <asm/segment.h>
  14#include <asm/types.h>
  15#include <uapi/asm/sigcontext.h>
  16#include <asm/current.h>
  17#include <asm/cpufeatures.h>
  18#include <asm/page.h>
  19#include <asm/pgtable_types.h>
  20#include <asm/percpu.h>
  21#include <asm/msr.h>
  22#include <asm/desc_defs.h>
  23#include <asm/nops.h>
  24#include <asm/special_insns.h>
  25#include <asm/fpu/types.h>
  26#include <asm/unwind_hints.h>
  27
  28#include <linux/personality.h>
  29#include <linux/cache.h>
  30#include <linux/threads.h>
  31#include <linux/math64.h>
  32#include <linux/err.h>
  33#include <linux/irqflags.h>
  34#include <linux/mem_encrypt.h>
  35
  36/*
  37 * We handle most unaligned accesses in hardware.  On the other hand
  38 * unaligned DMA can be quite expensive on some Nehalem processors.
  39 *
  40 * Based on this we disable the IP header alignment in network drivers.
  41 */
  42#define NET_IP_ALIGN    0
  43
  44#define HBP_NUM 4
  45/*
  46 * Default implementation of macro that returns current
  47 * instruction pointer ("program counter").
  48 */
  49static inline void *current_text_addr(void)
  50{
  51        void *pc;
  52
  53        asm volatile("mov $1f, %0; 1:":"=r" (pc));
  54
  55        return pc;
  56}
  57
  58/*
  59 * These alignment constraints are for performance in the vSMP case,
  60 * but in the task_struct case we must also meet hardware imposed
  61 * alignment requirements of the FPU state:
  62 */
  63#ifdef CONFIG_X86_VSMP
  64# define ARCH_MIN_TASKALIGN             (1 << INTERNODE_CACHE_SHIFT)
  65# define ARCH_MIN_MMSTRUCT_ALIGN        (1 << INTERNODE_CACHE_SHIFT)
  66#else
  67# define ARCH_MIN_TASKALIGN             __alignof__(union fpregs_state)
  68# define ARCH_MIN_MMSTRUCT_ALIGN        0
  69#endif
  70
  71enum tlb_infos {
  72        ENTRIES,
  73        NR_INFO
  74};
  75
  76extern u16 __read_mostly tlb_lli_4k[NR_INFO];
  77extern u16 __read_mostly tlb_lli_2m[NR_INFO];
  78extern u16 __read_mostly tlb_lli_4m[NR_INFO];
  79extern u16 __read_mostly tlb_lld_4k[NR_INFO];
  80extern u16 __read_mostly tlb_lld_2m[NR_INFO];
  81extern u16 __read_mostly tlb_lld_4m[NR_INFO];
  82extern u16 __read_mostly tlb_lld_1g[NR_INFO];
  83
  84/*
  85 *  CPU type and hardware bug flags. Kept separately for each CPU.
  86 *  Members of this structure are referenced in head_32.S, so think twice
  87 *  before touching them. [mj]
  88 */
  89
  90struct cpuinfo_x86 {
  91        __u8                    x86;            /* CPU family */
  92        __u8                    x86_vendor;     /* CPU vendor */
  93        __u8                    x86_model;
  94        __u8                    x86_stepping;
  95#ifdef CONFIG_X86_64
  96        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
  97        int                     x86_tlbsize;
  98#endif
  99        __u8                    x86_virt_bits;
 100        __u8                    x86_phys_bits;
 101        /* CPUID returned core id bits: */
 102        __u8                    x86_coreid_bits;
 103        __u8                    cu_id;
 104        /* Max extended CPUID function supported: */
 105        __u32                   extended_cpuid_level;
 106        /* Maximum supported CPUID level, -1=no CPUID: */
 107        int                     cpuid_level;
 108        __u32                   x86_capability[NCAPINTS + NBUGINTS];
 109        char                    x86_vendor_id[16];
 110        char                    x86_model_id[64];
 111        /* in KB - valid for CPUS which support this call: */
 112        unsigned int            x86_cache_size;
 113        int                     x86_cache_alignment;    /* In bytes */
 114        /* Cache QoS architectural values: */
 115        int                     x86_cache_max_rmid;     /* max index */
 116        int                     x86_cache_occ_scale;    /* scale to bytes */
 117        int                     x86_power;
 118        unsigned long           loops_per_jiffy;
 119        /* cpuid returned max cores value: */
 120        u16                      x86_max_cores;
 121        u16                     apicid;
 122        u16                     initial_apicid;
 123        u16                     x86_clflush_size;
 124        /* number of cores as seen by the OS: */
 125        u16                     booted_cores;
 126        /* Physical processor id: */
 127        u16                     phys_proc_id;
 128        /* Logical processor id: */
 129        u16                     logical_proc_id;
 130        /* Core id: */
 131        u16                     cpu_core_id;
 132        /* Index into per_cpu list: */
 133        u16                     cpu_index;
 134        u32                     microcode;
 135        /* Address space bits used by the cache internally */
 136        u8                      x86_cache_bits;
 137        unsigned                initialized : 1;
 138} __randomize_layout;
 139
 140struct cpuid_regs {
 141        u32 eax, ebx, ecx, edx;
 142};
 143
 144enum cpuid_regs_idx {
 145        CPUID_EAX = 0,
 146        CPUID_EBX,
 147        CPUID_ECX,
 148        CPUID_EDX,
 149};
 150
 151#define X86_VENDOR_INTEL        0
 152#define X86_VENDOR_CYRIX        1
 153#define X86_VENDOR_AMD          2
 154#define X86_VENDOR_UMC          3
 155#define X86_VENDOR_CENTAUR      5
 156#define X86_VENDOR_TRANSMETA    7
 157#define X86_VENDOR_NSC          8
 158#define X86_VENDOR_NUM          9
 159
 160#define X86_VENDOR_UNKNOWN      0xff
 161
 162/*
 163 * capabilities of CPUs
 164 */
 165extern struct cpuinfo_x86       boot_cpu_data;
 166extern struct cpuinfo_x86       new_cpu_data;
 167
 168extern struct x86_hw_tss        doublefault_tss;
 169extern __u32                    cpu_caps_cleared[NCAPINTS + NBUGINTS];
 170extern __u32                    cpu_caps_set[NCAPINTS + NBUGINTS];
 171
 172#ifdef CONFIG_SMP
 173DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 174#define cpu_data(cpu)           per_cpu(cpu_info, cpu)
 175#else
 176#define cpu_info                boot_cpu_data
 177#define cpu_data(cpu)           boot_cpu_data
 178#endif
 179
 180extern const struct seq_operations cpuinfo_op;
 181
 182#define cache_line_size()       (boot_cpu_data.x86_cache_alignment)
 183
 184extern void cpu_detect(struct cpuinfo_x86 *c);
 185
 186static inline unsigned long long l1tf_pfn_limit(void)
 187{
 188        return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
 189}
 190
 191extern void early_cpu_init(void);
 192extern void identify_boot_cpu(void);
 193extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 194extern void print_cpu_info(struct cpuinfo_x86 *);
 195void print_cpu_msr(struct cpuinfo_x86 *);
 196
 197#ifdef CONFIG_X86_32
 198extern int have_cpuid_p(void);
 199#else
 200static inline int have_cpuid_p(void)
 201{
 202        return 1;
 203}
 204#endif
 205static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 206                                unsigned int *ecx, unsigned int *edx)
 207{
 208        /* ecx is often an input as well as an output. */
 209        asm volatile("cpuid"
 210            : "=a" (*eax),
 211              "=b" (*ebx),
 212              "=c" (*ecx),
 213              "=d" (*edx)
 214            : "0" (*eax), "2" (*ecx)
 215            : "memory");
 216}
 217
 218#define native_cpuid_reg(reg)                                   \
 219static inline unsigned int native_cpuid_##reg(unsigned int op)  \
 220{                                                               \
 221        unsigned int eax = op, ebx, ecx = 0, edx;               \
 222                                                                \
 223        native_cpuid(&eax, &ebx, &ecx, &edx);                   \
 224                                                                \
 225        return reg;                                             \
 226}
 227
 228/*
 229 * Native CPUID functions returning a single datum.
 230 */
 231native_cpuid_reg(eax)
 232native_cpuid_reg(ebx)
 233native_cpuid_reg(ecx)
 234native_cpuid_reg(edx)
 235
 236/*
 237 * Friendlier CR3 helpers.
 238 */
 239static inline unsigned long read_cr3_pa(void)
 240{
 241        return __read_cr3() & CR3_ADDR_MASK;
 242}
 243
 244static inline unsigned long native_read_cr3_pa(void)
 245{
 246        return __native_read_cr3() & CR3_ADDR_MASK;
 247}
 248
 249static inline void load_cr3(pgd_t *pgdir)
 250{
 251        write_cr3(__sme_pa(pgdir));
 252}
 253
 254/*
 255 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
 256 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
 257 * unrelated to the task-switch mechanism:
 258 */
 259#ifdef CONFIG_X86_32
 260/* This is the TSS defined by the hardware. */
 261struct x86_hw_tss {
 262        unsigned short          back_link, __blh;
 263        unsigned long           sp0;
 264        unsigned short          ss0, __ss0h;
 265        unsigned long           sp1;
 266
 267        /*
 268         * We don't use ring 1, so ss1 is a convenient scratch space in
 269         * the same cacheline as sp0.  We use ss1 to cache the value in
 270         * MSR_IA32_SYSENTER_CS.  When we context switch
 271         * MSR_IA32_SYSENTER_CS, we first check if the new value being
 272         * written matches ss1, and, if it's not, then we wrmsr the new
 273         * value and update ss1.
 274         *
 275         * The only reason we context switch MSR_IA32_SYSENTER_CS is
 276         * that we set it to zero in vm86 tasks to avoid corrupting the
 277         * stack if we were to go through the sysenter path from vm86
 278         * mode.
 279         */
 280        unsigned short          ss1;    /* MSR_IA32_SYSENTER_CS */
 281
 282        unsigned short          __ss1h;
 283        unsigned long           sp2;
 284        unsigned short          ss2, __ss2h;
 285        unsigned long           __cr3;
 286        unsigned long           ip;
 287        unsigned long           flags;
 288        unsigned long           ax;
 289        unsigned long           cx;
 290        unsigned long           dx;
 291        unsigned long           bx;
 292        unsigned long           sp;
 293        unsigned long           bp;
 294        unsigned long           si;
 295        unsigned long           di;
 296        unsigned short          es, __esh;
 297        unsigned short          cs, __csh;
 298        unsigned short          ss, __ssh;
 299        unsigned short          ds, __dsh;
 300        unsigned short          fs, __fsh;
 301        unsigned short          gs, __gsh;
 302        unsigned short          ldt, __ldth;
 303        unsigned short          trace;
 304        unsigned short          io_bitmap_base;
 305
 306} __attribute__((packed));
 307#else
 308struct x86_hw_tss {
 309        u32                     reserved1;
 310        u64                     sp0;
 311
 312        /*
 313         * We store cpu_current_top_of_stack in sp1 so it's always accessible.
 314         * Linux does not use ring 1, so sp1 is not otherwise needed.
 315         */
 316        u64                     sp1;
 317
 318        u64                     sp2;
 319        u64                     reserved2;
 320        u64                     ist[7];
 321        u32                     reserved3;
 322        u32                     reserved4;
 323        u16                     reserved5;
 324        u16                     io_bitmap_base;
 325
 326} __attribute__((packed));
 327#endif
 328
 329/*
 330 * IO-bitmap sizes:
 331 */
 332#define IO_BITMAP_BITS                  65536
 333#define IO_BITMAP_BYTES                 (IO_BITMAP_BITS/8)
 334#define IO_BITMAP_LONGS                 (IO_BITMAP_BYTES/sizeof(long))
 335#define IO_BITMAP_OFFSET                (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 336#define INVALID_IO_BITMAP_OFFSET        0x8000
 337
 338struct entry_stack {
 339        unsigned long           words[64];
 340};
 341
 342struct entry_stack_page {
 343        struct entry_stack stack;
 344} __aligned(PAGE_SIZE);
 345
 346struct tss_struct {
 347        /*
 348         * The fixed hardware portion.  This must not cross a page boundary
 349         * at risk of violating the SDM's advice and potentially triggering
 350         * errata.
 351         */
 352        struct x86_hw_tss       x86_tss;
 353
 354        /*
 355         * The extra 1 is there because the CPU will access an
 356         * additional byte beyond the end of the IO permission
 357         * bitmap. The extra byte must be all 1 bits, and must
 358         * be within the limit.
 359         */
 360        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
 361} __aligned(PAGE_SIZE);
 362
 363DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 364
 365/*
 366 * sizeof(unsigned long) coming from an extra "long" at the end
 367 * of the iobitmap.
 368 *
 369 * -1? seg base+limit should be pointing to the address of the
 370 * last valid byte
 371 */
 372#define __KERNEL_TSS_LIMIT      \
 373        (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
 374
 375#ifdef CONFIG_X86_32
 376DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 377#else
 378/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
 379#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 380#endif
 381
 382/*
 383 * Save the original ist values for checking stack pointers during debugging
 384 */
 385struct orig_ist {
 386        unsigned long           ist[7];
 387};
 388
 389#ifdef CONFIG_X86_64
 390DECLARE_PER_CPU(struct orig_ist, orig_ist);
 391
 392union irq_stack_union {
 393        char irq_stack[IRQ_STACK_SIZE];
 394        /*
 395         * GCC hardcodes the stack canary as %gs:40.  Since the
 396         * irq_stack is the object at %gs:0, we reserve the bottom
 397         * 48 bytes of the irq stack for the canary.
 398         */
 399        struct {
 400                char gs_base[40];
 401                unsigned long stack_canary;
 402        };
 403};
 404
 405DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
 406DECLARE_INIT_PER_CPU(irq_stack_union);
 407
 408static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 409{
 410        return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
 411}
 412
 413DECLARE_PER_CPU(char *, irq_stack_ptr);
 414DECLARE_PER_CPU(unsigned int, irq_count);
 415extern asmlinkage void ignore_sysret(void);
 416
 417#if IS_ENABLED(CONFIG_KVM)
 418/* Save actual FS/GS selectors and bases to current->thread */
 419void save_fsgs_for_kvm(void);
 420#endif
 421#else   /* X86_64 */
 422#ifdef CONFIG_STACKPROTECTOR
 423/*
 424 * Make sure stack canary segment base is cached-aligned:
 425 *   "For Intel Atom processors, avoid non zero segment base address
 426 *    that is not aligned to cache line boundary at all cost."
 427 * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
 428 */
 429struct stack_canary {
 430        char __pad[20];         /* canary at %gs:20 */
 431        unsigned long canary;
 432};
 433DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 434#endif
 435/*
 436 * per-CPU IRQ handling stacks
 437 */
 438struct irq_stack {
 439        u32                     stack[THREAD_SIZE/sizeof(u32)];
 440} __aligned(THREAD_SIZE);
 441
 442DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 443DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 444#endif  /* X86_64 */
 445
 446extern unsigned int fpu_kernel_xstate_size;
 447extern unsigned int fpu_user_xstate_size;
 448
 449struct perf_event;
 450
 451typedef struct {
 452        unsigned long           seg;
 453} mm_segment_t;
 454
 455struct thread_struct {
 456        /* Cached TLS descriptors: */
 457        struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
 458#ifdef CONFIG_X86_32
 459        unsigned long           sp0;
 460#endif
 461        unsigned long           sp;
 462#ifdef CONFIG_X86_32
 463        unsigned long           sysenter_cs;
 464#else
 465        unsigned short          es;
 466        unsigned short          ds;
 467        unsigned short          fsindex;
 468        unsigned short          gsindex;
 469#endif
 470
 471#ifdef CONFIG_X86_64
 472        unsigned long           fsbase;
 473        unsigned long           gsbase;
 474#else
 475        /*
 476         * XXX: this could presumably be unsigned short.  Alternatively,
 477         * 32-bit kernels could be taught to use fsindex instead.
 478         */
 479        unsigned long fs;
 480        unsigned long gs;
 481#endif
 482
 483        /* Save middle states of ptrace breakpoints */
 484        struct perf_event       *ptrace_bps[HBP_NUM];
 485        /* Debug status used for traps, single steps, etc... */
 486        unsigned long           debugreg6;
 487        /* Keep track of the exact dr7 value set by the user */
 488        unsigned long           ptrace_dr7;
 489        /* Fault info: */
 490        unsigned long           cr2;
 491        unsigned long           trap_nr;
 492        unsigned long           error_code;
 493#ifdef CONFIG_VM86
 494        /* Virtual 86 mode info */
 495        struct vm86             *vm86;
 496#endif
 497        /* IO permissions: */
 498        unsigned long           *io_bitmap_ptr;
 499        unsigned long           iopl;
 500        /* Max allowed port in the bitmap, in bytes: */
 501        unsigned                io_bitmap_max;
 502
 503        mm_segment_t            addr_limit;
 504
 505        unsigned int            sig_on_uaccess_err:1;
 506        unsigned int            uaccess_err:1;  /* uaccess failed */
 507
 508        /* Floating point and extended processor state */
 509        struct fpu              fpu;
 510        /*
 511         * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
 512         * the end.
 513         */
 514};
 515
 516/* Whitelist the FPU state from the task_struct for hardened usercopy. */
 517static inline void arch_thread_struct_whitelist(unsigned long *offset,
 518                                                unsigned long *size)
 519{
 520        *offset = offsetof(struct thread_struct, fpu.state);
 521        *size = fpu_kernel_xstate_size;
 522}
 523
 524/*
 525 * Thread-synchronous status.
 526 *
 527 * This is different from the flags in that nobody else
 528 * ever touches our thread-synchronous status, so we don't
 529 * have to worry about atomic accesses.
 530 */
 531#define TS_COMPAT               0x0002  /* 32bit syscall active (64BIT)*/
 532
 533/*
 534 * Set IOPL bits in EFLAGS from given mask
 535 */
 536static inline void native_set_iopl_mask(unsigned mask)
 537{
 538#ifdef CONFIG_X86_32
 539        unsigned int reg;
 540
 541        asm volatile ("pushfl;"
 542                      "popl %0;"
 543                      "andl %1, %0;"
 544                      "orl %2, %0;"
 545                      "pushl %0;"
 546                      "popfl"
 547                      : "=&r" (reg)
 548                      : "i" (~X86_EFLAGS_IOPL), "r" (mask));
 549#endif
 550}
 551
 552static inline void
 553native_load_sp0(unsigned long sp0)
 554{
 555        this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 556}
 557
 558static inline void native_swapgs(void)
 559{
 560#ifdef CONFIG_X86_64
 561        asm volatile("swapgs" ::: "memory");
 562#endif
 563}
 564
 565static inline unsigned long current_top_of_stack(void)
 566{
 567        /*
 568         *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
 569         *  and around vm86 mode and sp0 on x86_64 is special because of the
 570         *  entry trampoline.
 571         */
 572        return this_cpu_read_stable(cpu_current_top_of_stack);
 573}
 574
 575static inline bool on_thread_stack(void)
 576{
 577        return (unsigned long)(current_top_of_stack() -
 578                               current_stack_pointer) < THREAD_SIZE;
 579}
 580
 581#ifdef CONFIG_PARAVIRT
 582#include <asm/paravirt.h>
 583#else
 584#define __cpuid                 native_cpuid
 585
 586static inline void load_sp0(unsigned long sp0)
 587{
 588        native_load_sp0(sp0);
 589}
 590
 591#define set_iopl_mask native_set_iopl_mask
 592#endif /* CONFIG_PARAVIRT */
 593
 594/* Free all resources held by a thread. */
 595extern void release_thread(struct task_struct *);
 596
 597unsigned long get_wchan(struct task_struct *p);
 598
 599/*
 600 * Generic CPUID function
 601 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
 602 * resulting in stale register contents being returned.
 603 */
 604static inline void cpuid(unsigned int op,
 605                         unsigned int *eax, unsigned int *ebx,
 606                         unsigned int *ecx, unsigned int *edx)
 607{
 608        *eax = op;
 609        *ecx = 0;
 610        __cpuid(eax, ebx, ecx, edx);
 611}
 612
 613/* Some CPUID calls want 'count' to be placed in ecx */
 614static inline void cpuid_count(unsigned int op, int count,
 615                               unsigned int *eax, unsigned int *ebx,
 616                               unsigned int *ecx, unsigned int *edx)
 617{
 618        *eax = op;
 619        *ecx = count;
 620        __cpuid(eax, ebx, ecx, edx);
 621}
 622
 623/*
 624 * CPUID functions returning a single datum
 625 */
 626static inline unsigned int cpuid_eax(unsigned int op)
 627{
 628        unsigned int eax, ebx, ecx, edx;
 629
 630        cpuid(op, &eax, &ebx, &ecx, &edx);
 631
 632        return eax;
 633}
 634
 635static inline unsigned int cpuid_ebx(unsigned int op)
 636{
 637        unsigned int eax, ebx, ecx, edx;
 638
 639        cpuid(op, &eax, &ebx, &ecx, &edx);
 640
 641        return ebx;
 642}
 643
 644static inline unsigned int cpuid_ecx(unsigned int op)
 645{
 646        unsigned int eax, ebx, ecx, edx;
 647
 648        cpuid(op, &eax, &ebx, &ecx, &edx);
 649
 650        return ecx;
 651}
 652
 653static inline unsigned int cpuid_edx(unsigned int op)
 654{
 655        unsigned int eax, ebx, ecx, edx;
 656
 657        cpuid(op, &eax, &ebx, &ecx, &edx);
 658
 659        return edx;
 660}
 661
 662/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 663static __always_inline void rep_nop(void)
 664{
 665        asm volatile("rep; nop" ::: "memory");
 666}
 667
 668static __always_inline void cpu_relax(void)
 669{
 670        rep_nop();
 671}
 672
 673/*
 674 * This function forces the icache and prefetched instruction stream to
 675 * catch up with reality in two very specific cases:
 676 *
 677 *  a) Text was modified using one virtual address and is about to be executed
 678 *     from the same physical page at a different virtual address.
 679 *
 680 *  b) Text was modified on a different CPU, may subsequently be
 681 *     executed on this CPU, and you want to make sure the new version
 682 *     gets executed.  This generally means you're calling this in a IPI.
 683 *
 684 * If you're calling this for a different reason, you're probably doing
 685 * it wrong.
 686 */
 687static inline void sync_core(void)
 688{
 689        /*
 690         * There are quite a few ways to do this.  IRET-to-self is nice
 691         * because it works on every CPU, at any CPL (so it's compatible
 692         * with paravirtualization), and it never exits to a hypervisor.
 693         * The only down sides are that it's a bit slow (it seems to be
 694         * a bit more than 2x slower than the fastest options) and that
 695         * it unmasks NMIs.  The "push %cs" is needed because, in
 696         * paravirtual environments, __KERNEL_CS may not be a valid CS
 697         * value when we do IRET directly.
 698         *
 699         * In case NMI unmasking or performance ever becomes a problem,
 700         * the next best option appears to be MOV-to-CR2 and an
 701         * unconditional jump.  That sequence also works on all CPUs,
 702         * but it will fault at CPL3 (i.e. Xen PV).
 703         *
 704         * CPUID is the conventional way, but it's nasty: it doesn't
 705         * exist on some 486-like CPUs, and it usually exits to a
 706         * hypervisor.
 707         *
 708         * Like all of Linux's memory ordering operations, this is a
 709         * compiler barrier as well.
 710         */
 711#ifdef CONFIG_X86_32
 712        asm volatile (
 713                "pushfl\n\t"
 714                "pushl %%cs\n\t"
 715                "pushl $1f\n\t"
 716                "iret\n\t"
 717                "1:"
 718                : ASM_CALL_CONSTRAINT : : "memory");
 719#else
 720        unsigned int tmp;
 721
 722        asm volatile (
 723                UNWIND_HINT_SAVE
 724                "mov %%ss, %0\n\t"
 725                "pushq %q0\n\t"
 726                "pushq %%rsp\n\t"
 727                "addq $8, (%%rsp)\n\t"
 728                "pushfq\n\t"
 729                "mov %%cs, %0\n\t"
 730                "pushq %q0\n\t"
 731                "pushq $1f\n\t"
 732                "iretq\n\t"
 733                UNWIND_HINT_RESTORE
 734                "1:"
 735                : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 736#endif
 737}
 738
 739extern void select_idle_routine(const struct cpuinfo_x86 *c);
 740extern void amd_e400_c1e_apic_setup(void);
 741
 742extern unsigned long            boot_option_idle_override;
 743
 744enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 745                         IDLE_POLL};
 746
 747extern void enable_sep_cpu(void);
 748extern int sysenter_setup(void);
 749
 750void early_trap_pf_init(void);
 751
 752/* Defined in head.S */
 753extern struct desc_ptr          early_gdt_descr;
 754
 755extern void switch_to_new_gdt(int);
 756extern void load_direct_gdt(int);
 757extern void load_fixmap_gdt(int);
 758extern void load_percpu_segment(int);
 759extern void cpu_init(void);
 760
 761static inline unsigned long get_debugctlmsr(void)
 762{
 763        unsigned long debugctlmsr = 0;
 764
 765#ifndef CONFIG_X86_DEBUGCTLMSR
 766        if (boot_cpu_data.x86 < 6)
 767                return 0;
 768#endif
 769        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 770
 771        return debugctlmsr;
 772}
 773
 774static inline void update_debugctlmsr(unsigned long debugctlmsr)
 775{
 776#ifndef CONFIG_X86_DEBUGCTLMSR
 777        if (boot_cpu_data.x86 < 6)
 778                return;
 779#endif
 780        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 781}
 782
 783extern void set_task_blockstep(struct task_struct *task, bool on);
 784
 785/* Boot loader type from the setup header: */
 786extern int                      bootloader_type;
 787extern int                      bootloader_version;
 788
 789extern char                     ignore_fpu_irq;
 790
 791#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 792#define ARCH_HAS_PREFETCHW
 793#define ARCH_HAS_SPINLOCK_PREFETCH
 794
 795#ifdef CONFIG_X86_32
 796# define BASE_PREFETCH          ""
 797# define ARCH_HAS_PREFETCH
 798#else
 799# define BASE_PREFETCH          "prefetcht0 %P1"
 800#endif
 801
 802/*
 803 * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
 804 *
 805 * It's not worth to care about 3dnow prefetches for the K6
 806 * because they are microcoded there and very slow.
 807 */
 808static inline void prefetch(const void *x)
 809{
 810        alternative_input(BASE_PREFETCH, "prefetchnta %P1",
 811                          X86_FEATURE_XMM,
 812                          "m" (*(const char *)x));
 813}
 814
 815/*
 816 * 3dnow prefetch to get an exclusive cache line.
 817 * Useful for spinlocks to avoid one state transition in the
 818 * cache coherency protocol:
 819 */
 820static inline void prefetchw(const void *x)
 821{
 822        alternative_input(BASE_PREFETCH, "prefetchw %P1",
 823                          X86_FEATURE_3DNOWPREFETCH,
 824                          "m" (*(const char *)x));
 825}
 826
 827static inline void spin_lock_prefetch(const void *x)
 828{
 829        prefetchw(x);
 830}
 831
 832#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
 833                           TOP_OF_KERNEL_STACK_PADDING)
 834
 835#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
 836
 837#define task_pt_regs(task) \
 838({                                                                      \
 839        unsigned long __ptr = (unsigned long)task_stack_page(task);     \
 840        __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
 841        ((struct pt_regs *)__ptr) - 1;                                  \
 842})
 843
 844#ifdef CONFIG_X86_32
 845/*
 846 * User space process size: 3GB (default).
 847 */
 848#define IA32_PAGE_OFFSET        PAGE_OFFSET
 849#define TASK_SIZE               PAGE_OFFSET
 850#define TASK_SIZE_LOW           TASK_SIZE
 851#define TASK_SIZE_MAX           TASK_SIZE
 852#define DEFAULT_MAP_WINDOW      TASK_SIZE
 853#define STACK_TOP               TASK_SIZE
 854#define STACK_TOP_MAX           STACK_TOP
 855
 856#define INIT_THREAD  {                                                    \
 857        .sp0                    = TOP_OF_INIT_STACK,                      \
 858        .sysenter_cs            = __KERNEL_CS,                            \
 859        .io_bitmap_ptr          = NULL,                                   \
 860        .addr_limit             = KERNEL_DS,                              \
 861}
 862
 863#define KSTK_ESP(task)          (task_pt_regs(task)->sp)
 864
 865#else
 866/*
 867 * User space process size.  This is the first address outside the user range.
 868 * There are a few constraints that determine this:
 869 *
 870 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
 871 * address, then that syscall will enter the kernel with a
 872 * non-canonical return address, and SYSRET will explode dangerously.
 873 * We avoid this particular problem by preventing anything executable
 874 * from being mapped at the maximum canonical address.
 875 *
 876 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
 877 * CPUs malfunction if they execute code from the highest canonical page.
 878 * They'll speculate right off the end of the canonical space, and
 879 * bad things happen.  This is worked around in the same way as the
 880 * Intel problem.
 881 *
 882 * With page table isolation enabled, we map the LDT in ... [stay tuned]
 883 */
 884#define TASK_SIZE_MAX   ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 885
 886#define DEFAULT_MAP_WINDOW      ((1UL << 47) - PAGE_SIZE)
 887
 888/* This decides where the kernel will search for a free chunk of vm
 889 * space during mmap's.
 890 */
 891#define IA32_PAGE_OFFSET        ((current->personality & ADDR_LIMIT_3GB) ? \
 892                                        0xc0000000 : 0xFFFFe000)
 893
 894#define TASK_SIZE_LOW           (test_thread_flag(TIF_ADDR32) ? \
 895                                        IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
 896#define TASK_SIZE               (test_thread_flag(TIF_ADDR32) ? \
 897                                        IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 898#define TASK_SIZE_OF(child)     ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
 899                                        IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 900
 901#define STACK_TOP               TASK_SIZE_LOW
 902#define STACK_TOP_MAX           TASK_SIZE_MAX
 903
 904#define INIT_THREAD  {                                          \
 905        .addr_limit             = KERNEL_DS,                    \
 906}
 907
 908extern unsigned long KSTK_ESP(struct task_struct *task);
 909
 910#endif /* CONFIG_X86_64 */
 911
 912extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 913                                               unsigned long new_sp);
 914
 915/*
 916 * This decides where the kernel will search for a free chunk of vm
 917 * space during mmap's.
 918 */
 919#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
 920#define TASK_UNMAPPED_BASE              __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
 921
 922#define KSTK_EIP(task)          (task_pt_regs(task)->ip)
 923
 924/* Get/set a process' ability to use the timestamp counter instruction */
 925#define GET_TSC_CTL(adr)        get_tsc_mode((adr))
 926#define SET_TSC_CTL(val)        set_tsc_mode((val))
 927
 928extern int get_tsc_mode(unsigned long adr);
 929extern int set_tsc_mode(unsigned int val);
 930
 931DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 932
 933/* Register/unregister a process' MPX related resource */
 934#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
 935#define MPX_DISABLE_MANAGEMENT()        mpx_disable_management()
 936
 937#ifdef CONFIG_X86_INTEL_MPX
 938extern int mpx_enable_management(void);
 939extern int mpx_disable_management(void);
 940#else
 941static inline int mpx_enable_management(void)
 942{
 943        return -EINVAL;
 944}
 945static inline int mpx_disable_management(void)
 946{
 947        return -EINVAL;
 948}
 949#endif /* CONFIG_X86_INTEL_MPX */
 950
 951#ifdef CONFIG_CPU_SUP_AMD
 952extern u16 amd_get_nb_id(int cpu);
 953extern u32 amd_get_nodes_per_socket(void);
 954#else
 955static inline u16 amd_get_nb_id(int cpu)                { return 0; }
 956static inline u32 amd_get_nodes_per_socket(void)        { return 0; }
 957#endif
 958
 959static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 960{
 961        uint32_t base, eax, signature[3];
 962
 963        for (base = 0x40000000; base < 0x40010000; base += 0x100) {
 964                cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
 965
 966                if (!memcmp(sig, signature, 12) &&
 967                    (leaves == 0 || ((eax - base) >= leaves)))
 968                        return base;
 969        }
 970
 971        return 0;
 972}
 973
 974extern unsigned long arch_align_stack(unsigned long sp);
 975extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 976extern void free_kernel_image_pages(void *begin, void *end);
 977
 978void default_idle(void);
 979#ifdef  CONFIG_XEN
 980bool xen_set_default_idle(void);
 981#else
 982#define xen_set_default_idle 0
 983#endif
 984
 985void stop_this_cpu(void *dummy);
 986void df_debug(struct pt_regs *regs, long error_code);
 987void microcode_check(void);
 988
 989enum l1tf_mitigations {
 990        L1TF_MITIGATION_OFF,
 991        L1TF_MITIGATION_FLUSH_NOWARN,
 992        L1TF_MITIGATION_FLUSH,
 993        L1TF_MITIGATION_FLUSH_NOSMT,
 994        L1TF_MITIGATION_FULL,
 995        L1TF_MITIGATION_FULL_FORCE
 996};
 997
 998extern enum l1tf_mitigations l1tf_mitigation;
 999
1000#endif /* _ASM_X86_PROCESSOR_H */
1001