linux/arch/x86/include/asm/processor.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_X86_PROCESSOR_H
   3#define _ASM_X86_PROCESSOR_H
   4
   5#include <asm/processor-flags.h>
   6
   7/* Forward declaration, a strange C thing */
   8struct task_struct;
   9struct mm_struct;
  10struct vm86;
  11
  12#include <asm/math_emu.h>
  13#include <asm/segment.h>
  14#include <asm/types.h>
  15#include <uapi/asm/sigcontext.h>
  16#include <asm/current.h>
  17#include <asm/cpufeatures.h>
  18#include <asm/page.h>
  19#include <asm/pgtable_types.h>
  20#include <asm/percpu.h>
  21#include <asm/msr.h>
  22#include <asm/desc_defs.h>
  23#include <asm/nops.h>
  24#include <asm/special_insns.h>
  25#include <asm/fpu/types.h>
  26#include <asm/unwind_hints.h>
  27
  28#include <linux/personality.h>
  29#include <linux/cache.h>
  30#include <linux/threads.h>
  31#include <linux/math64.h>
  32#include <linux/err.h>
  33#include <linux/irqflags.h>
  34#include <linux/mem_encrypt.h>
  35
  36/*
  37 * We handle most unaligned accesses in hardware.  On the other hand
  38 * unaligned DMA can be quite expensive on some Nehalem processors.
  39 *
  40 * Based on this we disable the IP header alignment in network drivers.
  41 */
  42#define NET_IP_ALIGN    0
  43
  44#define HBP_NUM 4
  45/*
  46 * Default implementation of macro that returns current
  47 * instruction pointer ("program counter").
  48 */
  49static inline void *current_text_addr(void)
  50{
  51        void *pc;
  52
  53        asm volatile("mov $1f, %0; 1:":"=r" (pc));
  54
  55        return pc;
  56}
  57
  58/*
  59 * These alignment constraints are for performance in the vSMP case,
  60 * but in the task_struct case we must also meet hardware imposed
  61 * alignment requirements of the FPU state:
  62 */
  63#ifdef CONFIG_X86_VSMP
  64# define ARCH_MIN_TASKALIGN             (1 << INTERNODE_CACHE_SHIFT)
  65# define ARCH_MIN_MMSTRUCT_ALIGN        (1 << INTERNODE_CACHE_SHIFT)
  66#else
  67# define ARCH_MIN_TASKALIGN             __alignof__(union fpregs_state)
  68# define ARCH_MIN_MMSTRUCT_ALIGN        0
  69#endif
  70
  71enum tlb_infos {
  72        ENTRIES,
  73        NR_INFO
  74};
  75
  76extern u16 __read_mostly tlb_lli_4k[NR_INFO];
  77extern u16 __read_mostly tlb_lli_2m[NR_INFO];
  78extern u16 __read_mostly tlb_lli_4m[NR_INFO];
  79extern u16 __read_mostly tlb_lld_4k[NR_INFO];
  80extern u16 __read_mostly tlb_lld_2m[NR_INFO];
  81extern u16 __read_mostly tlb_lld_4m[NR_INFO];
  82extern u16 __read_mostly tlb_lld_1g[NR_INFO];
  83
  84/*
  85 *  CPU type and hardware bug flags. Kept separately for each CPU.
  86 *  Members of this structure are referenced in head_32.S, so think twice
  87 *  before touching them. [mj]
  88 */
  89
  90struct cpuinfo_x86 {
  91        __u8                    x86;            /* CPU family */
  92        __u8                    x86_vendor;     /* CPU vendor */
  93        __u8                    x86_model;
  94        __u8                    x86_stepping;
  95#ifdef CONFIG_X86_64
  96        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
  97        int                     x86_tlbsize;
  98#endif
  99        __u8                    x86_virt_bits;
 100        __u8                    x86_phys_bits;
 101        /* CPUID returned core id bits: */
 102        __u8                    x86_coreid_bits;
 103        __u8                    cu_id;
 104        /* Max extended CPUID function supported: */
 105        __u32                   extended_cpuid_level;
 106        /* Maximum supported CPUID level, -1=no CPUID: */
 107        int                     cpuid_level;
 108        __u32                   x86_capability[NCAPINTS + NBUGINTS];
 109        char                    x86_vendor_id[16];
 110        char                    x86_model_id[64];
 111        /* in KB - valid for CPUS which support this call: */
 112        unsigned int            x86_cache_size;
 113        int                     x86_cache_alignment;    /* In bytes */
 114        /* Cache QoS architectural values: */
 115        int                     x86_cache_max_rmid;     /* max index */
 116        int                     x86_cache_occ_scale;    /* scale to bytes */
 117        int                     x86_power;
 118        unsigned long           loops_per_jiffy;
 119        /* cpuid returned max cores value: */
 120        u16                      x86_max_cores;
 121        u16                     apicid;
 122        u16                     initial_apicid;
 123        u16                     x86_clflush_size;
 124        /* number of cores as seen by the OS: */
 125        u16                     booted_cores;
 126        /* Physical processor id: */
 127        u16                     phys_proc_id;
 128        /* Logical processor id: */
 129        u16                     logical_proc_id;
 130        /* Core id: */
 131        u16                     cpu_core_id;
 132        /* Index into per_cpu list: */
 133        u16                     cpu_index;
 134        u32                     microcode;
 135        unsigned                initialized : 1;
 136} __randomize_layout;
 137
 138struct cpuid_regs {
 139        u32 eax, ebx, ecx, edx;
 140};
 141
 142enum cpuid_regs_idx {
 143        CPUID_EAX = 0,
 144        CPUID_EBX,
 145        CPUID_ECX,
 146        CPUID_EDX,
 147};
 148
 149#define X86_VENDOR_INTEL        0
 150#define X86_VENDOR_CYRIX        1
 151#define X86_VENDOR_AMD          2
 152#define X86_VENDOR_UMC          3
 153#define X86_VENDOR_CENTAUR      5
 154#define X86_VENDOR_TRANSMETA    7
 155#define X86_VENDOR_NSC          8
 156#define X86_VENDOR_NUM          9
 157
 158#define X86_VENDOR_UNKNOWN      0xff
 159
 160/*
 161 * capabilities of CPUs
 162 */
 163extern struct cpuinfo_x86       boot_cpu_data;
 164extern struct cpuinfo_x86       new_cpu_data;
 165
 166extern struct x86_hw_tss        doublefault_tss;
 167extern __u32                    cpu_caps_cleared[NCAPINTS + NBUGINTS];
 168extern __u32                    cpu_caps_set[NCAPINTS + NBUGINTS];
 169
 170#ifdef CONFIG_SMP
 171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 172#define cpu_data(cpu)           per_cpu(cpu_info, cpu)
 173#else
 174#define cpu_info                boot_cpu_data
 175#define cpu_data(cpu)           boot_cpu_data
 176#endif
 177
 178extern const struct seq_operations cpuinfo_op;
 179
 180#define cache_line_size()       (boot_cpu_data.x86_cache_alignment)
 181
 182extern void cpu_detect(struct cpuinfo_x86 *c);
 183
 184extern void early_cpu_init(void);
 185extern void identify_boot_cpu(void);
 186extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 187extern void print_cpu_info(struct cpuinfo_x86 *);
 188void print_cpu_msr(struct cpuinfo_x86 *);
 189extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 190extern u32 get_scattered_cpuid_leaf(unsigned int level,
 191                                    unsigned int sub_leaf,
 192                                    enum cpuid_regs_idx reg);
 193extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 194extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 195
 196extern void detect_extended_topology(struct cpuinfo_x86 *c);
 197extern void detect_ht(struct cpuinfo_x86 *c);
 198
 199#ifdef CONFIG_X86_32
 200extern int have_cpuid_p(void);
 201#else
 202static inline int have_cpuid_p(void)
 203{
 204        return 1;
 205}
 206#endif
 207static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 208                                unsigned int *ecx, unsigned int *edx)
 209{
 210        /* ecx is often an input as well as an output. */
 211        asm volatile("cpuid"
 212            : "=a" (*eax),
 213              "=b" (*ebx),
 214              "=c" (*ecx),
 215              "=d" (*edx)
 216            : "0" (*eax), "2" (*ecx)
 217            : "memory");
 218}
 219
 220#define native_cpuid_reg(reg)                                   \
 221static inline unsigned int native_cpuid_##reg(unsigned int op)  \
 222{                                                               \
 223        unsigned int eax = op, ebx, ecx = 0, edx;               \
 224                                                                \
 225        native_cpuid(&eax, &ebx, &ecx, &edx);                   \
 226                                                                \
 227        return reg;                                             \
 228}
 229
 230/*
 231 * Native CPUID functions returning a single datum.
 232 */
 233native_cpuid_reg(eax)
 234native_cpuid_reg(ebx)
 235native_cpuid_reg(ecx)
 236native_cpuid_reg(edx)
 237
 238/*
 239 * Friendlier CR3 helpers.
 240 */
 241static inline unsigned long read_cr3_pa(void)
 242{
 243        return __read_cr3() & CR3_ADDR_MASK;
 244}
 245
 246static inline unsigned long native_read_cr3_pa(void)
 247{
 248        return __native_read_cr3() & CR3_ADDR_MASK;
 249}
 250
 251static inline void load_cr3(pgd_t *pgdir)
 252{
 253        write_cr3(__sme_pa(pgdir));
 254}
 255
 256/*
 257 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
 258 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
 259 * unrelated to the task-switch mechanism:
 260 */
 261#ifdef CONFIG_X86_32
 262/* This is the TSS defined by the hardware. */
 263struct x86_hw_tss {
 264        unsigned short          back_link, __blh;
 265        unsigned long           sp0;
 266        unsigned short          ss0, __ss0h;
 267        unsigned long           sp1;
 268
 269        /*
 270         * We don't use ring 1, so ss1 is a convenient scratch space in
 271         * the same cacheline as sp0.  We use ss1 to cache the value in
 272         * MSR_IA32_SYSENTER_CS.  When we context switch
 273         * MSR_IA32_SYSENTER_CS, we first check if the new value being
 274         * written matches ss1, and, if it's not, then we wrmsr the new
 275         * value and update ss1.
 276         *
 277         * The only reason we context switch MSR_IA32_SYSENTER_CS is
 278         * that we set it to zero in vm86 tasks to avoid corrupting the
 279         * stack if we were to go through the sysenter path from vm86
 280         * mode.
 281         */
 282        unsigned short          ss1;    /* MSR_IA32_SYSENTER_CS */
 283
 284        unsigned short          __ss1h;
 285        unsigned long           sp2;
 286        unsigned short          ss2, __ss2h;
 287        unsigned long           __cr3;
 288        unsigned long           ip;
 289        unsigned long           flags;
 290        unsigned long           ax;
 291        unsigned long           cx;
 292        unsigned long           dx;
 293        unsigned long           bx;
 294        unsigned long           sp;
 295        unsigned long           bp;
 296        unsigned long           si;
 297        unsigned long           di;
 298        unsigned short          es, __esh;
 299        unsigned short          cs, __csh;
 300        unsigned short          ss, __ssh;
 301        unsigned short          ds, __dsh;
 302        unsigned short          fs, __fsh;
 303        unsigned short          gs, __gsh;
 304        unsigned short          ldt, __ldth;
 305        unsigned short          trace;
 306        unsigned short          io_bitmap_base;
 307
 308} __attribute__((packed));
 309#else
 310struct x86_hw_tss {
 311        u32                     reserved1;
 312        u64                     sp0;
 313
 314        /*
 315         * We store cpu_current_top_of_stack in sp1 so it's always accessible.
 316         * Linux does not use ring 1, so sp1 is not otherwise needed.
 317         */
 318        u64                     sp1;
 319
 320        u64                     sp2;
 321        u64                     reserved2;
 322        u64                     ist[7];
 323        u32                     reserved3;
 324        u32                     reserved4;
 325        u16                     reserved5;
 326        u16                     io_bitmap_base;
 327
 328} __attribute__((packed));
 329#endif
 330
 331/*
 332 * IO-bitmap sizes:
 333 */
 334#define IO_BITMAP_BITS                  65536
 335#define IO_BITMAP_BYTES                 (IO_BITMAP_BITS/8)
 336#define IO_BITMAP_LONGS                 (IO_BITMAP_BYTES/sizeof(long))
 337#define IO_BITMAP_OFFSET                (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 338#define INVALID_IO_BITMAP_OFFSET        0x8000
 339
 340struct entry_stack {
 341        unsigned long           words[64];
 342};
 343
 344struct entry_stack_page {
 345        struct entry_stack stack;
 346} __aligned(PAGE_SIZE);
 347
 348struct tss_struct {
 349        /*
 350         * The fixed hardware portion.  This must not cross a page boundary
 351         * at risk of violating the SDM's advice and potentially triggering
 352         * errata.
 353         */
 354        struct x86_hw_tss       x86_tss;
 355
 356        /*
 357         * The extra 1 is there because the CPU will access an
 358         * additional byte beyond the end of the IO permission
 359         * bitmap. The extra byte must be all 1 bits, and must
 360         * be within the limit.
 361         */
 362        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
 363} __aligned(PAGE_SIZE);
 364
 365DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 366
 367/*
 368 * sizeof(unsigned long) coming from an extra "long" at the end
 369 * of the iobitmap.
 370 *
 371 * -1? seg base+limit should be pointing to the address of the
 372 * last valid byte
 373 */
 374#define __KERNEL_TSS_LIMIT      \
 375        (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
 376
 377#ifdef CONFIG_X86_32
 378DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 379#else
 380/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
 381#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 382#endif
 383
 384/*
 385 * Save the original ist values for checking stack pointers during debugging
 386 */
 387struct orig_ist {
 388        unsigned long           ist[7];
 389};
 390
 391#ifdef CONFIG_X86_64
 392DECLARE_PER_CPU(struct orig_ist, orig_ist);
 393
 394union irq_stack_union {
 395        char irq_stack[IRQ_STACK_SIZE];
 396        /*
 397         * GCC hardcodes the stack canary as %gs:40.  Since the
 398         * irq_stack is the object at %gs:0, we reserve the bottom
 399         * 48 bytes of the irq stack for the canary.
 400         */
 401        struct {
 402                char gs_base[40];
 403                unsigned long stack_canary;
 404        };
 405};
 406
 407DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
 408DECLARE_INIT_PER_CPU(irq_stack_union);
 409
 410DECLARE_PER_CPU(char *, irq_stack_ptr);
 411DECLARE_PER_CPU(unsigned int, irq_count);
 412extern asmlinkage void ignore_sysret(void);
 413#else   /* X86_64 */
 414#ifdef CONFIG_CC_STACKPROTECTOR
 415/*
 416 * Make sure stack canary segment base is cached-aligned:
 417 *   "For Intel Atom processors, avoid non zero segment base address
 418 *    that is not aligned to cache line boundary at all cost."
 419 * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
 420 */
 421struct stack_canary {
 422        char __pad[20];         /* canary at %gs:20 */
 423        unsigned long canary;
 424};
 425DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 426#endif
 427/*
 428 * per-CPU IRQ handling stacks
 429 */
 430struct irq_stack {
 431        u32                     stack[THREAD_SIZE/sizeof(u32)];
 432} __aligned(THREAD_SIZE);
 433
 434DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 435DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 436#endif  /* X86_64 */
 437
 438extern unsigned int fpu_kernel_xstate_size;
 439extern unsigned int fpu_user_xstate_size;
 440
 441struct perf_event;
 442
 443typedef struct {
 444        unsigned long           seg;
 445} mm_segment_t;
 446
 447struct thread_struct {
 448        /* Cached TLS descriptors: */
 449        struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
 450#ifdef CONFIG_X86_32
 451        unsigned long           sp0;
 452#endif
 453        unsigned long           sp;
 454#ifdef CONFIG_X86_32
 455        unsigned long           sysenter_cs;
 456#else
 457        unsigned short          es;
 458        unsigned short          ds;
 459        unsigned short          fsindex;
 460        unsigned short          gsindex;
 461#endif
 462
 463#ifdef CONFIG_X86_64
 464        unsigned long           fsbase;
 465        unsigned long           gsbase;
 466#else
 467        /*
 468         * XXX: this could presumably be unsigned short.  Alternatively,
 469         * 32-bit kernels could be taught to use fsindex instead.
 470         */
 471        unsigned long fs;
 472        unsigned long gs;
 473#endif
 474
 475        /* Save middle states of ptrace breakpoints */
 476        struct perf_event       *ptrace_bps[HBP_NUM];
 477        /* Debug status used for traps, single steps, etc... */
 478        unsigned long           debugreg6;
 479        /* Keep track of the exact dr7 value set by the user */
 480        unsigned long           ptrace_dr7;
 481        /* Fault info: */
 482        unsigned long           cr2;
 483        unsigned long           trap_nr;
 484        unsigned long           error_code;
 485#ifdef CONFIG_VM86
 486        /* Virtual 86 mode info */
 487        struct vm86             *vm86;
 488#endif
 489        /* IO permissions: */
 490        unsigned long           *io_bitmap_ptr;
 491        unsigned long           iopl;
 492        /* Max allowed port in the bitmap, in bytes: */
 493        unsigned                io_bitmap_max;
 494
 495        mm_segment_t            addr_limit;
 496
 497        unsigned int            sig_on_uaccess_err:1;
 498        unsigned int            uaccess_err:1;  /* uaccess failed */
 499
 500        /* Floating point and extended processor state */
 501        struct fpu              fpu;
 502        /*
 503         * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
 504         * the end.
 505         */
 506};
 507
 508/* Whitelist the FPU state from the task_struct for hardened usercopy. */
 509static inline void arch_thread_struct_whitelist(unsigned long *offset,
 510                                                unsigned long *size)
 511{
 512        *offset = offsetof(struct thread_struct, fpu.state);
 513        *size = fpu_kernel_xstate_size;
 514}
 515
 516/*
 517 * Thread-synchronous status.
 518 *
 519 * This is different from the flags in that nobody else
 520 * ever touches our thread-synchronous status, so we don't
 521 * have to worry about atomic accesses.
 522 */
 523#define TS_COMPAT               0x0002  /* 32bit syscall active (64BIT)*/
 524
 525/*
 526 * Set IOPL bits in EFLAGS from given mask
 527 */
 528static inline void native_set_iopl_mask(unsigned mask)
 529{
 530#ifdef CONFIG_X86_32
 531        unsigned int reg;
 532
 533        asm volatile ("pushfl;"
 534                      "popl %0;"
 535                      "andl %1, %0;"
 536                      "orl %2, %0;"
 537                      "pushl %0;"
 538                      "popfl"
 539                      : "=&r" (reg)
 540                      : "i" (~X86_EFLAGS_IOPL), "r" (mask));
 541#endif
 542}
 543
 544static inline void
 545native_load_sp0(unsigned long sp0)
 546{
 547        this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 548}
 549
 550static inline void native_swapgs(void)
 551{
 552#ifdef CONFIG_X86_64
 553        asm volatile("swapgs" ::: "memory");
 554#endif
 555}
 556
 557static inline unsigned long current_top_of_stack(void)
 558{
 559        /*
 560         *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
 561         *  and around vm86 mode and sp0 on x86_64 is special because of the
 562         *  entry trampoline.
 563         */
 564        return this_cpu_read_stable(cpu_current_top_of_stack);
 565}
 566
 567static inline bool on_thread_stack(void)
 568{
 569        return (unsigned long)(current_top_of_stack() -
 570                               current_stack_pointer) < THREAD_SIZE;
 571}
 572
 573#ifdef CONFIG_PARAVIRT
 574#include <asm/paravirt.h>
 575#else
 576#define __cpuid                 native_cpuid
 577
 578static inline void load_sp0(unsigned long sp0)
 579{
 580        native_load_sp0(sp0);
 581}
 582
 583#define set_iopl_mask native_set_iopl_mask
 584#endif /* CONFIG_PARAVIRT */
 585
 586/* Free all resources held by a thread. */
 587extern void release_thread(struct task_struct *);
 588
 589unsigned long get_wchan(struct task_struct *p);
 590
 591/*
 592 * Generic CPUID function
 593 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
 594 * resulting in stale register contents being returned.
 595 */
 596static inline void cpuid(unsigned int op,
 597                         unsigned int *eax, unsigned int *ebx,
 598                         unsigned int *ecx, unsigned int *edx)
 599{
 600        *eax = op;
 601        *ecx = 0;
 602        __cpuid(eax, ebx, ecx, edx);
 603}
 604
 605/* Some CPUID calls want 'count' to be placed in ecx */
 606static inline void cpuid_count(unsigned int op, int count,
 607                               unsigned int *eax, unsigned int *ebx,
 608                               unsigned int *ecx, unsigned int *edx)
 609{
 610        *eax = op;
 611        *ecx = count;
 612        __cpuid(eax, ebx, ecx, edx);
 613}
 614
 615/*
 616 * CPUID functions returning a single datum
 617 */
 618static inline unsigned int cpuid_eax(unsigned int op)
 619{
 620        unsigned int eax, ebx, ecx, edx;
 621
 622        cpuid(op, &eax, &ebx, &ecx, &edx);
 623
 624        return eax;
 625}
 626
 627static inline unsigned int cpuid_ebx(unsigned int op)
 628{
 629        unsigned int eax, ebx, ecx, edx;
 630
 631        cpuid(op, &eax, &ebx, &ecx, &edx);
 632
 633        return ebx;
 634}
 635
 636static inline unsigned int cpuid_ecx(unsigned int op)
 637{
 638        unsigned int eax, ebx, ecx, edx;
 639
 640        cpuid(op, &eax, &ebx, &ecx, &edx);
 641
 642        return ecx;
 643}
 644
 645static inline unsigned int cpuid_edx(unsigned int op)
 646{
 647        unsigned int eax, ebx, ecx, edx;
 648
 649        cpuid(op, &eax, &ebx, &ecx, &edx);
 650
 651        return edx;
 652}
 653
 654/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 655static __always_inline void rep_nop(void)
 656{
 657        asm volatile("rep; nop" ::: "memory");
 658}
 659
 660static __always_inline void cpu_relax(void)
 661{
 662        rep_nop();
 663}
 664
 665/*
 666 * This function forces the icache and prefetched instruction stream to
 667 * catch up with reality in two very specific cases:
 668 *
 669 *  a) Text was modified using one virtual address and is about to be executed
 670 *     from the same physical page at a different virtual address.
 671 *
 672 *  b) Text was modified on a different CPU, may subsequently be
 673 *     executed on this CPU, and you want to make sure the new version
 674 *     gets executed.  This generally means you're calling this in a IPI.
 675 *
 676 * If you're calling this for a different reason, you're probably doing
 677 * it wrong.
 678 */
 679static inline void sync_core(void)
 680{
 681        /*
 682         * There are quite a few ways to do this.  IRET-to-self is nice
 683         * because it works on every CPU, at any CPL (so it's compatible
 684         * with paravirtualization), and it never exits to a hypervisor.
 685         * The only down sides are that it's a bit slow (it seems to be
 686         * a bit more than 2x slower than the fastest options) and that
 687         * it unmasks NMIs.  The "push %cs" is needed because, in
 688         * paravirtual environments, __KERNEL_CS may not be a valid CS
 689         * value when we do IRET directly.
 690         *
 691         * In case NMI unmasking or performance ever becomes a problem,
 692         * the next best option appears to be MOV-to-CR2 and an
 693         * unconditional jump.  That sequence also works on all CPUs,
 694         * but it will fault at CPL3 (i.e. Xen PV).
 695         *
 696         * CPUID is the conventional way, but it's nasty: it doesn't
 697         * exist on some 486-like CPUs, and it usually exits to a
 698         * hypervisor.
 699         *
 700         * Like all of Linux's memory ordering operations, this is a
 701         * compiler barrier as well.
 702         */
 703#ifdef CONFIG_X86_32
 704        asm volatile (
 705                "pushfl\n\t"
 706                "pushl %%cs\n\t"
 707                "pushl $1f\n\t"
 708                "iret\n\t"
 709                "1:"
 710                : ASM_CALL_CONSTRAINT : : "memory");
 711#else
 712        unsigned int tmp;
 713
 714        asm volatile (
 715                UNWIND_HINT_SAVE
 716                "mov %%ss, %0\n\t"
 717                "pushq %q0\n\t"
 718                "pushq %%rsp\n\t"
 719                "addq $8, (%%rsp)\n\t"
 720                "pushfq\n\t"
 721                "mov %%cs, %0\n\t"
 722                "pushq %q0\n\t"
 723                "pushq $1f\n\t"
 724                "iretq\n\t"
 725                UNWIND_HINT_RESTORE
 726                "1:"
 727                : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 728#endif
 729}
 730
 731extern void select_idle_routine(const struct cpuinfo_x86 *c);
 732extern void amd_e400_c1e_apic_setup(void);
 733
 734extern unsigned long            boot_option_idle_override;
 735
 736enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 737                         IDLE_POLL};
 738
 739extern void enable_sep_cpu(void);
 740extern int sysenter_setup(void);
 741
 742extern void early_trap_init(void);
 743void early_trap_pf_init(void);
 744
 745/* Defined in head.S */
 746extern struct desc_ptr          early_gdt_descr;
 747
 748extern void cpu_set_gdt(int);
 749extern void switch_to_new_gdt(int);
 750extern void load_direct_gdt(int);
 751extern void load_fixmap_gdt(int);
 752extern void load_percpu_segment(int);
 753extern void cpu_init(void);
 754
 755static inline unsigned long get_debugctlmsr(void)
 756{
 757        unsigned long debugctlmsr = 0;
 758
 759#ifndef CONFIG_X86_DEBUGCTLMSR
 760        if (boot_cpu_data.x86 < 6)
 761                return 0;
 762#endif
 763        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 764
 765        return debugctlmsr;
 766}
 767
 768static inline void update_debugctlmsr(unsigned long debugctlmsr)
 769{
 770#ifndef CONFIG_X86_DEBUGCTLMSR
 771        if (boot_cpu_data.x86 < 6)
 772                return;
 773#endif
 774        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 775}
 776
 777extern void set_task_blockstep(struct task_struct *task, bool on);
 778
 779/* Boot loader type from the setup header: */
 780extern int                      bootloader_type;
 781extern int                      bootloader_version;
 782
 783extern char                     ignore_fpu_irq;
 784
 785#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 786#define ARCH_HAS_PREFETCHW
 787#define ARCH_HAS_SPINLOCK_PREFETCH
 788
 789#ifdef CONFIG_X86_32
 790# define BASE_PREFETCH          ""
 791# define ARCH_HAS_PREFETCH
 792#else
 793# define BASE_PREFETCH          "prefetcht0 %P1"
 794#endif
 795
 796/*
 797 * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
 798 *
 799 * It's not worth to care about 3dnow prefetches for the K6
 800 * because they are microcoded there and very slow.
 801 */
 802static inline void prefetch(const void *x)
 803{
 804        alternative_input(BASE_PREFETCH, "prefetchnta %P1",
 805                          X86_FEATURE_XMM,
 806                          "m" (*(const char *)x));
 807}
 808
 809/*
 810 * 3dnow prefetch to get an exclusive cache line.
 811 * Useful for spinlocks to avoid one state transition in the
 812 * cache coherency protocol:
 813 */
 814static inline void prefetchw(const void *x)
 815{
 816        alternative_input(BASE_PREFETCH, "prefetchw %P1",
 817                          X86_FEATURE_3DNOWPREFETCH,
 818                          "m" (*(const char *)x));
 819}
 820
 821static inline void spin_lock_prefetch(const void *x)
 822{
 823        prefetchw(x);
 824}
 825
 826#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
 827                           TOP_OF_KERNEL_STACK_PADDING)
 828
 829#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
 830
 831#define task_pt_regs(task) \
 832({                                                                      \
 833        unsigned long __ptr = (unsigned long)task_stack_page(task);     \
 834        __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
 835        ((struct pt_regs *)__ptr) - 1;                                  \
 836})
 837
 838#ifdef CONFIG_X86_32
 839/*
 840 * User space process size: 3GB (default).
 841 */
 842#define IA32_PAGE_OFFSET        PAGE_OFFSET
 843#define TASK_SIZE               PAGE_OFFSET
 844#define TASK_SIZE_LOW           TASK_SIZE
 845#define TASK_SIZE_MAX           TASK_SIZE
 846#define DEFAULT_MAP_WINDOW      TASK_SIZE
 847#define STACK_TOP               TASK_SIZE
 848#define STACK_TOP_MAX           STACK_TOP
 849
 850#define INIT_THREAD  {                                                    \
 851        .sp0                    = TOP_OF_INIT_STACK,                      \
 852        .sysenter_cs            = __KERNEL_CS,                            \
 853        .io_bitmap_ptr          = NULL,                                   \
 854        .addr_limit             = KERNEL_DS,                              \
 855}
 856
 857#define KSTK_ESP(task)          (task_pt_regs(task)->sp)
 858
 859#else
 860/*
 861 * User space process size.  This is the first address outside the user range.
 862 * There are a few constraints that determine this:
 863 *
 864 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
 865 * address, then that syscall will enter the kernel with a
 866 * non-canonical return address, and SYSRET will explode dangerously.
 867 * We avoid this particular problem by preventing anything executable
 868 * from being mapped at the maximum canonical address.
 869 *
 870 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
 871 * CPUs malfunction if they execute code from the highest canonical page.
 872 * They'll speculate right off the end of the canonical space, and
 873 * bad things happen.  This is worked around in the same way as the
 874 * Intel problem.
 875 *
 876 * With page table isolation enabled, we map the LDT in ... [stay tuned]
 877 */
 878#define TASK_SIZE_MAX   ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 879
 880#define DEFAULT_MAP_WINDOW      ((1UL << 47) - PAGE_SIZE)
 881
 882/* This decides where the kernel will search for a free chunk of vm
 883 * space during mmap's.
 884 */
 885#define IA32_PAGE_OFFSET        ((current->personality & ADDR_LIMIT_3GB) ? \
 886                                        0xc0000000 : 0xFFFFe000)
 887
 888#define TASK_SIZE_LOW           (test_thread_flag(TIF_ADDR32) ? \
 889                                        IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
 890#define TASK_SIZE               (test_thread_flag(TIF_ADDR32) ? \
 891                                        IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 892#define TASK_SIZE_OF(child)     ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
 893                                        IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 894
 895#define STACK_TOP               TASK_SIZE_LOW
 896#define STACK_TOP_MAX           TASK_SIZE_MAX
 897
 898#define INIT_THREAD  {                                          \
 899        .addr_limit             = KERNEL_DS,                    \
 900}
 901
 902extern unsigned long KSTK_ESP(struct task_struct *task);
 903
 904#endif /* CONFIG_X86_64 */
 905
 906extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 907                                               unsigned long new_sp);
 908
 909/*
 910 * This decides where the kernel will search for a free chunk of vm
 911 * space during mmap's.
 912 */
 913#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
 914#define TASK_UNMAPPED_BASE              __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
 915
 916#define KSTK_EIP(task)          (task_pt_regs(task)->ip)
 917
 918/* Get/set a process' ability to use the timestamp counter instruction */
 919#define GET_TSC_CTL(adr)        get_tsc_mode((adr))
 920#define SET_TSC_CTL(val)        set_tsc_mode((val))
 921
 922extern int get_tsc_mode(unsigned long adr);
 923extern int set_tsc_mode(unsigned int val);
 924
 925DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 926
 927/* Register/unregister a process' MPX related resource */
 928#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
 929#define MPX_DISABLE_MANAGEMENT()        mpx_disable_management()
 930
 931#ifdef CONFIG_X86_INTEL_MPX
 932extern int mpx_enable_management(void);
 933extern int mpx_disable_management(void);
 934#else
 935static inline int mpx_enable_management(void)
 936{
 937        return -EINVAL;
 938}
 939static inline int mpx_disable_management(void)
 940{
 941        return -EINVAL;
 942}
 943#endif /* CONFIG_X86_INTEL_MPX */
 944
 945#ifdef CONFIG_CPU_SUP_AMD
 946extern u16 amd_get_nb_id(int cpu);
 947extern u32 amd_get_nodes_per_socket(void);
 948#else
 949static inline u16 amd_get_nb_id(int cpu)                { return 0; }
 950static inline u32 amd_get_nodes_per_socket(void)        { return 0; }
 951#endif
 952
 953static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 954{
 955        uint32_t base, eax, signature[3];
 956
 957        for (base = 0x40000000; base < 0x40010000; base += 0x100) {
 958                cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
 959
 960                if (!memcmp(sig, signature, 12) &&
 961                    (leaves == 0 || ((eax - base) >= leaves)))
 962                        return base;
 963        }
 964
 965        return 0;
 966}
 967
 968extern unsigned long arch_align_stack(unsigned long sp);
 969extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 970
 971void default_idle(void);
 972#ifdef  CONFIG_XEN
 973bool xen_set_default_idle(void);
 974#else
 975#define xen_set_default_idle 0
 976#endif
 977
 978void stop_this_cpu(void *dummy);
 979void df_debug(struct pt_regs *regs, long error_code);
 980void microcode_check(void);
 981#endif /* _ASM_X86_PROCESSOR_H */
 982