linux/arch/x86/include/asm/fpu/types.h
<<
>>
Prefs
   1/*
   2 * FPU data structures:
   3 */
   4#ifndef _ASM_X86_FPU_H
   5#define _ASM_X86_FPU_H
   6
   7/*
   8 * The legacy x87 FPU state format, as saved by FSAVE and
   9 * restored by the FRSTOR instructions:
  10 */
  11struct fregs_state {
  12        u32                     cwd;    /* FPU Control Word             */
  13        u32                     swd;    /* FPU Status Word              */
  14        u32                     twd;    /* FPU Tag Word                 */
  15        u32                     fip;    /* FPU IP Offset                */
  16        u32                     fcs;    /* FPU IP Selector              */
  17        u32                     foo;    /* FPU Operand Pointer Offset   */
  18        u32                     fos;    /* FPU Operand Pointer Selector */
  19
  20        /* 8*10 bytes for each FP-reg = 80 bytes:                       */
  21        u32                     st_space[20];
  22
  23        /* Software status information [not touched by FSAVE]:          */
  24        u32                     status;
  25};
  26
  27/*
  28 * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
  29 * restored by the FXRSTOR instructions. It's similar to the FSAVE
  30 * format, but differs in some areas, plus has extensions at
  31 * the end for the XMM registers.
  32 */
  33struct fxregs_state {
  34        u16                     cwd; /* Control Word                    */
  35        u16                     swd; /* Status Word                     */
  36        u16                     twd; /* Tag Word                        */
  37        u16                     fop; /* Last Instruction Opcode         */
  38        union {
  39                struct {
  40                        u64     rip; /* Instruction Pointer             */
  41                        u64     rdp; /* Data Pointer                    */
  42                };
  43                struct {
  44                        u32     fip; /* FPU IP Offset                   */
  45                        u32     fcs; /* FPU IP Selector                 */
  46                        u32     foo; /* FPU Operand Offset              */
  47                        u32     fos; /* FPU Operand Selector            */
  48                };
  49        };
  50        u32                     mxcsr;          /* MXCSR Register State */
  51        u32                     mxcsr_mask;     /* MXCSR Mask           */
  52
  53        /* 8*16 bytes for each FP-reg = 128 bytes:                      */
  54        u32                     st_space[32];
  55
  56        /* 16*16 bytes for each XMM-reg = 256 bytes:                    */
  57        u32                     xmm_space[64];
  58
  59        u32                     padding[12];
  60
  61        union {
  62                u32             padding1[12];
  63                u32             sw_reserved[12];
  64        };
  65
  66} __attribute__((aligned(16)));
  67
  68/* Default value for fxregs_state.mxcsr: */
  69#define MXCSR_DEFAULT           0x1f80
  70
  71/*
  72 * Software based FPU emulation state. This is arbitrary really,
  73 * it matches the x87 format to make it easier to understand:
  74 */
  75struct swregs_state {
  76        u32                     cwd;
  77        u32                     swd;
  78        u32                     twd;
  79        u32                     fip;
  80        u32                     fcs;
  81        u32                     foo;
  82        u32                     fos;
  83        /* 8*10 bytes for each FP-reg = 80 bytes: */
  84        u32                     st_space[20];
  85        u8                      ftop;
  86        u8                      changed;
  87        u8                      lookahead;
  88        u8                      no_update;
  89        u8                      rm;
  90        u8                      alimit;
  91        struct math_emu_info    *info;
  92        u32                     entry_eip;
  93};
  94
  95/*
  96 * List of XSAVE features Linux knows about:
  97 */
  98enum xfeature {
  99        XFEATURE_FP,
 100        XFEATURE_SSE,
 101        /*
 102         * Values above here are "legacy states".
 103         * Those below are "extended states".
 104         */
 105        XFEATURE_YMM,
 106        XFEATURE_BNDREGS,
 107        XFEATURE_BNDCSR,
 108        XFEATURE_OPMASK,
 109        XFEATURE_ZMM_Hi256,
 110        XFEATURE_Hi16_ZMM,
 111
 112        XFEATURE_MAX,
 113};
 114
 115#define XFEATURE_MASK_FP                (1 << XFEATURE_FP)
 116#define XFEATURE_MASK_SSE               (1 << XFEATURE_SSE)
 117#define XFEATURE_MASK_YMM               (1 << XFEATURE_YMM)
 118#define XFEATURE_MASK_BNDREGS           (1 << XFEATURE_BNDREGS)
 119#define XFEATURE_MASK_BNDCSR            (1 << XFEATURE_BNDCSR)
 120#define XFEATURE_MASK_OPMASK            (1 << XFEATURE_OPMASK)
 121#define XFEATURE_MASK_ZMM_Hi256         (1 << XFEATURE_ZMM_Hi256)
 122#define XFEATURE_MASK_Hi16_ZMM          (1 << XFEATURE_Hi16_ZMM)
 123
 124#define XFEATURE_MASK_FPSSE             (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 125#define XFEATURE_MASK_AVX512            (XFEATURE_MASK_OPMASK \
 126                                         | XFEATURE_MASK_ZMM_Hi256 \
 127                                         | XFEATURE_MASK_Hi16_ZMM)
 128
 129#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
 130
 131struct reg_128_bit {
 132        u8      regbytes[128/8];
 133};
 134struct reg_256_bit {
 135        u8      regbytes[256/8];
 136};
 137struct reg_512_bit {
 138        u8      regbytes[512/8];
 139};
 140
 141/*
 142 * State component 2:
 143 *
 144 * There are 16x 256-bit AVX registers named YMM0-YMM15.
 145 * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
 146 * and are stored in 'struct fxregs_state::xmm_space[]' in the
 147 * "legacy" area.
 148 *
 149 * The high 128 bits are stored here.
 150 */
 151struct ymmh_struct {
 152        struct reg_128_bit              hi_ymm[16];
 153} __packed;
 154
 155/* Intel MPX support: */
 156
 157struct mpx_bndreg {
 158        u64                             lower_bound;
 159        u64                             upper_bound;
 160} __packed;
 161/*
 162 * State component 3 is used for the 4 128-bit bounds registers
 163 */
 164struct mpx_bndreg_state {
 165        struct mpx_bndreg               bndreg[4];
 166} __packed;
 167
 168/*
 169 * State component 4 is used for the 64-bit user-mode MPX
 170 * configuration register BNDCFGU and the 64-bit MPX status
 171 * register BNDSTATUS.  We call the pair "BNDCSR".
 172 */
 173struct mpx_bndcsr {
 174        u64                             bndcfgu;
 175        u64                             bndstatus;
 176} __packed;
 177
 178/*
 179 * The BNDCSR state is padded out to be 64-bytes in size.
 180 */
 181struct mpx_bndcsr_state {
 182        union {
 183                struct mpx_bndcsr               bndcsr;
 184                u8                              pad_to_64_bytes[64];
 185        };
 186} __packed;
 187
 188/* AVX-512 Components: */
 189
 190/*
 191 * State component 5 is used for the 8 64-bit opmask registers
 192 * k0-k7 (opmask state).
 193 */
 194struct avx_512_opmask_state {
 195        u64                             opmask_reg[8];
 196} __packed;
 197
 198/*
 199 * State component 6 is used for the upper 256 bits of the
 200 * registers ZMM0-ZMM15. These 16 256-bit values are denoted
 201 * ZMM0_H-ZMM15_H (ZMM_Hi256 state).
 202 */
 203struct avx_512_zmm_uppers_state {
 204        struct reg_256_bit              zmm_upper[16];
 205} __packed;
 206
 207/*
 208 * State component 7 is used for the 16 512-bit registers
 209 * ZMM16-ZMM31 (Hi16_ZMM state).
 210 */
 211struct avx_512_hi16_state {
 212        struct reg_512_bit              hi16_zmm[16];
 213} __packed;
 214
 215struct xstate_header {
 216        u64                             xfeatures;
 217        u64                             xcomp_bv;
 218        u64                             reserved[6];
 219} __attribute__((packed));
 220
 221/*
 222 * This is our most modern FPU state format, as saved by the XSAVE
 223 * and restored by the XRSTOR instructions.
 224 *
 225 * It consists of a legacy fxregs portion, an xstate header and
 226 * subsequent areas as defined by the xstate header.  Not all CPUs
 227 * support all the extensions, so the size of the extended area
 228 * can vary quite a bit between CPUs.
 229 */
 230struct xregs_state {
 231        struct fxregs_state             i387;
 232        struct xstate_header            header;
 233        u8                              extended_state_area[0];
 234} __attribute__ ((packed, aligned (64)));
 235
 236/*
 237 * This is a union of all the possible FPU state formats
 238 * put together, so that we can pick the right one runtime.
 239 *
 240 * The size of the structure is determined by the largest
 241 * member - which is the xsave area.  The padding is there
 242 * to ensure that statically-allocated task_structs (just
 243 * the init_task today) have enough space.
 244 */
 245union fpregs_state {
 246        struct fregs_state              fsave;
 247        struct fxregs_state             fxsave;
 248        struct swregs_state             soft;
 249        struct xregs_state              xsave;
 250        u8 __padding[PAGE_SIZE];
 251};
 252
 253/*
 254 * Highest level per task FPU state data structure that
 255 * contains the FPU register state plus various FPU
 256 * state fields:
 257 */
 258struct fpu {
 259        /*
 260         * @last_cpu:
 261         *
 262         * Records the last CPU on which this context was loaded into
 263         * FPU registers. (In the lazy-restore case we might be
 264         * able to reuse FPU registers across multiple context switches
 265         * this way, if no intermediate task used the FPU.)
 266         *
 267         * A value of -1 is used to indicate that the FPU state in context
 268         * memory is newer than the FPU state in registers, and that the
 269         * FPU state should be reloaded next time the task is run.
 270         */
 271        unsigned int                    last_cpu;
 272
 273        /*
 274         * @fpstate_active:
 275         *
 276         * This flag indicates whether this context is active: if the task
 277         * is not running then we can restore from this context, if the task
 278         * is running then we should save into this context.
 279         */
 280        unsigned char                   fpstate_active;
 281
 282        /*
 283         * @fpregs_active:
 284         *
 285         * This flag determines whether a given context is actively
 286         * loaded into the FPU's registers and that those registers
 287         * represent the task's current FPU state.
 288         *
 289         * Note the interaction with fpstate_active:
 290         *
 291         *   # task does not use the FPU:
 292         *   fpstate_active == 0
 293         *
 294         *   # task uses the FPU and regs are active:
 295         *   fpstate_active == 1 && fpregs_active == 1
 296         *
 297         *   # the regs are inactive but still match fpstate:
 298         *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
 299         *
 300         * The third state is what we use for the lazy restore optimization
 301         * on lazy-switching CPUs.
 302         */
 303        unsigned char                   fpregs_active;
 304
 305        /*
 306         * @counter:
 307         *
 308         * This counter contains the number of consecutive context switches
 309         * during which the FPU stays used. If this is over a threshold, the
 310         * lazy FPU restore logic becomes eager, to save the trap overhead.
 311         * This is an unsigned char so that after 256 iterations the counter
 312         * wraps and the context switch behavior turns lazy again; this is to
 313         * deal with bursty apps that only use the FPU for a short time:
 314         */
 315        unsigned char                   counter;
 316        /*
 317         * @state:
 318         *
 319         * In-memory copy of all FPU registers that we save/restore
 320         * over context switches. If the task is using the FPU then
 321         * the registers in the FPU are more recent than this state
 322         * copy. If the task context-switches away then they get
 323         * saved here and represent the FPU state.
 324         *
 325         * After context switches there may be a (short) time period
 326         * during which the in-FPU hardware registers are unchanged
 327         * and still perfectly match this state, if the tasks
 328         * scheduled afterwards are not using the FPU.
 329         *
 330         * This is the 'lazy restore' window of optimization, which
 331         * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
 332         *
 333         * We detect whether a subsequent task uses the FPU via setting
 334         * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
 335         *
 336         * During this window, if the task gets scheduled again, we
 337         * might be able to skip having to do a restore from this
 338         * memory buffer to the hardware registers - at the cost of
 339         * incurring the overhead of #NM fault traps.
 340         *
 341         * Note that on modern CPUs that support the XSAVEOPT (or other
 342         * optimized XSAVE instructions), we don't use #NM traps anymore,
 343         * as the hardware can track whether FPU registers need saving
 344         * or not. On such CPUs we activate the non-lazy ('eagerfpu')
 345         * logic, which unconditionally saves/restores all FPU state
 346         * across context switches. (if FPU state exists.)
 347         */
 348        union fpregs_state              state;
 349        /*
 350         * WARNING: 'state' is dynamically-sized.  Do not put
 351         * anything after it here.
 352         */
 353};
 354
 355#endif /* _ASM_X86_FPU_H */
 356