linux/arch/x86/include/asm/fpu/types.h
<<
>>
Prefs
   1/*
   2 * FPU data structures:
   3 */
   4#ifndef _ASM_X86_FPU_H
   5#define _ASM_X86_FPU_H
   6
   7/*
   8 * The legacy x87 FPU state format, as saved by FSAVE and
   9 * restored by the FRSTOR instructions:
  10 */
  11struct fregs_state {
  12        u32                     cwd;    /* FPU Control Word             */
  13        u32                     swd;    /* FPU Status Word              */
  14        u32                     twd;    /* FPU Tag Word                 */
  15        u32                     fip;    /* FPU IP Offset                */
  16        u32                     fcs;    /* FPU IP Selector              */
  17        u32                     foo;    /* FPU Operand Pointer Offset   */
  18        u32                     fos;    /* FPU Operand Pointer Selector */
  19
  20        /* 8*10 bytes for each FP-reg = 80 bytes:                       */
  21        u32                     st_space[20];
  22
  23        /* Software status information [not touched by FSAVE]:          */
  24        u32                     status;
  25};
  26
  27/*
  28 * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
  29 * restored by the FXRSTOR instructions. It's similar to the FSAVE
  30 * format, but differs in some areas, plus has extensions at
  31 * the end for the XMM registers.
  32 */
  33struct fxregs_state {
  34        u16                     cwd; /* Control Word                    */
  35        u16                     swd; /* Status Word                     */
  36        u16                     twd; /* Tag Word                        */
  37        u16                     fop; /* Last Instruction Opcode         */
  38        union {
  39                struct {
  40                        u64     rip; /* Instruction Pointer             */
  41                        u64     rdp; /* Data Pointer                    */
  42                };
  43                struct {
  44                        u32     fip; /* FPU IP Offset                   */
  45                        u32     fcs; /* FPU IP Selector                 */
  46                        u32     foo; /* FPU Operand Offset              */
  47                        u32     fos; /* FPU Operand Selector            */
  48                };
  49        };
  50        u32                     mxcsr;          /* MXCSR Register State */
  51        u32                     mxcsr_mask;     /* MXCSR Mask           */
  52
  53        /* 8*16 bytes for each FP-reg = 128 bytes:                      */
  54        u32                     st_space[32];
  55
  56        /* 16*16 bytes for each XMM-reg = 256 bytes:                    */
  57        u32                     xmm_space[64];
  58
  59        u32                     padding[12];
  60
  61        union {
  62                u32             padding1[12];
  63                u32             sw_reserved[12];
  64        };
  65
  66} __attribute__((aligned(16)));
  67
  68/* Default value for fxregs_state.mxcsr: */
  69#define MXCSR_DEFAULT           0x1f80
  70
  71/*
  72 * Software based FPU emulation state. This is arbitrary really,
  73 * it matches the x87 format to make it easier to understand:
  74 */
  75struct swregs_state {
  76        u32                     cwd;
  77        u32                     swd;
  78        u32                     twd;
  79        u32                     fip;
  80        u32                     fcs;
  81        u32                     foo;
  82        u32                     fos;
  83        /* 8*10 bytes for each FP-reg = 80 bytes: */
  84        u32                     st_space[20];
  85        u8                      ftop;
  86        u8                      changed;
  87        u8                      lookahead;
  88        u8                      no_update;
  89        u8                      rm;
  90        u8                      alimit;
  91        struct math_emu_info    *info;
  92        u32                     entry_eip;
  93};
  94
  95/*
  96 * List of XSAVE features Linux knows about:
  97 */
  98enum xfeature {
  99        XFEATURE_FP,
 100        XFEATURE_SSE,
 101        /*
 102         * Values above here are "legacy states".
 103         * Those below are "extended states".
 104         */
 105        XFEATURE_YMM,
 106        XFEATURE_BNDREGS,
 107        XFEATURE_BNDCSR,
 108        XFEATURE_OPMASK,
 109        XFEATURE_ZMM_Hi256,
 110        XFEATURE_Hi16_ZMM,
 111        XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
 112        XFEATURE_PKRU,
 113
 114        XFEATURE_MAX,
 115};
 116
 117#define XFEATURE_MASK_FP                (1 << XFEATURE_FP)
 118#define XFEATURE_MASK_SSE               (1 << XFEATURE_SSE)
 119#define XFEATURE_MASK_YMM               (1 << XFEATURE_YMM)
 120#define XFEATURE_MASK_BNDREGS           (1 << XFEATURE_BNDREGS)
 121#define XFEATURE_MASK_BNDCSR            (1 << XFEATURE_BNDCSR)
 122#define XFEATURE_MASK_OPMASK            (1 << XFEATURE_OPMASK)
 123#define XFEATURE_MASK_ZMM_Hi256         (1 << XFEATURE_ZMM_Hi256)
 124#define XFEATURE_MASK_Hi16_ZMM          (1 << XFEATURE_Hi16_ZMM)
 125#define XFEATURE_MASK_PT                (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 126#define XFEATURE_MASK_PKRU              (1 << XFEATURE_PKRU)
 127
 128#define XFEATURE_MASK_FPSSE             (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 129#define XFEATURE_MASK_AVX512            (XFEATURE_MASK_OPMASK \
 130                                         | XFEATURE_MASK_ZMM_Hi256 \
 131                                         | XFEATURE_MASK_Hi16_ZMM)
 132
 133#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
 134
 135struct reg_128_bit {
 136        u8      regbytes[128/8];
 137};
 138struct reg_256_bit {
 139        u8      regbytes[256/8];
 140};
 141struct reg_512_bit {
 142        u8      regbytes[512/8];
 143};
 144
 145/*
 146 * State component 2:
 147 *
 148 * There are 16x 256-bit AVX registers named YMM0-YMM15.
 149 * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
 150 * and are stored in 'struct fxregs_state::xmm_space[]' in the
 151 * "legacy" area.
 152 *
 153 * The high 128 bits are stored here.
 154 */
 155struct ymmh_struct {
 156        struct reg_128_bit              hi_ymm[16];
 157} __packed;
 158
 159/* Intel MPX support: */
 160
 161struct mpx_bndreg {
 162        u64                             lower_bound;
 163        u64                             upper_bound;
 164} __packed;
 165/*
 166 * State component 3 is used for the 4 128-bit bounds registers
 167 */
 168struct mpx_bndreg_state {
 169        struct mpx_bndreg               bndreg[4];
 170} __packed;
 171
 172/*
 173 * State component 4 is used for the 64-bit user-mode MPX
 174 * configuration register BNDCFGU and the 64-bit MPX status
 175 * register BNDSTATUS.  We call the pair "BNDCSR".
 176 */
 177struct mpx_bndcsr {
 178        u64                             bndcfgu;
 179        u64                             bndstatus;
 180} __packed;
 181
 182/*
 183 * The BNDCSR state is padded out to be 64-bytes in size.
 184 */
 185struct mpx_bndcsr_state {
 186        union {
 187                struct mpx_bndcsr               bndcsr;
 188                u8                              pad_to_64_bytes[64];
 189        };
 190} __packed;
 191
 192/* AVX-512 Components: */
 193
 194/*
 195 * State component 5 is used for the 8 64-bit opmask registers
 196 * k0-k7 (opmask state).
 197 */
 198struct avx_512_opmask_state {
 199        u64                             opmask_reg[8];
 200} __packed;
 201
 202/*
 203 * State component 6 is used for the upper 256 bits of the
 204 * registers ZMM0-ZMM15. These 16 256-bit values are denoted
 205 * ZMM0_H-ZMM15_H (ZMM_Hi256 state).
 206 */
 207struct avx_512_zmm_uppers_state {
 208        struct reg_256_bit              zmm_upper[16];
 209} __packed;
 210
 211/*
 212 * State component 7 is used for the 16 512-bit registers
 213 * ZMM16-ZMM31 (Hi16_ZMM state).
 214 */
 215struct avx_512_hi16_state {
 216        struct reg_512_bit              hi16_zmm[16];
 217} __packed;
 218
 219/*
 220 * State component 9: 32-bit PKRU register.  The state is
 221 * 8 bytes long but only 4 bytes is used currently.
 222 */
 223struct pkru_state {
 224        u32                             pkru;
 225        u32                             pad;
 226} __packed;
 227
 228struct xstate_header {
 229        u64                             xfeatures;
 230        u64                             xcomp_bv;
 231        u64                             reserved[6];
 232} __attribute__((packed));
 233
 234/*
 235 * xstate_header.xcomp_bv[63] indicates that the extended_state_area
 236 * is in compacted format.
 237 */
 238#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)
 239
 240/*
 241 * This is our most modern FPU state format, as saved by the XSAVE
 242 * and restored by the XRSTOR instructions.
 243 *
 244 * It consists of a legacy fxregs portion, an xstate header and
 245 * subsequent areas as defined by the xstate header.  Not all CPUs
 246 * support all the extensions, so the size of the extended area
 247 * can vary quite a bit between CPUs.
 248 */
 249struct xregs_state {
 250        struct fxregs_state             i387;
 251        struct xstate_header            header;
 252        u8                              extended_state_area[0];
 253} __attribute__ ((packed, aligned (64)));
 254
 255/*
 256 * This is a union of all the possible FPU state formats
 257 * put together, so that we can pick the right one runtime.
 258 *
 259 * The size of the structure is determined by the largest
 260 * member - which is the xsave area.  The padding is there
 261 * to ensure that statically-allocated task_structs (just
 262 * the init_task today) have enough space.
 263 */
 264union fpregs_state {
 265        struct fregs_state              fsave;
 266        struct fxregs_state             fxsave;
 267        struct swregs_state             soft;
 268        struct xregs_state              xsave;
 269        u8 __padding[PAGE_SIZE];
 270};
 271
 272/*
 273 * Highest level per task FPU state data structure that
 274 * contains the FPU register state plus various FPU
 275 * state fields:
 276 */
 277struct fpu {
 278        /*
 279         * @last_cpu:
 280         *
 281         * Records the last CPU on which this context was loaded into
 282         * FPU registers. (In the lazy-restore case we might be
 283         * able to reuse FPU registers across multiple context switches
 284         * this way, if no intermediate task used the FPU.)
 285         *
 286         * A value of -1 is used to indicate that the FPU state in context
 287         * memory is newer than the FPU state in registers, and that the
 288         * FPU state should be reloaded next time the task is run.
 289         */
 290        unsigned int                    last_cpu;
 291
 292        /*
 293         * @fpstate_active:
 294         *
 295         * This flag indicates whether this context is active: if the task
 296         * is not running then we can restore from this context, if the task
 297         * is running then we should save into this context.
 298         */
 299        unsigned char                   fpstate_active;
 300
 301        /*
 302         * @fpregs_active:
 303         *
 304         * This flag determines whether a given context is actively
 305         * loaded into the FPU's registers and that those registers
 306         * represent the task's current FPU state.
 307         *
 308         * Note the interaction with fpstate_active:
 309         *
 310         *   # task does not use the FPU:
 311         *   fpstate_active == 0
 312         *
 313         *   # task uses the FPU and regs are active:
 314         *   fpstate_active == 1 && fpregs_active == 1
 315         *
 316         *   # the regs are inactive but still match fpstate:
 317         *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
 318         *
 319         * The third state is what we use for the lazy restore optimization
 320         * on lazy-switching CPUs.
 321         */
 322        unsigned char                   fpregs_active;
 323
 324        /*
 325         * @counter:
 326         *
 327         * This counter contains the number of consecutive context switches
 328         * during which the FPU stays used. If this is over a threshold, the
 329         * lazy FPU restore logic becomes eager, to save the trap overhead.
 330         * This is an unsigned char so that after 256 iterations the counter
 331         * wraps and the context switch behavior turns lazy again; this is to
 332         * deal with bursty apps that only use the FPU for a short time:
 333         */
 334        unsigned char                   counter;
 335        /*
 336         * @state:
 337         *
 338         * In-memory copy of all FPU registers that we save/restore
 339         * over context switches. If the task is using the FPU then
 340         * the registers in the FPU are more recent than this state
 341         * copy. If the task context-switches away then they get
 342         * saved here and represent the FPU state.
 343         *
 344         * After context switches there may be a (short) time period
 345         * during which the in-FPU hardware registers are unchanged
 346         * and still perfectly match this state, if the tasks
 347         * scheduled afterwards are not using the FPU.
 348         *
 349         * This is the 'lazy restore' window of optimization, which
 350         * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
 351         *
 352         * We detect whether a subsequent task uses the FPU via setting
 353         * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
 354         *
 355         * During this window, if the task gets scheduled again, we
 356         * might be able to skip having to do a restore from this
 357         * memory buffer to the hardware registers - at the cost of
 358         * incurring the overhead of #NM fault traps.
 359         *
 360         * Note that on modern CPUs that support the XSAVEOPT (or other
 361         * optimized XSAVE instructions), we don't use #NM traps anymore,
 362         * as the hardware can track whether FPU registers need saving
 363         * or not. On such CPUs we activate the non-lazy ('eagerfpu')
 364         * logic, which unconditionally saves/restores all FPU state
 365         * across context switches. (if FPU state exists.)
 366         */
 367        union fpregs_state              state;
 368        /*
 369         * WARNING: 'state' is dynamically-sized.  Do not put
 370         * anything after it here.
 371         */
 372};
 373
 374#endif /* _ASM_X86_FPU_H */
 375