linux/arch/x86/include/asm/fpu/internal.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Copyright (C) 1994 Linus Torvalds
   4 *
   5 * Pentium III FXSR, SSE support
   6 * General FPU state handling cleanups
   7 *      Gareth Hughes <gareth@valinux.com>, May 2000
   8 * x86-64 work by Andi Kleen 2002
   9 */
  10
  11#ifndef _ASM_X86_FPU_INTERNAL_H
  12#define _ASM_X86_FPU_INTERNAL_H
  13
  14#include <linux/compat.h>
  15#include <linux/sched.h>
  16#include <linux/slab.h>
  17
  18#include <asm/user.h>
  19#include <asm/fpu/api.h>
  20#include <asm/fpu/xstate.h>
  21#include <asm/cpufeature.h>
  22#include <asm/trace/fpu.h>
  23
  24/*
  25 * High level FPU state handling functions:
  26 */
  27extern void fpu__initialize(struct fpu *fpu);
  28extern void fpu__prepare_read(struct fpu *fpu);
  29extern void fpu__prepare_write(struct fpu *fpu);
  30extern void fpu__save(struct fpu *fpu);
  31extern void fpu__restore(struct fpu *fpu);
  32extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
  33extern void fpu__drop(struct fpu *fpu);
  34extern int  fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
  35extern void fpu__clear(struct fpu *fpu);
  36extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
  37extern int  dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
  38
  39/*
  40 * Boot time FPU initialization functions:
  41 */
  42extern void fpu__init_cpu(void);
  43extern void fpu__init_system_xstate(void);
  44extern void fpu__init_cpu_xstate(void);
  45extern void fpu__init_system(struct cpuinfo_x86 *c);
  46extern void fpu__init_check_bugs(void);
  47extern void fpu__resume_cpu(void);
  48extern u64 fpu__get_supported_xfeatures_mask(void);
  49
  50/*
  51 * Debugging facility:
  52 */
  53#ifdef CONFIG_X86_DEBUG_FPU
  54# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
  55#else
  56# define WARN_ON_FPU(x) ({ (void)(x); 0; })
  57#endif
  58
  59/*
  60 * FPU related CPU feature flag helper routines:
  61 */
  62static __always_inline __pure bool use_xsaveopt(void)
  63{
  64        return static_cpu_has(X86_FEATURE_XSAVEOPT);
  65}
  66
  67static __always_inline __pure bool use_xsave(void)
  68{
  69        return static_cpu_has(X86_FEATURE_XSAVE);
  70}
  71
  72static __always_inline __pure bool use_fxsr(void)
  73{
  74        return static_cpu_has(X86_FEATURE_FXSR);
  75}
  76
  77/*
  78 * fpstate handling functions:
  79 */
  80
  81extern union fpregs_state init_fpstate;
  82
  83extern void fpstate_init(union fpregs_state *state);
  84#ifdef CONFIG_MATH_EMULATION
  85extern void fpstate_init_soft(struct swregs_state *soft);
  86#else
  87static inline void fpstate_init_soft(struct swregs_state *soft) {}
  88#endif
  89
  90static inline void fpstate_init_xstate(struct xregs_state *xsave)
  91{
  92        /*
  93         * XRSTORS requires these bits set in xcomp_bv, or it will
  94         * trigger #GP:
  95         */
  96        xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
  97}
  98
  99static inline void fpstate_init_fxstate(struct fxregs_state *fx)
 100{
 101        fx->cwd = 0x37f;
 102        fx->mxcsr = MXCSR_DEFAULT;
 103}
 104extern void fpstate_sanitize_xstate(struct fpu *fpu);
 105
 106#define user_insn(insn, output, input...)                               \
 107({                                                                      \
 108        int err;                                                        \
 109        asm volatile(ASM_STAC "\n"                                      \
 110                     "1:" #insn "\n\t"                                  \
 111                     "2: " ASM_CLAC "\n"                                \
 112                     ".section .fixup,\"ax\"\n"                         \
 113                     "3:  movl $-1,%[err]\n"                            \
 114                     "    jmp  2b\n"                                    \
 115                     ".previous\n"                                      \
 116                     _ASM_EXTABLE(1b, 3b)                               \
 117                     : [err] "=r" (err), output                         \
 118                     : "0"(0), input);                                  \
 119        err;                                                            \
 120})
 121
 122#define kernel_insn(insn, output, input...)                             \
 123        asm volatile("1:" #insn "\n\t"                                  \
 124                     "2:\n"                                             \
 125                     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)  \
 126                     : output : input)
 127
 128static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 129{
 130        return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
 131}
 132
 133static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 134{
 135        if (IS_ENABLED(CONFIG_X86_32))
 136                return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
 137        else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
 138                return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
 139
 140        /* See comment in copy_fxregs_to_kernel() below. */
 141        return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
 142}
 143
 144static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 145{
 146        if (IS_ENABLED(CONFIG_X86_32)) {
 147                kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 148        } else {
 149                if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
 150                        kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 151                } else {
 152                        /* See comment in copy_fxregs_to_kernel() below. */
 153                        kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
 154                }
 155        }
 156}
 157
 158static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 159{
 160        if (IS_ENABLED(CONFIG_X86_32))
 161                return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 162        else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
 163                return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 164
 165        /* See comment in copy_fxregs_to_kernel() below. */
 166        return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
 167                          "m" (*fx));
 168}
 169
 170static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 171{
 172        kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 173}
 174
 175static inline int copy_user_to_fregs(struct fregs_state __user *fx)
 176{
 177        return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 178}
 179
 180static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 181{
 182        if (IS_ENABLED(CONFIG_X86_32))
 183                asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
 184        else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
 185                asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
 186        else {
 187                /* Using "rex64; fxsave %0" is broken because, if the memory
 188                 * operand uses any extended registers for addressing, a second
 189                 * REX prefix will be generated (to the assembler, rex64
 190                 * followed by semicolon is a separate instruction), and hence
 191                 * the 64-bitness is lost.
 192                 *
 193                 * Using "fxsaveq %0" would be the ideal choice, but is only
 194                 * supported starting with gas 2.16.
 195                 *
 196                 * Using, as a workaround, the properly prefixed form below
 197                 * isn't accepted by any binutils version so far released,
 198                 * complaining that the same type of prefix is used twice if
 199                 * an extended register is needed for addressing (fix submitted
 200                 * to mainline 2005-11-21).
 201                 *
 202                 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
 203                 *
 204                 * This, however, we can work around by forcing the compiler to
 205                 * select an addressing mode that doesn't require extended
 206                 * registers.
 207                 */
 208                asm volatile( "rex64/fxsave (%[fx])"
 209                             : "=m" (fpu->state.fxsave)
 210                             : [fx] "R" (&fpu->state.fxsave));
 211        }
 212}
 213
 214/* These macros all use (%edi)/(%rdi) as the single memory argument. */
 215#define XSAVE           ".byte " REX_PREFIX "0x0f,0xae,0x27"
 216#define XSAVEOPT        ".byte " REX_PREFIX "0x0f,0xae,0x37"
 217#define XSAVES          ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
 218#define XRSTOR          ".byte " REX_PREFIX "0x0f,0xae,0x2f"
 219#define XRSTORS         ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
 220
 221#define XSTATE_OP(op, st, lmask, hmask, err)                            \
 222        asm volatile("1:" op "\n\t"                                     \
 223                     "xor %[err], %[err]\n"                             \
 224                     "2:\n\t"                                           \
 225                     ".pushsection .fixup,\"ax\"\n\t"                   \
 226                     "3: movl $-2,%[err]\n\t"                           \
 227                     "jmp 2b\n\t"                                       \
 228                     ".popsection\n\t"                                  \
 229                     _ASM_EXTABLE(1b, 3b)                               \
 230                     : [err] "=r" (err)                                 \
 231                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)    \
 232                     : "memory")
 233
 234/*
 235 * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
 236 * format and supervisor states in addition to modified optimization in
 237 * XSAVEOPT.
 238 *
 239 * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
 240 * supports modified optimization which is not supported by XSAVE.
 241 *
 242 * We use XSAVE as a fallback.
 243 *
 244 * The 661 label is defined in the ALTERNATIVE* macros as the address of the
 245 * original instruction which gets replaced. We need to use it here as the
 246 * address of the instruction where we might get an exception at.
 247 */
 248#define XSTATE_XSAVE(st, lmask, hmask, err)                             \
 249        asm volatile(ALTERNATIVE_2(XSAVE,                               \
 250                                   XSAVEOPT, X86_FEATURE_XSAVEOPT,      \
 251                                   XSAVES,   X86_FEATURE_XSAVES)        \
 252                     "\n"                                               \
 253                     "xor %[err], %[err]\n"                             \
 254                     "3:\n"                                             \
 255                     ".pushsection .fixup,\"ax\"\n"                     \
 256                     "4: movl $-2, %[err]\n"                            \
 257                     "jmp 3b\n"                                         \
 258                     ".popsection\n"                                    \
 259                     _ASM_EXTABLE(661b, 4b)                             \
 260                     : [err] "=r" (err)                                 \
 261                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)    \
 262                     : "memory")
 263
 264/*
 265 * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
 266 * XSAVE area format.
 267 */
 268#define XSTATE_XRESTORE(st, lmask, hmask)                               \
 269        asm volatile(ALTERNATIVE(XRSTOR,                                \
 270                                 XRSTORS, X86_FEATURE_XSAVES)           \
 271                     "\n"                                               \
 272                     "3:\n"                                             \
 273                     _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
 274                     :                                                  \
 275                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)    \
 276                     : "memory")
 277
 278/*
 279 * This function is called only during boot time when x86 caps are not set
 280 * up and alternative can not be used yet.
 281 */
 282static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
 283{
 284        u64 mask = -1;
 285        u32 lmask = mask;
 286        u32 hmask = mask >> 32;
 287        int err;
 288
 289        WARN_ON(system_state != SYSTEM_BOOTING);
 290
 291        if (static_cpu_has(X86_FEATURE_XSAVES))
 292                XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 293        else
 294                XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
 295
 296        /* We should never fault when copying to a kernel buffer: */
 297        WARN_ON_FPU(err);
 298}
 299
 300/*
 301 * This function is called only during boot time when x86 caps are not set
 302 * up and alternative can not be used yet.
 303 */
 304static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 305{
 306        u64 mask = -1;
 307        u32 lmask = mask;
 308        u32 hmask = mask >> 32;
 309        int err;
 310
 311        WARN_ON(system_state != SYSTEM_BOOTING);
 312
 313        if (static_cpu_has(X86_FEATURE_XSAVES))
 314                XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 315        else
 316                XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 317
 318        /*
 319         * We should never fault when copying from a kernel buffer, and the FPU
 320         * state we set at boot time should be valid.
 321         */
 322        WARN_ON_FPU(err);
 323}
 324
 325/*
 326 * Save processor xstate to xsave area.
 327 */
 328static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
 329{
 330        u64 mask = -1;
 331        u32 lmask = mask;
 332        u32 hmask = mask >> 32;
 333        int err;
 334
 335        WARN_ON_FPU(!alternatives_patched);
 336
 337        XSTATE_XSAVE(xstate, lmask, hmask, err);
 338
 339        /* We should never fault when copying to a kernel buffer: */
 340        WARN_ON_FPU(err);
 341}
 342
 343/*
 344 * Restore processor xstate from xsave area.
 345 */
 346static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 347{
 348        u32 lmask = mask;
 349        u32 hmask = mask >> 32;
 350
 351        XSTATE_XRESTORE(xstate, lmask, hmask);
 352}
 353
 354/*
 355 * Save xstate to user space xsave area.
 356 *
 357 * We don't use modified optimization because xrstor/xrstors might track
 358 * a different application.
 359 *
 360 * We don't use compacted format xsave area for
 361 * backward compatibility for old applications which don't understand
 362 * compacted format of xsave area.
 363 */
 364static inline int copy_xregs_to_user(struct xregs_state __user *buf)
 365{
 366        int err;
 367
 368        /*
 369         * Clear the xsave header first, so that reserved fields are
 370         * initialized to zero.
 371         */
 372        err = __clear_user(&buf->header, sizeof(buf->header));
 373        if (unlikely(err))
 374                return -EFAULT;
 375
 376        stac();
 377        XSTATE_OP(XSAVE, buf, -1, -1, err);
 378        clac();
 379
 380        return err;
 381}
 382
 383/*
 384 * Restore xstate from user space xsave area.
 385 */
 386static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
 387{
 388        struct xregs_state *xstate = ((__force struct xregs_state *)buf);
 389        u32 lmask = mask;
 390        u32 hmask = mask >> 32;
 391        int err;
 392
 393        stac();
 394        XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 395        clac();
 396
 397        return err;
 398}
 399
 400/*
 401 * These must be called with preempt disabled. Returns
 402 * 'true' if the FPU state is still intact and we can
 403 * keep registers active.
 404 *
 405 * The legacy FNSAVE instruction cleared all FPU state
 406 * unconditionally, so registers are essentially destroyed.
 407 * Modern FPU state can be kept in registers, if there are
 408 * no pending FP exceptions.
 409 */
 410static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
 411{
 412        if (likely(use_xsave())) {
 413                copy_xregs_to_kernel(&fpu->state.xsave);
 414                return 1;
 415        }
 416
 417        if (likely(use_fxsr())) {
 418                copy_fxregs_to_kernel(fpu);
 419                return 1;
 420        }
 421
 422        /*
 423         * Legacy FPU register saving, FNSAVE always clears FPU registers,
 424         * so we have to mark them inactive:
 425         */
 426        asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
 427
 428        return 0;
 429}
 430
 431static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
 432{
 433        if (use_xsave()) {
 434                copy_kernel_to_xregs(&fpstate->xsave, mask);
 435        } else {
 436                if (use_fxsr())
 437                        copy_kernel_to_fxregs(&fpstate->fxsave);
 438                else
 439                        copy_kernel_to_fregs(&fpstate->fsave);
 440        }
 441}
 442
 443static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
 444{
 445        /*
 446         * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
 447         * pending. Clear the x87 state here by setting it to fixed values.
 448         * "m" is a random variable that should be in L1.
 449         */
 450        if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 451                asm volatile(
 452                        "fnclex\n\t"
 453                        "emms\n\t"
 454                        "fildl %P[addr]"        /* set F?P to defined value */
 455                        : : [addr] "m" (fpstate));
 456        }
 457
 458        __copy_kernel_to_fpregs(fpstate, -1);
 459}
 460
 461extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
 462
 463/*
 464 * FPU context switch related helper methods:
 465 */
 466
 467DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 468
 469/*
 470 * The in-register FPU state for an FPU context on a CPU is assumed to be
 471 * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
 472 * matches the FPU.
 473 *
 474 * If the FPU register state is valid, the kernel can skip restoring the
 475 * FPU state from memory.
 476 *
 477 * Any code that clobbers the FPU registers or updates the in-memory
 478 * FPU state for a task MUST let the rest of the kernel know that the
 479 * FPU registers are no longer valid for this task.
 480 *
 481 * Either one of these invalidation functions is enough. Invalidate
 482 * a resource you control: CPU if using the CPU for something else
 483 * (with preemption disabled), FPU for the current task, or a task that
 484 * is prevented from running by the current task.
 485 */
 486static inline void __cpu_invalidate_fpregs_state(void)
 487{
 488        __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 489}
 490
 491static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
 492{
 493        fpu->last_cpu = -1;
 494}
 495
 496static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
 497{
 498        return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
 499}
 500
 501/*
 502 * These generally need preemption protection to work,
 503 * do try to avoid using these on their own:
 504 */
 505static inline void fpregs_deactivate(struct fpu *fpu)
 506{
 507        this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 508        trace_x86_fpu_regs_deactivated(fpu);
 509}
 510
 511static inline void fpregs_activate(struct fpu *fpu)
 512{
 513        this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 514        trace_x86_fpu_regs_activated(fpu);
 515}
 516
 517/*
 518 * FPU state switching for scheduling.
 519 *
 520 * This is a two-stage process:
 521 *
 522 *  - switch_fpu_prepare() saves the old state.
 523 *    This is done within the context of the old process.
 524 *
 525 *  - switch_fpu_finish() restores the new state as
 526 *    necessary.
 527 */
 528static inline void
 529switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 530{
 531        if (old_fpu->initialized) {
 532                if (!copy_fpregs_to_fpstate(old_fpu))
 533                        old_fpu->last_cpu = -1;
 534                else
 535                        old_fpu->last_cpu = cpu;
 536
 537                /* But leave fpu_fpregs_owner_ctx! */
 538                trace_x86_fpu_regs_deactivated(old_fpu);
 539        } else
 540                old_fpu->last_cpu = -1;
 541}
 542
 543/*
 544 * Misc helper functions:
 545 */
 546
 547/*
 548 * Set up the userspace FPU context for the new task, if the task
 549 * has used the FPU.
 550 */
 551static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 552{
 553        bool preload = static_cpu_has(X86_FEATURE_FPU) &&
 554                       new_fpu->initialized;
 555
 556        if (preload) {
 557                if (!fpregs_state_valid(new_fpu, cpu))
 558                        copy_kernel_to_fpregs(&new_fpu->state);
 559                fpregs_activate(new_fpu);
 560        }
 561}
 562
 563/*
 564 * Needs to be preemption-safe.
 565 *
 566 * NOTE! user_fpu_begin() must be used only immediately before restoring
 567 * the save state. It does not do any saving/restoring on its own. In
 568 * lazy FPU mode, it is just an optimization to avoid a #NM exception,
 569 * the task can lose the FPU right after preempt_enable().
 570 */
 571static inline void user_fpu_begin(void)
 572{
 573        struct fpu *fpu = &current->thread.fpu;
 574
 575        preempt_disable();
 576        fpregs_activate(fpu);
 577        preempt_enable();
 578}
 579
 580/*
 581 * MXCSR and XCR definitions:
 582 */
 583
 584extern unsigned int mxcsr_feature_mask;
 585
 586#define XCR_XFEATURE_ENABLED_MASK       0x00000000
 587
 588static inline u64 xgetbv(u32 index)
 589{
 590        u32 eax, edx;
 591
 592        asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
 593                     : "=a" (eax), "=d" (edx)
 594                     : "c" (index));
 595        return eax + ((u64)edx << 32);
 596}
 597
 598static inline void xsetbv(u32 index, u64 value)
 599{
 600        u32 eax = value;
 601        u32 edx = value >> 32;
 602
 603        asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
 604                     : : "a" (eax), "d" (edx), "c" (index));
 605}
 606
 607#endif /* _ASM_X86_FPU_INTERNAL_H */
 608