linux/arch/x86/kernel/fpu/core.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Copyright (C) 1994 Linus Torvalds
   4 *
   5 *  Pentium III FXSR, SSE support
   6 *  General FPU state handling cleanups
   7 *      Gareth Hughes <gareth@valinux.com>, May 2000
   8 */
   9#include <asm/fpu/internal.h>
  10#include <asm/fpu/regset.h>
  11#include <asm/fpu/signal.h>
  12#include <asm/fpu/types.h>
  13#include <asm/traps.h>
  14#include <asm/irq_regs.h>
  15
  16#include <linux/hardirq.h>
  17#include <linux/pkeys.h>
  18
  19#define CREATE_TRACE_POINTS
  20#include <asm/trace/fpu.h>
  21
  22/*
  23 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
  24 * depending on the FPU hardware format:
  25 */
  26union fpregs_state init_fpstate __ro_after_init;
  27
  28/*
  29 * Track whether the kernel is using the FPU state
  30 * currently.
  31 *
  32 * This flag is used:
  33 *
  34 *   - by IRQ context code to potentially use the FPU
  35 *     if it's unused.
  36 *
  37 *   - to debug kernel_fpu_begin()/end() correctness
  38 */
  39static DEFINE_PER_CPU(bool, in_kernel_fpu);
  40
  41/*
  42 * Track which context is using the FPU on the CPU:
  43 */
  44DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
  45
  46static bool kernel_fpu_disabled(void)
  47{
  48        return this_cpu_read(in_kernel_fpu);
  49}
  50
  51static bool interrupted_kernel_fpu_idle(void)
  52{
  53        return !kernel_fpu_disabled();
  54}
  55
  56/*
  57 * Were we in user mode (or vm86 mode) when we were
  58 * interrupted?
  59 *
  60 * Doing kernel_fpu_begin/end() is ok if we are running
  61 * in an interrupt context from user mode - we'll just
  62 * save the FPU state as required.
  63 */
  64static bool interrupted_user_mode(void)
  65{
  66        struct pt_regs *regs = get_irq_regs();
  67        return regs && user_mode(regs);
  68}
  69
  70/*
  71 * Can we use the FPU in kernel mode with the
  72 * whole "kernel_fpu_begin/end()" sequence?
  73 *
  74 * It's always ok in process context (ie "not interrupt")
  75 * but it is sometimes ok even from an irq.
  76 */
  77bool irq_fpu_usable(void)
  78{
  79        return !in_interrupt() ||
  80                interrupted_user_mode() ||
  81                interrupted_kernel_fpu_idle();
  82}
  83EXPORT_SYMBOL(irq_fpu_usable);
  84
  85/*
  86 * Save the FPU register state in fpu->state. The register state is
  87 * preserved.
  88 *
  89 * Must be called with fpregs_lock() held.
  90 *
  91 * The legacy FNSAVE instruction clears all FPU state unconditionally, so
  92 * register state has to be reloaded. That might be a pointless exercise
  93 * when the FPU is going to be used by another task right after that. But
  94 * this only affects 20+ years old 32bit systems and avoids conditionals all
  95 * over the place.
  96 *
  97 * FXSAVE and all XSAVE variants preserve the FPU register state.
  98 */
  99void save_fpregs_to_fpstate(struct fpu *fpu)
 100{
 101        if (likely(use_xsave())) {
 102                os_xsave(&fpu->state.xsave);
 103
 104                /*
 105                 * AVX512 state is tracked here because its use is
 106                 * known to slow the max clock speed of the core.
 107                 */
 108                if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
 109                        fpu->avx512_timestamp = jiffies;
 110                return;
 111        }
 112
 113        if (likely(use_fxsr())) {
 114                fxsave(&fpu->state.fxsave);
 115                return;
 116        }
 117
 118        /*
 119         * Legacy FPU register saving, FNSAVE always clears FPU registers,
 120         * so we have to reload them from the memory state.
 121         */
 122        asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
 123        frstor(&fpu->state.fsave);
 124}
 125EXPORT_SYMBOL(save_fpregs_to_fpstate);
 126
 127void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
 128{
 129        /*
 130         * AMD K7/K8 and later CPUs up to Zen don't save/restore
 131         * FDP/FIP/FOP unless an exception is pending. Clear the x87 state
 132         * here by setting it to fixed values.  "m" is a random variable
 133         * that should be in L1.
 134         */
 135        if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 136                asm volatile(
 137                        "fnclex\n\t"
 138                        "emms\n\t"
 139                        "fildl %P[addr]"        /* set F?P to defined value */
 140                        : : [addr] "m" (fpstate));
 141        }
 142
 143        if (use_xsave()) {
 144                os_xrstor(&fpstate->xsave, mask);
 145        } else {
 146                if (use_fxsr())
 147                        fxrstor(&fpstate->fxsave);
 148                else
 149                        frstor(&fpstate->fsave);
 150        }
 151}
 152EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate);
 153
 154void kernel_fpu_begin_mask(unsigned int kfpu_mask)
 155{
 156        preempt_disable();
 157
 158        WARN_ON_FPU(!irq_fpu_usable());
 159        WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
 160
 161        this_cpu_write(in_kernel_fpu, true);
 162
 163        if (!(current->flags & PF_KTHREAD) &&
 164            !test_thread_flag(TIF_NEED_FPU_LOAD)) {
 165                set_thread_flag(TIF_NEED_FPU_LOAD);
 166                save_fpregs_to_fpstate(&current->thread.fpu);
 167        }
 168        __cpu_invalidate_fpregs_state();
 169
 170        /* Put sane initial values into the control registers. */
 171        if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
 172                ldmxcsr(MXCSR_DEFAULT);
 173
 174        if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
 175                asm volatile ("fninit");
 176}
 177EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
 178
 179void kernel_fpu_end(void)
 180{
 181        WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
 182
 183        this_cpu_write(in_kernel_fpu, false);
 184        preempt_enable();
 185}
 186EXPORT_SYMBOL_GPL(kernel_fpu_end);
 187
 188/*
 189 * Sync the FPU register state to current's memory register state when the
 190 * current task owns the FPU. The hardware register state is preserved.
 191 */
 192void fpu_sync_fpstate(struct fpu *fpu)
 193{
 194        WARN_ON_FPU(fpu != &current->thread.fpu);
 195
 196        fpregs_lock();
 197        trace_x86_fpu_before_save(fpu);
 198
 199        if (!test_thread_flag(TIF_NEED_FPU_LOAD))
 200                save_fpregs_to_fpstate(fpu);
 201
 202        trace_x86_fpu_after_save(fpu);
 203        fpregs_unlock();
 204}
 205
 206static inline void fpstate_init_xstate(struct xregs_state *xsave)
 207{
 208        /*
 209         * XRSTORS requires these bits set in xcomp_bv, or it will
 210         * trigger #GP:
 211         */
 212        xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
 213}
 214
 215static inline void fpstate_init_fxstate(struct fxregs_state *fx)
 216{
 217        fx->cwd = 0x37f;
 218        fx->mxcsr = MXCSR_DEFAULT;
 219}
 220
 221/*
 222 * Legacy x87 fpstate state init:
 223 */
 224static inline void fpstate_init_fstate(struct fregs_state *fp)
 225{
 226        fp->cwd = 0xffff037fu;
 227        fp->swd = 0xffff0000u;
 228        fp->twd = 0xffffffffu;
 229        fp->fos = 0xffff0000u;
 230}
 231
 232void fpstate_init(union fpregs_state *state)
 233{
 234        if (!static_cpu_has(X86_FEATURE_FPU)) {
 235                fpstate_init_soft(&state->soft);
 236                return;
 237        }
 238
 239        memset(state, 0, fpu_kernel_xstate_size);
 240
 241        if (static_cpu_has(X86_FEATURE_XSAVES))
 242                fpstate_init_xstate(&state->xsave);
 243        if (static_cpu_has(X86_FEATURE_FXSR))
 244                fpstate_init_fxstate(&state->fxsave);
 245        else
 246                fpstate_init_fstate(&state->fsave);
 247}
 248EXPORT_SYMBOL_GPL(fpstate_init);
 249
 250/* Clone current's FPU state on fork */
 251int fpu_clone(struct task_struct *dst)
 252{
 253        struct fpu *src_fpu = &current->thread.fpu;
 254        struct fpu *dst_fpu = &dst->thread.fpu;
 255
 256        /* The new task's FPU state cannot be valid in the hardware. */
 257        dst_fpu->last_cpu = -1;
 258
 259        if (!cpu_feature_enabled(X86_FEATURE_FPU))
 260                return 0;
 261
 262        /*
 263         * Don't let 'init optimized' areas of the XSAVE area
 264         * leak into the child task:
 265         */
 266        memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 267
 268        /*
 269         * If the FPU registers are not owned by current just memcpy() the
 270         * state.  Otherwise save the FPU registers directly into the
 271         * child's FPU context, without any memory-to-memory copying.
 272         */
 273        fpregs_lock();
 274        if (test_thread_flag(TIF_NEED_FPU_LOAD))
 275                memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
 276
 277        else
 278                save_fpregs_to_fpstate(dst_fpu);
 279        fpregs_unlock();
 280
 281        set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
 282
 283        trace_x86_fpu_copy_src(src_fpu);
 284        trace_x86_fpu_copy_dst(dst_fpu);
 285
 286        return 0;
 287}
 288
 289/*
 290 * Drops current FPU state: deactivates the fpregs and
 291 * the fpstate. NOTE: it still leaves previous contents
 292 * in the fpregs in the eager-FPU case.
 293 *
 294 * This function can be used in cases where we know that
 295 * a state-restore is coming: either an explicit one,
 296 * or a reschedule.
 297 */
 298void fpu__drop(struct fpu *fpu)
 299{
 300        preempt_disable();
 301
 302        if (fpu == &current->thread.fpu) {
 303                /* Ignore delayed exceptions from user space */
 304                asm volatile("1: fwait\n"
 305                             "2:\n"
 306                             _ASM_EXTABLE(1b, 2b));
 307                fpregs_deactivate(fpu);
 308        }
 309
 310        trace_x86_fpu_dropped(fpu);
 311
 312        preempt_enable();
 313}
 314
 315/*
 316 * Clear FPU registers by setting them up from the init fpstate.
 317 * Caller must do fpregs_[un]lock() around it.
 318 */
 319static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
 320{
 321        if (use_xsave())
 322                os_xrstor(&init_fpstate.xsave, features_mask);
 323        else if (use_fxsr())
 324                fxrstor(&init_fpstate.fxsave);
 325        else
 326                frstor(&init_fpstate.fsave);
 327
 328        pkru_write_default();
 329}
 330
 331static inline unsigned int init_fpstate_copy_size(void)
 332{
 333        if (!use_xsave())
 334                return fpu_kernel_xstate_size;
 335
 336        /* XSAVE(S) just needs the legacy and the xstate header part */
 337        return sizeof(init_fpstate.xsave);
 338}
 339
 340/*
 341 * Reset current->fpu memory state to the init values.
 342 */
 343static void fpu_reset_fpstate(void)
 344{
 345        struct fpu *fpu = &current->thread.fpu;
 346
 347        fpregs_lock();
 348        fpu__drop(fpu);
 349        /*
 350         * This does not change the actual hardware registers. It just
 351         * resets the memory image and sets TIF_NEED_FPU_LOAD so a
 352         * subsequent return to usermode will reload the registers from the
 353         * task's memory image.
 354         *
 355         * Do not use fpstate_init() here. Just copy init_fpstate which has
 356         * the correct content already except for PKRU.
 357         *
 358         * PKRU handling does not rely on the xstate when restoring for
 359         * user space as PKRU is eagerly written in switch_to() and
 360         * flush_thread().
 361         */
 362        memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size());
 363        set_thread_flag(TIF_NEED_FPU_LOAD);
 364        fpregs_unlock();
 365}
 366
 367/*
 368 * Reset current's user FPU states to the init states.  current's
 369 * supervisor states, if any, are not modified by this function.  The
 370 * caller guarantees that the XSTATE header in memory is intact.
 371 */
 372void fpu__clear_user_states(struct fpu *fpu)
 373{
 374        WARN_ON_FPU(fpu != &current->thread.fpu);
 375
 376        fpregs_lock();
 377        if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
 378                fpu_reset_fpstate();
 379                fpregs_unlock();
 380                return;
 381        }
 382
 383        /*
 384         * Ensure that current's supervisor states are loaded into their
 385         * corresponding registers.
 386         */
 387        if (xfeatures_mask_supervisor() &&
 388            !fpregs_state_valid(fpu, smp_processor_id())) {
 389                os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
 390        }
 391
 392        /* Reset user states in registers. */
 393        restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user());
 394
 395        /*
 396         * Now all FPU registers have their desired values.  Inform the FPU
 397         * state machine that current's FPU registers are in the hardware
 398         * registers. The memory image does not need to be updated because
 399         * any operation relying on it has to save the registers first when
 400         * current's FPU is marked active.
 401         */
 402        fpregs_mark_activate();
 403        fpregs_unlock();
 404}
 405
 406void fpu_flush_thread(void)
 407{
 408        fpu_reset_fpstate();
 409}
 410/*
 411 * Load FPU context before returning to userspace.
 412 */
 413void switch_fpu_return(void)
 414{
 415        if (!static_cpu_has(X86_FEATURE_FPU))
 416                return;
 417
 418        fpregs_restore_userregs();
 419}
 420EXPORT_SYMBOL_GPL(switch_fpu_return);
 421
 422#ifdef CONFIG_X86_DEBUG_FPU
 423/*
 424 * If current FPU state according to its tracking (loaded FPU context on this
 425 * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
 426 * loaded on return to userland.
 427 */
 428void fpregs_assert_state_consistent(void)
 429{
 430        struct fpu *fpu = &current->thread.fpu;
 431
 432        if (test_thread_flag(TIF_NEED_FPU_LOAD))
 433                return;
 434
 435        WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
 436}
 437EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
 438#endif
 439
 440void fpregs_mark_activate(void)
 441{
 442        struct fpu *fpu = &current->thread.fpu;
 443
 444        fpregs_activate(fpu);
 445        fpu->last_cpu = smp_processor_id();
 446        clear_thread_flag(TIF_NEED_FPU_LOAD);
 447}
 448EXPORT_SYMBOL_GPL(fpregs_mark_activate);
 449
 450/*
 451 * x87 math exception handling:
 452 */
 453
 454int fpu__exception_code(struct fpu *fpu, int trap_nr)
 455{
 456        int err;
 457
 458        if (trap_nr == X86_TRAP_MF) {
 459                unsigned short cwd, swd;
 460                /*
 461                 * (~cwd & swd) will mask out exceptions that are not set to unmasked
 462                 * status.  0x3f is the exception bits in these regs, 0x200 is the
 463                 * C1 reg you need in case of a stack fault, 0x040 is the stack
 464                 * fault bit.  We should only be taking one exception at a time,
 465                 * so if this combination doesn't produce any single exception,
 466                 * then we have a bad program that isn't synchronizing its FPU usage
 467                 * and it will suffer the consequences since we won't be able to
 468                 * fully reproduce the context of the exception.
 469                 */
 470                if (boot_cpu_has(X86_FEATURE_FXSR)) {
 471                        cwd = fpu->state.fxsave.cwd;
 472                        swd = fpu->state.fxsave.swd;
 473                } else {
 474                        cwd = (unsigned short)fpu->state.fsave.cwd;
 475                        swd = (unsigned short)fpu->state.fsave.swd;
 476                }
 477
 478                err = swd & ~cwd;
 479        } else {
 480                /*
 481                 * The SIMD FPU exceptions are handled a little differently, as there
 482                 * is only a single status/control register.  Thus, to determine which
 483                 * unmasked exception was caught we must mask the exception mask bits
 484                 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 485                 */
 486                unsigned short mxcsr = MXCSR_DEFAULT;
 487
 488                if (boot_cpu_has(X86_FEATURE_XMM))
 489                        mxcsr = fpu->state.fxsave.mxcsr;
 490
 491                err = ~(mxcsr >> 7) & mxcsr;
 492        }
 493
 494        if (err & 0x001) {      /* Invalid op */
 495                /*
 496                 * swd & 0x240 == 0x040: Stack Underflow
 497                 * swd & 0x240 == 0x240: Stack Overflow
 498                 * User must clear the SF bit (0x40) if set
 499                 */
 500                return FPE_FLTINV;
 501        } else if (err & 0x004) { /* Divide by Zero */
 502                return FPE_FLTDIV;
 503        } else if (err & 0x008) { /* Overflow */
 504                return FPE_FLTOVF;
 505        } else if (err & 0x012) { /* Denormal, Underflow */
 506                return FPE_FLTUND;
 507        } else if (err & 0x020) { /* Precision */
 508                return FPE_FLTRES;
 509        }
 510
 511        /*
 512         * If we're using IRQ 13, or supposedly even some trap
 513         * X86_TRAP_MF implementations, it's possible
 514         * we get a spurious trap, which is not an error.
 515         */
 516        return 0;
 517}
 518