linux/arch/x86/kernel/fpu/core.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1994 Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *  General FPU state handling cleanups
   6 *      Gareth Hughes <gareth@valinux.com>, May 2000
   7 */
   8#include <asm/fpu/internal.h>
   9#include <asm/fpu/regset.h>
  10#include <asm/fpu/signal.h>
  11#include <asm/fpu/types.h>
  12#include <asm/traps.h>
  13
  14#include <linux/hardirq.h>
  15#include <linux/pkeys.h>
  16
  17#define CREATE_TRACE_POINTS
  18#include <asm/trace/fpu.h>
  19
  20/*
  21 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
  22 * depending on the FPU hardware format:
  23 */
  24union fpregs_state init_fpstate __read_mostly;
  25
  26/*
  27 * Track whether the kernel is using the FPU state
  28 * currently.
  29 *
  30 * This flag is used:
  31 *
  32 *   - by IRQ context code to potentially use the FPU
  33 *     if it's unused.
  34 *
  35 *   - to debug kernel_fpu_begin()/end() correctness
  36 */
  37static DEFINE_PER_CPU(bool, in_kernel_fpu);
  38
  39/*
  40 * Track which context is using the FPU on the CPU:
  41 */
  42DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
  43
  44static void kernel_fpu_disable(void)
  45{
  46        WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
  47        this_cpu_write(in_kernel_fpu, true);
  48}
  49
  50static void kernel_fpu_enable(void)
  51{
  52        WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
  53        this_cpu_write(in_kernel_fpu, false);
  54}
  55
  56static bool kernel_fpu_disabled(void)
  57{
  58        return this_cpu_read(in_kernel_fpu);
  59}
  60
  61static bool interrupted_kernel_fpu_idle(void)
  62{
  63        return !kernel_fpu_disabled();
  64}
  65
  66/*
  67 * Were we in user mode (or vm86 mode) when we were
  68 * interrupted?
  69 *
  70 * Doing kernel_fpu_begin/end() is ok if we are running
  71 * in an interrupt context from user mode - we'll just
  72 * save the FPU state as required.
  73 */
  74static bool interrupted_user_mode(void)
  75{
  76        struct pt_regs *regs = get_irq_regs();
  77        return regs && user_mode(regs);
  78}
  79
  80/*
  81 * Can we use the FPU in kernel mode with the
  82 * whole "kernel_fpu_begin/end()" sequence?
  83 *
  84 * It's always ok in process context (ie "not interrupt")
  85 * but it is sometimes ok even from an irq.
  86 */
  87bool irq_fpu_usable(void)
  88{
  89        return !in_interrupt() ||
  90                interrupted_user_mode() ||
  91                interrupted_kernel_fpu_idle();
  92}
  93EXPORT_SYMBOL(irq_fpu_usable);
  94
  95void __kernel_fpu_begin(void)
  96{
  97        struct fpu *fpu = &current->thread.fpu;
  98
  99        WARN_ON_FPU(!irq_fpu_usable());
 100
 101        kernel_fpu_disable();
 102
 103        if (fpu->initialized) {
 104                /*
 105                 * Ignore return value -- we don't care if reg state
 106                 * is clobbered.
 107                 */
 108                copy_fpregs_to_fpstate(fpu);
 109        } else {
 110                __cpu_invalidate_fpregs_state();
 111        }
 112}
 113EXPORT_SYMBOL(__kernel_fpu_begin);
 114
 115void __kernel_fpu_end(void)
 116{
 117        struct fpu *fpu = &current->thread.fpu;
 118
 119        if (fpu->initialized)
 120                copy_kernel_to_fpregs(&fpu->state);
 121
 122        kernel_fpu_enable();
 123}
 124EXPORT_SYMBOL(__kernel_fpu_end);
 125
 126void kernel_fpu_begin(void)
 127{
 128        preempt_disable();
 129        __kernel_fpu_begin();
 130}
 131EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 132
 133void kernel_fpu_end(void)
 134{
 135        __kernel_fpu_end();
 136        preempt_enable();
 137}
 138EXPORT_SYMBOL_GPL(kernel_fpu_end);
 139
 140/*
 141 * Save the FPU state (mark it for reload if necessary):
 142 *
 143 * This only ever gets called for the current task.
 144 */
 145void fpu__save(struct fpu *fpu)
 146{
 147        WARN_ON_FPU(fpu != &current->thread.fpu);
 148
 149        preempt_disable();
 150        trace_x86_fpu_before_save(fpu);
 151        if (fpu->initialized) {
 152                if (!copy_fpregs_to_fpstate(fpu)) {
 153                        copy_kernel_to_fpregs(&fpu->state);
 154                }
 155        }
 156        trace_x86_fpu_after_save(fpu);
 157        preempt_enable();
 158}
 159EXPORT_SYMBOL_GPL(fpu__save);
 160
 161/*
 162 * Legacy x87 fpstate state init:
 163 */
 164static inline void fpstate_init_fstate(struct fregs_state *fp)
 165{
 166        fp->cwd = 0xffff037fu;
 167        fp->swd = 0xffff0000u;
 168        fp->twd = 0xffffffffu;
 169        fp->fos = 0xffff0000u;
 170}
 171
 172void fpstate_init(union fpregs_state *state)
 173{
 174        if (!static_cpu_has(X86_FEATURE_FPU)) {
 175                fpstate_init_soft(&state->soft);
 176                return;
 177        }
 178
 179        memset(state, 0, fpu_kernel_xstate_size);
 180
 181        if (static_cpu_has(X86_FEATURE_XSAVES))
 182                fpstate_init_xstate(&state->xsave);
 183        if (static_cpu_has(X86_FEATURE_FXSR))
 184                fpstate_init_fxstate(&state->fxsave);
 185        else
 186                fpstate_init_fstate(&state->fsave);
 187}
 188EXPORT_SYMBOL_GPL(fpstate_init);
 189
 190int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 191{
 192        dst_fpu->last_cpu = -1;
 193
 194        if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
 195                return 0;
 196
 197        WARN_ON_FPU(src_fpu != &current->thread.fpu);
 198
 199        /*
 200         * Don't let 'init optimized' areas of the XSAVE area
 201         * leak into the child task:
 202         */
 203        memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 204
 205        /*
 206         * Save current FPU registers directly into the child
 207         * FPU context, without any memory-to-memory copying.
 208         *
 209         * ( The function 'fails' in the FNSAVE case, which destroys
 210         *   register contents so we have to copy them back. )
 211         */
 212        if (!copy_fpregs_to_fpstate(dst_fpu)) {
 213                memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
 214                copy_kernel_to_fpregs(&src_fpu->state);
 215        }
 216
 217        trace_x86_fpu_copy_src(src_fpu);
 218        trace_x86_fpu_copy_dst(dst_fpu);
 219
 220        return 0;
 221}
 222
 223/*
 224 * Activate the current task's in-memory FPU context,
 225 * if it has not been used before:
 226 */
 227void fpu__initialize(struct fpu *fpu)
 228{
 229        WARN_ON_FPU(fpu != &current->thread.fpu);
 230
 231        if (!fpu->initialized) {
 232                fpstate_init(&fpu->state);
 233                trace_x86_fpu_init_state(fpu);
 234
 235                trace_x86_fpu_activate_state(fpu);
 236                /* Safe to do for the current task: */
 237                fpu->initialized = 1;
 238        }
 239}
 240EXPORT_SYMBOL_GPL(fpu__initialize);
 241
 242/*
 243 * This function must be called before we read a task's fpstate.
 244 *
 245 * There's two cases where this gets called:
 246 *
 247 * - for the current task (when coredumping), in which case we have
 248 *   to save the latest FPU registers into the fpstate,
 249 *
 250 * - or it's called for stopped tasks (ptrace), in which case the
 251 *   registers were already saved by the context-switch code when
 252 *   the task scheduled out - we only have to initialize the registers
 253 *   if they've never been initialized.
 254 *
 255 * If the task has used the FPU before then save it.
 256 */
 257void fpu__prepare_read(struct fpu *fpu)
 258{
 259        if (fpu == &current->thread.fpu) {
 260                fpu__save(fpu);
 261        } else {
 262                if (!fpu->initialized) {
 263                        fpstate_init(&fpu->state);
 264                        trace_x86_fpu_init_state(fpu);
 265
 266                        trace_x86_fpu_activate_state(fpu);
 267                        /* Safe to do for current and for stopped child tasks: */
 268                        fpu->initialized = 1;
 269                }
 270        }
 271}
 272
 273/*
 274 * This function must be called before we write a task's fpstate.
 275 *
 276 * If the task has used the FPU before then invalidate any cached FPU registers.
 277 * If the task has not used the FPU before then initialize its fpstate.
 278 *
 279 * After this function call, after registers in the fpstate are
 280 * modified and the child task has woken up, the child task will
 281 * restore the modified FPU state from the modified context. If we
 282 * didn't clear its cached status here then the cached in-registers
 283 * state pending on its former CPU could be restored, corrupting
 284 * the modifications.
 285 */
 286void fpu__prepare_write(struct fpu *fpu)
 287{
 288        /*
 289         * Only stopped child tasks can be used to modify the FPU
 290         * state in the fpstate buffer:
 291         */
 292        WARN_ON_FPU(fpu == &current->thread.fpu);
 293
 294        if (fpu->initialized) {
 295                /* Invalidate any cached state: */
 296                __fpu_invalidate_fpregs_state(fpu);
 297        } else {
 298                fpstate_init(&fpu->state);
 299                trace_x86_fpu_init_state(fpu);
 300
 301                trace_x86_fpu_activate_state(fpu);
 302                /* Safe to do for stopped child tasks: */
 303                fpu->initialized = 1;
 304        }
 305}
 306
 307/*
 308 * 'fpu__restore()' is called to copy FPU registers from
 309 * the FPU fpstate to the live hw registers and to activate
 310 * access to the hardware registers, so that FPU instructions
 311 * can be used afterwards.
 312 *
 313 * Must be called with kernel preemption disabled (for example
 314 * with local interrupts disabled, as it is in the case of
 315 * do_device_not_available()).
 316 */
 317void fpu__restore(struct fpu *fpu)
 318{
 319        fpu__initialize(fpu);
 320
 321        /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 322        kernel_fpu_disable();
 323        trace_x86_fpu_before_restore(fpu);
 324        fpregs_activate(fpu);
 325        copy_kernel_to_fpregs(&fpu->state);
 326        trace_x86_fpu_after_restore(fpu);
 327        kernel_fpu_enable();
 328}
 329EXPORT_SYMBOL_GPL(fpu__restore);
 330
 331/*
 332 * Drops current FPU state: deactivates the fpregs and
 333 * the fpstate. NOTE: it still leaves previous contents
 334 * in the fpregs in the eager-FPU case.
 335 *
 336 * This function can be used in cases where we know that
 337 * a state-restore is coming: either an explicit one,
 338 * or a reschedule.
 339 */
 340void fpu__drop(struct fpu *fpu)
 341{
 342        preempt_disable();
 343
 344        if (fpu == &current->thread.fpu) {
 345                if (fpu->initialized) {
 346                        /* Ignore delayed exceptions from user space */
 347                        asm volatile("1: fwait\n"
 348                                     "2:\n"
 349                                     _ASM_EXTABLE(1b, 2b));
 350                        fpregs_deactivate(fpu);
 351                }
 352        }
 353
 354        fpu->initialized = 0;
 355
 356        trace_x86_fpu_dropped(fpu);
 357
 358        preempt_enable();
 359}
 360
 361/*
 362 * Clear FPU registers by setting them up from
 363 * the init fpstate:
 364 */
 365static inline void copy_init_fpstate_to_fpregs(void)
 366{
 367        if (use_xsave())
 368                copy_kernel_to_xregs(&init_fpstate.xsave, -1);
 369        else if (static_cpu_has(X86_FEATURE_FXSR))
 370                copy_kernel_to_fxregs(&init_fpstate.fxsave);
 371        else
 372                copy_kernel_to_fregs(&init_fpstate.fsave);
 373
 374        if (boot_cpu_has(X86_FEATURE_OSPKE))
 375                copy_init_pkru_to_fpregs();
 376}
 377
 378/*
 379 * Clear the FPU state back to init state.
 380 *
 381 * Called by sys_execve(), by the signal handler code and by various
 382 * error paths.
 383 */
 384void fpu__clear(struct fpu *fpu)
 385{
 386        WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 387
 388        fpu__drop(fpu);
 389
 390        /*
 391         * Make sure fpstate is cleared and initialized.
 392         */
 393        if (static_cpu_has(X86_FEATURE_FPU)) {
 394                preempt_disable();
 395                fpu__initialize(fpu);
 396                user_fpu_begin();
 397                copy_init_fpstate_to_fpregs();
 398                preempt_enable();
 399        }
 400}
 401
 402/*
 403 * x87 math exception handling:
 404 */
 405
 406int fpu__exception_code(struct fpu *fpu, int trap_nr)
 407{
 408        int err;
 409
 410        if (trap_nr == X86_TRAP_MF) {
 411                unsigned short cwd, swd;
 412                /*
 413                 * (~cwd & swd) will mask out exceptions that are not set to unmasked
 414                 * status.  0x3f is the exception bits in these regs, 0x200 is the
 415                 * C1 reg you need in case of a stack fault, 0x040 is the stack
 416                 * fault bit.  We should only be taking one exception at a time,
 417                 * so if this combination doesn't produce any single exception,
 418                 * then we have a bad program that isn't synchronizing its FPU usage
 419                 * and it will suffer the consequences since we won't be able to
 420                 * fully reproduce the context of the exception.
 421                 */
 422                if (boot_cpu_has(X86_FEATURE_FXSR)) {
 423                        cwd = fpu->state.fxsave.cwd;
 424                        swd = fpu->state.fxsave.swd;
 425                } else {
 426                        cwd = (unsigned short)fpu->state.fsave.cwd;
 427                        swd = (unsigned short)fpu->state.fsave.swd;
 428                }
 429
 430                err = swd & ~cwd;
 431        } else {
 432                /*
 433                 * The SIMD FPU exceptions are handled a little differently, as there
 434                 * is only a single status/control register.  Thus, to determine which
 435                 * unmasked exception was caught we must mask the exception mask bits
 436                 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 437                 */
 438                unsigned short mxcsr = MXCSR_DEFAULT;
 439
 440                if (boot_cpu_has(X86_FEATURE_XMM))
 441                        mxcsr = fpu->state.fxsave.mxcsr;
 442
 443                err = ~(mxcsr >> 7) & mxcsr;
 444        }
 445
 446        if (err & 0x001) {      /* Invalid op */
 447                /*
 448                 * swd & 0x240 == 0x040: Stack Underflow
 449                 * swd & 0x240 == 0x240: Stack Overflow
 450                 * User must clear the SF bit (0x40) if set
 451                 */
 452                return FPE_FLTINV;
 453        } else if (err & 0x004) { /* Divide by Zero */
 454                return FPE_FLTDIV;
 455        } else if (err & 0x008) { /* Overflow */
 456                return FPE_FLTOVF;
 457        } else if (err & 0x012) { /* Denormal, Underflow */
 458                return FPE_FLTUND;
 459        } else if (err & 0x020) { /* Precision */
 460                return FPE_FLTRES;
 461        }
 462
 463        /*
 464         * If we're using IRQ 13, or supposedly even some trap
 465         * X86_TRAP_MF implementations, it's possible
 466         * we get a spurious trap, which is not an error.
 467         */
 468        return 0;
 469}
 470