linux/arch/x86/kernel/i387.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1994 Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *  General FPU state handling cleanups
   6 *      Gareth Hughes <gareth@valinux.com>, May 2000
   7 */
   8#include <linux/module.h>
   9#include <linux/regset.h>
  10#include <linux/sched.h>
  11#include <linux/slab.h>
  12
  13#include <asm/cmdline.h>
  14#include <asm/sigcontext.h>
  15#include <asm/processor.h>
  16#include <asm/math_emu.h>
  17#include <asm/uaccess.h>
  18#include <asm/ptrace.h>
  19#include <asm/i387.h>
  20#include <asm/fpu-internal.h>
  21#include <asm/user.h>
  22
  23/*
  24 * Were we in an interrupt that interrupted kernel mode?
  25 *
  26 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
  27 * pair does nothing at all: the thread must not have fpu (so
  28 * that we don't try to save the FPU state), and TS must
  29 * be set (so that the clts/stts pair does nothing that is
  30 * visible in the interrupted kernel thread).
  31 *
  32 * Except for the eagerfpu case when we return 1 unless we've already
  33 * been eager and saved the state in kernel_fpu_begin().
  34 */
  35static inline bool interrupted_kernel_fpu_idle(void)
  36{
  37        if (use_eager_fpu())
  38                return __thread_has_fpu(current);
  39
  40        return !__thread_has_fpu(current) &&
  41                (read_cr0() & X86_CR0_TS);
  42}
  43
  44/*
  45 * Were we in user mode (or vm86 mode) when we were
  46 * interrupted?
  47 *
  48 * Doing kernel_fpu_begin/end() is ok if we are running
  49 * in an interrupt context from user mode - we'll just
  50 * save the FPU state as required.
  51 */
  52static inline bool interrupted_user_mode(void)
  53{
  54        struct pt_regs *regs = get_irq_regs();
  55        return regs && user_mode_vm(regs);
  56}
  57
  58/*
  59 * Can we use the FPU in kernel mode with the
  60 * whole "kernel_fpu_begin/end()" sequence?
  61 *
  62 * It's always ok in process context (ie "not interrupt")
  63 * but it is sometimes ok even from an irq.
  64 */
  65bool irq_fpu_usable(void)
  66{
  67        return !in_interrupt() ||
  68                interrupted_user_mode() ||
  69                interrupted_kernel_fpu_idle();
  70}
  71EXPORT_SYMBOL(irq_fpu_usable);
  72
  73void __kernel_fpu_begin(void)
  74{
  75        struct task_struct *me = current;
  76
  77        if (__thread_has_fpu(me)) {
  78                __thread_clear_has_fpu(me);
  79                __save_init_fpu(me);
  80                /* We do 'stts()' in __kernel_fpu_end() */
  81        } else if (!use_eager_fpu()) {
  82                this_cpu_write(fpu_owner_task, NULL);
  83                clts();
  84        }
  85}
  86EXPORT_SYMBOL(__kernel_fpu_begin);
  87
  88void __kernel_fpu_end(void)
  89{
  90        if (use_eager_fpu()) {
  91                /*
  92                 * For eager fpu, most the time, tsk_used_math() is true.
  93                 * Restore the user math as we are done with the kernel usage.
  94                 * At few instances during thread exit, signal handling etc,
  95                 * tsk_used_math() is false. Those few places will take proper
  96                 * actions, so we don't need to restore the math here.
  97                 */
  98                if (likely(tsk_used_math(current)))
  99                        math_state_restore();
 100        } else {
 101                stts();
 102        }
 103}
 104EXPORT_SYMBOL(__kernel_fpu_end);
 105
 106void unlazy_fpu(struct task_struct *tsk)
 107{
 108        preempt_disable();
 109        if (__thread_has_fpu(tsk)) {
 110                __save_init_fpu(tsk);
 111                __thread_fpu_end(tsk);
 112        } else
 113                tsk->fpu_counter = 0;
 114        preempt_enable();
 115}
 116EXPORT_SYMBOL(unlazy_fpu);
 117
 118unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
 119EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
 120unsigned int xstate_size;
 121EXPORT_SYMBOL_GPL(xstate_size);
 122static struct i387_fxsave_struct fx_scratch;
 123
 124static void mxcsr_feature_mask_init(void)
 125{
 126        unsigned long mask = 0;
 127
 128        if (cpu_has_fxsr) {
 129                memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
 130                asm volatile("fxsave %0" : "+m" (fx_scratch));
 131                mask = fx_scratch.mxcsr_mask;
 132                if (mask == 0)
 133                        mask = 0x0000ffbf;
 134        }
 135        mxcsr_feature_mask &= mask;
 136}
 137
 138static void init_thread_xstate(void)
 139{
 140        /*
 141         * Note that xstate_size might be overwriten later during
 142         * xsave_init().
 143         */
 144
 145        if (!HAVE_HWFP) {
 146                /*
 147                 * Disable xsave as we do not support it if i387
 148                 * emulation is enabled.
 149                 */
 150                setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 151                setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
 152                xstate_size = sizeof(struct i387_soft_struct);
 153                return;
 154        }
 155
 156        if (cpu_has_fxsr)
 157                xstate_size = sizeof(struct i387_fxsave_struct);
 158        else
 159                xstate_size = sizeof(struct i387_fsave_struct);
 160
 161        /*
 162         * Quirk: we don't yet handle the XSAVES* instructions
 163         * correctly, as we don't correctly convert between
 164         * standard and compacted format when interfacing
 165         * with user-space - so disable it for now.
 166         *
 167         * The difference is small: with recent CPUs the
 168         * compacted format is only marginally smaller than
 169         * the standard FPU state format.
 170         *
 171         * ( This is easy to backport while we are fixing
 172         *   XSAVES* support. )
 173         */
 174        setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 175
 176}
 177
 178/*
 179 * We parse fpu parameters early because fpu_init() is executed
 180 * before parse_early_param().
 181 */
 182static int __init x86_clearcpuid_setup(char *s)
 183{
 184        int bit;
 185
 186        if (get_option(&s, &bit) &&
 187            bit >= 0 &&
 188            bit < NCAPINTS * 32)
 189                setup_clear_cpu_cap(bit);
 190
 191        return 0;
 192}
 193early_param("clearcpuid", x86_clearcpuid_setup);
 194
 195/*
 196 * Called at bootup to set up the initial FPU state that is later cloned
 197 * into all processes.
 198 */
 199
 200void fpu_init(void)
 201{
 202        unsigned long cr0;
 203        unsigned long cr4_mask = 0;
 204
 205        if (cpu_has_fxsr)
 206                cr4_mask |= X86_CR4_OSFXSR;
 207        if (cpu_has_xmm)
 208                cr4_mask |= X86_CR4_OSXMMEXCPT;
 209        if (cr4_mask)
 210                set_in_cr4(cr4_mask);
 211
 212        cr0 = read_cr0();
 213        cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
 214        if (!HAVE_HWFP)
 215                cr0 |= X86_CR0_EM;
 216        write_cr0(cr0);
 217
 218        /*
 219         * init_thread_xstate is only called once to avoid overriding
 220         * xstate_size during boot time or during CPU hotplug.
 221         */
 222        if (xstate_size == 0)
 223                init_thread_xstate();
 224
 225        mxcsr_feature_mask_init();
 226        xsave_init();
 227        eager_fpu_init();
 228}
 229
 230void fpu_finit(struct fpu *fpu)
 231{
 232        if (!HAVE_HWFP) {
 233                finit_soft_fpu(&fpu->state->soft);
 234                return;
 235        }
 236
 237        if (cpu_has_fxsr) {
 238                fx_finit(&fpu->state->fxsave);
 239        } else {
 240                struct i387_fsave_struct *fp = &fpu->state->fsave;
 241                memset(fp, 0, xstate_size);
 242                fp->cwd = 0xffff037fu;
 243                fp->swd = 0xffff0000u;
 244                fp->twd = 0xffffffffu;
 245                fp->fos = 0xffff0000u;
 246        }
 247}
 248EXPORT_SYMBOL_GPL(fpu_finit);
 249
 250/*
 251 * The _current_ task is using the FPU for the first time
 252 * so initialize it and set the mxcsr to its default
 253 * value at reset if we support XMM instructions and then
 254 * remember the current task has used the FPU.
 255 */
 256int init_fpu(struct task_struct *tsk)
 257{
 258        int ret;
 259
 260        if (tsk_used_math(tsk)) {
 261                if (HAVE_HWFP && tsk == current)
 262                        unlazy_fpu(tsk);
 263                tsk->thread.fpu.last_cpu = ~0;
 264                return 0;
 265        }
 266
 267        /*
 268         * Memory allocation at the first usage of the FPU and other state.
 269         */
 270        ret = fpu_alloc(&tsk->thread.fpu);
 271        if (ret)
 272                return ret;
 273
 274        fpu_finit(&tsk->thread.fpu);
 275
 276        set_stopped_child_used_math(tsk);
 277        return 0;
 278}
 279EXPORT_SYMBOL_GPL(init_fpu);
 280
 281/*
 282 * The xstateregs_active() routine is the same as the fpregs_active() routine,
 283 * as the "regset->n" for the xstate regset will be updated based on the feature
 284 * capabilites supported by the xsave.
 285 */
 286int fpregs_active(struct task_struct *target, const struct user_regset *regset)
 287{
 288        return tsk_used_math(target) ? regset->n : 0;
 289}
 290
 291int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
 292{
 293        return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
 294}
 295
 296int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 297                unsigned int pos, unsigned int count,
 298                void *kbuf, void __user *ubuf)
 299{
 300        int ret;
 301
 302        if (!cpu_has_fxsr)
 303                return -ENODEV;
 304
 305        ret = init_fpu(target);
 306        if (ret)
 307                return ret;
 308
 309        sanitize_i387_state(target);
 310
 311        return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 312                                   &target->thread.fpu.state->fxsave, 0, -1);
 313}
 314
 315int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 316                unsigned int pos, unsigned int count,
 317                const void *kbuf, const void __user *ubuf)
 318{
 319        int ret;
 320
 321        if (!cpu_has_fxsr)
 322                return -ENODEV;
 323
 324        ret = init_fpu(target);
 325        if (ret)
 326                return ret;
 327
 328        sanitize_i387_state(target);
 329
 330        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 331                                 &target->thread.fpu.state->fxsave, 0, -1);
 332
 333        /*
 334         * mxcsr reserved bits must be masked to zero for security reasons.
 335         */
 336        target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
 337
 338        /*
 339         * update the header bits in the xsave header, indicating the
 340         * presence of FP and SSE state.
 341         */
 342        if (cpu_has_xsave)
 343                target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
 344
 345        return ret;
 346}
 347
 348int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 349                unsigned int pos, unsigned int count,
 350                void *kbuf, void __user *ubuf)
 351{
 352        int ret;
 353
 354        if (!cpu_has_xsave)
 355                return -ENODEV;
 356
 357        ret = init_fpu(target);
 358        if (ret)
 359                return ret;
 360
 361        /*
 362         * Copy the 48bytes defined by the software first into the xstate
 363         * memory layout in the thread struct, so that we can copy the entire
 364         * xstateregs to the user using one user_regset_copyout().
 365         */
 366        memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
 367               xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
 368
 369        /*
 370         * Copy the xstate memory layout.
 371         */
 372        ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 373                                  &target->thread.fpu.state->xsave, 0, -1);
 374        return ret;
 375}
 376
 377int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 378                  unsigned int pos, unsigned int count,
 379                  const void *kbuf, const void __user *ubuf)
 380{
 381        int ret;
 382        struct xsave_hdr_struct *xsave_hdr;
 383
 384        if (!cpu_has_xsave)
 385                return -ENODEV;
 386
 387        ret = init_fpu(target);
 388        if (ret)
 389                return ret;
 390
 391        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 392                                 &target->thread.fpu.state->xsave, 0, -1);
 393
 394        /*
 395         * mxcsr reserved bits must be masked to zero for security reasons.
 396         */
 397        target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
 398
 399        xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
 400
 401        xsave_hdr->xstate_bv &= pcntxt_mask;
 402        /*
 403         * These bits must be zero.
 404         */
 405        memset(xsave_hdr->reserved, 0, 48);
 406
 407        return ret;
 408}
 409
 410#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 411
 412/*
 413 * FPU tag word conversions.
 414 */
 415
 416static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 417{
 418        unsigned int tmp; /* to avoid 16 bit prefixes in the code */
 419
 420        /* Transform each pair of bits into 01 (valid) or 00 (empty) */
 421        tmp = ~twd;
 422        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
 423        /* and move the valid bits to the lower byte. */
 424        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
 425        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
 426        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
 427
 428        return tmp;
 429}
 430
 431#define FPREG_ADDR(f, n)        ((void *)&(f)->st_space + (n) * 16)
 432#define FP_EXP_TAG_VALID        0
 433#define FP_EXP_TAG_ZERO         1
 434#define FP_EXP_TAG_SPECIAL      2
 435#define FP_EXP_TAG_EMPTY        3
 436
 437static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
 438{
 439        struct _fpxreg *st;
 440        u32 tos = (fxsave->swd >> 11) & 7;
 441        u32 twd = (unsigned long) fxsave->twd;
 442        u32 tag;
 443        u32 ret = 0xffff0000u;
 444        int i;
 445
 446        for (i = 0; i < 8; i++, twd >>= 1) {
 447                if (twd & 0x1) {
 448                        st = FPREG_ADDR(fxsave, (i - tos) & 7);
 449
 450                        switch (st->exponent & 0x7fff) {
 451                        case 0x7fff:
 452                                tag = FP_EXP_TAG_SPECIAL;
 453                                break;
 454                        case 0x0000:
 455                                if (!st->significand[0] &&
 456                                    !st->significand[1] &&
 457                                    !st->significand[2] &&
 458                                    !st->significand[3])
 459                                        tag = FP_EXP_TAG_ZERO;
 460                                else
 461                                        tag = FP_EXP_TAG_SPECIAL;
 462                                break;
 463                        default:
 464                                if (st->significand[3] & 0x8000)
 465                                        tag = FP_EXP_TAG_VALID;
 466                                else
 467                                        tag = FP_EXP_TAG_SPECIAL;
 468                                break;
 469                        }
 470                } else {
 471                        tag = FP_EXP_TAG_EMPTY;
 472                }
 473                ret |= tag << (2 * i);
 474        }
 475        return ret;
 476}
 477
 478/*
 479 * FXSR floating point environment conversions.
 480 */
 481
 482void
 483convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 484{
 485        struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
 486        struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
 487        struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
 488        int i;
 489
 490        env->cwd = fxsave->cwd | 0xffff0000u;
 491        env->swd = fxsave->swd | 0xffff0000u;
 492        env->twd = twd_fxsr_to_i387(fxsave);
 493
 494#ifdef CONFIG_X86_64
 495        env->fip = fxsave->rip;
 496        env->foo = fxsave->rdp;
 497        /*
 498         * should be actually ds/cs at fpu exception time, but
 499         * that information is not available in 64bit mode.
 500         */
 501        env->fcs = task_pt_regs(tsk)->cs;
 502        if (tsk == current) {
 503                savesegment(ds, env->fos);
 504        } else {
 505                env->fos = tsk->thread.ds;
 506        }
 507        env->fos |= 0xffff0000;
 508#else
 509        env->fip = fxsave->fip;
 510        env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
 511        env->foo = fxsave->foo;
 512        env->fos = fxsave->fos;
 513#endif
 514
 515        for (i = 0; i < 8; ++i)
 516                memcpy(&to[i], &from[i], sizeof(to[0]));
 517}
 518
 519void convert_to_fxsr(struct task_struct *tsk,
 520                     const struct user_i387_ia32_struct *env)
 521
 522{
 523        struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
 524        struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
 525        struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
 526        int i;
 527
 528        fxsave->cwd = env->cwd;
 529        fxsave->swd = env->swd;
 530        fxsave->twd = twd_i387_to_fxsr(env->twd);
 531        fxsave->fop = (u16) ((u32) env->fcs >> 16);
 532#ifdef CONFIG_X86_64
 533        fxsave->rip = env->fip;
 534        fxsave->rdp = env->foo;
 535        /* cs and ds ignored */
 536#else
 537        fxsave->fip = env->fip;
 538        fxsave->fcs = (env->fcs & 0xffff);
 539        fxsave->foo = env->foo;
 540        fxsave->fos = env->fos;
 541#endif
 542
 543        for (i = 0; i < 8; ++i)
 544                memcpy(&to[i], &from[i], sizeof(from[0]));
 545}
 546
 547int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 548               unsigned int pos, unsigned int count,
 549               void *kbuf, void __user *ubuf)
 550{
 551        struct user_i387_ia32_struct env;
 552        int ret;
 553
 554        ret = init_fpu(target);
 555        if (ret)
 556                return ret;
 557
 558        if (!HAVE_HWFP)
 559                return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 560
 561        if (!cpu_has_fxsr) {
 562                return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 563                                           &target->thread.fpu.state->fsave, 0,
 564                                           -1);
 565        }
 566
 567        sanitize_i387_state(target);
 568
 569        if (kbuf && pos == 0 && count == sizeof(env)) {
 570                convert_from_fxsr(kbuf, target);
 571                return 0;
 572        }
 573
 574        convert_from_fxsr(&env, target);
 575
 576        return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
 577}
 578
 579int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 580               unsigned int pos, unsigned int count,
 581               const void *kbuf, const void __user *ubuf)
 582{
 583        struct user_i387_ia32_struct env;
 584        int ret;
 585
 586        ret = init_fpu(target);
 587        if (ret)
 588                return ret;
 589
 590        sanitize_i387_state(target);
 591
 592        if (!HAVE_HWFP)
 593                return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 594
 595        if (!cpu_has_fxsr) {
 596                return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 597                                          &target->thread.fpu.state->fsave, 0, -1);
 598        }
 599
 600        if (pos > 0 || count < sizeof(env))
 601                convert_from_fxsr(&env, target);
 602
 603        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
 604        if (!ret)
 605                convert_to_fxsr(target, &env);
 606
 607        /*
 608         * update the header bit in the xsave header, indicating the
 609         * presence of FP.
 610         */
 611        if (cpu_has_xsave)
 612                target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
 613        return ret;
 614}
 615
 616/*
 617 * FPU state for core dumps.
 618 * This is only used for a.out dumps now.
 619 * It is declared generically using elf_fpregset_t (which is
 620 * struct user_i387_struct) but is in fact only used for 32-bit
 621 * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
 622 */
 623int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
 624{
 625        struct task_struct *tsk = current;
 626        int fpvalid;
 627
 628        fpvalid = !!used_math();
 629        if (fpvalid)
 630                fpvalid = !fpregs_get(tsk, NULL,
 631                                      0, sizeof(struct user_i387_ia32_struct),
 632                                      fpu, NULL);
 633
 634        return fpvalid;
 635}
 636EXPORT_SYMBOL(dump_fpu);
 637
 638#endif  /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
 639