linux/arch/powerpc/math-emu/math_efp.c
<<
>>
Prefs
   1/*
   2 * arch/powerpc/math-emu/math_efp.c
   3 *
   4 * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc.
   5 *
   6 * Author: Ebony Zhu,   <ebony.zhu@freescale.com>
   7 *         Yu Liu,      <yu.liu@freescale.com>
   8 *
   9 * Derived from arch/alpha/math-emu/math.c
  10 *              arch/powerpc/math-emu/math.c
  11 *
  12 * Description:
  13 * This file is the exception handler to make E500 SPE instructions
  14 * fully comply with IEEE-754 floating point standard.
  15 *
  16 * This program is free software; you can redistribute it and/or
  17 * modify it under the terms of the GNU General Public License
  18 * as published by the Free Software Foundation; either version
  19 * 2 of the License, or (at your option) any later version.
  20 */
  21
  22#include <linux/types.h>
  23#include <linux/prctl.h>
  24
  25#include <linux/uaccess.h>
  26#include <asm/reg.h>
  27
  28#define FP_EX_BOOKE_E500_SPE
  29#include <asm/sfp-machine.h>
  30
  31#include <math-emu/soft-fp.h>
  32#include <math-emu/single.h>
  33#include <math-emu/double.h>
  34
  35#define EFAPU           0x4
  36
  37#define VCT             0x4
  38#define SPFP            0x6
  39#define DPFP            0x7
  40
  41#define EFSADD          0x2c0
  42#define EFSSUB          0x2c1
  43#define EFSABS          0x2c4
  44#define EFSNABS         0x2c5
  45#define EFSNEG          0x2c6
  46#define EFSMUL          0x2c8
  47#define EFSDIV          0x2c9
  48#define EFSCMPGT        0x2cc
  49#define EFSCMPLT        0x2cd
  50#define EFSCMPEQ        0x2ce
  51#define EFSCFD          0x2cf
  52#define EFSCFSI         0x2d1
  53#define EFSCTUI         0x2d4
  54#define EFSCTSI         0x2d5
  55#define EFSCTUF         0x2d6
  56#define EFSCTSF         0x2d7
  57#define EFSCTUIZ        0x2d8
  58#define EFSCTSIZ        0x2da
  59
  60#define EVFSADD         0x280
  61#define EVFSSUB         0x281
  62#define EVFSABS         0x284
  63#define EVFSNABS        0x285
  64#define EVFSNEG         0x286
  65#define EVFSMUL         0x288
  66#define EVFSDIV         0x289
  67#define EVFSCMPGT       0x28c
  68#define EVFSCMPLT       0x28d
  69#define EVFSCMPEQ       0x28e
  70#define EVFSCTUI        0x294
  71#define EVFSCTSI        0x295
  72#define EVFSCTUF        0x296
  73#define EVFSCTSF        0x297
  74#define EVFSCTUIZ       0x298
  75#define EVFSCTSIZ       0x29a
  76
  77#define EFDADD          0x2e0
  78#define EFDSUB          0x2e1
  79#define EFDABS          0x2e4
  80#define EFDNABS         0x2e5
  81#define EFDNEG          0x2e6
  82#define EFDMUL          0x2e8
  83#define EFDDIV          0x2e9
  84#define EFDCTUIDZ       0x2ea
  85#define EFDCTSIDZ       0x2eb
  86#define EFDCMPGT        0x2ec
  87#define EFDCMPLT        0x2ed
  88#define EFDCMPEQ        0x2ee
  89#define EFDCFS          0x2ef
  90#define EFDCTUI         0x2f4
  91#define EFDCTSI         0x2f5
  92#define EFDCTUF         0x2f6
  93#define EFDCTSF         0x2f7
  94#define EFDCTUIZ        0x2f8
  95#define EFDCTSIZ        0x2fa
  96
  97#define AB      2
  98#define XA      3
  99#define XB      4
 100#define XCR     5
 101#define NOTYPE  0
 102
 103#define SIGN_BIT_S      (1UL << 31)
 104#define SIGN_BIT_D      (1ULL << 63)
 105#define FP_EX_MASK      (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \
 106                        FP_EX_UNDERFLOW | FP_EX_OVERFLOW)
 107
 108static int have_e500_cpu_a005_erratum;
 109
 110union dw_union {
 111        u64 dp[1];
 112        u32 wp[2];
 113};
 114
 115static unsigned long insn_type(unsigned long speinsn)
 116{
 117        unsigned long ret = NOTYPE;
 118
 119        switch (speinsn & 0x7ff) {
 120        case EFSABS:    ret = XA;       break;
 121        case EFSADD:    ret = AB;       break;
 122        case EFSCFD:    ret = XB;       break;
 123        case EFSCMPEQ:  ret = XCR;      break;
 124        case EFSCMPGT:  ret = XCR;      break;
 125        case EFSCMPLT:  ret = XCR;      break;
 126        case EFSCTSF:   ret = XB;       break;
 127        case EFSCTSI:   ret = XB;       break;
 128        case EFSCTSIZ:  ret = XB;       break;
 129        case EFSCTUF:   ret = XB;       break;
 130        case EFSCTUI:   ret = XB;       break;
 131        case EFSCTUIZ:  ret = XB;       break;
 132        case EFSDIV:    ret = AB;       break;
 133        case EFSMUL:    ret = AB;       break;
 134        case EFSNABS:   ret = XA;       break;
 135        case EFSNEG:    ret = XA;       break;
 136        case EFSSUB:    ret = AB;       break;
 137        case EFSCFSI:   ret = XB;       break;
 138
 139        case EVFSABS:   ret = XA;       break;
 140        case EVFSADD:   ret = AB;       break;
 141        case EVFSCMPEQ: ret = XCR;      break;
 142        case EVFSCMPGT: ret = XCR;      break;
 143        case EVFSCMPLT: ret = XCR;      break;
 144        case EVFSCTSF:  ret = XB;       break;
 145        case EVFSCTSI:  ret = XB;       break;
 146        case EVFSCTSIZ: ret = XB;       break;
 147        case EVFSCTUF:  ret = XB;       break;
 148        case EVFSCTUI:  ret = XB;       break;
 149        case EVFSCTUIZ: ret = XB;       break;
 150        case EVFSDIV:   ret = AB;       break;
 151        case EVFSMUL:   ret = AB;       break;
 152        case EVFSNABS:  ret = XA;       break;
 153        case EVFSNEG:   ret = XA;       break;
 154        case EVFSSUB:   ret = AB;       break;
 155
 156        case EFDABS:    ret = XA;       break;
 157        case EFDADD:    ret = AB;       break;
 158        case EFDCFS:    ret = XB;       break;
 159        case EFDCMPEQ:  ret = XCR;      break;
 160        case EFDCMPGT:  ret = XCR;      break;
 161        case EFDCMPLT:  ret = XCR;      break;
 162        case EFDCTSF:   ret = XB;       break;
 163        case EFDCTSI:   ret = XB;       break;
 164        case EFDCTSIDZ: ret = XB;       break;
 165        case EFDCTSIZ:  ret = XB;       break;
 166        case EFDCTUF:   ret = XB;       break;
 167        case EFDCTUI:   ret = XB;       break;
 168        case EFDCTUIDZ: ret = XB;       break;
 169        case EFDCTUIZ:  ret = XB;       break;
 170        case EFDDIV:    ret = AB;       break;
 171        case EFDMUL:    ret = AB;       break;
 172        case EFDNABS:   ret = XA;       break;
 173        case EFDNEG:    ret = XA;       break;
 174        case EFDSUB:    ret = AB;       break;
 175        }
 176
 177        return ret;
 178}
 179
 180int do_spe_mathemu(struct pt_regs *regs)
 181{
 182        FP_DECL_EX;
 183        int IR, cmp;
 184
 185        unsigned long type, func, fc, fa, fb, src, speinsn;
 186        union dw_union vc, va, vb;
 187
 188        if (get_user(speinsn, (unsigned int __user *) regs->nip))
 189                return -EFAULT;
 190        if ((speinsn >> 26) != EFAPU)
 191                return -EINVAL;         /* not an spe instruction */
 192
 193        type = insn_type(speinsn);
 194        if (type == NOTYPE)
 195                goto illegal;
 196
 197        func = speinsn & 0x7ff;
 198        fc = (speinsn >> 21) & 0x1f;
 199        fa = (speinsn >> 16) & 0x1f;
 200        fb = (speinsn >> 11) & 0x1f;
 201        src = (speinsn >> 5) & 0x7;
 202
 203        vc.wp[0] = current->thread.evr[fc];
 204        vc.wp[1] = regs->gpr[fc];
 205        va.wp[0] = current->thread.evr[fa];
 206        va.wp[1] = regs->gpr[fa];
 207        vb.wp[0] = current->thread.evr[fb];
 208        vb.wp[1] = regs->gpr[fb];
 209
 210        __FPU_FPSCR = mfspr(SPRN_SPEFSCR);
 211
 212        pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
 213        pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
 214        pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
 215        pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
 216
 217        switch (src) {
 218        case SPFP: {
 219                FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
 220
 221                switch (type) {
 222                case AB:
 223                case XCR:
 224                        FP_UNPACK_SP(SA, va.wp + 1);
 225                case XB:
 226                        FP_UNPACK_SP(SB, vb.wp + 1);
 227                        break;
 228                case XA:
 229                        FP_UNPACK_SP(SA, va.wp + 1);
 230                        break;
 231                }
 232
 233                pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c);
 234                pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c);
 235
 236                switch (func) {
 237                case EFSABS:
 238                        vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
 239                        goto update_regs;
 240
 241                case EFSNABS:
 242                        vc.wp[1] = va.wp[1] | SIGN_BIT_S;
 243                        goto update_regs;
 244
 245                case EFSNEG:
 246                        vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
 247                        goto update_regs;
 248
 249                case EFSADD:
 250                        FP_ADD_S(SR, SA, SB);
 251                        goto pack_s;
 252
 253                case EFSSUB:
 254                        FP_SUB_S(SR, SA, SB);
 255                        goto pack_s;
 256
 257                case EFSMUL:
 258                        FP_MUL_S(SR, SA, SB);
 259                        goto pack_s;
 260
 261                case EFSDIV:
 262                        FP_DIV_S(SR, SA, SB);
 263                        goto pack_s;
 264
 265                case EFSCMPEQ:
 266                        cmp = 0;
 267                        goto cmp_s;
 268
 269                case EFSCMPGT:
 270                        cmp = 1;
 271                        goto cmp_s;
 272
 273                case EFSCMPLT:
 274                        cmp = -1;
 275                        goto cmp_s;
 276
 277                case EFSCTSF:
 278                case EFSCTUF:
 279                        if (SB_c == FP_CLS_NAN) {
 280                                vc.wp[1] = 0;
 281                                FP_SET_EXCEPTION(FP_EX_INVALID);
 282                        } else {
 283                                SB_e += (func == EFSCTSF ? 31 : 32);
 284                                FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
 285                                                (func == EFSCTSF));
 286                        }
 287                        goto update_regs;
 288
 289                case EFSCFD: {
 290                        FP_DECL_D(DB);
 291                        FP_CLEAR_EXCEPTIONS;
 292                        FP_UNPACK_DP(DB, vb.dp);
 293
 294                        pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
 295                                        DB_s, DB_f1, DB_f0, DB_e, DB_c);
 296
 297                        FP_CONV(S, D, 1, 2, SR, DB);
 298                        goto pack_s;
 299                }
 300
 301                case EFSCTSI:
 302                case EFSCTUI:
 303                        if (SB_c == FP_CLS_NAN) {
 304                                vc.wp[1] = 0;
 305                                FP_SET_EXCEPTION(FP_EX_INVALID);
 306                        } else {
 307                                FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
 308                                                ((func & 0x3) != 0));
 309                        }
 310                        goto update_regs;
 311
 312                case EFSCTSIZ:
 313                case EFSCTUIZ:
 314                        if (SB_c == FP_CLS_NAN) {
 315                                vc.wp[1] = 0;
 316                                FP_SET_EXCEPTION(FP_EX_INVALID);
 317                        } else {
 318                                FP_TO_INT_S(vc.wp[1], SB, 32,
 319                                                ((func & 0x3) != 0));
 320                        }
 321                        goto update_regs;
 322
 323                default:
 324                        goto illegal;
 325                }
 326                break;
 327
 328pack_s:
 329                pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c);
 330
 331                FP_PACK_SP(vc.wp + 1, SR);
 332                goto update_regs;
 333
 334cmp_s:
 335                FP_CMP_S(IR, SA, SB, 3);
 336                if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB)))
 337                        FP_SET_EXCEPTION(FP_EX_INVALID);
 338                if (IR == cmp) {
 339                        IR = 0x4;
 340                } else {
 341                        IR = 0;
 342                }
 343                goto update_ccr;
 344        }
 345
 346        case DPFP: {
 347                FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
 348
 349                switch (type) {
 350                case AB:
 351                case XCR:
 352                        FP_UNPACK_DP(DA, va.dp);
 353                case XB:
 354                        FP_UNPACK_DP(DB, vb.dp);
 355                        break;
 356                case XA:
 357                        FP_UNPACK_DP(DA, va.dp);
 358                        break;
 359                }
 360
 361                pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n",
 362                                DA_s, DA_f1, DA_f0, DA_e, DA_c);
 363                pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
 364                                DB_s, DB_f1, DB_f0, DB_e, DB_c);
 365
 366                switch (func) {
 367                case EFDABS:
 368                        vc.dp[0] = va.dp[0] & ~SIGN_BIT_D;
 369                        goto update_regs;
 370
 371                case EFDNABS:
 372                        vc.dp[0] = va.dp[0] | SIGN_BIT_D;
 373                        goto update_regs;
 374
 375                case EFDNEG:
 376                        vc.dp[0] = va.dp[0] ^ SIGN_BIT_D;
 377                        goto update_regs;
 378
 379                case EFDADD:
 380                        FP_ADD_D(DR, DA, DB);
 381                        goto pack_d;
 382
 383                case EFDSUB:
 384                        FP_SUB_D(DR, DA, DB);
 385                        goto pack_d;
 386
 387                case EFDMUL:
 388                        FP_MUL_D(DR, DA, DB);
 389                        goto pack_d;
 390
 391                case EFDDIV:
 392                        FP_DIV_D(DR, DA, DB);
 393                        goto pack_d;
 394
 395                case EFDCMPEQ:
 396                        cmp = 0;
 397                        goto cmp_d;
 398
 399                case EFDCMPGT:
 400                        cmp = 1;
 401                        goto cmp_d;
 402
 403                case EFDCMPLT:
 404                        cmp = -1;
 405                        goto cmp_d;
 406
 407                case EFDCTSF:
 408                case EFDCTUF:
 409                        if (DB_c == FP_CLS_NAN) {
 410                                vc.wp[1] = 0;
 411                                FP_SET_EXCEPTION(FP_EX_INVALID);
 412                        } else {
 413                                DB_e += (func == EFDCTSF ? 31 : 32);
 414                                FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
 415                                                (func == EFDCTSF));
 416                        }
 417                        goto update_regs;
 418
 419                case EFDCFS: {
 420                        FP_DECL_S(SB);
 421                        FP_CLEAR_EXCEPTIONS;
 422                        FP_UNPACK_SP(SB, vb.wp + 1);
 423
 424                        pr_debug("SB: %ld %08lx %ld (%ld)\n",
 425                                        SB_s, SB_f, SB_e, SB_c);
 426
 427                        FP_CONV(D, S, 2, 1, DR, SB);
 428                        goto pack_d;
 429                }
 430
 431                case EFDCTUIDZ:
 432                case EFDCTSIDZ:
 433                        if (DB_c == FP_CLS_NAN) {
 434                                vc.dp[0] = 0;
 435                                FP_SET_EXCEPTION(FP_EX_INVALID);
 436                        } else {
 437                                FP_TO_INT_D(vc.dp[0], DB, 64,
 438                                                ((func & 0x1) == 0));
 439                        }
 440                        goto update_regs;
 441
 442                case EFDCTUI:
 443                case EFDCTSI:
 444                        if (DB_c == FP_CLS_NAN) {
 445                                vc.wp[1] = 0;
 446                                FP_SET_EXCEPTION(FP_EX_INVALID);
 447                        } else {
 448                                FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
 449                                                ((func & 0x3) != 0));
 450                        }
 451                        goto update_regs;
 452
 453                case EFDCTUIZ:
 454                case EFDCTSIZ:
 455                        if (DB_c == FP_CLS_NAN) {
 456                                vc.wp[1] = 0;
 457                                FP_SET_EXCEPTION(FP_EX_INVALID);
 458                        } else {
 459                                FP_TO_INT_D(vc.wp[1], DB, 32,
 460                                                ((func & 0x3) != 0));
 461                        }
 462                        goto update_regs;
 463
 464                default:
 465                        goto illegal;
 466                }
 467                break;
 468
 469pack_d:
 470                pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n",
 471                                DR_s, DR_f1, DR_f0, DR_e, DR_c);
 472
 473                FP_PACK_DP(vc.dp, DR);
 474                goto update_regs;
 475
 476cmp_d:
 477                FP_CMP_D(IR, DA, DB, 3);
 478                if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB)))
 479                        FP_SET_EXCEPTION(FP_EX_INVALID);
 480                if (IR == cmp) {
 481                        IR = 0x4;
 482                } else {
 483                        IR = 0;
 484                }
 485                goto update_ccr;
 486
 487        }
 488
 489        case VCT: {
 490                FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0);
 491                FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1);
 492                int IR0, IR1;
 493
 494                switch (type) {
 495                case AB:
 496                case XCR:
 497                        FP_UNPACK_SP(SA0, va.wp);
 498                        FP_UNPACK_SP(SA1, va.wp + 1);
 499                case XB:
 500                        FP_UNPACK_SP(SB0, vb.wp);
 501                        FP_UNPACK_SP(SB1, vb.wp + 1);
 502                        break;
 503                case XA:
 504                        FP_UNPACK_SP(SA0, va.wp);
 505                        FP_UNPACK_SP(SA1, va.wp + 1);
 506                        break;
 507                }
 508
 509                pr_debug("SA0: %ld %08lx %ld (%ld)\n",
 510                                SA0_s, SA0_f, SA0_e, SA0_c);
 511                pr_debug("SA1: %ld %08lx %ld (%ld)\n",
 512                                SA1_s, SA1_f, SA1_e, SA1_c);
 513                pr_debug("SB0: %ld %08lx %ld (%ld)\n",
 514                                SB0_s, SB0_f, SB0_e, SB0_c);
 515                pr_debug("SB1: %ld %08lx %ld (%ld)\n",
 516                                SB1_s, SB1_f, SB1_e, SB1_c);
 517
 518                switch (func) {
 519                case EVFSABS:
 520                        vc.wp[0] = va.wp[0] & ~SIGN_BIT_S;
 521                        vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
 522                        goto update_regs;
 523
 524                case EVFSNABS:
 525                        vc.wp[0] = va.wp[0] | SIGN_BIT_S;
 526                        vc.wp[1] = va.wp[1] | SIGN_BIT_S;
 527                        goto update_regs;
 528
 529                case EVFSNEG:
 530                        vc.wp[0] = va.wp[0] ^ SIGN_BIT_S;
 531                        vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
 532                        goto update_regs;
 533
 534                case EVFSADD:
 535                        FP_ADD_S(SR0, SA0, SB0);
 536                        FP_ADD_S(SR1, SA1, SB1);
 537                        goto pack_vs;
 538
 539                case EVFSSUB:
 540                        FP_SUB_S(SR0, SA0, SB0);
 541                        FP_SUB_S(SR1, SA1, SB1);
 542                        goto pack_vs;
 543
 544                case EVFSMUL:
 545                        FP_MUL_S(SR0, SA0, SB0);
 546                        FP_MUL_S(SR1, SA1, SB1);
 547                        goto pack_vs;
 548
 549                case EVFSDIV:
 550                        FP_DIV_S(SR0, SA0, SB0);
 551                        FP_DIV_S(SR1, SA1, SB1);
 552                        goto pack_vs;
 553
 554                case EVFSCMPEQ:
 555                        cmp = 0;
 556                        goto cmp_vs;
 557
 558                case EVFSCMPGT:
 559                        cmp = 1;
 560                        goto cmp_vs;
 561
 562                case EVFSCMPLT:
 563                        cmp = -1;
 564                        goto cmp_vs;
 565
 566                case EVFSCTUF:
 567                case EVFSCTSF:
 568                        if (SB0_c == FP_CLS_NAN) {
 569                                vc.wp[0] = 0;
 570                                FP_SET_EXCEPTION(FP_EX_INVALID);
 571                        } else {
 572                                SB0_e += (func == EVFSCTSF ? 31 : 32);
 573                                FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
 574                                                (func == EVFSCTSF));
 575                        }
 576                        if (SB1_c == FP_CLS_NAN) {
 577                                vc.wp[1] = 0;
 578                                FP_SET_EXCEPTION(FP_EX_INVALID);
 579                        } else {
 580                                SB1_e += (func == EVFSCTSF ? 31 : 32);
 581                                FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
 582                                                (func == EVFSCTSF));
 583                        }
 584                        goto update_regs;
 585
 586                case EVFSCTUI:
 587                case EVFSCTSI:
 588                        if (SB0_c == FP_CLS_NAN) {
 589                                vc.wp[0] = 0;
 590                                FP_SET_EXCEPTION(FP_EX_INVALID);
 591                        } else {
 592                                FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
 593                                                ((func & 0x3) != 0));
 594                        }
 595                        if (SB1_c == FP_CLS_NAN) {
 596                                vc.wp[1] = 0;
 597                                FP_SET_EXCEPTION(FP_EX_INVALID);
 598                        } else {
 599                                FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
 600                                                ((func & 0x3) != 0));
 601                        }
 602                        goto update_regs;
 603
 604                case EVFSCTUIZ:
 605                case EVFSCTSIZ:
 606                        if (SB0_c == FP_CLS_NAN) {
 607                                vc.wp[0] = 0;
 608                                FP_SET_EXCEPTION(FP_EX_INVALID);
 609                        } else {
 610                                FP_TO_INT_S(vc.wp[0], SB0, 32,
 611                                                ((func & 0x3) != 0));
 612                        }
 613                        if (SB1_c == FP_CLS_NAN) {
 614                                vc.wp[1] = 0;
 615                                FP_SET_EXCEPTION(FP_EX_INVALID);
 616                        } else {
 617                                FP_TO_INT_S(vc.wp[1], SB1, 32,
 618                                                ((func & 0x3) != 0));
 619                        }
 620                        goto update_regs;
 621
 622                default:
 623                        goto illegal;
 624                }
 625                break;
 626
 627pack_vs:
 628                pr_debug("SR0: %ld %08lx %ld (%ld)\n",
 629                                SR0_s, SR0_f, SR0_e, SR0_c);
 630                pr_debug("SR1: %ld %08lx %ld (%ld)\n",
 631                                SR1_s, SR1_f, SR1_e, SR1_c);
 632
 633                FP_PACK_SP(vc.wp, SR0);
 634                FP_PACK_SP(vc.wp + 1, SR1);
 635                goto update_regs;
 636
 637cmp_vs:
 638                {
 639                        int ch, cl;
 640
 641                        FP_CMP_S(IR0, SA0, SB0, 3);
 642                        FP_CMP_S(IR1, SA1, SB1, 3);
 643                        if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0)))
 644                                FP_SET_EXCEPTION(FP_EX_INVALID);
 645                        if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1)))
 646                                FP_SET_EXCEPTION(FP_EX_INVALID);
 647                        ch = (IR0 == cmp) ? 1 : 0;
 648                        cl = (IR1 == cmp) ? 1 : 0;
 649                        IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) |
 650                                ((ch & cl) << 0);
 651                        goto update_ccr;
 652                }
 653        }
 654        default:
 655                return -EINVAL;
 656        }
 657
 658update_ccr:
 659        regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2));
 660        regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
 661
 662update_regs:
 663        /*
 664         * If the "invalid" exception sticky bit was set by the
 665         * processor for non-finite input, but was not set before the
 666         * instruction being emulated, clear it.  Likewise for the
 667         * "underflow" bit, which may have been set by the processor
 668         * for exact underflow, not just inexact underflow when the
 669         * flag should be set for IEEE 754 semantics.  Other sticky
 670         * exceptions will only be set by the processor when they are
 671         * correct according to IEEE 754 semantics, and we must not
 672         * clear sticky bits that were already set before the emulated
 673         * instruction as they represent the user-visible sticky
 674         * exception status.  "inexact" traps to kernel are not
 675         * required for IEEE semantics and are not enabled by default,
 676         * so the "inexact" sticky bit may have been set by a previous
 677         * instruction without the kernel being aware of it.
 678         */
 679        __FPU_FPSCR
 680          &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
 681        __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
 682        mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
 683        current->thread.spefscr_last = __FPU_FPSCR;
 684
 685        current->thread.evr[fc] = vc.wp[0];
 686        regs->gpr[fc] = vc.wp[1];
 687
 688        pr_debug("ccr = %08lx\n", regs->ccr);
 689        pr_debug("cur exceptions = %08x spefscr = %08lx\n",
 690                        FP_CUR_EXCEPTIONS, __FPU_FPSCR);
 691        pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
 692        pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
 693        pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
 694
 695        if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
 696                if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
 697                    && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
 698                        return 1;
 699                if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
 700                    && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
 701                        return 1;
 702                if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
 703                    && (current->thread.fpexc_mode & PR_FP_EXC_UND))
 704                        return 1;
 705                if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
 706                    && (current->thread.fpexc_mode & PR_FP_EXC_RES))
 707                        return 1;
 708                if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
 709                    && (current->thread.fpexc_mode & PR_FP_EXC_INV))
 710                        return 1;
 711        }
 712        return 0;
 713
 714illegal:
 715        if (have_e500_cpu_a005_erratum) {
 716                /* according to e500 cpu a005 erratum, reissue efp inst */
 717                regs->nip -= 4;
 718                pr_debug("re-issue efp inst: %08lx\n", speinsn);
 719                return 0;
 720        }
 721
 722        printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn);
 723        return -ENOSYS;
 724}
 725
 726int speround_handler(struct pt_regs *regs)
 727{
 728        union dw_union fgpr;
 729        int s_lo, s_hi;
 730        int lo_inexact, hi_inexact;
 731        int fp_result;
 732        unsigned long speinsn, type, fb, fc, fptype, func;
 733
 734        if (get_user(speinsn, (unsigned int __user *) regs->nip))
 735                return -EFAULT;
 736        if ((speinsn >> 26) != 4)
 737                return -EINVAL;         /* not an spe instruction */
 738
 739        func = speinsn & 0x7ff;
 740        type = insn_type(func);
 741        if (type == XCR) return -ENOSYS;
 742
 743        __FPU_FPSCR = mfspr(SPRN_SPEFSCR);
 744        pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
 745
 746        fptype = (speinsn >> 5) & 0x7;
 747
 748        /* No need to round if the result is exact */
 749        lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
 750        hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
 751        if (!(lo_inexact || (hi_inexact && fptype == VCT)))
 752                return 0;
 753
 754        fc = (speinsn >> 21) & 0x1f;
 755        s_lo = regs->gpr[fc] & SIGN_BIT_S;
 756        s_hi = current->thread.evr[fc] & SIGN_BIT_S;
 757        fgpr.wp[0] = current->thread.evr[fc];
 758        fgpr.wp[1] = regs->gpr[fc];
 759
 760        fb = (speinsn >> 11) & 0x1f;
 761        switch (func) {
 762        case EFSCTUIZ:
 763        case EFSCTSIZ:
 764        case EVFSCTUIZ:
 765        case EVFSCTSIZ:
 766        case EFDCTUIDZ:
 767        case EFDCTSIDZ:
 768        case EFDCTUIZ:
 769        case EFDCTSIZ:
 770                /*
 771                 * These instructions always round to zero,
 772                 * independent of the rounding mode.
 773                 */
 774                return 0;
 775
 776        case EFSCTUI:
 777        case EFSCTUF:
 778        case EVFSCTUI:
 779        case EVFSCTUF:
 780        case EFDCTUI:
 781        case EFDCTUF:
 782                fp_result = 0;
 783                s_lo = 0;
 784                s_hi = 0;
 785                break;
 786
 787        case EFSCTSI:
 788        case EFSCTSF:
 789                fp_result = 0;
 790                /* Recover the sign of a zero result if possible.  */
 791                if (fgpr.wp[1] == 0)
 792                        s_lo = regs->gpr[fb] & SIGN_BIT_S;
 793                break;
 794
 795        case EVFSCTSI:
 796        case EVFSCTSF:
 797                fp_result = 0;
 798                /* Recover the sign of a zero result if possible.  */
 799                if (fgpr.wp[1] == 0)
 800                        s_lo = regs->gpr[fb] & SIGN_BIT_S;
 801                if (fgpr.wp[0] == 0)
 802                        s_hi = current->thread.evr[fb] & SIGN_BIT_S;
 803                break;
 804
 805        case EFDCTSI:
 806        case EFDCTSF:
 807                fp_result = 0;
 808                s_hi = s_lo;
 809                /* Recover the sign of a zero result if possible.  */
 810                if (fgpr.wp[1] == 0)
 811                        s_hi = current->thread.evr[fb] & SIGN_BIT_S;
 812                break;
 813
 814        default:
 815                fp_result = 1;
 816                break;
 817        }
 818
 819        pr_debug("round fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
 820
 821        switch (fptype) {
 822        /* Since SPE instructions on E500 core can handle round to nearest
 823         * and round toward zero with IEEE-754 complied, we just need
 824         * to handle round toward +Inf and round toward -Inf by software.
 825         */
 826        case SPFP:
 827                if ((FP_ROUNDMODE) == FP_RND_PINF) {
 828                        if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
 829                } else { /* round to -Inf */
 830                        if (s_lo) {
 831                                if (fp_result)
 832                                        fgpr.wp[1]++; /* Z < 0, choose Z2 */
 833                                else
 834                                        fgpr.wp[1]--; /* Z < 0, choose Z2 */
 835                        }
 836                }
 837                break;
 838
 839        case DPFP:
 840                if (FP_ROUNDMODE == FP_RND_PINF) {
 841                        if (!s_hi) {
 842                                if (fp_result)
 843                                        fgpr.dp[0]++; /* Z > 0, choose Z1 */
 844                                else
 845                                        fgpr.wp[1]++; /* Z > 0, choose Z1 */
 846                        }
 847                } else { /* round to -Inf */
 848                        if (s_hi) {
 849                                if (fp_result)
 850                                        fgpr.dp[0]++; /* Z < 0, choose Z2 */
 851                                else
 852                                        fgpr.wp[1]--; /* Z < 0, choose Z2 */
 853                        }
 854                }
 855                break;
 856
 857        case VCT:
 858                if (FP_ROUNDMODE == FP_RND_PINF) {
 859                        if (lo_inexact && !s_lo)
 860                                fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
 861                        if (hi_inexact && !s_hi)
 862                                fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
 863                } else { /* round to -Inf */
 864                        if (lo_inexact && s_lo) {
 865                                if (fp_result)
 866                                        fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
 867                                else
 868                                        fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
 869                        }
 870                        if (hi_inexact && s_hi) {
 871                                if (fp_result)
 872                                        fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
 873                                else
 874                                        fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
 875                        }
 876                }
 877                break;
 878
 879        default:
 880                return -EINVAL;
 881        }
 882
 883        current->thread.evr[fc] = fgpr.wp[0];
 884        regs->gpr[fc] = fgpr.wp[1];
 885
 886        pr_debug("  to fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
 887
 888        if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
 889                return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
 890        return 0;
 891}
 892
 893int __init spe_mathemu_init(void)
 894{
 895        u32 pvr, maj, min;
 896
 897        pvr = mfspr(SPRN_PVR);
 898
 899        if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
 900            (PVR_VER(pvr) == PVR_VER_E500V2)) {
 901                maj = PVR_MAJ(pvr);
 902                min = PVR_MIN(pvr);
 903
 904                /*
 905                 * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1
 906                 * need cpu a005 errata workaround
 907                 */
 908                switch (maj) {
 909                case 1:
 910                        if (min < 1)
 911                                have_e500_cpu_a005_erratum = 1;
 912                        break;
 913                case 2:
 914                        if (min < 3)
 915                                have_e500_cpu_a005_erratum = 1;
 916                        break;
 917                case 3:
 918                case 4:
 919                case 5:
 920                        if (min < 1)
 921                                have_e500_cpu_a005_erratum = 1;
 922                        break;
 923                default:
 924                        break;
 925                }
 926        }
 927
 928        return 0;
 929}
 930
 931module_init(spe_mathemu_init);
 932