linux/arch/powerpc/math-emu/math_efp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * arch/powerpc/math-emu/math_efp.c
   4 *
   5 * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc.
   6 *
   7 * Author: Ebony Zhu,   <ebony.zhu@freescale.com>
   8 *         Yu Liu,      <yu.liu@freescale.com>
   9 *
  10 * Derived from arch/alpha/math-emu/math.c
  11 *              arch/powerpc/math-emu/math.c
  12 *
  13 * Description:
  14 * This file is the exception handler to make E500 SPE instructions
  15 * fully comply with IEEE-754 floating point standard.
  16 */
  17
  18#include <linux/types.h>
  19#include <linux/prctl.h>
  20
  21#include <linux/uaccess.h>
  22#include <asm/reg.h>
  23
  24#define FP_EX_BOOKE_E500_SPE
  25#include <asm/sfp-machine.h>
  26
  27#include <math-emu/soft-fp.h>
  28#include <math-emu/single.h>
  29#include <math-emu/double.h>
  30
  31#define EFAPU           0x4
  32
  33#define VCT             0x4
  34#define SPFP            0x6
  35#define DPFP            0x7
  36
  37#define EFSADD          0x2c0
  38#define EFSSUB          0x2c1
  39#define EFSABS          0x2c4
  40#define EFSNABS         0x2c5
  41#define EFSNEG          0x2c6
  42#define EFSMUL          0x2c8
  43#define EFSDIV          0x2c9
  44#define EFSCMPGT        0x2cc
  45#define EFSCMPLT        0x2cd
  46#define EFSCMPEQ        0x2ce
  47#define EFSCFD          0x2cf
  48#define EFSCFSI         0x2d1
  49#define EFSCTUI         0x2d4
  50#define EFSCTSI         0x2d5
  51#define EFSCTUF         0x2d6
  52#define EFSCTSF         0x2d7
  53#define EFSCTUIZ        0x2d8
  54#define EFSCTSIZ        0x2da
  55
  56#define EVFSADD         0x280
  57#define EVFSSUB         0x281
  58#define EVFSABS         0x284
  59#define EVFSNABS        0x285
  60#define EVFSNEG         0x286
  61#define EVFSMUL         0x288
  62#define EVFSDIV         0x289
  63#define EVFSCMPGT       0x28c
  64#define EVFSCMPLT       0x28d
  65#define EVFSCMPEQ       0x28e
  66#define EVFSCTUI        0x294
  67#define EVFSCTSI        0x295
  68#define EVFSCTUF        0x296
  69#define EVFSCTSF        0x297
  70#define EVFSCTUIZ       0x298
  71#define EVFSCTSIZ       0x29a
  72
  73#define EFDADD          0x2e0
  74#define EFDSUB          0x2e1
  75#define EFDABS          0x2e4
  76#define EFDNABS         0x2e5
  77#define EFDNEG          0x2e6
  78#define EFDMUL          0x2e8
  79#define EFDDIV          0x2e9
  80#define EFDCTUIDZ       0x2ea
  81#define EFDCTSIDZ       0x2eb
  82#define EFDCMPGT        0x2ec
  83#define EFDCMPLT        0x2ed
  84#define EFDCMPEQ        0x2ee
  85#define EFDCFS          0x2ef
  86#define EFDCTUI         0x2f4
  87#define EFDCTSI         0x2f5
  88#define EFDCTUF         0x2f6
  89#define EFDCTSF         0x2f7
  90#define EFDCTUIZ        0x2f8
  91#define EFDCTSIZ        0x2fa
  92
  93#define AB      2
  94#define XA      3
  95#define XB      4
  96#define XCR     5
  97#define NOTYPE  0
  98
  99#define SIGN_BIT_S      (1UL << 31)
 100#define SIGN_BIT_D      (1ULL << 63)
 101#define FP_EX_MASK      (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \
 102                        FP_EX_UNDERFLOW | FP_EX_OVERFLOW)
 103
 104static int have_e500_cpu_a005_erratum;
 105
 106union dw_union {
 107        u64 dp[1];
 108        u32 wp[2];
 109};
 110
 111static unsigned long insn_type(unsigned long speinsn)
 112{
 113        unsigned long ret = NOTYPE;
 114
 115        switch (speinsn & 0x7ff) {
 116        case EFSABS:    ret = XA;       break;
 117        case EFSADD:    ret = AB;       break;
 118        case EFSCFD:    ret = XB;       break;
 119        case EFSCMPEQ:  ret = XCR;      break;
 120        case EFSCMPGT:  ret = XCR;      break;
 121        case EFSCMPLT:  ret = XCR;      break;
 122        case EFSCTSF:   ret = XB;       break;
 123        case EFSCTSI:   ret = XB;       break;
 124        case EFSCTSIZ:  ret = XB;       break;
 125        case EFSCTUF:   ret = XB;       break;
 126        case EFSCTUI:   ret = XB;       break;
 127        case EFSCTUIZ:  ret = XB;       break;
 128        case EFSDIV:    ret = AB;       break;
 129        case EFSMUL:    ret = AB;       break;
 130        case EFSNABS:   ret = XA;       break;
 131        case EFSNEG:    ret = XA;       break;
 132        case EFSSUB:    ret = AB;       break;
 133        case EFSCFSI:   ret = XB;       break;
 134
 135        case EVFSABS:   ret = XA;       break;
 136        case EVFSADD:   ret = AB;       break;
 137        case EVFSCMPEQ: ret = XCR;      break;
 138        case EVFSCMPGT: ret = XCR;      break;
 139        case EVFSCMPLT: ret = XCR;      break;
 140        case EVFSCTSF:  ret = XB;       break;
 141        case EVFSCTSI:  ret = XB;       break;
 142        case EVFSCTSIZ: ret = XB;       break;
 143        case EVFSCTUF:  ret = XB;       break;
 144        case EVFSCTUI:  ret = XB;       break;
 145        case EVFSCTUIZ: ret = XB;       break;
 146        case EVFSDIV:   ret = AB;       break;
 147        case EVFSMUL:   ret = AB;       break;
 148        case EVFSNABS:  ret = XA;       break;
 149        case EVFSNEG:   ret = XA;       break;
 150        case EVFSSUB:   ret = AB;       break;
 151
 152        case EFDABS:    ret = XA;       break;
 153        case EFDADD:    ret = AB;       break;
 154        case EFDCFS:    ret = XB;       break;
 155        case EFDCMPEQ:  ret = XCR;      break;
 156        case EFDCMPGT:  ret = XCR;      break;
 157        case EFDCMPLT:  ret = XCR;      break;
 158        case EFDCTSF:   ret = XB;       break;
 159        case EFDCTSI:   ret = XB;       break;
 160        case EFDCTSIDZ: ret = XB;       break;
 161        case EFDCTSIZ:  ret = XB;       break;
 162        case EFDCTUF:   ret = XB;       break;
 163        case EFDCTUI:   ret = XB;       break;
 164        case EFDCTUIDZ: ret = XB;       break;
 165        case EFDCTUIZ:  ret = XB;       break;
 166        case EFDDIV:    ret = AB;       break;
 167        case EFDMUL:    ret = AB;       break;
 168        case EFDNABS:   ret = XA;       break;
 169        case EFDNEG:    ret = XA;       break;
 170        case EFDSUB:    ret = AB;       break;
 171        }
 172
 173        return ret;
 174}
 175
 176int do_spe_mathemu(struct pt_regs *regs)
 177{
 178        FP_DECL_EX;
 179        int IR, cmp;
 180
 181        unsigned long type, func, fc, fa, fb, src, speinsn;
 182        union dw_union vc, va, vb;
 183
 184        if (get_user(speinsn, (unsigned int __user *) regs->nip))
 185                return -EFAULT;
 186        if ((speinsn >> 26) != EFAPU)
 187                return -EINVAL;         /* not an spe instruction */
 188
 189        type = insn_type(speinsn);
 190        if (type == NOTYPE)
 191                goto illegal;
 192
 193        func = speinsn & 0x7ff;
 194        fc = (speinsn >> 21) & 0x1f;
 195        fa = (speinsn >> 16) & 0x1f;
 196        fb = (speinsn >> 11) & 0x1f;
 197        src = (speinsn >> 5) & 0x7;
 198
 199        vc.wp[0] = current->thread.evr[fc];
 200        vc.wp[1] = regs->gpr[fc];
 201        va.wp[0] = current->thread.evr[fa];
 202        va.wp[1] = regs->gpr[fa];
 203        vb.wp[0] = current->thread.evr[fb];
 204        vb.wp[1] = regs->gpr[fb];
 205
 206        __FPU_FPSCR = mfspr(SPRN_SPEFSCR);
 207
 208        pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
 209        pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
 210        pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
 211        pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
 212
 213        switch (src) {
 214        case SPFP: {
 215                FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
 216
 217                switch (type) {
 218                case AB:
 219                case XCR:
 220                        FP_UNPACK_SP(SA, va.wp + 1);
 221                case XB:
 222                        FP_UNPACK_SP(SB, vb.wp + 1);
 223                        break;
 224                case XA:
 225                        FP_UNPACK_SP(SA, va.wp + 1);
 226                        break;
 227                }
 228
 229                pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c);
 230                pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c);
 231
 232                switch (func) {
 233                case EFSABS:
 234                        vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
 235                        goto update_regs;
 236
 237                case EFSNABS:
 238                        vc.wp[1] = va.wp[1] | SIGN_BIT_S;
 239                        goto update_regs;
 240
 241                case EFSNEG:
 242                        vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
 243                        goto update_regs;
 244
 245                case EFSADD:
 246                        FP_ADD_S(SR, SA, SB);
 247                        goto pack_s;
 248
 249                case EFSSUB:
 250                        FP_SUB_S(SR, SA, SB);
 251                        goto pack_s;
 252
 253                case EFSMUL:
 254                        FP_MUL_S(SR, SA, SB);
 255                        goto pack_s;
 256
 257                case EFSDIV:
 258                        FP_DIV_S(SR, SA, SB);
 259                        goto pack_s;
 260
 261                case EFSCMPEQ:
 262                        cmp = 0;
 263                        goto cmp_s;
 264
 265                case EFSCMPGT:
 266                        cmp = 1;
 267                        goto cmp_s;
 268
 269                case EFSCMPLT:
 270                        cmp = -1;
 271                        goto cmp_s;
 272
 273                case EFSCTSF:
 274                case EFSCTUF:
 275                        if (SB_c == FP_CLS_NAN) {
 276                                vc.wp[1] = 0;
 277                                FP_SET_EXCEPTION(FP_EX_INVALID);
 278                        } else {
 279                                SB_e += (func == EFSCTSF ? 31 : 32);
 280                                FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
 281                                                (func == EFSCTSF));
 282                        }
 283                        goto update_regs;
 284
 285                case EFSCFD: {
 286                        FP_DECL_D(DB);
 287                        FP_CLEAR_EXCEPTIONS;
 288                        FP_UNPACK_DP(DB, vb.dp);
 289
 290                        pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
 291                                        DB_s, DB_f1, DB_f0, DB_e, DB_c);
 292
 293                        FP_CONV(S, D, 1, 2, SR, DB);
 294                        goto pack_s;
 295                }
 296
 297                case EFSCTSI:
 298                case EFSCTUI:
 299                        if (SB_c == FP_CLS_NAN) {
 300                                vc.wp[1] = 0;
 301                                FP_SET_EXCEPTION(FP_EX_INVALID);
 302                        } else {
 303                                FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
 304                                                ((func & 0x3) != 0));
 305                        }
 306                        goto update_regs;
 307
 308                case EFSCTSIZ:
 309                case EFSCTUIZ:
 310                        if (SB_c == FP_CLS_NAN) {
 311                                vc.wp[1] = 0;
 312                                FP_SET_EXCEPTION(FP_EX_INVALID);
 313                        } else {
 314                                FP_TO_INT_S(vc.wp[1], SB, 32,
 315                                                ((func & 0x3) != 0));
 316                        }
 317                        goto update_regs;
 318
 319                default:
 320                        goto illegal;
 321                }
 322                break;
 323
 324pack_s:
 325                pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c);
 326
 327                FP_PACK_SP(vc.wp + 1, SR);
 328                goto update_regs;
 329
 330cmp_s:
 331                FP_CMP_S(IR, SA, SB, 3);
 332                if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB)))
 333                        FP_SET_EXCEPTION(FP_EX_INVALID);
 334                if (IR == cmp) {
 335                        IR = 0x4;
 336                } else {
 337                        IR = 0;
 338                }
 339                goto update_ccr;
 340        }
 341
 342        case DPFP: {
 343                FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
 344
 345                switch (type) {
 346                case AB:
 347                case XCR:
 348                        FP_UNPACK_DP(DA, va.dp);
 349                case XB:
 350                        FP_UNPACK_DP(DB, vb.dp);
 351                        break;
 352                case XA:
 353                        FP_UNPACK_DP(DA, va.dp);
 354                        break;
 355                }
 356
 357                pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n",
 358                                DA_s, DA_f1, DA_f0, DA_e, DA_c);
 359                pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
 360                                DB_s, DB_f1, DB_f0, DB_e, DB_c);
 361
 362                switch (func) {
 363                case EFDABS:
 364                        vc.dp[0] = va.dp[0] & ~SIGN_BIT_D;
 365                        goto update_regs;
 366
 367                case EFDNABS:
 368                        vc.dp[0] = va.dp[0] | SIGN_BIT_D;
 369                        goto update_regs;
 370
 371                case EFDNEG:
 372                        vc.dp[0] = va.dp[0] ^ SIGN_BIT_D;
 373                        goto update_regs;
 374
 375                case EFDADD:
 376                        FP_ADD_D(DR, DA, DB);
 377                        goto pack_d;
 378
 379                case EFDSUB:
 380                        FP_SUB_D(DR, DA, DB);
 381                        goto pack_d;
 382
 383                case EFDMUL:
 384                        FP_MUL_D(DR, DA, DB);
 385                        goto pack_d;
 386
 387                case EFDDIV:
 388                        FP_DIV_D(DR, DA, DB);
 389                        goto pack_d;
 390
 391                case EFDCMPEQ:
 392                        cmp = 0;
 393                        goto cmp_d;
 394
 395                case EFDCMPGT:
 396                        cmp = 1;
 397                        goto cmp_d;
 398
 399                case EFDCMPLT:
 400                        cmp = -1;
 401                        goto cmp_d;
 402
 403                case EFDCTSF:
 404                case EFDCTUF:
 405                        if (DB_c == FP_CLS_NAN) {
 406                                vc.wp[1] = 0;
 407                                FP_SET_EXCEPTION(FP_EX_INVALID);
 408                        } else {
 409                                DB_e += (func == EFDCTSF ? 31 : 32);
 410                                FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
 411                                                (func == EFDCTSF));
 412                        }
 413                        goto update_regs;
 414
 415                case EFDCFS: {
 416                        FP_DECL_S(SB);
 417                        FP_CLEAR_EXCEPTIONS;
 418                        FP_UNPACK_SP(SB, vb.wp + 1);
 419
 420                        pr_debug("SB: %ld %08lx %ld (%ld)\n",
 421                                        SB_s, SB_f, SB_e, SB_c);
 422
 423                        FP_CONV(D, S, 2, 1, DR, SB);
 424                        goto pack_d;
 425                }
 426
 427                case EFDCTUIDZ:
 428                case EFDCTSIDZ:
 429                        if (DB_c == FP_CLS_NAN) {
 430                                vc.dp[0] = 0;
 431                                FP_SET_EXCEPTION(FP_EX_INVALID);
 432                        } else {
 433                                FP_TO_INT_D(vc.dp[0], DB, 64,
 434                                                ((func & 0x1) == 0));
 435                        }
 436                        goto update_regs;
 437
 438                case EFDCTUI:
 439                case EFDCTSI:
 440                        if (DB_c == FP_CLS_NAN) {
 441                                vc.wp[1] = 0;
 442                                FP_SET_EXCEPTION(FP_EX_INVALID);
 443                        } else {
 444                                FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
 445                                                ((func & 0x3) != 0));
 446                        }
 447                        goto update_regs;
 448
 449                case EFDCTUIZ:
 450                case EFDCTSIZ:
 451                        if (DB_c == FP_CLS_NAN) {
 452                                vc.wp[1] = 0;
 453                                FP_SET_EXCEPTION(FP_EX_INVALID);
 454                        } else {
 455                                FP_TO_INT_D(vc.wp[1], DB, 32,
 456                                                ((func & 0x3) != 0));
 457                        }
 458                        goto update_regs;
 459
 460                default:
 461                        goto illegal;
 462                }
 463                break;
 464
 465pack_d:
 466                pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n",
 467                                DR_s, DR_f1, DR_f0, DR_e, DR_c);
 468
 469                FP_PACK_DP(vc.dp, DR);
 470                goto update_regs;
 471
 472cmp_d:
 473                FP_CMP_D(IR, DA, DB, 3);
 474                if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB)))
 475                        FP_SET_EXCEPTION(FP_EX_INVALID);
 476                if (IR == cmp) {
 477                        IR = 0x4;
 478                } else {
 479                        IR = 0;
 480                }
 481                goto update_ccr;
 482
 483        }
 484
 485        case VCT: {
 486                FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0);
 487                FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1);
 488                int IR0, IR1;
 489
 490                switch (type) {
 491                case AB:
 492                case XCR:
 493                        FP_UNPACK_SP(SA0, va.wp);
 494                        FP_UNPACK_SP(SA1, va.wp + 1);
 495                case XB:
 496                        FP_UNPACK_SP(SB0, vb.wp);
 497                        FP_UNPACK_SP(SB1, vb.wp + 1);
 498                        break;
 499                case XA:
 500                        FP_UNPACK_SP(SA0, va.wp);
 501                        FP_UNPACK_SP(SA1, va.wp + 1);
 502                        break;
 503                }
 504
 505                pr_debug("SA0: %ld %08lx %ld (%ld)\n",
 506                                SA0_s, SA0_f, SA0_e, SA0_c);
 507                pr_debug("SA1: %ld %08lx %ld (%ld)\n",
 508                                SA1_s, SA1_f, SA1_e, SA1_c);
 509                pr_debug("SB0: %ld %08lx %ld (%ld)\n",
 510                                SB0_s, SB0_f, SB0_e, SB0_c);
 511                pr_debug("SB1: %ld %08lx %ld (%ld)\n",
 512                                SB1_s, SB1_f, SB1_e, SB1_c);
 513
 514                switch (func) {
 515                case EVFSABS:
 516                        vc.wp[0] = va.wp[0] & ~SIGN_BIT_S;
 517                        vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
 518                        goto update_regs;
 519
 520                case EVFSNABS:
 521                        vc.wp[0] = va.wp[0] | SIGN_BIT_S;
 522                        vc.wp[1] = va.wp[1] | SIGN_BIT_S;
 523                        goto update_regs;
 524
 525                case EVFSNEG:
 526                        vc.wp[0] = va.wp[0] ^ SIGN_BIT_S;
 527                        vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
 528                        goto update_regs;
 529
 530                case EVFSADD:
 531                        FP_ADD_S(SR0, SA0, SB0);
 532                        FP_ADD_S(SR1, SA1, SB1);
 533                        goto pack_vs;
 534
 535                case EVFSSUB:
 536                        FP_SUB_S(SR0, SA0, SB0);
 537                        FP_SUB_S(SR1, SA1, SB1);
 538                        goto pack_vs;
 539
 540                case EVFSMUL:
 541                        FP_MUL_S(SR0, SA0, SB0);
 542                        FP_MUL_S(SR1, SA1, SB1);
 543                        goto pack_vs;
 544
 545                case EVFSDIV:
 546                        FP_DIV_S(SR0, SA0, SB0);
 547                        FP_DIV_S(SR1, SA1, SB1);
 548                        goto pack_vs;
 549
 550                case EVFSCMPEQ:
 551                        cmp = 0;
 552                        goto cmp_vs;
 553
 554                case EVFSCMPGT:
 555                        cmp = 1;
 556                        goto cmp_vs;
 557
 558                case EVFSCMPLT:
 559                        cmp = -1;
 560                        goto cmp_vs;
 561
 562                case EVFSCTUF:
 563                case EVFSCTSF:
 564                        if (SB0_c == FP_CLS_NAN) {
 565                                vc.wp[0] = 0;
 566                                FP_SET_EXCEPTION(FP_EX_INVALID);
 567                        } else {
 568                                SB0_e += (func == EVFSCTSF ? 31 : 32);
 569                                FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
 570                                                (func == EVFSCTSF));
 571                        }
 572                        if (SB1_c == FP_CLS_NAN) {
 573                                vc.wp[1] = 0;
 574                                FP_SET_EXCEPTION(FP_EX_INVALID);
 575                        } else {
 576                                SB1_e += (func == EVFSCTSF ? 31 : 32);
 577                                FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
 578                                                (func == EVFSCTSF));
 579                        }
 580                        goto update_regs;
 581
 582                case EVFSCTUI:
 583                case EVFSCTSI:
 584                        if (SB0_c == FP_CLS_NAN) {
 585                                vc.wp[0] = 0;
 586                                FP_SET_EXCEPTION(FP_EX_INVALID);
 587                        } else {
 588                                FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
 589                                                ((func & 0x3) != 0));
 590                        }
 591                        if (SB1_c == FP_CLS_NAN) {
 592                                vc.wp[1] = 0;
 593                                FP_SET_EXCEPTION(FP_EX_INVALID);
 594                        } else {
 595                                FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
 596                                                ((func & 0x3) != 0));
 597                        }
 598                        goto update_regs;
 599
 600                case EVFSCTUIZ:
 601                case EVFSCTSIZ:
 602                        if (SB0_c == FP_CLS_NAN) {
 603                                vc.wp[0] = 0;
 604                                FP_SET_EXCEPTION(FP_EX_INVALID);
 605                        } else {
 606                                FP_TO_INT_S(vc.wp[0], SB0, 32,
 607                                                ((func & 0x3) != 0));
 608                        }
 609                        if (SB1_c == FP_CLS_NAN) {
 610                                vc.wp[1] = 0;
 611                                FP_SET_EXCEPTION(FP_EX_INVALID);
 612                        } else {
 613                                FP_TO_INT_S(vc.wp[1], SB1, 32,
 614                                                ((func & 0x3) != 0));
 615                        }
 616                        goto update_regs;
 617
 618                default:
 619                        goto illegal;
 620                }
 621                break;
 622
 623pack_vs:
 624                pr_debug("SR0: %ld %08lx %ld (%ld)\n",
 625                                SR0_s, SR0_f, SR0_e, SR0_c);
 626                pr_debug("SR1: %ld %08lx %ld (%ld)\n",
 627                                SR1_s, SR1_f, SR1_e, SR1_c);
 628
 629                FP_PACK_SP(vc.wp, SR0);
 630                FP_PACK_SP(vc.wp + 1, SR1);
 631                goto update_regs;
 632
 633cmp_vs:
 634                {
 635                        int ch, cl;
 636
 637                        FP_CMP_S(IR0, SA0, SB0, 3);
 638                        FP_CMP_S(IR1, SA1, SB1, 3);
 639                        if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0)))
 640                                FP_SET_EXCEPTION(FP_EX_INVALID);
 641                        if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1)))
 642                                FP_SET_EXCEPTION(FP_EX_INVALID);
 643                        ch = (IR0 == cmp) ? 1 : 0;
 644                        cl = (IR1 == cmp) ? 1 : 0;
 645                        IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) |
 646                                ((ch & cl) << 0);
 647                        goto update_ccr;
 648                }
 649        }
 650        default:
 651                return -EINVAL;
 652        }
 653
 654update_ccr:
 655        regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2));
 656        regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
 657
 658update_regs:
 659        /*
 660         * If the "invalid" exception sticky bit was set by the
 661         * processor for non-finite input, but was not set before the
 662         * instruction being emulated, clear it.  Likewise for the
 663         * "underflow" bit, which may have been set by the processor
 664         * for exact underflow, not just inexact underflow when the
 665         * flag should be set for IEEE 754 semantics.  Other sticky
 666         * exceptions will only be set by the processor when they are
 667         * correct according to IEEE 754 semantics, and we must not
 668         * clear sticky bits that were already set before the emulated
 669         * instruction as they represent the user-visible sticky
 670         * exception status.  "inexact" traps to kernel are not
 671         * required for IEEE semantics and are not enabled by default,
 672         * so the "inexact" sticky bit may have been set by a previous
 673         * instruction without the kernel being aware of it.
 674         */
 675        __FPU_FPSCR
 676          &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
 677        __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
 678        mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
 679        current->thread.spefscr_last = __FPU_FPSCR;
 680
 681        current->thread.evr[fc] = vc.wp[0];
 682        regs->gpr[fc] = vc.wp[1];
 683
 684        pr_debug("ccr = %08lx\n", regs->ccr);
 685        pr_debug("cur exceptions = %08x spefscr = %08lx\n",
 686                        FP_CUR_EXCEPTIONS, __FPU_FPSCR);
 687        pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
 688        pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
 689        pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
 690
 691        if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
 692                if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
 693                    && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
 694                        return 1;
 695                if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
 696                    && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
 697                        return 1;
 698                if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
 699                    && (current->thread.fpexc_mode & PR_FP_EXC_UND))
 700                        return 1;
 701                if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
 702                    && (current->thread.fpexc_mode & PR_FP_EXC_RES))
 703                        return 1;
 704                if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
 705                    && (current->thread.fpexc_mode & PR_FP_EXC_INV))
 706                        return 1;
 707        }
 708        return 0;
 709
 710illegal:
 711        if (have_e500_cpu_a005_erratum) {
 712                /* according to e500 cpu a005 erratum, reissue efp inst */
 713                regs->nip -= 4;
 714                pr_debug("re-issue efp inst: %08lx\n", speinsn);
 715                return 0;
 716        }
 717
 718        printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn);
 719        return -ENOSYS;
 720}
 721
 722int speround_handler(struct pt_regs *regs)
 723{
 724        union dw_union fgpr;
 725        int s_lo, s_hi;
 726        int lo_inexact, hi_inexact;
 727        int fp_result;
 728        unsigned long speinsn, type, fb, fc, fptype, func;
 729
 730        if (get_user(speinsn, (unsigned int __user *) regs->nip))
 731                return -EFAULT;
 732        if ((speinsn >> 26) != 4)
 733                return -EINVAL;         /* not an spe instruction */
 734
 735        func = speinsn & 0x7ff;
 736        type = insn_type(func);
 737        if (type == XCR) return -ENOSYS;
 738
 739        __FPU_FPSCR = mfspr(SPRN_SPEFSCR);
 740        pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
 741
 742        fptype = (speinsn >> 5) & 0x7;
 743
 744        /* No need to round if the result is exact */
 745        lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
 746        hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
 747        if (!(lo_inexact || (hi_inexact && fptype == VCT)))
 748                return 0;
 749
 750        fc = (speinsn >> 21) & 0x1f;
 751        s_lo = regs->gpr[fc] & SIGN_BIT_S;
 752        s_hi = current->thread.evr[fc] & SIGN_BIT_S;
 753        fgpr.wp[0] = current->thread.evr[fc];
 754        fgpr.wp[1] = regs->gpr[fc];
 755
 756        fb = (speinsn >> 11) & 0x1f;
 757        switch (func) {
 758        case EFSCTUIZ:
 759        case EFSCTSIZ:
 760        case EVFSCTUIZ:
 761        case EVFSCTSIZ:
 762        case EFDCTUIDZ:
 763        case EFDCTSIDZ:
 764        case EFDCTUIZ:
 765        case EFDCTSIZ:
 766                /*
 767                 * These instructions always round to zero,
 768                 * independent of the rounding mode.
 769                 */
 770                return 0;
 771
 772        case EFSCTUI:
 773        case EFSCTUF:
 774        case EVFSCTUI:
 775        case EVFSCTUF:
 776        case EFDCTUI:
 777        case EFDCTUF:
 778                fp_result = 0;
 779                s_lo = 0;
 780                s_hi = 0;
 781                break;
 782
 783        case EFSCTSI:
 784        case EFSCTSF:
 785                fp_result = 0;
 786                /* Recover the sign of a zero result if possible.  */
 787                if (fgpr.wp[1] == 0)
 788                        s_lo = regs->gpr[fb] & SIGN_BIT_S;
 789                break;
 790
 791        case EVFSCTSI:
 792        case EVFSCTSF:
 793                fp_result = 0;
 794                /* Recover the sign of a zero result if possible.  */
 795                if (fgpr.wp[1] == 0)
 796                        s_lo = regs->gpr[fb] & SIGN_BIT_S;
 797                if (fgpr.wp[0] == 0)
 798                        s_hi = current->thread.evr[fb] & SIGN_BIT_S;
 799                break;
 800
 801        case EFDCTSI:
 802        case EFDCTSF:
 803                fp_result = 0;
 804                s_hi = s_lo;
 805                /* Recover the sign of a zero result if possible.  */
 806                if (fgpr.wp[1] == 0)
 807                        s_hi = current->thread.evr[fb] & SIGN_BIT_S;
 808                break;
 809
 810        default:
 811                fp_result = 1;
 812                break;
 813        }
 814
 815        pr_debug("round fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
 816
 817        switch (fptype) {
 818        /* Since SPE instructions on E500 core can handle round to nearest
 819         * and round toward zero with IEEE-754 complied, we just need
 820         * to handle round toward +Inf and round toward -Inf by software.
 821         */
 822        case SPFP:
 823                if ((FP_ROUNDMODE) == FP_RND_PINF) {
 824                        if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
 825                } else { /* round to -Inf */
 826                        if (s_lo) {
 827                                if (fp_result)
 828                                        fgpr.wp[1]++; /* Z < 0, choose Z2 */
 829                                else
 830                                        fgpr.wp[1]--; /* Z < 0, choose Z2 */
 831                        }
 832                }
 833                break;
 834
 835        case DPFP:
 836                if (FP_ROUNDMODE == FP_RND_PINF) {
 837                        if (!s_hi) {
 838                                if (fp_result)
 839                                        fgpr.dp[0]++; /* Z > 0, choose Z1 */
 840                                else
 841                                        fgpr.wp[1]++; /* Z > 0, choose Z1 */
 842                        }
 843                } else { /* round to -Inf */
 844                        if (s_hi) {
 845                                if (fp_result)
 846                                        fgpr.dp[0]++; /* Z < 0, choose Z2 */
 847                                else
 848                                        fgpr.wp[1]--; /* Z < 0, choose Z2 */
 849                        }
 850                }
 851                break;
 852
 853        case VCT:
 854                if (FP_ROUNDMODE == FP_RND_PINF) {
 855                        if (lo_inexact && !s_lo)
 856                                fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
 857                        if (hi_inexact && !s_hi)
 858                                fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
 859                } else { /* round to -Inf */
 860                        if (lo_inexact && s_lo) {
 861                                if (fp_result)
 862                                        fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
 863                                else
 864                                        fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
 865                        }
 866                        if (hi_inexact && s_hi) {
 867                                if (fp_result)
 868                                        fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
 869                                else
 870                                        fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
 871                        }
 872                }
 873                break;
 874
 875        default:
 876                return -EINVAL;
 877        }
 878
 879        current->thread.evr[fc] = fgpr.wp[0];
 880        regs->gpr[fc] = fgpr.wp[1];
 881
 882        pr_debug("  to fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
 883
 884        if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
 885                return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
 886        return 0;
 887}
 888
 889int __init spe_mathemu_init(void)
 890{
 891        u32 pvr, maj, min;
 892
 893        pvr = mfspr(SPRN_PVR);
 894
 895        if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
 896            (PVR_VER(pvr) == PVR_VER_E500V2)) {
 897                maj = PVR_MAJ(pvr);
 898                min = PVR_MIN(pvr);
 899
 900                /*
 901                 * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1
 902                 * need cpu a005 errata workaround
 903                 */
 904                switch (maj) {
 905                case 1:
 906                        if (min < 1)
 907                                have_e500_cpu_a005_erratum = 1;
 908                        break;
 909                case 2:
 910                        if (min < 3)
 911                                have_e500_cpu_a005_erratum = 1;
 912                        break;
 913                case 3:
 914                case 4:
 915                case 5:
 916                        if (min < 1)
 917                                have_e500_cpu_a005_erratum = 1;
 918                        break;
 919                default:
 920                        break;
 921                }
 922        }
 923
 924        return 0;
 925}
 926
 927module_init(spe_mathemu_init);
 928