linux/arch/arm/vfp/vfpsingle.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/vfp/vfpsingle.c
   3 *
   4 * This code is derived in part from John R. Housers softfloat library, which
   5 * carries the following notice:
   6 *
   7 * ===========================================================================
   8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9 * Arithmetic Package, Release 2.
  10 *
  11 * Written by John R. Hauser.  This work was made possible in part by the
  12 * International Computer Science Institute, located at Suite 600, 1947 Center
  13 * Street, Berkeley, California 94704.  Funding was partially provided by the
  14 * National Science Foundation under grant MIP-9311980.  The original version
  15 * of this code was written as part of a project to build a fixed-point vector
  16 * processor in collaboration with the University of California at Berkeley,
  17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19 * arithmetic/softfloat.html'.
  20 *
  21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26 *
  27 * Derivative works are acceptable, even for commercial purposes, so long as
  28 * (1) they include prominent notice that the work is derivative, and (2) they
  29 * include prominent notice akin to these three paragraphs for those parts of
  30 * this code that are retained.
  31 * ===========================================================================
  32 */
  33#include <linux/kernel.h>
  34#include <linux/bitops.h>
  35
  36#include <asm/div64.h>
  37#include <asm/vfp.h>
  38
  39#include "vfpinstr.h"
  40#include "vfp.h"
  41
  42static struct vfp_single vfp_single_default_qnan = {
  43        .exponent       = 255,
  44        .sign           = 0,
  45        .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
  46};
  47
  48static void vfp_single_dump(const char *str, struct vfp_single *s)
  49{
  50        pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
  51                 str, s->sign != 0, s->exponent, s->significand);
  52}
  53
  54static void vfp_single_normalise_denormal(struct vfp_single *vs)
  55{
  56        int bits = 31 - fls(vs->significand);
  57
  58        vfp_single_dump("normalise_denormal: in", vs);
  59
  60        if (bits) {
  61                vs->exponent -= bits - 1;
  62                vs->significand <<= bits;
  63        }
  64
  65        vfp_single_dump("normalise_denormal: out", vs);
  66}
  67
  68#ifndef DEBUG
  69#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  70u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
  71#else
  72u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
  73#endif
  74{
  75        u32 significand, incr, rmode;
  76        int exponent, shift, underflow;
  77
  78        vfp_single_dump("pack: in", vs);
  79
  80        /*
  81         * Infinities and NaNs are a special case.
  82         */
  83        if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
  84                goto pack;
  85
  86        /*
  87         * Special-case zero.
  88         */
  89        if (vs->significand == 0) {
  90                vs->exponent = 0;
  91                goto pack;
  92        }
  93
  94        exponent = vs->exponent;
  95        significand = vs->significand;
  96
  97        /*
  98         * Normalise first.  Note that we shift the significand up to
  99         * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
 100         * significant bit.
 101         */
 102        shift = 32 - fls(significand);
 103        if (shift < 32 && shift) {
 104                exponent -= shift;
 105                significand <<= shift;
 106        }
 107
 108#ifdef DEBUG
 109        vs->exponent = exponent;
 110        vs->significand = significand;
 111        vfp_single_dump("pack: normalised", vs);
 112#endif
 113
 114        /*
 115         * Tiny number?
 116         */
 117        underflow = exponent < 0;
 118        if (underflow) {
 119                significand = vfp_shiftright32jamming(significand, -exponent);
 120                exponent = 0;
 121#ifdef DEBUG
 122                vs->exponent = exponent;
 123                vs->significand = significand;
 124                vfp_single_dump("pack: tiny number", vs);
 125#endif
 126                if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
 127                        underflow = 0;
 128        }
 129
 130        /*
 131         * Select rounding increment.
 132         */
 133        incr = 0;
 134        rmode = fpscr & FPSCR_RMODE_MASK;
 135
 136        if (rmode == FPSCR_ROUND_NEAREST) {
 137                incr = 1 << VFP_SINGLE_LOW_BITS;
 138                if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
 139                        incr -= 1;
 140        } else if (rmode == FPSCR_ROUND_TOZERO) {
 141                incr = 0;
 142        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
 143                incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
 144
 145        pr_debug("VFP: rounding increment = 0x%08x\n", incr);
 146
 147        /*
 148         * Is our rounding going to overflow?
 149         */
 150        if ((significand + incr) < significand) {
 151                exponent += 1;
 152                significand = (significand >> 1) | (significand & 1);
 153                incr >>= 1;
 154#ifdef DEBUG
 155                vs->exponent = exponent;
 156                vs->significand = significand;
 157                vfp_single_dump("pack: overflow", vs);
 158#endif
 159        }
 160
 161        /*
 162         * If any of the low bits (which will be shifted out of the
 163         * number) are non-zero, the result is inexact.
 164         */
 165        if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
 166                exceptions |= FPSCR_IXC;
 167
 168        /*
 169         * Do our rounding.
 170         */
 171        significand += incr;
 172
 173        /*
 174         * Infinity?
 175         */
 176        if (exponent >= 254) {
 177                exceptions |= FPSCR_OFC | FPSCR_IXC;
 178                if (incr == 0) {
 179                        vs->exponent = 253;
 180                        vs->significand = 0x7fffffff;
 181                } else {
 182                        vs->exponent = 255;             /* infinity */
 183                        vs->significand = 0;
 184                }
 185        } else {
 186                if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
 187                        exponent = 0;
 188                if (exponent || significand > 0x80000000)
 189                        underflow = 0;
 190                if (underflow)
 191                        exceptions |= FPSCR_UFC;
 192                vs->exponent = exponent;
 193                vs->significand = significand >> 1;
 194        }
 195
 196 pack:
 197        vfp_single_dump("pack: final", vs);
 198        {
 199                s32 d = vfp_single_pack(vs);
 200#ifdef DEBUG
 201                pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
 202                         sd, d, exceptions);
 203#endif
 204                vfp_put_float(d, sd);
 205        }
 206
 207        return exceptions;
 208}
 209
 210/*
 211 * Propagate the NaN, setting exceptions if it is signalling.
 212 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 213 */
 214static u32
 215vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
 216                  struct vfp_single *vsm, u32 fpscr)
 217{
 218        struct vfp_single *nan;
 219        int tn, tm = 0;
 220
 221        tn = vfp_single_type(vsn);
 222
 223        if (vsm)
 224                tm = vfp_single_type(vsm);
 225
 226        if (fpscr & FPSCR_DEFAULT_NAN)
 227                /*
 228                 * Default NaN mode - always returns a quiet NaN
 229                 */
 230                nan = &vfp_single_default_qnan;
 231        else {
 232                /*
 233                 * Contemporary mode - select the first signalling
 234                 * NAN, or if neither are signalling, the first
 235                 * quiet NAN.
 236                 */
 237                if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 238                        nan = vsn;
 239                else
 240                        nan = vsm;
 241                /*
 242                 * Make the NaN quiet.
 243                 */
 244                nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 245        }
 246
 247        *vsd = *nan;
 248
 249        /*
 250         * If one was a signalling NAN, raise invalid operation.
 251         */
 252        return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 253}
 254
 255
 256/*
 257 * Extended operations
 258 */
 259static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
 260{
 261        vfp_put_float(vfp_single_packed_abs(m), sd);
 262        return 0;
 263}
 264
 265static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
 266{
 267        vfp_put_float(m, sd);
 268        return 0;
 269}
 270
 271static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
 272{
 273        vfp_put_float(vfp_single_packed_negate(m), sd);
 274        return 0;
 275}
 276
 277static const u16 sqrt_oddadjust[] = {
 278        0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
 279        0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
 280};
 281
 282static const u16 sqrt_evenadjust[] = {
 283        0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
 284        0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
 285};
 286
 287u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
 288{
 289        int index;
 290        u32 z, a;
 291
 292        if ((significand & 0xc0000000) != 0x40000000) {
 293                pr_warn("VFP: estimate_sqrt: invalid significand\n");
 294        }
 295
 296        a = significand << 1;
 297        index = (a >> 27) & 15;
 298        if (exponent & 1) {
 299                z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
 300                z = ((a / z) << 14) + (z << 15);
 301                a >>= 1;
 302        } else {
 303                z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
 304                z = a / z + z;
 305                z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
 306                if (z <= a)
 307                        return (s32)a >> 1;
 308        }
 309        {
 310                u64 v = (u64)a << 31;
 311                do_div(v, z);
 312                return v + (z >> 1);
 313        }
 314}
 315
 316static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
 317{
 318        struct vfp_single vsm, vsd;
 319        int ret, tm;
 320
 321        vfp_single_unpack(&vsm, m);
 322        tm = vfp_single_type(&vsm);
 323        if (tm & (VFP_NAN|VFP_INFINITY)) {
 324                struct vfp_single *vsp = &vsd;
 325
 326                if (tm & VFP_NAN)
 327                        ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
 328                else if (vsm.sign == 0) {
 329 sqrt_copy:
 330                        vsp = &vsm;
 331                        ret = 0;
 332                } else {
 333 sqrt_invalid:
 334                        vsp = &vfp_single_default_qnan;
 335                        ret = FPSCR_IOC;
 336                }
 337                vfp_put_float(vfp_single_pack(vsp), sd);
 338                return ret;
 339        }
 340
 341        /*
 342         * sqrt(+/- 0) == +/- 0
 343         */
 344        if (tm & VFP_ZERO)
 345                goto sqrt_copy;
 346
 347        /*
 348         * Normalise a denormalised number
 349         */
 350        if (tm & VFP_DENORMAL)
 351                vfp_single_normalise_denormal(&vsm);
 352
 353        /*
 354         * sqrt(<0) = invalid
 355         */
 356        if (vsm.sign)
 357                goto sqrt_invalid;
 358
 359        vfp_single_dump("sqrt", &vsm);
 360
 361        /*
 362         * Estimate the square root.
 363         */
 364        vsd.sign = 0;
 365        vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
 366        vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
 367
 368        vfp_single_dump("sqrt estimate", &vsd);
 369
 370        /*
 371         * And now adjust.
 372         */
 373        if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
 374                if (vsd.significand < 2) {
 375                        vsd.significand = 0xffffffff;
 376                } else {
 377                        u64 term;
 378                        s64 rem;
 379                        vsm.significand <<= !(vsm.exponent & 1);
 380                        term = (u64)vsd.significand * vsd.significand;
 381                        rem = ((u64)vsm.significand << 32) - term;
 382
 383                        pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
 384
 385                        while (rem < 0) {
 386                                vsd.significand -= 1;
 387                                rem += ((u64)vsd.significand << 1) | 1;
 388                        }
 389                        vsd.significand |= rem != 0;
 390                }
 391        }
 392        vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
 393
 394        return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
 395}
 396
 397/*
 398 * Equal        := ZC
 399 * Less than    := N
 400 * Greater than := C
 401 * Unordered    := CV
 402 */
 403static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
 404{
 405        s32 d;
 406        u32 ret = 0;
 407
 408        d = vfp_get_float(sd);
 409        if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
 410                ret |= FPSCR_C | FPSCR_V;
 411                if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 412                        /*
 413                         * Signalling NaN, or signalling on quiet NaN
 414                         */
 415                        ret |= FPSCR_IOC;
 416        }
 417
 418        if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
 419                ret |= FPSCR_C | FPSCR_V;
 420                if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 421                        /*
 422                         * Signalling NaN, or signalling on quiet NaN
 423                         */
 424                        ret |= FPSCR_IOC;
 425        }
 426
 427        if (ret == 0) {
 428                if (d == m || vfp_single_packed_abs(d | m) == 0) {
 429                        /*
 430                         * equal
 431                         */
 432                        ret |= FPSCR_Z | FPSCR_C;
 433                } else if (vfp_single_packed_sign(d ^ m)) {
 434                        /*
 435                         * different signs
 436                         */
 437                        if (vfp_single_packed_sign(d))
 438                                /*
 439                                 * d is negative, so d < m
 440                                 */
 441                                ret |= FPSCR_N;
 442                        else
 443                                /*
 444                                 * d is positive, so d > m
 445                                 */
 446                                ret |= FPSCR_C;
 447                } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
 448                        /*
 449                         * d < m
 450                         */
 451                        ret |= FPSCR_N;
 452                } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
 453                        /*
 454                         * d > m
 455                         */
 456                        ret |= FPSCR_C;
 457                }
 458        }
 459        return ret;
 460}
 461
 462static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
 463{
 464        return vfp_compare(sd, 0, m, fpscr);
 465}
 466
 467static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
 468{
 469        return vfp_compare(sd, 1, m, fpscr);
 470}
 471
 472static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
 473{
 474        return vfp_compare(sd, 0, 0, fpscr);
 475}
 476
 477static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
 478{
 479        return vfp_compare(sd, 1, 0, fpscr);
 480}
 481
 482static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
 483{
 484        struct vfp_single vsm;
 485        struct vfp_double vdd;
 486        int tm;
 487        u32 exceptions = 0;
 488
 489        vfp_single_unpack(&vsm, m);
 490
 491        tm = vfp_single_type(&vsm);
 492
 493        /*
 494         * If we have a signalling NaN, signal invalid operation.
 495         */
 496        if (tm == VFP_SNAN)
 497                exceptions = FPSCR_IOC;
 498
 499        if (tm & VFP_DENORMAL)
 500                vfp_single_normalise_denormal(&vsm);
 501
 502        vdd.sign = vsm.sign;
 503        vdd.significand = (u64)vsm.significand << 32;
 504
 505        /*
 506         * If we have an infinity or NaN, the exponent must be 2047.
 507         */
 508        if (tm & (VFP_INFINITY|VFP_NAN)) {
 509                vdd.exponent = 2047;
 510                if (tm == VFP_QNAN)
 511                        vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 512                goto pack_nan;
 513        } else if (tm & VFP_ZERO)
 514                vdd.exponent = 0;
 515        else
 516                vdd.exponent = vsm.exponent + (1023 - 127);
 517
 518        return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
 519
 520 pack_nan:
 521        vfp_put_double(vfp_double_pack(&vdd), dd);
 522        return exceptions;
 523}
 524
 525static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
 526{
 527        struct vfp_single vs;
 528
 529        vs.sign = 0;
 530        vs.exponent = 127 + 31 - 1;
 531        vs.significand = (u32)m;
 532
 533        return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
 534}
 535
 536static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
 537{
 538        struct vfp_single vs;
 539
 540        vs.sign = (m & 0x80000000) >> 16;
 541        vs.exponent = 127 + 31 - 1;
 542        vs.significand = vs.sign ? -m : m;
 543
 544        return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
 545}
 546
 547static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
 548{
 549        struct vfp_single vsm;
 550        u32 d, exceptions = 0;
 551        int rmode = fpscr & FPSCR_RMODE_MASK;
 552        int tm;
 553
 554        vfp_single_unpack(&vsm, m);
 555        vfp_single_dump("VSM", &vsm);
 556
 557        /*
 558         * Do we have a denormalised number?
 559         */
 560        tm = vfp_single_type(&vsm);
 561        if (tm & VFP_DENORMAL)
 562                exceptions |= FPSCR_IDC;
 563
 564        if (tm & VFP_NAN)
 565                vsm.sign = 0;
 566
 567        if (vsm.exponent >= 127 + 32) {
 568                d = vsm.sign ? 0 : 0xffffffff;
 569                exceptions = FPSCR_IOC;
 570        } else if (vsm.exponent >= 127 - 1) {
 571                int shift = 127 + 31 - vsm.exponent;
 572                u32 rem, incr = 0;
 573
 574                /*
 575                 * 2^0 <= m < 2^32-2^8
 576                 */
 577                d = (vsm.significand << 1) >> shift;
 578                rem = vsm.significand << (33 - shift);
 579
 580                if (rmode == FPSCR_ROUND_NEAREST) {
 581                        incr = 0x80000000;
 582                        if ((d & 1) == 0)
 583                                incr -= 1;
 584                } else if (rmode == FPSCR_ROUND_TOZERO) {
 585                        incr = 0;
 586                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 587                        incr = ~0;
 588                }
 589
 590                if ((rem + incr) < rem) {
 591                        if (d < 0xffffffff)
 592                                d += 1;
 593                        else
 594                                exceptions |= FPSCR_IOC;
 595                }
 596
 597                if (d && vsm.sign) {
 598                        d = 0;
 599                        exceptions |= FPSCR_IOC;
 600                } else if (rem)
 601                        exceptions |= FPSCR_IXC;
 602        } else {
 603                d = 0;
 604                if (vsm.exponent | vsm.significand) {
 605                        exceptions |= FPSCR_IXC;
 606                        if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 607                                d = 1;
 608                        else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
 609                                d = 0;
 610                                exceptions |= FPSCR_IOC;
 611                        }
 612                }
 613        }
 614
 615        pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 616
 617        vfp_put_float(d, sd);
 618
 619        return exceptions;
 620}
 621
 622static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
 623{
 624        return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
 625}
 626
 627static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
 628{
 629        struct vfp_single vsm;
 630        u32 d, exceptions = 0;
 631        int rmode = fpscr & FPSCR_RMODE_MASK;
 632        int tm;
 633
 634        vfp_single_unpack(&vsm, m);
 635        vfp_single_dump("VSM", &vsm);
 636
 637        /*
 638         * Do we have a denormalised number?
 639         */
 640        tm = vfp_single_type(&vsm);
 641        if (vfp_single_type(&vsm) & VFP_DENORMAL)
 642                exceptions |= FPSCR_IDC;
 643
 644        if (tm & VFP_NAN) {
 645                d = 0;
 646                exceptions |= FPSCR_IOC;
 647        } else if (vsm.exponent >= 127 + 32) {
 648                /*
 649                 * m >= 2^31-2^7: invalid
 650                 */
 651                d = 0x7fffffff;
 652                if (vsm.sign)
 653                        d = ~d;
 654                exceptions |= FPSCR_IOC;
 655        } else if (vsm.exponent >= 127 - 1) {
 656                int shift = 127 + 31 - vsm.exponent;
 657                u32 rem, incr = 0;
 658
 659                /* 2^0 <= m <= 2^31-2^7 */
 660                d = (vsm.significand << 1) >> shift;
 661                rem = vsm.significand << (33 - shift);
 662
 663                if (rmode == FPSCR_ROUND_NEAREST) {
 664                        incr = 0x80000000;
 665                        if ((d & 1) == 0)
 666                                incr -= 1;
 667                } else if (rmode == FPSCR_ROUND_TOZERO) {
 668                        incr = 0;
 669                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 670                        incr = ~0;
 671                }
 672
 673                if ((rem + incr) < rem && d < 0xffffffff)
 674                        d += 1;
 675                if (d > 0x7fffffff + (vsm.sign != 0)) {
 676                        d = 0x7fffffff + (vsm.sign != 0);
 677                        exceptions |= FPSCR_IOC;
 678                } else if (rem)
 679                        exceptions |= FPSCR_IXC;
 680
 681                if (vsm.sign)
 682                        d = -d;
 683        } else {
 684                d = 0;
 685                if (vsm.exponent | vsm.significand) {
 686                        exceptions |= FPSCR_IXC;
 687                        if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 688                                d = 1;
 689                        else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
 690                                d = -1;
 691                }
 692        }
 693
 694        pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 695
 696        vfp_put_float((s32)d, sd);
 697
 698        return exceptions;
 699}
 700
 701static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
 702{
 703        return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 704}
 705
 706static struct op fops_ext[32] = {
 707        [FEXT_TO_IDX(FEXT_FCPY)]        = { vfp_single_fcpy,   0 },
 708        [FEXT_TO_IDX(FEXT_FABS)]        = { vfp_single_fabs,   0 },
 709        [FEXT_TO_IDX(FEXT_FNEG)]        = { vfp_single_fneg,   0 },
 710        [FEXT_TO_IDX(FEXT_FSQRT)]       = { vfp_single_fsqrt,  0 },
 711        [FEXT_TO_IDX(FEXT_FCMP)]        = { vfp_single_fcmp,   OP_SCALAR },
 712        [FEXT_TO_IDX(FEXT_FCMPE)]       = { vfp_single_fcmpe,  OP_SCALAR },
 713        [FEXT_TO_IDX(FEXT_FCMPZ)]       = { vfp_single_fcmpz,  OP_SCALAR },
 714        [FEXT_TO_IDX(FEXT_FCMPEZ)]      = { vfp_single_fcmpez, OP_SCALAR },
 715        [FEXT_TO_IDX(FEXT_FCVT)]        = { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
 716        [FEXT_TO_IDX(FEXT_FUITO)]       = { vfp_single_fuito,  OP_SCALAR },
 717        [FEXT_TO_IDX(FEXT_FSITO)]       = { vfp_single_fsito,  OP_SCALAR },
 718        [FEXT_TO_IDX(FEXT_FTOUI)]       = { vfp_single_ftoui,  OP_SCALAR },
 719        [FEXT_TO_IDX(FEXT_FTOUIZ)]      = { vfp_single_ftouiz, OP_SCALAR },
 720        [FEXT_TO_IDX(FEXT_FTOSI)]       = { vfp_single_ftosi,  OP_SCALAR },
 721        [FEXT_TO_IDX(FEXT_FTOSIZ)]      = { vfp_single_ftosiz, OP_SCALAR },
 722};
 723
 724
 725
 726
 727
 728static u32
 729vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
 730                          struct vfp_single *vsm, u32 fpscr)
 731{
 732        struct vfp_single *vsp;
 733        u32 exceptions = 0;
 734        int tn, tm;
 735
 736        tn = vfp_single_type(vsn);
 737        tm = vfp_single_type(vsm);
 738
 739        if (tn & tm & VFP_INFINITY) {
 740                /*
 741                 * Two infinities.  Are they different signs?
 742                 */
 743                if (vsn->sign ^ vsm->sign) {
 744                        /*
 745                         * different signs -> invalid
 746                         */
 747                        exceptions = FPSCR_IOC;
 748                        vsp = &vfp_single_default_qnan;
 749                } else {
 750                        /*
 751                         * same signs -> valid
 752                         */
 753                        vsp = vsn;
 754                }
 755        } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 756                /*
 757                 * One infinity and one number -> infinity
 758                 */
 759                vsp = vsn;
 760        } else {
 761                /*
 762                 * 'n' is a NaN of some type
 763                 */
 764                return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 765        }
 766        *vsd = *vsp;
 767        return exceptions;
 768}
 769
 770static u32
 771vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
 772               struct vfp_single *vsm, u32 fpscr)
 773{
 774        u32 exp_diff, m_sig;
 775
 776        if (vsn->significand & 0x80000000 ||
 777            vsm->significand & 0x80000000) {
 778                pr_info("VFP: bad FP values in %s\n", __func__);
 779                vfp_single_dump("VSN", vsn);
 780                vfp_single_dump("VSM", vsm);
 781        }
 782
 783        /*
 784         * Ensure that 'n' is the largest magnitude number.  Note that
 785         * if 'n' and 'm' have equal exponents, we do not swap them.
 786         * This ensures that NaN propagation works correctly.
 787         */
 788        if (vsn->exponent < vsm->exponent) {
 789                struct vfp_single *t = vsn;
 790                vsn = vsm;
 791                vsm = t;
 792        }
 793
 794        /*
 795         * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 796         * infinity or a NaN here.
 797         */
 798        if (vsn->exponent == 255)
 799                return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
 800
 801        /*
 802         * We have two proper numbers, where 'vsn' is the larger magnitude.
 803         *
 804         * Copy 'n' to 'd' before doing the arithmetic.
 805         */
 806        *vsd = *vsn;
 807
 808        /*
 809         * Align both numbers.
 810         */
 811        exp_diff = vsn->exponent - vsm->exponent;
 812        m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
 813
 814        /*
 815         * If the signs are different, we are really subtracting.
 816         */
 817        if (vsn->sign ^ vsm->sign) {
 818                m_sig = vsn->significand - m_sig;
 819                if ((s32)m_sig < 0) {
 820                        vsd->sign = vfp_sign_negate(vsd->sign);
 821                        m_sig = -m_sig;
 822                } else if (m_sig == 0) {
 823                        vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 824                                      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 825                }
 826        } else {
 827                m_sig = vsn->significand + m_sig;
 828        }
 829        vsd->significand = m_sig;
 830
 831        return 0;
 832}
 833
 834static u32
 835vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
 836{
 837        vfp_single_dump("VSN", vsn);
 838        vfp_single_dump("VSM", vsm);
 839
 840        /*
 841         * Ensure that 'n' is the largest magnitude number.  Note that
 842         * if 'n' and 'm' have equal exponents, we do not swap them.
 843         * This ensures that NaN propagation works correctly.
 844         */
 845        if (vsn->exponent < vsm->exponent) {
 846                struct vfp_single *t = vsn;
 847                vsn = vsm;
 848                vsm = t;
 849                pr_debug("VFP: swapping M <-> N\n");
 850        }
 851
 852        vsd->sign = vsn->sign ^ vsm->sign;
 853
 854        /*
 855         * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 856         */
 857        if (vsn->exponent == 255) {
 858                if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
 859                        return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 860                if ((vsm->exponent | vsm->significand) == 0) {
 861                        *vsd = vfp_single_default_qnan;
 862                        return FPSCR_IOC;
 863                }
 864                vsd->exponent = vsn->exponent;
 865                vsd->significand = 0;
 866                return 0;
 867        }
 868
 869        /*
 870         * If 'm' is zero, the result is always zero.  In this case,
 871         * 'n' may be zero or a number, but it doesn't matter which.
 872         */
 873        if ((vsm->exponent | vsm->significand) == 0) {
 874                vsd->exponent = 0;
 875                vsd->significand = 0;
 876                return 0;
 877        }
 878
 879        /*
 880         * We add 2 to the destination exponent for the same reason as
 881         * the addition case - though this time we have +1 from each
 882         * input operand.
 883         */
 884        vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
 885        vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
 886
 887        vfp_single_dump("VSD", vsd);
 888        return 0;
 889}
 890
 891#define NEG_MULTIPLY    (1 << 0)
 892#define NEG_SUBTRACT    (1 << 1)
 893
 894static u32
 895vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
 896{
 897        struct vfp_single vsd, vsp, vsn, vsm;
 898        u32 exceptions;
 899        s32 v;
 900
 901        v = vfp_get_float(sn);
 902        pr_debug("VFP: s%u = %08x\n", sn, v);
 903        vfp_single_unpack(&vsn, v);
 904        if (vsn.exponent == 0 && vsn.significand)
 905                vfp_single_normalise_denormal(&vsn);
 906
 907        vfp_single_unpack(&vsm, m);
 908        if (vsm.exponent == 0 && vsm.significand)
 909                vfp_single_normalise_denormal(&vsm);
 910
 911        exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
 912        if (negate & NEG_MULTIPLY)
 913                vsp.sign = vfp_sign_negate(vsp.sign);
 914
 915        v = vfp_get_float(sd);
 916        pr_debug("VFP: s%u = %08x\n", sd, v);
 917        vfp_single_unpack(&vsn, v);
 918        if (vsn.exponent == 0 && vsn.significand)
 919                vfp_single_normalise_denormal(&vsn);
 920        if (negate & NEG_SUBTRACT)
 921                vsn.sign = vfp_sign_negate(vsn.sign);
 922
 923        exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
 924
 925        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
 926}
 927
 928/*
 929 * Standard operations
 930 */
 931
 932/*
 933 * sd = sd + (sn * sm)
 934 */
 935static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
 936{
 937        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
 938}
 939
 940/*
 941 * sd = sd - (sn * sm)
 942 */
 943static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
 944{
 945        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
 946}
 947
 948/*
 949 * sd = -sd + (sn * sm)
 950 */
 951static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
 952{
 953        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
 954}
 955
 956/*
 957 * sd = -sd - (sn * sm)
 958 */
 959static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
 960{
 961        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 962}
 963
 964/*
 965 * sd = sn * sm
 966 */
 967static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
 968{
 969        struct vfp_single vsd, vsn, vsm;
 970        u32 exceptions;
 971        s32 n = vfp_get_float(sn);
 972
 973        pr_debug("VFP: s%u = %08x\n", sn, n);
 974
 975        vfp_single_unpack(&vsn, n);
 976        if (vsn.exponent == 0 && vsn.significand)
 977                vfp_single_normalise_denormal(&vsn);
 978
 979        vfp_single_unpack(&vsm, m);
 980        if (vsm.exponent == 0 && vsm.significand)
 981                vfp_single_normalise_denormal(&vsm);
 982
 983        exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 984        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
 985}
 986
 987/*
 988 * sd = -(sn * sm)
 989 */
 990static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
 991{
 992        struct vfp_single vsd, vsn, vsm;
 993        u32 exceptions;
 994        s32 n = vfp_get_float(sn);
 995
 996        pr_debug("VFP: s%u = %08x\n", sn, n);
 997
 998        vfp_single_unpack(&vsn, n);
 999        if (vsn.exponent == 0 && vsn.significand)
1000                vfp_single_normalise_denormal(&vsn);
1001
1002        vfp_single_unpack(&vsm, m);
1003        if (vsm.exponent == 0 && vsm.significand)
1004                vfp_single_normalise_denormal(&vsm);
1005
1006        exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1007        vsd.sign = vfp_sign_negate(vsd.sign);
1008        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1009}
1010
1011/*
1012 * sd = sn + sm
1013 */
1014static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1015{
1016        struct vfp_single vsd, vsn, vsm;
1017        u32 exceptions;
1018        s32 n = vfp_get_float(sn);
1019
1020        pr_debug("VFP: s%u = %08x\n", sn, n);
1021
1022        /*
1023         * Unpack and normalise denormals.
1024         */
1025        vfp_single_unpack(&vsn, n);
1026        if (vsn.exponent == 0 && vsn.significand)
1027                vfp_single_normalise_denormal(&vsn);
1028
1029        vfp_single_unpack(&vsm, m);
1030        if (vsm.exponent == 0 && vsm.significand)
1031                vfp_single_normalise_denormal(&vsm);
1032
1033        exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1034
1035        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1036}
1037
1038/*
1039 * sd = sn - sm
1040 */
1041static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1042{
1043        /*
1044         * Subtraction is addition with one sign inverted.
1045         */
1046        return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1047}
1048
1049/*
1050 * sd = sn / sm
1051 */
1052static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1053{
1054        struct vfp_single vsd, vsn, vsm;
1055        u32 exceptions = 0;
1056        s32 n = vfp_get_float(sn);
1057        int tm, tn;
1058
1059        pr_debug("VFP: s%u = %08x\n", sn, n);
1060
1061        vfp_single_unpack(&vsn, n);
1062        vfp_single_unpack(&vsm, m);
1063
1064        vsd.sign = vsn.sign ^ vsm.sign;
1065
1066        tn = vfp_single_type(&vsn);
1067        tm = vfp_single_type(&vsm);
1068
1069        /*
1070         * Is n a NAN?
1071         */
1072        if (tn & VFP_NAN)
1073                goto vsn_nan;
1074
1075        /*
1076         * Is m a NAN?
1077         */
1078        if (tm & VFP_NAN)
1079                goto vsm_nan;
1080
1081        /*
1082         * If n and m are infinity, the result is invalid
1083         * If n and m are zero, the result is invalid
1084         */
1085        if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1086                goto invalid;
1087
1088        /*
1089         * If n is infinity, the result is infinity
1090         */
1091        if (tn & VFP_INFINITY)
1092                goto infinity;
1093
1094        /*
1095         * If m is zero, raise div0 exception
1096         */
1097        if (tm & VFP_ZERO)
1098                goto divzero;
1099
1100        /*
1101         * If m is infinity, or n is zero, the result is zero
1102         */
1103        if (tm & VFP_INFINITY || tn & VFP_ZERO)
1104                goto zero;
1105
1106        if (tn & VFP_DENORMAL)
1107                vfp_single_normalise_denormal(&vsn);
1108        if (tm & VFP_DENORMAL)
1109                vfp_single_normalise_denormal(&vsm);
1110
1111        /*
1112         * Ok, we have two numbers, we can perform division.
1113         */
1114        vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1115        vsm.significand <<= 1;
1116        if (vsm.significand <= (2 * vsn.significand)) {
1117                vsn.significand >>= 1;
1118                vsd.exponent++;
1119        }
1120        {
1121                u64 significand = (u64)vsn.significand << 32;
1122                do_div(significand, vsm.significand);
1123                vsd.significand = significand;
1124        }
1125        if ((vsd.significand & 0x3f) == 0)
1126                vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1127
1128        return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1129
1130 vsn_nan:
1131        exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1132 pack:
1133        vfp_put_float(vfp_single_pack(&vsd), sd);
1134        return exceptions;
1135
1136 vsm_nan:
1137        exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1138        goto pack;
1139
1140 zero:
1141        vsd.exponent = 0;
1142        vsd.significand = 0;
1143        goto pack;
1144
1145 divzero:
1146        exceptions = FPSCR_DZC;
1147 infinity:
1148        vsd.exponent = 255;
1149        vsd.significand = 0;
1150        goto pack;
1151
1152 invalid:
1153        vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1154        return FPSCR_IOC;
1155}
1156
1157static struct op fops[16] = {
1158        [FOP_TO_IDX(FOP_FMAC)]  = { vfp_single_fmac,  0 },
1159        [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 },
1160        [FOP_TO_IDX(FOP_FMSC)]  = { vfp_single_fmsc,  0 },
1161        [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 },
1162        [FOP_TO_IDX(FOP_FMUL)]  = { vfp_single_fmul,  0 },
1163        [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 },
1164        [FOP_TO_IDX(FOP_FADD)]  = { vfp_single_fadd,  0 },
1165        [FOP_TO_IDX(FOP_FSUB)]  = { vfp_single_fsub,  0 },
1166        [FOP_TO_IDX(FOP_FDIV)]  = { vfp_single_fdiv,  0 },
1167};
1168
1169#define FREG_BANK(x)    ((x) & 0x18)
1170#define FREG_IDX(x)     ((x) & 7)
1171
1172u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1173{
1174        u32 op = inst & FOP_MASK;
1175        u32 exceptions = 0;
1176        unsigned int dest;
1177        unsigned int sn = vfp_get_sn(inst);
1178        unsigned int sm = vfp_get_sm(inst);
1179        unsigned int vecitr, veclen, vecstride;
1180        struct op *fop;
1181
1182        vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1183
1184        fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1185
1186        /*
1187         * fcvtsd takes a dN register number as destination, not sN.
1188         * Technically, if bit 0 of dd is set, this is an invalid
1189         * instruction.  However, we ignore this for efficiency.
1190         * It also only operates on scalars.
1191         */
1192        if (fop->flags & OP_DD)
1193                dest = vfp_get_dd(inst);
1194        else
1195                dest = vfp_get_sd(inst);
1196
1197        /*
1198         * If destination bank is zero, vector length is always '1'.
1199         * ARM DDI0100F C5.1.3, C5.3.2.
1200         */
1201        if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1202                veclen = 0;
1203        else
1204                veclen = fpscr & FPSCR_LENGTH_MASK;
1205
1206        pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1207                 (veclen >> FPSCR_LENGTH_BIT) + 1);
1208
1209        if (!fop->fn)
1210                goto invalid;
1211
1212        for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1213                s32 m = vfp_get_float(sm);
1214                u32 except;
1215                char type;
1216
1217                type = fop->flags & OP_DD ? 'd' : 's';
1218                if (op == FOP_EXT)
1219                        pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1220                                 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1221                                 sm, m);
1222                else
1223                        pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1224                                 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1225                                 FOP_TO_IDX(op), sm, m);
1226
1227                except = fop->fn(dest, sn, m, fpscr);
1228                pr_debug("VFP: itr%d: exceptions=%08x\n",
1229                         vecitr >> FPSCR_LENGTH_BIT, except);
1230
1231                exceptions |= except;
1232
1233                /*
1234                 * CHECK: It appears to be undefined whether we stop when
1235                 * we encounter an exception.  We continue.
1236                 */
1237                dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1238                sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1239                if (FREG_BANK(sm) != 0)
1240                        sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1241        }
1242        return exceptions;
1243
1244 invalid:
1245        return (u32)-1;
1246}
1247