LXR linux/arch/arm/vfp/vfpsingle.c

   1/*
   2 *  linux/arch/arm/vfp/vfpsingle.c
   3 *
   4 * This code is derived in part from John R. Housers softfloat library, which
   5 * carries the following notice:
   6 *
   7 * ===========================================================================
   8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9 * Arithmetic Package, Release 2.
  10 *
  11 * Written by John R. Hauser.  This work was made possible in part by the
  12 * International Computer Science Institute, located at Suite 600, 1947 Center
  13 * Street, Berkeley, California 94704.  Funding was partially provided by the
  14 * National Science Foundation under grant MIP-9311980.  The original version
  15 * of this code was written as part of a project to build a fixed-point vector
  16 * processor in collaboration with the University of California at Berkeley,
  17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19 * arithmetic/softfloat.html'.
  20 *
  21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26 *
  27 * Derivative works are acceptable, even for commercial purposes, so long as
  28 * (1) they include prominent notice that the work is derivative, and (2) they
  29 * include prominent notice akin to these three paragraphs for those parts of
  30 * this code that are retained.
  31 * ===========================================================================
  32 */
  33#include <linux/kernel.h>
  34#include <linux/bitops.h>
  35
  36#include <asm/div64.h>
  37#include <asm/vfp.h>
  38
  39#include "vfpinstr.h"
  40#include "vfp.h"
  41
  42static struct vfp_single vfp_single_default_qnan = {
  43        .exponent       = 255,
  44        .sign           = 0,
  45        .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
  46};
  47
  48static void vfp_single_dump(const char *str, struct vfp_single *s)
  49{
  50        pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
  51                 str, s->sign != 0, s->exponent, s->significand);
  52}
  53
  54static void vfp_single_normalise_denormal(struct vfp_single *vs)
  55{
  56        int bits = 31 - fls(vs->significand);
  57
  58        vfp_single_dump("normalise_denormal: in", vs);
  59
  60        if (bits) {
  61                vs->exponent -= bits - 1;
  62                vs->significand <<= bits;
  63        }
  64
  65        vfp_single_dump("normalise_denormal: out", vs);
  66}
  67
  68#ifndef DEBUG
  69#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  70u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
  71#else
  72u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
  73#endif
  74{
  75        u32 significand, incr, rmode;
  76        int exponent, shift, underflow;
  77
  78        vfp_single_dump("pack: in", vs);
  79
  80        /*
  81         * Infinities and NaNs are a special case.
  82         */
  83        if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
  84                goto pack;
  85
  86        /*
  87         * Special-case zero.
  88         */
  89        if (vs->significand == 0) {
  90                vs->exponent = 0;
  91                goto pack;
  92        }
  93
  94        exponent = vs->exponent;
  95        significand = vs->significand;
  96
  97        /*
  98         * Normalise first.  Note that we shift the significand up to
  99         * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
 100         * significant bit.
 101         */
 102        shift = 32 - fls(significand);
 103        if (shift < 32 && shift) {
 104                exponent -= shift;
 105                significand <<= shift;
 106        }
 107
 108#ifdef DEBUG
 109        vs->exponent = exponent;
 110        vs->significand = significand;
 111        vfp_single_dump("pack: normalised", vs);
 112#endif
 113
 114        /*
 115         * Tiny number?
 116         */
 117        underflow = exponent < 0;
 118        if (underflow) {
 119                significand = vfp_shiftright32jamming(significand, -exponent);
 120                exponent = 0;
 121#ifdef DEBUG
 122                vs->exponent = exponent;
 123                vs->significand = significand;
 124                vfp_single_dump("pack: tiny number", vs);
 125#endif
 126                if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
 127                        underflow = 0;
 128        }
 129
 130        /*
 131         * Select rounding increment.
 132         */
 133        incr = 0;
 134        rmode = fpscr & FPSCR_RMODE_MASK;
 135
 136        if (rmode == FPSCR_ROUND_NEAREST) {
 137                incr = 1 << VFP_SINGLE_LOW_BITS;
 138                if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
 139                        incr -= 1;
 140        } else if (rmode == FPSCR_ROUND_TOZERO) {
 141                incr = 0;
 142        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
 143                incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
 144
 145        pr_debug("VFP: rounding increment = 0x%08x\n", incr);
 146
 147        /*
 148         * Is our rounding going to overflow?
 149         */
 150        if ((significand + incr) < significand) {
 151                exponent += 1;
 152                significand = (significand >> 1) | (significand & 1);
 153                incr >>= 1;
 154#ifdef DEBUG
 155                vs->exponent = exponent;
 156                vs->significand = significand;
 157                vfp_single_dump("pack: overflow", vs);
 158#endif
 159        }
 160
 161        /*
 162         * If any of the low bits (which will be shifted out of the
 163         * number) are non-zero, the result is inexact.
 164         */
 165        if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
 166                exceptions |= FPSCR_IXC;
 167
 168        /*
 169         * Do our rounding.
 170         */
 171        significand += incr;
 172
 173        /*
 174         * Infinity?
 175         */
 176        if (exponent >= 254) {
 177                exceptions |= FPSCR_OFC | FPSCR_IXC;
 178                if (incr == 0) {
 179                        vs->exponent = 253;
 180                        vs->significand = 0x7fffffff;
 181                } else {
 182                        vs->exponent = 255;             /* infinity */
 183                        vs->significand = 0;
 184                }
 185        } else {
 186                if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
 187                        exponent = 0;
 188                if (exponent || significand > 0x80000000)
 189                        underflow = 0;
 190                if (underflow)
 191                        exceptions |= FPSCR_UFC;
 192                vs->exponent = exponent;
 193                vs->significand = significand >> 1;
 194        }
 195
 196 pack:
 197        vfp_single_dump("pack: final", vs);
 198        {
 199                s32 d = vfp_single_pack(vs);
 200#ifdef DEBUG
 201                pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
 202                         sd, d, exceptions);
 203#endif
 204                vfp_put_float(d, sd);
 205        }
 206
 207        return exceptions;
 208}
 209
 210/*
 211 * Propagate the NaN, setting exceptions if it is signalling.
 212 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 213 */
 214static u32
 215vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
 216                  struct vfp_single *vsm, u32 fpscr)
 217{
 218        struct vfp_single *nan;
 219        int tn, tm = 0;
 220
 221        tn = vfp_single_type(vsn);
 222
 223        if (vsm)
 224                tm = vfp_single_type(vsm);
 225
 226        if (fpscr & FPSCR_DEFAULT_NAN)
 227                /*
 228                 * Default NaN mode - always returns a quiet NaN
 229                 */
 230                nan = &vfp_single_default_qnan;
 231        else {
 232                /*
 233                 * Contemporary mode - select the first signalling
 234                 * NAN, or if neither are signalling, the first
 235                 * quiet NAN.
 236                 */
 237                if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 238                        nan = vsn;
 239                else
 240                        nan = vsm;
 241                /*
 242                 * Make the NaN quiet.
 243                 */
 244                nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 245        }
 246
 247        *vsd = *nan;
 248
 249        /*
 250         * If one was a signalling NAN, raise invalid operation.
 251         */
 252        return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 253}
 254
 255
 256/*
 257 * Extended operations
 258 */
 259static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
 260{
 261        vfp_put_float(vfp_single_packed_abs(m), sd);
 262        return 0;
 263}
 264
 265static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
 266{
 267        vfp_put_float(m, sd);
 268        return 0;
 269}
 270
 271static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
 272{
 273        vfp_put_float(vfp_single_packed_negate(m), sd);
 274        return 0;
 275}
 276
 277static const u16 sqrt_oddadjust[] = {
 278        0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
 279        0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
 280};
 281
 282static const u16 sqrt_evenadjust[] = {
 283        0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
 284        0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
 285};
 286
 287u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
 288{
 289        int index;
 290        u32 z, a;
 291
 292        if ((significand & 0xc0000000) != 0x40000000) {
 293                printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
 294        }
 295
 296        a = significand << 1;
 297        index = (a >> 27) & 15;
 298        if (exponent & 1) {
 299                z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
 300                z = ((a / z) << 14) + (z << 15);
 301                a >>= 1;
 302        } else {
 303                z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
 304                z = a / z + z;
 305                z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
 306                if (z <= a)
 307                        return (s32)a >> 1;
 308        }
 309        {
 310                u64 v = (u64)a << 31;
 311                do_div(v, z);
 312                return v + (z >> 1);
 313        }
 314}
 315
 316static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
 317{
 318        struct vfp_single vsm, vsd;
 319        int ret, tm;
 320
 321        vfp_single_unpack(&vsm, m);
 322        tm = vfp_single_type(&vsm);
 323        if (tm & (VFP_NAN|VFP_INFINITY)) {
 324                struct vfp_single *vsp = &vsd;
 325
 326                if (tm & VFP_NAN)
 327                        ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
 328                else if (vsm.sign == 0) {
 329 sqrt_copy:
 330                        vsp = &vsm;
 331                        ret = 0;
 332                } else {
 333 sqrt_invalid:
 334                        vsp = &vfp_single_default_qnan;
 335                        ret = FPSCR_IOC;
 336                }
 337                vfp_put_float(vfp_single_pack(vsp), sd);
 338                return ret;
 339        }
 340
 341        /*
 342         * sqrt(+/- 0) == +/- 0
 343         */
 344        if (tm & VFP_ZERO)
 345                goto sqrt_copy;
 346
 347        /*
 348         * Normalise a denormalised number
 349         */
 350        if (tm & VFP_DENORMAL)
 351                vfp_single_normalise_denormal(&vsm);
 352
 353        /*
 354         * sqrt(<0) = invalid
 355         */
 356        if (vsm.sign)
 357                goto sqrt_invalid;
 358
 359        vfp_single_dump("sqrt", &vsm);
 360
 361        /*
 362         * Estimate the square root.
 363         */
 364        vsd.sign = 0;
 365        vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
 366        vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
 367
 368        vfp_single_dump("sqrt estimate", &vsd);
 369
 370        /*
 371         * And now adjust.
 372         */
 373        if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
 374                if (vsd.significand < 2) {
 375                        vsd.significand = 0xffffffff;
 376                } else {
 377                        u64 term;
 378                        s64 rem;
 379                        vsm.significand <<= !(vsm.exponent & 1);
 380                        term = (u64)vsd.significand * vsd.significand;
 381                        rem = ((u64)vsm.significand << 32) - term;
 382
 383                        pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
 384
 385                        while (rem < 0) {
 386                                vsd.significand -= 1;
 387                                rem += ((u64)vsd.significand << 1) | 1;
 388                        }
 389                        vsd.significand |= rem != 0;
 390                }
 391        }
 392        vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
 393
 394        return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
 395}
 396
 397/*
 398 * Equal        := ZC
 399 * Less than    := N
 400 * Greater than := C
 401 * Unordered    := CV
 402 */
 403static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
 404{
 405        s32 d;
 406        u32 ret = 0;
 407
 408        d = vfp_get_float(sd);
 409        if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
 410                ret |= FPSCR_C | FPSCR_V;
 411                if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 412                        /*
 413                         * Signalling NaN, or signalling on quiet NaN
 414                         */
 415                        ret |= FPSCR_IOC;
 416        }
 417
 418        if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
 419                ret |= FPSCR_C | FPSCR_V;
 420                if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 421                        /*
 422                         * Signalling NaN, or signalling on quiet NaN
 423                         */
 424                        ret |= FPSCR_IOC;
 425        }
 426
 427        if (ret == 0) {
 428                if (d == m || vfp_single_packed_abs(d | m) == 0) {
 429                        /*
 430                         * equal
 431                         */
 432                        ret |= FPSCR_Z | FPSCR_C;
 433                } else if (vfp_single_packed_sign(d ^ m)) {
 434                        /*
 435                         * different signs
 436                         */
 437                        if (vfp_single_packed_sign(d))
 438                                /*
 439                                 * d is negative, so d < m
 440                                 */
 441                                ret |= FPSCR_N;
 442                        else
 443                                /*
 444                                 * d is positive, so d > m
 445                                 */
 446                                ret |= FPSCR_C;
 447                } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
 448                        /*
 449                         * d < m
 450                         */
 451                        ret |= FPSCR_N;
 452                } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
 453                        /*
 454                         * d > m
 455                         */
 456                        ret |= FPSCR_C;
 457                }
 458        }
 459        return ret;
 460}
 461
 462static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
 463{
 464        return vfp_compare(sd, 0, m, fpscr);
 465}
 466
 467static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
 468{
 469        return vfp_compare(sd, 1, m, fpscr);
 470}
 471
 472static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
 473{
 474        return vfp_compare(sd, 0, 0, fpscr);
 475}
 476
 477static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
 478{
 479        return vfp_compare(sd, 1, 0, fpscr);
 480}
 481
 482static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
 483{
 484        struct vfp_single vsm;
 485        struct vfp_double vdd;
 486        int tm;
 487        u32 exceptions = 0;
 488
 489        vfp_single_unpack(&vsm, m);
 490
 491        tm = vfp_single_type(&vsm);
 492
 493        /*
 494         * If we have a signalling NaN, signal invalid operation.
 495         */
 496        if (tm == VFP_SNAN)
 497                exceptions = FPSCR_IOC;
 498
 499        if (tm & VFP_DENORMAL)
 500                vfp_single_normalise_denormal(&vsm);
 501
 502        vdd.sign = vsm.sign;
 503        vdd.significand = (u64)vsm.significand << 32;
 504
 505        /*
 506         * If we have an infinity or NaN, the exponent must be 2047.
 507         */
 508        if (tm & (VFP_INFINITY|VFP_NAN)) {
 509                vdd.exponent = 2047;
 510                if (tm == VFP_QNAN)
 511                        vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 512                goto pack_nan;
 513        } else if (tm & VFP_ZERO)
 514                vdd.exponent = 0;
 515        else
 516                vdd.exponent = vsm.exponent + (1023 - 127);
 517
 518        return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
 519
 520 pack_nan:
 521        vfp_put_double(vfp_double_pack(&vdd), dd);
 522        return exceptions;
 523}
 524
 525static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
 526{
 527        struct vfp_single vs;
 528
 529        vs.sign = 0;
 530        vs.exponent = 127 + 31 - 1;
 531        vs.significand = (u32)m;
 532
 533        return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
 534}
 535
 536static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
 537{
 538        struct vfp_single vs;
 539
 540        vs.sign = (m & 0x80000000) >> 16;
 541        vs.exponent = 127 + 31 - 1;
 542        vs.significand = vs.sign ? -m : m;
 543
 544        return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
 545}
 546
 547static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
 548{
 549        struct vfp_single vsm;
 550        u32 d, exceptions = 0;
 551        int rmode = fpscr & FPSCR_RMODE_MASK;
 552        int tm;
 553
 554        vfp_single_unpack(&vsm, m);
 555        vfp_single_dump("VSM", &vsm);
 556
 557        /*
 558         * Do we have a denormalised number?
 559         */
 560        tm = vfp_single_type(&vsm);
 561        if (tm & VFP_DENORMAL)
 562                exceptions |= FPSCR_IDC;
 563
 564        if (tm & VFP_NAN)
 565                vsm.sign = 0;
 566
 567        if (vsm.exponent >= 127 + 32) {
 568                d = vsm.sign ? 0 : 0xffffffff;
 569                exceptions = FPSCR_IOC;
 570        } else if (vsm.exponent >= 127 - 1) {
 571                int shift = 127 + 31 - vsm.exponent;
 572                u32 rem, incr = 0;
 573
 574                /*
 575                 * 2^0 <= m < 2^32-2^8
 576                 */
 577                d = (vsm.significand << 1) >> shift;
 578                rem = vsm.significand << (33 - shift);
 579
 580                if (rmode == FPSCR_ROUND_NEAREST) {
 581                        incr = 0x80000000;
 582                        if ((d & 1) == 0)
 583                                incr -= 1;
 584                } else if (rmode == FPSCR_ROUND_TOZERO) {
 585                        incr = 0;
 586                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 587                        incr = ~0;
 588                }
 589
 590                if ((rem + incr) < rem) {
 591                        if (d < 0xffffffff)
 592                                d += 1;
 593                        else
 594                                exceptions |= FPSCR_IOC;
 595                }
 596
 597                if (d && vsm.sign) {
 598                        d = 0;
 599                        exceptions |= FPSCR_IOC;
 600                } else if (rem)
 601                        exceptions |= FPSCR_IXC;
 602        } else {
 603                d = 0;
 604                if (vsm.exponent | vsm.significand) {
 605                        exceptions |= FPSCR_IXC;
 606                        if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 607                                d = 1;
 608                        else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
 609                                d = 0;
 610                                exceptions |= FPSCR_IOC;
 611                        }
 612                }
 613        }
 614
 615        pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 616
 617        vfp_put_float(d, sd);
 618
 619        return exceptions;
 620}
 621
 622static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
 623{
 624        return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
 625}
 626
 627static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
 628{
 629        struct vfp_single vsm;
 630        u32 d, exceptions = 0;
 631        int rmode = fpscr & FPSCR_RMODE_MASK;
 632        int tm;
 633
 634        vfp_single_unpack(&vsm, m);
 635        vfp_single_dump("VSM", &vsm);
 636
 637        /*
 638         * Do we have a denormalised number?
 639         */
 640        tm = vfp_single_type(&vsm);
 641        if (vfp_single_type(&vsm) & VFP_DENORMAL)
 642                exceptions |= FPSCR_IDC;
 643
 644        if (tm & VFP_NAN) {
 645                d = 0;
 646                exceptions |= FPSCR_IOC;
 647        } else if (vsm.exponent >= 127 + 32) {
 648                /*
 649                 * m >= 2^31-2^7: invalid
 650                 */
 651                d = 0x7fffffff;
 652                if (vsm.sign)
 653                        d = ~d;
 654                exceptions |= FPSCR_IOC;
 655        } else if (vsm.exponent >= 127 - 1) {
 656                int shift = 127 + 31 - vsm.exponent;
 657                u32 rem, incr = 0;
 658
 659                /* 2^0 <= m <= 2^31-2^7 */
 660                d = (vsm.significand << 1) >> shift;
 661                rem = vsm.significand << (33 - shift);
 662
 663                if (rmode == FPSCR_ROUND_NEAREST) {
 664                        incr = 0x80000000;
 665                        if ((d & 1) == 0)
 666                                incr -= 1;
 667                } else if (rmode == FPSCR_ROUND_TOZERO) {
 668                        incr = 0;
 669                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 670                        incr = ~0;
 671                }
 672
 673                if ((rem + incr) < rem && d < 0xffffffff)
 674                        d += 1;
 675                if (d > 0x7fffffff + (vsm.sign != 0)) {
 676                        d = 0x7fffffff + (vsm.sign != 0);
 677                        exceptions |= FPSCR_IOC;
 678                } else if (rem)
 679                        exceptions |= FPSCR_IXC;
 680
 681                if (vsm.sign)
 682                        d = -d;
 683        } else {
 684                d = 0;
 685                if (vsm.exponent | vsm.significand) {
 686                        exceptions |= FPSCR_IXC;
 687                        if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 688                                d = 1;
 689                        else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
 690                                d = -1;
 691                }
 692        }
 693
 694        pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 695
 696        vfp_put_float((s32)d, sd);
 697
 698        return exceptions;
 699}
 700
 701static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
 702{
 703        return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 704}
 705
 706static struct op fops_ext[32] = {
 707        [FEXT_TO_IDX(FEXT_FCPY)]        = { vfp_single_fcpy,   0 },
 708        [FEXT_TO_IDX(FEXT_FABS)]        = { vfp_single_fabs,   0 },
 709        [FEXT_TO_IDX(FEXT_FNEG)]        = { vfp_single_fneg,   0 },
 710        [FEXT_TO_IDX(FEXT_FSQRT)]       = { vfp_single_fsqrt,  0 },
 711        [FEXT_TO_IDX(FEXT_FCMP)]        = { vfp_single_fcmp,   OP_SCALAR },
 712        [FEXT_TO_IDX(FEXT_FCMPE)]       = { vfp_single_fcmpe,  OP_SCALAR },
 713        [FEXT_TO_IDX(FEXT_FCMPZ)]       = { vfp_single_fcmpz,  OP_SCALAR },
 714        [FEXT_TO_IDX(FEXT_FCMPEZ)]      = { vfp_single_fcmpez, OP_SCALAR },
 715        [FEXT_TO_IDX(FEXT_FCVT)]        = { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
 716        [FEXT_TO_IDX(FEXT_FUITO)]       = { vfp_single_fuito,  OP_SCALAR },
 717        [FEXT_TO_IDX(FEXT_FSITO)]       = { vfp_single_fsito,  OP_SCALAR },
 718        [FEXT_TO_IDX(FEXT_FTOUI)]       = { vfp_single_ftoui,  OP_SCALAR },
 719        [FEXT_TO_IDX(FEXT_FTOUIZ)]      = { vfp_single_ftouiz, OP_SCALAR },
 720        [FEXT_TO_IDX(FEXT_FTOSI)]       = { vfp_single_ftosi,  OP_SCALAR },
 721        [FEXT_TO_IDX(FEXT_FTOSIZ)]      = { vfp_single_ftosiz, OP_SCALAR },
 722};
 723
 724
 725
 726
 727
 728static u32
 729vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
 730                          struct vfp_single *vsm, u32 fpscr)
 731{
 732        struct vfp_single *vsp;
 733        u32 exceptions = 0;
 734        int tn, tm;
 735
 736        tn = vfp_single_type(vsn);
 737        tm = vfp_single_type(vsm);
 738
 739        if (tn & tm & VFP_INFINITY) {
 740                /*
 741                 * Two infinities.  Are they different signs?
 742                 */
 743                if (vsn->sign ^ vsm->sign) {
 744                        /*
 745                         * different signs -> invalid
 746                         */
 747                        exceptions = FPSCR_IOC;
 748                        vsp = &vfp_single_default_qnan;
 749                } else {
 750                        /*
 751                         * same signs -> valid
 752                         */
 753                        vsp = vsn;
 754                }
 755        } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 756                /*
 757                 * One infinity and one number -> infinity
 758                 */
 759                vsp = vsn;
 760        } else {
 761                /*
 762                 * 'n' is a NaN of some type
 763                 */
 764                return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 765        }
 766        *vsd = *vsp;
 767        return exceptions;
 768}
 769
 770static u32
 771vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
 772               struct vfp_single *vsm, u32 fpscr)
 773{
 774        u32 exp_diff, m_sig;
 775
 776        if (vsn->significand & 0x80000000 ||
 777            vsm->significand & 0x80000000) {
 778                pr_info("VFP: bad FP values in %s\n", __func__);
 779                vfp_single_dump("VSN", vsn);
 780                vfp_single_dump("VSM", vsm);
 781        }
 782
 783        /*
 784         * Ensure that 'n' is the largest magnitude number.  Note that
 785         * if 'n' and 'm' have equal exponents, we do not swap them.
 786         * This ensures that NaN propagation works correctly.
 787         */
 788        if (vsn->exponent < vsm->exponent) {
 789                struct vfp_single *t = vsn;
 790                vsn = vsm;
 791                vsm = t;
 792        }
 793
 794        /*
 795         * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 796         * infinity or a NaN here.
 797         */
 798        if (vsn->exponent == 255)
 799                return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
 800
 801        /*
 802         * We have two proper numbers, where 'vsn' is the larger magnitude.
 803         *
 804         * Copy 'n' to 'd' before doing the arithmetic.
 805         */
 806        *vsd = *vsn;
 807
 808        /*
 809         * Align both numbers.
 810         */
 811        exp_diff = vsn->exponent - vsm->exponent;
 812        m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
 813
 814        /*
 815         * If the signs are different, we are really subtracting.
 816         */
 817        if (vsn->sign ^ vsm->sign) {
 818                m_sig = vsn->significand - m_sig;
 819                if ((s32)m_sig < 0) {
 820                        vsd->sign = vfp_sign_negate(vsd->sign);
 821                        m_sig = -m_sig;
 822                } else if (m_sig == 0) {
 823                        vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 824                                      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 825                }
 826        } else {
 827                m_sig = vsn->significand + m_sig;
 828        }
 829        vsd->significand = m_sig;
 830
 831        return 0;
 832}
 833
 834static u32
 835vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
 836{
 837        vfp_single_dump("VSN", vsn);
 838        vfp_single_dump("VSM", vsm);
 839
 840        /*
 841         * Ensure that 'n' is the largest magnitude number.  Note that
 842         * if 'n' and 'm' have equal exponents, we do not swap them.
 843         * This ensures that NaN propagation works correctly.
 844         */
 845        if (vsn->exponent < vsm->exponent) {
 846                struct vfp_single *t = vsn;
 847                vsn = vsm;
 848                vsm = t;
 849                pr_debug("VFP: swapping M <-> N\n");
 850        }
 851
 852        vsd->sign = vsn->sign ^ vsm->sign;
 853
 854        /*
 855         * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 856         */
 857        if (vsn->exponent == 255) {
 858                if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
 859                        return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 860                if ((vsm->exponent | vsm->significand) == 0) {
 861                        *vsd = vfp_single_default_qnan;
 862                        return FPSCR_IOC;
 863                }
 864                vsd->exponent = vsn->exponent;
 865                vsd->significand = 0;
 866                return 0;
 867        }
 868
 869        /*
 870         * If 'm' is zero, the result is always zero.  In this case,
 871         * 'n' may be zero or a number, but it doesn't matter which.
 872         */
 873        if ((vsm->exponent | vsm->significand) == 0) {
 874                vsd->exponent = 0;
 875                vsd->significand = 0;
 876                return 0;
 877        }
 878
 879        /*
 880         * We add 2 to the destination exponent for the same reason as
 881         * the addition case - though this time we have +1 from each
 882         * input operand.
 883         */
 884        vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
 885        vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
 886
 887        vfp_single_dump("VSD", vsd);
 888        return 0;
 889}
 890
 891#define NEG_MULTIPLY    (1 << 0)
 892#define NEG_SUBTRACT    (1 << 1)
 893
 894static u32
 895vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
 896{
 897        struct vfp_single vsd, vsp, vsn, vsm;
 898        u32 exceptions;
 899        s32 v;
 900
 901        v = vfp_get_float(sn);
 902        pr_debug("VFP: s%u = %08x\n", sn, v);
 903        vfp_single_unpack(&vsn, v);
 904        if (vsn.exponent == 0 && vsn.significand)
 905                vfp_single_normalise_denormal(&vsn);
 906
 907        vfp_single_unpack(&vsm, m);
 908        if (vsm.exponent == 0 && vsm.significand)
 909                vfp_single_normalise_denormal(&vsm);
 910
 911        exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
 912        if (negate & NEG_MULTIPLY)
 913                vsp.sign = vfp_sign_negate(vsp.sign);
 914
 915        v = vfp_get_float(sd);
 916        pr_debug("VFP: s%u = %08x\n", sd, v);
 917        vfp_single_unpack(&vsn, v);
 918        if (negate & NEG_SUBTRACT)
 919                vsn.sign = vfp_sign_negate(vsn.sign);
 920
 921        exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
 922
 923        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
 924}
 925
 926/*
 927 * Standard operations
 928 */
 929
 930/*
 931 * sd = sd + (sn * sm)
 932 */
 933static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
 934{
 935        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
 936}
 937
 938/*
 939 * sd = sd - (sn * sm)
 940 */
 941static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
 942{
 943        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
 944}
 945
 946/*
 947 * sd = -sd + (sn * sm)
 948 */
 949static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
 950{
 951        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
 952}
 953
 954/*
 955 * sd = -sd - (sn * sm)
 956 */
 957static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
 958{
 959        return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 960}
 961
 962/*
 963 * sd = sn * sm
 964 */
 965static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
 966{
 967        struct vfp_single vsd, vsn, vsm;
 968        u32 exceptions;
 969        s32 n = vfp_get_float(sn);
 970
 971        pr_debug("VFP: s%u = %08x\n", sn, n);
 972
 973        vfp_single_unpack(&vsn, n);
 974        if (vsn.exponent == 0 && vsn.significand)
 975                vfp_single_normalise_denormal(&vsn);
 976
 977        vfp_single_unpack(&vsm, m);
 978        if (vsm.exponent == 0 && vsm.significand)
 979                vfp_single_normalise_denormal(&vsm);
 980
 981        exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 982        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
 983}
 984
 985/*
 986 * sd = -(sn * sm)
 987 */
 988static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
 989{
 990        struct vfp_single vsd, vsn, vsm;
 991        u32 exceptions;
 992        s32 n = vfp_get_float(sn);
 993
 994        pr_debug("VFP: s%u = %08x\n", sn, n);
 995
 996        vfp_single_unpack(&vsn, n);
 997        if (vsn.exponent == 0 && vsn.significand)
 998                vfp_single_normalise_denormal(&vsn);
 999
1000        vfp_single_unpack(&vsm, m);

1001        if (vsm.exponent == 0 && vsm.significand)
1002                vfp_single_normalise_denormal(&vsm);
1003
1004        exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1005        vsd.sign = vfp_sign_negate(vsd.sign);
1006        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1007}
1008
1009/*
1010 * sd = sn + sm
1011 */
1012static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1013{
1014        struct vfp_single vsd, vsn, vsm;
1015        u32 exceptions;
1016        s32 n = vfp_get_float(sn);
1017
1018        pr_debug("VFP: s%u = %08x\n", sn, n);
1019
1020        /*
1021         * Unpack and normalise denormals.
1022         */
1023        vfp_single_unpack(&vsn, n);
1024        if (vsn.exponent == 0 && vsn.significand)
1025                vfp_single_normalise_denormal(&vsn);
1026
1027        vfp_single_unpack(&vsm, m);
1028        if (vsm.exponent == 0 && vsm.significand)
1029                vfp_single_normalise_denormal(&vsm);
1030
1031        exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1032
1033        return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1034}
1035
1036/*
1037 * sd = sn - sm
1038 */
1039static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1040{
1041        /*
1042         * Subtraction is addition with one sign inverted.
1043         */
1044        return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1045}
1046
1047/*
1048 * sd = sn / sm
1049 */
1050static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1051{
1052        struct vfp_single vsd, vsn, vsm;
1053        u32 exceptions = 0;
1054        s32 n = vfp_get_float(sn);
1055        int tm, tn;
1056
1057        pr_debug("VFP: s%u = %08x\n", sn, n);
1058
1059        vfp_single_unpack(&vsn, n);
1060        vfp_single_unpack(&vsm, m);
1061
1062        vsd.sign = vsn.sign ^ vsm.sign;
1063
1064        tn = vfp_single_type(&vsn);
1065        tm = vfp_single_type(&vsm);
1066
1067        /*
1068         * Is n a NAN?
1069         */
1070        if (tn & VFP_NAN)
1071                goto vsn_nan;
1072
1073        /*
1074         * Is m a NAN?
1075         */
1076        if (tm & VFP_NAN)
1077                goto vsm_nan;
1078
1079        /*
1080         * If n and m are infinity, the result is invalid
1081         * If n and m are zero, the result is invalid
1082         */
1083        if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1084                goto invalid;
1085
1086        /*
1087         * If n is infinity, the result is infinity
1088         */
1089        if (tn & VFP_INFINITY)
1090                goto infinity;
1091
1092        /*
1093         * If m is zero, raise div0 exception
1094         */
1095        if (tm & VFP_ZERO)
1096                goto divzero;
1097
1098        /*
1099         * If m is infinity, or n is zero, the result is zero
1100         */
1101        if (tm & VFP_INFINITY || tn & VFP_ZERO)
1102                goto zero;
1103
1104        if (tn & VFP_DENORMAL)
1105                vfp_single_normalise_denormal(&vsn);
1106        if (tm & VFP_DENORMAL)
1107                vfp_single_normalise_denormal(&vsm);
1108
1109        /*
1110         * Ok, we have two numbers, we can perform division.
1111         */
1112        vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1113        vsm.significand <<= 1;
1114        if (vsm.significand <= (2 * vsn.significand)) {
1115                vsn.significand >>= 1;
1116                vsd.exponent++;
1117        }
1118        {
1119                u64 significand = (u64)vsn.significand << 32;
1120                do_div(significand, vsm.significand);
1121                vsd.significand = significand;
1122        }
1123        if ((vsd.significand & 0x3f) == 0)
1124                vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1125
1126        return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1127
1128 vsn_nan:
1129        exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1130 pack:
1131        vfp_put_float(vfp_single_pack(&vsd), sd);
1132        return exceptions;
1133
1134 vsm_nan:
1135        exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1136        goto pack;
1137
1138 zero:
1139        vsd.exponent = 0;
1140        vsd.significand = 0;
1141        goto pack;
1142
1143 divzero:
1144        exceptions = FPSCR_DZC;
1145 infinity:
1146        vsd.exponent = 255;
1147        vsd.significand = 0;
1148        goto pack;
1149
1150 invalid:
1151        vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1152        return FPSCR_IOC;
1153}
1154
1155static struct op fops[16] = {
1156        [FOP_TO_IDX(FOP_FMAC)]  = { vfp_single_fmac,  0 },
1157        [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 },
1158        [FOP_TO_IDX(FOP_FMSC)]  = { vfp_single_fmsc,  0 },
1159        [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 },
1160        [FOP_TO_IDX(FOP_FMUL)]  = { vfp_single_fmul,  0 },
1161        [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 },
1162        [FOP_TO_IDX(FOP_FADD)]  = { vfp_single_fadd,  0 },
1163        [FOP_TO_IDX(FOP_FSUB)]  = { vfp_single_fsub,  0 },
1164        [FOP_TO_IDX(FOP_FDIV)]  = { vfp_single_fdiv,  0 },
1165};
1166
1167#define FREG_BANK(x)    ((x) & 0x18)
1168#define FREG_IDX(x)     ((x) & 7)
1169
1170u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1171{
1172        u32 op = inst & FOP_MASK;
1173        u32 exceptions = 0;
1174        unsigned int dest;
1175        unsigned int sn = vfp_get_sn(inst);
1176        unsigned int sm = vfp_get_sm(inst);
1177        unsigned int vecitr, veclen, vecstride;
1178        struct op *fop;
1179
1180        vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1181
1182        fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1183
1184        /*
1185         * fcvtsd takes a dN register number as destination, not sN.
1186         * Technically, if bit 0 of dd is set, this is an invalid
1187         * instruction.  However, we ignore this for efficiency.
1188         * It also only operates on scalars.
1189         */
1190        if (fop->flags & OP_DD)
1191                dest = vfp_get_dd(inst);
1192        else
1193                dest = vfp_get_sd(inst);
1194
1195        /*
1196         * If destination bank is zero, vector length is always '1'.
1197         * ARM DDI0100F C5.1.3, C5.3.2.
1198         */
1199        if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1200                veclen = 0;
1201        else
1202                veclen = fpscr & FPSCR_LENGTH_MASK;
1203
1204        pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1205                 (veclen >> FPSCR_LENGTH_BIT) + 1);
1206
1207        if (!fop->fn)
1208                goto invalid;
1209
1210        for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1211                s32 m = vfp_get_float(sm);
1212                u32 except;
1213                char type;
1214
1215                type = fop->flags & OP_DD ? 'd' : 's';
1216                if (op == FOP_EXT)
1217                        pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1218                                 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1219                                 sm, m);
1220                else
1221                        pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1222                                 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1223                                 FOP_TO_IDX(op), sm, m);
1224
1225                except = fop->fn(dest, sn, m, fpscr);
1226                pr_debug("VFP: itr%d: exceptions=%08x\n",
1227                         vecitr >> FPSCR_LENGTH_BIT, except);
1228
1229                exceptions |= except;
1230
1231                /*
1232                 * CHECK: It appears to be undefined whether we stop when
1233                 * we encounter an exception.  We continue.
1234                 */
1235                dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1236                sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1237                if (FREG_BANK(sm) != 0)
1238                        sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1239        }
1240        return exceptions;
1241
1242 invalid:
1243        return (u32)-1;
1244}
1245